diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 19231aea15..9567b914e5 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7362,13 +7362,30 @@ public: set_vr(op.rt, sext(fcmp_ord(fabs(get_vr(op.ra)) == fabs(get_vr(op.rb))))); } - // Multiply and return zero if any of the arguments is in the xfloat range. - value_t mzero_if_xtended(value_t a, value_t b) + value_t fma32x4(value_t a, value_t b, value_t c) { // Compare absolute values with max positive float in normal range. const auto aa = bitcast(fabs(a)); const auto ab = bitcast(fabs(b)); - return eval(select(max(aa, ab) > 0x7f7fffff, fsplat(0.), a * b)); + const auto sc = eval(max(aa, ab) > 0x7f7fffff); + + if (m_use_fma) + { + value_t r; + r.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a.value, b.value, c.value}); + r.value = m_ir->CreateSelect(sc.value, c.value, r.value); + return r; + } + + // Convert to doubles + const auto xa = m_ir->CreateFPExt(a.value, get_type()); + const auto xb = m_ir->CreateFPExt(b.value, get_type()); + const auto xc = m_ir->CreateFPExt(c.value, get_type()); + const auto xr = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fmuladd), {xa, xb, xc}); + value_t r; + r.value = m_ir->CreateFPTrunc(xr, get_type()); + r.value = m_ir->CreateSelect(sc.value, c.value, r.value); + return r; } void FNMS(spu_opcode_t op) @@ -7377,7 +7394,7 @@ public: if (g_cfg.core.spu_accurate_xfloat) set_vr(op.rt4, -fmuladd(get_vr(op.ra), get_vr(op.rb), eval(-get_vr(op.rc)))); else if (g_cfg.core.spu_approx_xfloat) - set_vr(op.rt4, get_vr(op.rc) - mzero_if_xtended(get_vr(op.ra), get_vr(op.rb))); + set_vr(op.rt4, -fma32x4(get_vr(op.ra), get_vr(op.rb), eval(-get_vr(op.rc)))); else set_vr(op.rt4, get_vr(op.rc) - get_vr(op.ra) * get_vr(op.rb)); } @@ -7388,7 +7405,7 @@ public: if (g_cfg.core.spu_accurate_xfloat) set_vr(op.rt4, fmuladd(get_vr(op.ra), get_vr(op.rb), get_vr(op.rc))); else if (g_cfg.core.spu_approx_xfloat) - set_vr(op.rt4, mzero_if_xtended(get_vr(op.ra), get_vr(op.rb)) + get_vr(op.rc)); + set_vr(op.rt4, fma32x4(get_vr(op.ra), get_vr(op.rb), get_vr(op.rc))); else set_vr(op.rt4, get_vr(op.ra) * get_vr(op.rb) + get_vr(op.rc)); } @@ -7399,7 +7416,7 @@ public: if (g_cfg.core.spu_accurate_xfloat) set_vr(op.rt4, fmuladd(get_vr(op.ra), get_vr(op.rb), eval(-get_vr(op.rc)))); else if (g_cfg.core.spu_approx_xfloat) - set_vr(op.rt4, mzero_if_xtended(get_vr(op.ra), get_vr(op.rb)) - get_vr(op.rc)); + set_vr(op.rt4, fma32x4(get_vr(op.ra), get_vr(op.rb), eval(-get_vr(op.rc)))); else set_vr(op.rt4, get_vr(op.ra) * get_vr(op.rb) - get_vr(op.rc)); }