diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 9330830d63..5f8b79219d 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -1458,7 +1458,7 @@ bool ppu_interpreter::VMULOUH(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter::VNMSUBFP(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter_fast::VNMSUBFP(ppu_thread& ppu, ppu_opcode_t op) { const auto a = _mm_sub_ps(_mm_mul_ps(ppu.vr[op.va].vf, ppu.vr[op.vc].vf), ppu.vr[op.vb].vf); const auto b = _mm_set1_ps(-0.0f); @@ -1467,6 +1467,17 @@ bool ppu_interpreter::VNMSUBFP(ppu_thread& ppu, ppu_opcode_t op) return true; } +bool ppu_interpreter_precise::VNMSUBFP(ppu_thread& ppu, ppu_opcode_t op) +{ + const auto m = _mm_set1_ps(-0.0f); + const auto a = ppu.vr[op.va]; + const auto c = ppu.vr[op.vc]; + const auto b = v128::fromF(_mm_xor_ps(ppu.vr[op.vb].vf, m)); + const auto r = v128::fromF(_mm_xor_ps(v128::fma32f(a, c, b).vf, m)); + ppu.vr[op.rd] = vec_handle_nan(r, a, b, c); + return true; +} + bool ppu_interpreter::VNOR(ppu_thread& ppu, ppu_opcode_t op) { ppu.vr[op.vd] = ~(ppu.vr[op.va] | ppu.vr[op.vb]); diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 39e96c92eb..77b91e78cf 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -73,7 +73,6 @@ struct ppu_interpreter static bool VMULOSH(ppu_thread&, ppu_opcode_t); static bool VMULOUB(ppu_thread&, ppu_opcode_t); static bool VMULOUH(ppu_thread&, ppu_opcode_t); - static bool VNMSUBFP(ppu_thread&, ppu_opcode_t); static bool VNOR(ppu_thread&, ppu_opcode_t); static bool VOR(ppu_thread&, ppu_opcode_t); static bool VPERM(ppu_thread&, ppu_opcode_t); @@ -373,6 +372,7 @@ struct ppu_interpreter_precise final : ppu_interpreter static bool VCTSXS(ppu_thread&, ppu_opcode_t); static bool VCTUXS(ppu_thread&, ppu_opcode_t); static bool VMADDFP(ppu_thread&, ppu_opcode_t); + static bool VNMSUBFP(ppu_thread&, ppu_opcode_t); static bool FDIVS(ppu_thread&, ppu_opcode_t); static bool FSUBS(ppu_thread&, ppu_opcode_t); @@ -430,6 +430,7 @@ struct ppu_interpreter_fast final : ppu_interpreter static bool VCTSXS(ppu_thread&, ppu_opcode_t); static bool VCTUXS(ppu_thread&, ppu_opcode_t); static bool VMADDFP(ppu_thread&, ppu_opcode_t); + static bool VNMSUBFP(ppu_thread&, ppu_opcode_t); static bool FDIVS(ppu_thread&, ppu_opcode_t); static bool FSUBS(ppu_thread&, ppu_opcode_t);