diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 800596e1f9..ce807524cd 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -5434,23 +5434,16 @@ public: return eval(sext(bitcast(a) > bitcast(b))); } - if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed) - { - const auto ai = eval(bitcast(a)); - const auto bi = eval(bitcast(b)); + const auto ai = eval(bitcast(a)); + const auto bi = eval(bitcast(b)); - if (!safe_nonzero_compare.any()) - { - return eval(sext(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi))); - } - else - { - return eval(sext(select((ai & bi) >= 0, ai > bi, ai < bi))); - } + if (!safe_nonzero_compare.any()) + { + return eval(sext(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi))); } else { - return eval(sext(fcmp_ord(a > b))); + return eval(sext(select((ai & bi) >= 0, ai > bi, ai < bi))); } }); @@ -5631,17 +5624,17 @@ public: if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed) { // FM(a, re_accurate(div)) - if (const auto [ok_re_acc, div] = match_expr(b, re_accurate(match())); ok_re_acc) + if (const auto [ok_re_acc, div, one] = match_expr(b, re_accurate(match(), match())); ok_re_acc) { - erase_stores(b); + erase_stores(one, b); set_vr(op.rt, a / div); return; } // FM(re_accurate(div), b) - if (const auto [ok_re_acc, div] = match_expr(a, re_accurate(match())); ok_re_acc) + if (const auto [ok_re_acc, div, one] = match_expr(a, re_accurate(match(), match())); ok_re_acc) { - erase_stores(a); + erase_stores(one, a); set_vr(op.rt, b / div); return; } @@ -5955,14 +5948,7 @@ public: const auto b = value(ci->getOperand(1)); const auto c = value(ci->getOperand(2)); - if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed) - { - return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c); - } - else - { - return fma32x4(eval(-a), b, c); - } + return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c); }); set_vr(op.rt4, fnms(get_vr(op.ra), get_vr(op.rb), get_vr(op.rc))); @@ -5974,10 +5960,10 @@ public: return llvm_calli{"spu_fma", {std::forward(a), std::forward(b), std::forward(c)}}.set_order_equality_hint(1, 1, 0); } - template - static llvm_calli re_accurate(T&& a) + template + static llvm_calli re_accurate(T&& a, U&& b) { - return {"spu_re_acc", {std::forward(a)}}; + return {"spu_re_acc", {std::forward(a), std::forward(b)}}; } void FMA(spu_opcode_t op) @@ -6013,7 +5999,19 @@ public: register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci) { const auto div = value(ci->getOperand(0)); - return fsplat(1.0f) / div; + const auto the_one = value(ci->getOperand(1)); + + const auto div_result = the_one / div; + + // from ps3 hardware testing: Inf => NaN and NaN => Zero + const auto result_and = bitcast(div_result) & 0x7fffffffu; + const auto result_cmp_inf = sext(result_and == splat(0x7F800000u)); + const auto result_cmp_nan = sext(result_and <= splat(0x7F800000u)); + + const auto and_mask = bitcast(result_cmp_nan) & splat(0xFFFFFFFFu); + const auto or_mask = bitcast(result_cmp_inf) & splat(0xFFFFFFFu); + + return bitcast((bitcast(div_result) & and_mask) | or_mask); }); const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rc); @@ -6113,8 +6111,8 @@ public: { if (auto [ok_re] = match_expr(b, spu_re(div)); ok_re) { - erase_stores(b); - set_vr(op.rt4, re_accurate(div)); + erase_stores(a, b, c); + set_vr(op.rt4, re_accurate(div, fsplat(float_value))); return true; } } @@ -6124,30 +6122,30 @@ public: { if (auto [ok_re] = match_expr(b, spu_re(div)); ok_re) { - erase_stores(b); - set_vr(op.rt4, re_accurate(div)); + erase_stores(a, b, c); + set_vr(op.rt4, re_accurate(div, fsplat(float_value))); return true; } } // FMA(spu_re(div), FNMS(div, spu_re(div), float_value), spu_re(div)) - if (auto [ok_fnms, div] = match_expr(a, fnms(MT, a, fsplat(float_value))); ok_fnms && op.ra == op.rc) + if (auto [ok_fnms, div] = match_expr(b, fnms(MT, a, fsplat(float_value))); ok_fnms && op.ra == op.rc) { if (auto [ok_re] = match_expr(a, spu_re(div)); ok_re) { - erase_stores(a); - set_vr(op.rt4, re_accurate(div)); + erase_stores(a, b, c); + set_vr(op.rt4, re_accurate(div, fsplat(float_value))); return true; } } // FMA(spu_re(div), FNMS(spu_re(div), div, float_value), spu_re(div)) - if (auto [ok_fnms, div] = match_expr(a, fnms(a, MT, fsplat(float_value))); ok_fnms && op.ra == op.rc) + if (auto [ok_fnms, div] = match_expr(b, fnms(a, MT, fsplat(float_value))); ok_fnms && op.ra == op.rc) { if (auto [ok_re] = match_expr(a, spu_re(div)); ok_re) { - erase_stores(a); - set_vr(op.rt4, re_accurate(div)); + erase_stores(a, b, c); + set_vr(op.rt4, re_accurate(div, fsplat(float_value))); return true; } }