From d33955c2908a8bd0c20d3361300ad170c8da773e Mon Sep 17 00:00:00 2001 From: RipleyTom Date: Tue, 23 Jan 2024 05:13:31 +0100 Subject: [PATCH] Asmjit FI --- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 28 +++++++++++++++++++++++++ rpcs3/Emu/Cell/SPULLVMRecompiler.cpp | 29 ++++++++------------------ 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 51b0d6fb54..453e5d15e2 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -3983,7 +3983,35 @@ void spu_recompiler::CEQB(spu_opcode_t op) void spu_recompiler::FI(spu_opcode_t op) { // Floating Interpolate + const XmmLink& va = XmmGet(op.ra, XmmType::Float); const XmmLink& vb = XmmGet(op.rb, XmmType::Float); + const XmmLink& vb_base = XmmAlloc(); + const XmmLink& ymul = XmmAlloc(); + const XmmLink& temp_reg = XmmAlloc(); + + c->movdqa(vb_base, vb); + c->movdqa(ymul, vb); + c->movdqa(temp_reg, va); + + c->pand(vb_base, XmmConst(v128::from32p(0x007ffc00u))); + c->pslld(vb_base, 9); + + c->pand(temp_reg, XmmConst(v128::from32p(0x7ffff))); // va_fraction + c->pand(ymul, XmmConst(v128::from32p(0x3ff))); + c->pmulld(ymul, temp_reg); + + c->movdqa(temp_reg, vb_base); + c->psubd(temp_reg, ymul); + c->psrld(temp_reg, 9); + + c->pcmpgtd(vb_base, ymul); + c->pand(vb_base, XmmConst(v128::from32p(1 << 23))); + c->paddd(temp_reg, vb_base); + + c->pand(vb, XmmConst(v128::from32p(0xff800000u))); + c->pand(temp_reg, XmmConst(v128::from32p(~0xff800000u))); + c->por(vb, temp_reg); + c->movaps(SPU_OFF_128(gpr, op.rt), vb); } diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index ccbeaaeceb..a60baa6962 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -6073,24 +6073,6 @@ public: void FI(spu_opcode_t op) { - // TODO - if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) - { - set_vr(op.rt, get_vr(op.rb)); - // const auto [a, b] = get_vrs(op.ra, op.rb); - - // const auto mask_se = splat(0xfff0000000000000ull); - // const auto mask_bf = splat(0x000fff8000000000ull); - // const auto mask_sf = splat(0x0000007fe0000000ull); - // const auto mask_yf = splat(0x0000ffffe0000000ull); - - // const auto base = bitcast((bitcast(b) & mask_bf) | 0x3ff0000000000000ull); - // const auto step = fpcast(bitcast(b) & mask_sf) * fsplat(std::exp2(-13.f)); - // const auto yval = fpcast(bitcast(a) & mask_yf) * fsplat(std::exp2(-19.f)); - // set_vr(op.rt, bitcast((bitcast(b) & mask_se) | (bitcast(base - step * yval) & ~mask_se))); - return; - } - register_intrinsic("spu_fi", [&](llvm::CallInst* ci) { const auto a = bitcast(value(ci->getOperand(0))); @@ -6102,6 +6084,15 @@ public: return bitcast((b & 0xff800000u) | (bitcast(fpcast(bnew)) & ~0xff800000u)); // Inject old sign and exponent }); + const auto [a, b] = get_vrs(op.ra, op.rb); + + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) + { + const auto r = eval(fi(a, b)); + set_vr(op.rt, r); + return; + } + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { register_intrinsic("spu_re", [&](llvm::CallInst* ci) @@ -6136,8 +6127,6 @@ public: }); } - const auto [a, b] = get_vrs(op.ra, op.rb); - if (const auto [ok, mb] = match_expr(b, frest(match())); ok && mb.eq(a)) { erase_stores(b);