diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index a9f7e2309b..0ac68ce4f9 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -3801,16 +3801,32 @@ void spu_recompiler::CGX(spu_opcode_t op) //nf void spu_recompiler::BGX(spu_opcode_t op) //nf { - for (u32 i = 0; i < 4; i++) // unrolled loop + const XmmLink& vt = XmmGet(op.rt, XmmType::Int); + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& temp = XmmAlloc(); + const XmmLink& sign = XmmAlloc(); + + c->pslld(vt, 31); + + if (utils::has_avx()) { - c->bt(SPU_OFF_32(gpr, op.rt, &v128::_u32, i), 0); - c->cmc(); - c->mov(*addr, SPU_OFF_32(gpr, op.rb, &v128::_u32, i)); - c->sbb(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, i)); - c->setnc(addr->r8()); - c->movzx(*addr, addr->r8()); - c->mov(SPU_OFF_32(gpr, op.rt, &v128::_u32, i), *addr); + c->vpcmpeqd(temp, vb, va); } + else + { + c->movdqa(temp, vb); + c->pcmpeqd(temp, va); + } + + c->pand(vt, temp); + c->movdqa(sign, XmmConst(_mm_set1_epi32(-0x80000000))); + c->pxor(va, sign); + c->pxor(vb, sign); + c->pcmpgtd(vb, va); + c->por(vt, vb); + c->psrld(vt, 31); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); } void spu_recompiler::MPYHHA(spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index c1c7e5a962..1c92e1b888 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -6612,7 +6612,7 @@ public: { const auto [a, b] = get_vrs(op.ra, op.rb); const auto c = get_vr(op.rt) << 31; - set_vr(op.rt, zext((a <= b) & ~((a == b) & (c >= 0)))); + set_vr(op.rt, noncast(sext(b > a) | (sext(a == b) & c)) >> 31); } void MPYHHA(spu_opcode_t op)