diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 94efd18a8e..1803e598e3 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -972,6 +972,17 @@ public: return a << (b & mask) | a >> ((0 - b) & mask); } + // Select (c ? a : b) + template + auto select(T2 c, T a, T b) + { + static_assert(value_t::esize == 1, "select: expected bool type (first argument)"); + static_assert(value_t::is_vector == value_t::is_vector, "select: incompatible arguments (vectors)"); + T result; + result.value = m_ir->CreateSelect(c.eval(m_ir), a.eval(m_ir), b.eval(m_ir)); + return result; + } + template auto insert(T v, u64 i, E e) { diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 891862999b..540f5c391a 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -4701,11 +4701,19 @@ void spu_recompiler::SHUFB(spu_opcode_t op) const XmmLink& v5 = XmmAlloc(); c->movdqa(vm, XmmConst(_mm_set1_epi8(0xc0))); - // Test for (110xxxxx) and (11xxxxxx) bit values if (utils::has_avx()) { c->vpand(v5, vc, XmmConst(_mm_set1_epi8(0xe0))); - c->vpand(vt, vc, vm); + c->vpxor(vc, vc, XmmConst(_mm_set1_epi8(0xf))); + c->vpshufb(va, va, vc); + c->vpslld(vt, vc, 3); + c->vpcmpeqb(v5, v5, vm); + c->vpshufb(vb, vb, vc); + c->vpand(vc, vc, vm); + c->vpblendvb(vb, va, vb, vt); + c->vpcmpeqb(vt, vc, vm); + c->vpavgb(vt, vt, v5); + c->vpor(vt, vt, vb); } else { @@ -4713,35 +4721,21 @@ void spu_recompiler::SHUFB(spu_opcode_t op) c->pand(v5, XmmConst(_mm_set1_epi8(0xe0))); c->movdqa(vt, vc); c->pand(vt, vm); - } - - c->pxor(vc, XmmConst(_mm_set1_epi8(0xf))); - c->pshufb(va, vc); - c->pshufb(vb, vc); - c->pand(vc, XmmConst(_mm_set1_epi8(0x10))); - c->pcmpeqb(v5, vm); // If true, result should become 0xFF - c->pcmpeqb(vt, vm); // If true, result should become either 0xFF or 0x80 - c->pavgb(vt, v5); // Generate result constant: AVG(0xff, 0x00) == 0x80 - c->pxor(vm, vm); - c->pcmpeqb(vc, vm); - - // Select result value from va or vb - if (utils::has_512()) - { - c->vpternlogd(vc, va, vb, 0xca /* A?B:C */); - } - else if (utils::has_xop()) - { - c->vpcmov(vc, va, vb, vc); - } - else - { + c->pxor(vc, XmmConst(_mm_set1_epi8(0xf))); + c->pshufb(va, vc); + c->pshufb(vb, vc); + c->pslld(vc, 3); + c->pcmpeqb(v5, vm); // If true, result should become 0xFF + c->pcmpeqb(vt, vm); // If true, result should become either 0xFF or 0x80 + c->pcmpeqb(vm, vm); + c->pcmpgtb(vc, vm); c->pand(va, vc); c->pandn(vc, vb); - c->por(vc, va); + c->por(vc, va); // Select result value from va or vb + c->pavgb(vt, v5); // Generate result constant: AVG(0xff, 0x00) == 0x80 + c->por(vt, vc); } - c->por(vt, vc); c->movdqa(SPU_OFF_128(gpr, op.rt4), vt); } diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index dc59bd450b..844acabd10 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -2513,7 +2513,7 @@ public: const auto cr = c ^ 0xf; const auto a = pshufb(get_vr(op.ra), cr); const auto b = pshufb(get_vr(op.rb), cr); - set_vr(op.rt4, merge(sext((c & 0x10) == 0), a, b) | x); + set_vr(op.rt4, select(bitcast(cr << 3) >= 0, a, b) | x); } void MPYA(spu_opcode_t op)