From b9b8ed3ae6d75a9624a54a53cf4563c33f17c980 Mon Sep 17 00:00:00 2001 From: Malcolm Jestadt Date: Mon, 18 Nov 2024 10:52:29 -0500 Subject: [PATCH] SPU LLVM: Recognize ROTYQBYI and ROTQBI rotation pattern --- rpcs3/Emu/Cell/SPULLVMRecompiler.cpp | 81 +++++++++++++++++++++------- 1 file changed, 61 insertions(+), 20 deletions(-) diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index fcc21c09ae..9387527c8f 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -4666,35 +4666,44 @@ public: return zshuffle(std::forward(a), 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); } + template + static llvm_calli rotqbybi(T&& a, U&& b) + { + return {"spu_rotqbybi", {std::forward(a), std::forward(b)}}; + } + void ROTQBYBI(spu_opcode_t op) { - const auto a = get_vr(op.ra); - - // Data with swapped endian from a load instruction - if (auto [ok, as] = match_expr(a, byteswap(match())); ok) + register_intrinsic("spu_rotqbybi", [&](llvm::CallInst* ci) { - const auto sc = build(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - const auto sh = sc + (splat_scalar(get_vr(op.rb)) >> 3); + const auto a = value(ci->getOperand(0)); + const auto b = value(ci->getOperand(1)); + + // Data with swapped endian from a load instruction + if (auto [ok, as] = match_expr(a, byteswap(match())); ok) + { + const auto sc = build(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + const auto sh = sc + (splat_scalar(b) >> 3); + + if (m_use_avx512_icl) + { + return eval(vpermb(as, sh)); + } + + return eval(pshufb(as, (sh & 0xf))); + } + const auto sc = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const auto sh = sc - (splat_scalar(b) >> 3); if (m_use_avx512_icl) { - set_vr(op.rt, vpermb(as, sh)); - return; + return eval(vpermb(a, sh)); } - set_vr(op.rt, pshufb(as, (sh & 0xf))); - return; - } - const auto sc = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const auto sh = sc - (splat_scalar(get_vr(op.rb)) >> 3); + return eval(pshufb(a, (sh & 0xf))); + }); - if (m_use_avx512_icl) - { - set_vr(op.rt, vpermb(a, sh)); - return; - } - - set_vr(op.rt, pshufb(a, (sh & 0xf))); + set_vr(op.rt, rotqbybi(get_vr(op.ra), get_vr(op.rb))); } void ROTQMBYBI(spu_opcode_t op) @@ -4813,6 +4822,38 @@ public: void ROTQBI(spu_opcode_t op) { const auto a = get_vr(op.ra); + const auto ax = get_vr(op.ra); + const auto bx = get_vr(op.rb); + + // Combined bit and bytes shift + if (auto [ok, v0, v1] = match_expr(ax, rotqbybi(match(), match())); ok && v1.eq(bx)) + { + const auto b32 = get_vr(op.rb); + // Is the rotate less than 31 bits? + if (auto k = get_known_bits(b32); !!(k.Zero & 0x60)) + { + const auto b = splat_scalar(get_vr(op.rb)); + set_vr(op.rt, fshl(bitcast(v0), zshuffle(bitcast(v0), 3, 0, 1, 2), b)); + return; + } + + // Inverted shift count + if (auto [ok1, v10, v11] = match_expr(b32, match() - match()); ok1) + { + if (auto [ok2, data] = get_const_vector(v10.value, m_pos); ok2) + { + if (data == v128::from32p(0x80)) + { + if (auto k = get_known_bits(v11); !!(k.Zero & 0x60)) + { + set_vr(op.rt, fshr(zshuffle(bitcast(v0), 1, 2, 3, 0), bitcast(v0), splat_scalar(bitcast(v11)))); + return; + } + } + } + } + } + const auto b = splat_scalar(get_vr(op.rb) & 0x7); set_vr(op.rt, fshl(a, zshuffle(a, 3, 0, 1, 2), b)); }