From 64616f1408b85d00c0167d62d0163c7c3ad517d0 Mon Sep 17 00:00:00 2001 From: Malcolm Jestadt Date: Tue, 7 Jun 2022 22:32:41 -0400 Subject: [PATCH] SPU LLVM: Microfixes - Avoid vpermb path in shufb when op.ra == op.rb - Reverse indices with (c ^ 0xf) rather than (~c) in vpermb path, vpternlogd is a 3 input operation and requires needless mov instructions to avoid destroying inputs --- rpcs3/Emu/Cell/SPURecompiler.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 13cb3c432d..421583bdf5 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7695,7 +7695,7 @@ public: if (auto [ok, bs] = match_expr(b, byteswap(match())); ok) { // Undo endian swapping, and rely on pshufb/vperm2b to re-reverse endianness - if (m_use_avx512_icl) + if (m_use_avx512_icl && (op.ra != op.rb)) { if (perm_only) { @@ -7757,19 +7757,19 @@ public: } } - if (m_use_avx512_icl) + if (m_use_avx512_icl && (op.ra != op.rb)) { if (perm_only) { - set_vr(op.rt4, vperm2b256to128(b, a, eval(~c))); + set_vr(op.rt4, vperm2b256to128(a, b, eval(c ^ 0xf))); return; } const auto m = gf2p8affineqb(c, build(0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20), 0x7f); const auto mm = select(noncast(m) >= 0, splat(0), m); - const auto cr = eval(~c); - const auto ab = vperm2b256to128(b, a, cr); - set_vr(op.rt4, select(noncast(cr) >= 0, mm, ab)); + const auto cr = eval(c ^ 0xf); + const auto ab = vperm2b256to128(a, b, cr); + set_vr(op.rt4, select(noncast(c) >= 0, ab, mm)); return; }