SPU LLVM: Don't use vperm2b256to128 outside of CPUTranslator.h
Some checks are pending
Generate Translation Template / Generate Translation Template (push) Waiting to run
Build RPCS3 / RPCS3 Linux ubuntu-24.04 gcc (push) Waiting to run
Build RPCS3 / RPCS3 Linux ubuntu-24.04-arm clang (push) Waiting to run
Build RPCS3 / RPCS3 Linux ubuntu-24.04 clang (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run

- The vperm2b function uses vperm2b256to128 when it's ideal
This commit is contained in:
Malcolm Jestadt 2025-03-24 07:56:17 -04:00 committed by Elad
parent faef63e8a7
commit c52920755a

View file

@ -5870,13 +5870,13 @@ public:
{
if (perm_only)
{
set_vr(op.rt4, vperm2b256to128(as, b, c));
set_vr(op.rt4, vperm2b(as, b, c));
return;
}
const auto m = gf2p8affineqb(c, build<u8[16]>(0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20), 0x7f);
const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
const auto ab = vperm2b256to128(as, b, c);
const auto ab = vperm2b(as, b, c);
set_vr(op.rt4, select(noncast<s8[16]>(c) >= 0, ab, mm));
return;
}
@ -5920,13 +5920,13 @@ public:
{
if (perm_only)
{
set_vr(op.rt4, vperm2b256to128(a, b, eval(c ^ 0xf)));
set_vr(op.rt4, vperm2b(a, b, eval(c ^ 0xf)));
return;
}
const auto m = gf2p8affineqb(c, build<u8[16]>(0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20), 0x7f);
const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
const auto ab = vperm2b256to128(a, b, eval(c ^ 0xf));
const auto ab = vperm2b(a, b, eval(c ^ 0xf));
set_vr(op.rt4, select(noncast<s8[16]>(c) >= 0, ab, mm));
return;
}
@ -5938,13 +5938,13 @@ public:
{
if (perm_only)
{
set_vr(op.rt4, vperm2b256to128(b, a, eval(c ^ 0x1f)));
set_vr(op.rt4, vperm2b(b, a, eval(c ^ 0x1f)));
return;
}
const auto m = gf2p8affineqb(c, build<u8[16]>(0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20), 0x7f);
const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
const auto ab = vperm2b256to128(b, a, eval(c ^ 0x1f));
const auto ab = vperm2b(b, a, eval(c ^ 0x1f));
set_vr(op.rt4, select(noncast<s8[16]>(c) >= 0, ab, mm));
return;
}