SPU LLVM: Final fixup for icelake shufb paths

- The cause of the problems was due to the constant mask for gf2p9affineqb being used as the first argument, instead of the second argument.
This commit is contained in:
Malcolm Jestadt 2021-04-20 01:32:16 -04:00 committed by Ivan
parent 3e33f064bf
commit 53f13a9721

View file

@ -7424,6 +7424,15 @@ public:
const auto as = byteswap(a);
const auto bs = byteswap(b);
if (m_use_avx512_icl && (op.ra != op.rb))
{
const auto m = gf2p8affineqb(c, build<u8[16]>(0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04), 0x7f);
const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
const auto ab = vperm2b(as, bs, c);
set_vr(op.rt4, select(noncast<s8[16]>(c) >= 0, ab, mm));
return;
}
const auto x = avg(noncast<u8[16]>(sext<s8[16]>((c & 0xc0) == 0xc0)), noncast<u8[16]>(sext<s8[16]>((c & 0xe0) == 0xc0)));
const auto ax = pshufb(as, c);
const auto bx = pshufb(bs, c);
@ -7465,8 +7474,8 @@ public:
if (m_use_avx512_icl && (op.ra != op.rb || m_interp_magn))
{
const auto m = gf2p8affineqb(build<u8[16]>(0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04), c, 0x7f);
const auto mm = select(noncast<s8[16]>(c << 1) >= 0, splat<u8[16]>(0), m);
const auto m = gf2p8affineqb(c, build<u8[16]>(0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04), 0x7f);
const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
const auto cr = eval(~c);
const auto ab = vperm2b(b, a, cr);
set_vr(op.rt4, select(noncast<s8[16]>(cr) >= 0, mm, ab));