mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-19 19:15:26 +00:00
SPU LLVM: Add icelake optimized paths for SHUFB (#8712)
This commit is contained in:
parent
8cdfe5952a
commit
9e4f43f4d1
2 changed files with 37 additions and 2 deletions
|
@ -2785,6 +2785,21 @@ public:
|
|||
return result;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
value_t<u8[16]> gf2p8affineqb(T1 a, T2 b, u8 c)
|
||||
{
|
||||
value_t<u8[16]> result;
|
||||
|
||||
const auto data0 = a.eval(m_ir);
|
||||
const auto data1 = b.eval(m_ir);
|
||||
|
||||
const auto immediate = (llvm_const_int<u8>{c});
|
||||
const auto imm8 = immediate.eval(m_ir);
|
||||
|
||||
result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_vgf2p8affineqb_128), {data0, data1, imm8});
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename T3>
|
||||
value_t<u8[16]> vperm2b(T1 a, T2 b, T3 c)
|
||||
{
|
||||
|
|
|
@ -7277,10 +7277,20 @@ public:
|
|||
{
|
||||
if (auto [ok, v1] = match_expr(b, byteswap(match<u8[16]>())); ok)
|
||||
{
|
||||
// Undo endian swapping, and rely on pshufb to re-reverse endianness
|
||||
const auto x = avg(noncast<u8[16]>(sext<s8[16]>((c & 0xc0) == 0xc0)), noncast<u8[16]>(sext<s8[16]>((c & 0xe0) == 0xc0)));
|
||||
// Undo endian swapping, and rely on pshufb/vperm2b to re-reverse endianness
|
||||
const auto as = byteswap(a);
|
||||
const auto bs = byteswap(b);
|
||||
|
||||
if (m_use_avx512_icl && (op.ra != op.rb || m_interp_magn))
|
||||
{
|
||||
const auto m = gf2p8affineqb(build<u8[16]>(0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04), c, 0x7f);
|
||||
const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
|
||||
const auto ab = vperm2b(as, bs, c);
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(c) >= 0, ab, mm));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto x = avg(noncast<u8[16]>(sext<s8[16]>((c & 0xc0) == 0xc0)), noncast<u8[16]>(sext<s8[16]>((c & 0xe0) == 0xc0)));
|
||||
const auto ax = pshufb(as, c);
|
||||
const auto bx = pshufb(bs, c);
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, bx) | x);
|
||||
|
@ -7319,6 +7329,16 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
if (m_use_avx512_icl && (op.ra != op.rb || m_interp_magn))
|
||||
{
|
||||
const auto m = gf2p8affineqb(build<u8[16]>(0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04), c, 0x7f);
|
||||
const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
|
||||
const auto cr = eval(~c);
|
||||
const auto ab = vperm2b(b, a, cr);
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(cr) >= 0, mm, ab));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto x = avg(noncast<u8[16]>(sext<s8[16]>((c & 0xc0) == 0xc0)), noncast<u8[16]>(sext<s8[16]>((c & 0xe0) == 0xc0)));
|
||||
const auto cr = eval(c ^ 0xf);
|
||||
const auto ax = pshufb(a, cr);
|
||||
|
|
Loading…
Add table
Reference in a new issue