SPU/PPU LLVM: Fix FMA signed zeroes handling

This commit is contained in:
Eladash 2020-08-05 18:03:21 +03:00 committed by Ani
commit 7e11855330
2 changed files with 40 additions and 15 deletions

View file

@ -948,7 +948,7 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op)
// Optimization: Emit only a floating multiply if the addend is zero // Optimization: Emit only a floating multiply if the addend is zero
if (auto [ok, data] = get_const_vector(b.value, m_addr, 2000); ok) if (auto [ok, data] = get_const_vector(b.value, m_addr, 2000); ok)
{ {
if (data == v128{}) if (data == v128::from32p(1u << 31))
{ {
set_vr(op.vd, vec_handle_result(a * c)); set_vr(op.vd, vec_handle_result(a * c));
ppu_log.notice("LLVM: VMADDFP with 0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0)); ppu_log.notice("LLVM: VMADDFP with 0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));

View file

@ -7483,13 +7483,14 @@ public:
} }
// Checks for postive and negative zero, or Denormal (treated as zero) // Checks for postive and negative zero, or Denormal (treated as zero)
bool is_spu_float_zero(v128 a) // If sign is +-1 check equality againts all sign bits
bool is_spu_float_zero(v128 a, int sign = 0)
{ {
for (u32 i = 0; i < 4; i++) for (u32 i = 0; i < 4; i++)
{ {
const u32 exponent = a._u32[i] & 0x7f800000u; const u32 exponent = a._u32[i] & 0x7f800000u;
if (exponent) if (exponent || (sign && (sign >= 0) != (a._s32[i] >= 0)))
{ {
// Normalized number // Normalized number
return false; return false;
@ -7727,28 +7728,52 @@ public:
// This is odd since SPU code could just use the FM instruction, but it seems common enough // This is odd since SPU code could just use the FM instruction, but it seems common enough
if (auto [ok, data] = get_const_vector(c.value, m_pos, 4000); ok) if (auto [ok, data] = get_const_vector(c.value, m_pos, 4000); ok)
{ {
if (is_spu_float_zero(data)) if (is_spu_float_zero(data, -1))
{ {
r = eval(a * b); r = eval(a * b);
return r; return r;
} }
} }
if (auto [ok, data] = get_const_vector(b.value, m_pos, 4000); ok) if ([&]()
{ {
if (is_spu_float_zero(data)) if (auto [ok, data] = get_const_vector(a.value, m_pos, 4000); ok)
{ {
// Just return the added value if either a or b is 0 if (!is_spu_float_zero(data, +1))
return c; {
} return false;
} }
if (auto [ok, data] = get_const_vector(a.value, m_pos, 4000); ok) if (auto [ok0, data0] = get_const_vector(b.value, m_pos, 4000); ok0)
{ {
if (is_spu_float_zero(data)) if (is_spu_float_zero(data0, +1))
{ {
return c; return true;
}
}
} }
if (auto [ok, data] = get_const_vector(a.value, m_pos, 4000); ok)
{
if (!is_spu_float_zero(data, -1))
{
return false;
}
if (auto [ok0, data0] = get_const_vector(b.value, m_pos, 4000); ok0)
{
if (is_spu_float_zero(data0, -1))
{
return true;
}
}
}
return false;
}())
{
// Just return the added value if both a and b is +0 or -0 (+0 and -0 arent't allowed alone)
return c;
} }
if (m_use_fma) if (m_use_fma)