mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-08-11 18:50:55 +00:00
SPU/PPU LLVM: Fix FMA signed zeroes handling
This commit is contained in:
parent
f188589685
commit
7e11855330
2 changed files with 40 additions and 15 deletions
|
@ -948,7 +948,7 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op)
|
||||||
// Optimization: Emit only a floating multiply if the addend is zero
|
// Optimization: Emit only a floating multiply if the addend is zero
|
||||||
if (auto [ok, data] = get_const_vector(b.value, m_addr, 2000); ok)
|
if (auto [ok, data] = get_const_vector(b.value, m_addr, 2000); ok)
|
||||||
{
|
{
|
||||||
if (data == v128{})
|
if (data == v128::from32p(1u << 31))
|
||||||
{
|
{
|
||||||
set_vr(op.vd, vec_handle_result(a * c));
|
set_vr(op.vd, vec_handle_result(a * c));
|
||||||
ppu_log.notice("LLVM: VMADDFP with 0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
|
ppu_log.notice("LLVM: VMADDFP with 0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
|
||||||
|
|
|
@ -7483,13 +7483,14 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checks for postive and negative zero, or Denormal (treated as zero)
|
// Checks for postive and negative zero, or Denormal (treated as zero)
|
||||||
bool is_spu_float_zero(v128 a)
|
// If sign is +-1 check equality againts all sign bits
|
||||||
|
bool is_spu_float_zero(v128 a, int sign = 0)
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < 4; i++)
|
for (u32 i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
const u32 exponent = a._u32[i] & 0x7f800000u;
|
const u32 exponent = a._u32[i] & 0x7f800000u;
|
||||||
|
|
||||||
if (exponent)
|
if (exponent || (sign && (sign >= 0) != (a._s32[i] >= 0)))
|
||||||
{
|
{
|
||||||
// Normalized number
|
// Normalized number
|
||||||
return false;
|
return false;
|
||||||
|
@ -7727,28 +7728,52 @@ public:
|
||||||
// This is odd since SPU code could just use the FM instruction, but it seems common enough
|
// This is odd since SPU code could just use the FM instruction, but it seems common enough
|
||||||
if (auto [ok, data] = get_const_vector(c.value, m_pos, 4000); ok)
|
if (auto [ok, data] = get_const_vector(c.value, m_pos, 4000); ok)
|
||||||
{
|
{
|
||||||
if (is_spu_float_zero(data))
|
if (is_spu_float_zero(data, -1))
|
||||||
{
|
{
|
||||||
r = eval(a * b);
|
r = eval(a * b);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto [ok, data] = get_const_vector(b.value, m_pos, 4000); ok)
|
if ([&]()
|
||||||
{
|
{
|
||||||
if (is_spu_float_zero(data))
|
if (auto [ok, data] = get_const_vector(a.value, m_pos, 4000); ok)
|
||||||
{
|
{
|
||||||
// Just return the added value if either a or b is 0
|
if (!is_spu_float_zero(data, +1))
|
||||||
return c;
|
{
|
||||||
}
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto [ok, data] = get_const_vector(a.value, m_pos, 4000); ok)
|
if (auto [ok0, data0] = get_const_vector(b.value, m_pos, 4000); ok0)
|
||||||
{
|
{
|
||||||
if (is_spu_float_zero(data))
|
if (is_spu_float_zero(data0, +1))
|
||||||
{
|
{
|
||||||
return c;
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (auto [ok, data] = get_const_vector(a.value, m_pos, 4000); ok)
|
||||||
|
{
|
||||||
|
if (!is_spu_float_zero(data, -1))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (auto [ok0, data0] = get_const_vector(b.value, m_pos, 4000); ok0)
|
||||||
|
{
|
||||||
|
if (is_spu_float_zero(data0, -1))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}())
|
||||||
|
{
|
||||||
|
// Just return the added value if both a and b is +0 or -0 (+0 and -0 arent't allowed alone)
|
||||||
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_use_fma)
|
if (m_use_fma)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue