SPU Analyzer: Fix for value propagation

This commit is contained in:
Elad 2025-03-08 10:19:35 +02:00
parent bfb9dfea7e
commit e4b9a21acc
2 changed files with 53 additions and 54 deletions

View file

@ -2915,7 +2915,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
// Result bounds
u32 lsa = entry_point;
u32 limit = 0x40000;
u32 limit = SPU_LS_SIZE;
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
{
@ -3132,13 +3132,13 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
break;
}
if (target >= lsa && target < 0x40000)
if (target >= lsa && target < SPU_LS_SIZE)
{
// Possible jump table entry (absolute)
jt_abs.push_back(target);
}
if (target + start >= lsa && target + start < 0x40000)
if (target + start >= lsa && target + start < SPU_LS_SIZE)
{
// Possible jump table entry (relative)
jt_rel.push_back(target + start);
@ -3607,7 +3607,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
}
}
while (lsa > 0 || limit < 0x40000)
while (lsa > 0 || limit < SPU_LS_SIZE)
{
const u32 initial_size = ::size32(result.data);
@ -4001,7 +4001,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
bool need_repeat = false;
u32 start = 0;
u32 limit = 0x40000;
u32 limit = SPU_LS_SIZE;
// Walk block list in ascending order
for (auto& block : m_bbs)
@ -4012,7 +4012,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
{
const auto upper = m_funcs.upper_bound(addr);
start = addr;
limit = upper == m_funcs.end() ? 0x40000 : upper->first;
limit = upper == m_funcs.end() ? SPU_LS_SIZE : upper->first;
}
// Find targets that exceed [start; limit) range and make new functions from them
@ -4082,7 +4082,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
{
const u32 _old = ::at32(m_bbs, pred).chunk;
if (_old < 0x40000 && _old != _new)
if (_old < SPU_LS_SIZE && _old != _new)
{
// If block has multiple 'entry' points, it becomes an entry point itself
new_entries.push_back(addr);
@ -4107,7 +4107,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
const u32 value = m_entry_info[target / 4] ? target : block.chunk;
if (u32& tval = tb.chunk; tval < 0x40000)
if (u32& tval = tb.chunk; tval < SPU_LS_SIZE)
{
// TODO: fix condition
if (tval != value && !m_entry_info[target / 4])
@ -4139,7 +4139,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
for (auto& bb : m_bbs)
{
// Reset chunk info
bb.second.chunk = 0x40000;
bb.second.chunk = SPU_LS_SIZE;
}
}
@ -4201,7 +4201,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
for (u32 i = 0; i < s_reg_max; i++)
{
if (block.reg_origin[i] == 0x80000000)
block.reg_origin[i] = 0x40000;
block.reg_origin[i] = SPU_LS_SIZE;
}
}
@ -4210,7 +4210,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
for (u32 i = 0; i < s_reg_max; i++)
{
if (block.reg_origin_abs[i] == 0x80000000)
block.reg_origin_abs[i] = 0x40000;
block.reg_origin_abs[i] = SPU_LS_SIZE;
else if (block.reg_origin_abs[i] + 1 == 0)
block.reg_origin_abs[i] = -2;
}
@ -4235,14 +4235,16 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
if (tb.reg_origin[i] == 0x80000000)
{
must_repeat |= !tb.targets.empty() && tb.reg_origin[i] != expected;
tb.reg_origin[i] = expected;
}
else if (tb.reg_origin[i] != expected)
{
// Set -1 if multiple origins merged (requires PHI node)
tb.reg_origin[i] = -1;
must_repeat |= !tb.targets.empty() && tb.reg_origin[i] != umax;
must_repeat |= !tb.targets.empty();
// Set umax if multiple origins merged (requires PHI node)
tb.reg_origin[i] = umax;
}
}
@ -4256,9 +4258,9 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
}
else if (tb.reg_origin_abs[i] != expected)
{
if (tb.reg_origin_abs[i] == 0x40000 || expected + 2 == 0 || expected == 0x40000)
if (tb.reg_origin_abs[i] == SPU_LS_SIZE || expected + 2 == 0 || expected == SPU_LS_SIZE)
{
// Set -2: sticky value indicating possible external reg origin (0x40000)
// Set -2: sticky value indicating possible external reg origin (SPU_LS_SIZE)
tb.reg_origin_abs[i] = -2;
must_repeat |= !tb.targets.empty();
@ -4288,9 +4290,9 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
// Reset values for the next attempt (keep negative values)
for (u32 i = 0; i < s_reg_max; i++)
{
if (block.reg_origin[i] <= 0x40000)
if (block.reg_origin[i] <= SPU_LS_SIZE)
block.reg_origin[i] = 0x80000000;
if (block.reg_origin_abs[i] <= 0x40000)
if (block.reg_origin_abs[i] <= SPU_LS_SIZE)
block.reg_origin_abs[i] = 0x80000000;
}
}
@ -4323,21 +4325,21 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
bb.reg_val32[i] = src.reg_val32[i];
}
if (!bb.reg_save_dom[i] && bb.reg_use[i] && (orig == 0x40000 || orig + 2 == 0))
if (!bb.reg_save_dom[i] && bb.reg_use[i] && (orig == SPU_LS_SIZE || orig + 2 == 0))
{
// Destroy offset if external reg value is used
func.reg_save_off[i] = -1;
}
}
if (u32 orig = bb.reg_origin_abs[s_reg_sp]; orig < 0x40000)
if (u32 orig = bb.reg_origin_abs[s_reg_sp]; orig < SPU_LS_SIZE)
{
auto& prologue = ::at32(m_bbs, orig);
// Copy stack offset (from the assumed prologue)
bb.stack_sub = prologue.stack_sub;
}
else if (orig > 0x40000)
else if (orig > SPU_LS_SIZE)
{
// Unpredictable stack
bb.stack_sub = 0x80000000;
@ -4510,7 +4512,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
}
// Check for funny values.
if (bb.stack_sub >= 0x40000 || bb.stack_sub % 16)
if (bb.stack_sub >= SPU_LS_SIZE || bb.stack_sub % 16)
{
bb.stack_sub = 0x80000000;
}
@ -4659,7 +4661,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
if (is_ok && bb.terminator == term_type::ret)
{
// Check $LR (alternative return registers are currently not supported)
if (u32 lr_orig = bb.reg_mod[s_reg_lr] ? addr : bb.reg_origin_abs[s_reg_lr]; lr_orig < 0x40000)
if (u32 lr_orig = bb.reg_mod[s_reg_lr] ? addr : bb.reg_origin_abs[s_reg_lr]; lr_orig < SPU_LS_SIZE)
{
auto& src = ::at32(m_bbs, lr_orig);
@ -4674,7 +4676,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
is_ok = false;
}
}
else if (lr_orig > 0x40000)
else if (lr_orig > SPU_LS_SIZE)
{
spu_log.todo("Function 0x%05x: [0x%05x] $LR unpredictable (src=0x%x)", f.first, addr, lr_orig);
is_ok = false;
@ -4683,7 +4685,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
// Check $80..$127 (should be restored or unmodified)
for (u32 i = s_reg_80; is_ok && i <= s_reg_127; i++)
{
if (u32 orig = bb.reg_mod[i] ? addr : bb.reg_origin_abs[i]; orig < 0x40000)
if (u32 orig = bb.reg_mod[i] ? addr : bb.reg_origin_abs[i]; orig < SPU_LS_SIZE)
{
auto& src = ::at32(m_bbs, orig);
@ -4693,7 +4695,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
is_ok = false;
}
}
else if (orig > 0x40000)
else if (orig > SPU_LS_SIZE)
{
spu_log.todo("Function 0x%05x: [0x%05x] $%u unpredictable (src=0x%x)", f.first, addr, i, orig);
is_ok = false;
@ -5229,7 +5231,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
auto& block = infos[bpc];
if (pos == entry_point || (g_cfg.core.spu_block_size != spu_block_size_type::safe && (m_ret_info[bpc / 4] || m_entry_info[bpc / 4])))
if (bpc == entry_point || (g_cfg.core.spu_block_size != spu_block_size_type::safe && (m_ret_info[bpc / 4] || m_entry_info[bpc / 4])))
{
// Do not allow value passthrough
for (reg_state_t& f : block->start_reg_state)
@ -7269,6 +7271,24 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out)
for (auto& bb : m_bbs)
{
if (m_block_info[bb.first / 4])
{
fmt::append(out, "A: [0x%05x] %s\n", bb.first, m_entry_info[bb.first / 4] ? (m_ret_info[bb.first / 4] ? "Chunk" : "Entry") : "Block");
fmt::append(out, "\t F: 0x%05x\n", bb.second.func);
for (u32 pred : bb.second.preds)
{
fmt::append(out, "\t <- 0x%05x\n", pred);
}
}
else
{
fmt::append(out, "A: [0x%05x] ?\n", bb.first);
}
out += "\n";
for (u32 pos = bb.first, end = bb.first + bb.second.size * 4; pos < end; pos += 4)
{
dis_asm.disasm(pos);
@ -7285,24 +7305,11 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out)
if (m_block_info[bb.first / 4])
{
fmt::append(out, "A: [0x%05x] %s\n", bb.first, m_entry_info[bb.first / 4] ? (m_ret_info[bb.first / 4] ? "Chunk" : "Entry") : "Block");
fmt::append(out, "\tF: 0x%05x\n", bb.second.func);
for (u32 pred : bb.second.preds)
{
fmt::append(out, "\t<- 0x%05x\n", pred);
}
for (u32 target : bb.second.targets)
{
fmt::append(out, "\t-> 0x%05x%s\n", target, m_bbs.count(target) ? "" : " (null)");
}
}
else
{
fmt::append(out, "A: [0x%05x] ?\n", bb.first);
}
out += '\n';
}
@ -8143,23 +8150,15 @@ std::array<reg_state_t, s_reg_max>& block_reg_info::evaluate_start_state(const s
}
}
if (qi == 0)
{
// TODO: First block is always resolved here, but this logic can be improved to detect more cases of opportunistic resolving
is_all_resolved = true;
}
auto& res_state = is_all_resolved ? cur_node->start_reg_state : temp;
for (usz bi = 0; bi < it->state_prev.size(); bi++)
if (!is_all_resolved)
{
if (it->state_prev[bi].disconnected)
{
// Loop state, even if not ignored for a million times the result would still be the same
// So ignore it
continue;
}
res_state = reg_state_t::make_unknown<s_reg_max>(it->block_pc);
}
for (usz bi = 0; is_all_resolved && bi < it->state_prev.size(); bi++)
{
std::array<reg_state_t, s_reg_max>* arg_state{};
const auto& node = ::at32(map, it->state_prev[bi].block_pc);

View file

@ -2054,7 +2054,7 @@ public:
_phi->addIncoming(value, &m_function->getEntryBlock());
}
}
else if (src < 0x40000)
else if (src < SPU_LS_SIZE)
{
// Passthrough register value
const auto bfound = m_blocks.find(src);