diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index b466267a14..dedc31599e 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -2915,7 +2915,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Result bounds u32 lsa = entry_point; - u32 limit = 0x40000; + u32 limit = SPU_LS_SIZE; if (g_cfg.core.spu_block_size == spu_block_size_type::giga) { @@ -3132,13 +3132,13 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } - if (target >= lsa && target < 0x40000) + if (target >= lsa && target < SPU_LS_SIZE) { // Possible jump table entry (absolute) jt_abs.push_back(target); } - if (target + start >= lsa && target + start < 0x40000) + if (target + start >= lsa && target + start < SPU_LS_SIZE) { // Possible jump table entry (relative) jt_rel.push_back(target + start); @@ -3607,7 +3607,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } - while (lsa > 0 || limit < 0x40000) + while (lsa > 0 || limit < SPU_LS_SIZE) { const u32 initial_size = ::size32(result.data); @@ -4001,7 +4001,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s bool need_repeat = false; u32 start = 0; - u32 limit = 0x40000; + u32 limit = SPU_LS_SIZE; // Walk block list in ascending order for (auto& block : m_bbs) @@ -4012,7 +4012,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { const auto upper = m_funcs.upper_bound(addr); start = addr; - limit = upper == m_funcs.end() ? 0x40000 : upper->first; + limit = upper == m_funcs.end() ? SPU_LS_SIZE : upper->first; } // Find targets that exceed [start; limit) range and make new functions from them @@ -4082,7 +4082,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { const u32 _old = ::at32(m_bbs, pred).chunk; - if (_old < 0x40000 && _old != _new) + if (_old < SPU_LS_SIZE && _old != _new) { // If block has multiple 'entry' points, it becomes an entry point itself new_entries.push_back(addr); @@ -4107,7 +4107,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const u32 value = m_entry_info[target / 4] ? target : block.chunk; - if (u32& tval = tb.chunk; tval < 0x40000) + if (u32& tval = tb.chunk; tval < SPU_LS_SIZE) { // TODO: fix condition if (tval != value && !m_entry_info[target / 4]) @@ -4139,7 +4139,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s for (auto& bb : m_bbs) { // Reset chunk info - bb.second.chunk = 0x40000; + bb.second.chunk = SPU_LS_SIZE; } } @@ -4201,7 +4201,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s for (u32 i = 0; i < s_reg_max; i++) { if (block.reg_origin[i] == 0x80000000) - block.reg_origin[i] = 0x40000; + block.reg_origin[i] = SPU_LS_SIZE; } } @@ -4210,7 +4210,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s for (u32 i = 0; i < s_reg_max; i++) { if (block.reg_origin_abs[i] == 0x80000000) - block.reg_origin_abs[i] = 0x40000; + block.reg_origin_abs[i] = SPU_LS_SIZE; else if (block.reg_origin_abs[i] + 1 == 0) block.reg_origin_abs[i] = -2; } @@ -4235,14 +4235,16 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (tb.reg_origin[i] == 0x80000000) { + must_repeat |= !tb.targets.empty() && tb.reg_origin[i] != expected; + tb.reg_origin[i] = expected; } else if (tb.reg_origin[i] != expected) { - // Set -1 if multiple origins merged (requires PHI node) - tb.reg_origin[i] = -1; + must_repeat |= !tb.targets.empty() && tb.reg_origin[i] != umax; - must_repeat |= !tb.targets.empty(); + // Set umax if multiple origins merged (requires PHI node) + tb.reg_origin[i] = umax; } } @@ -4256,9 +4258,9 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } else if (tb.reg_origin_abs[i] != expected) { - if (tb.reg_origin_abs[i] == 0x40000 || expected + 2 == 0 || expected == 0x40000) + if (tb.reg_origin_abs[i] == SPU_LS_SIZE || expected + 2 == 0 || expected == SPU_LS_SIZE) { - // Set -2: sticky value indicating possible external reg origin (0x40000) + // Set -2: sticky value indicating possible external reg origin (SPU_LS_SIZE) tb.reg_origin_abs[i] = -2; must_repeat |= !tb.targets.empty(); @@ -4288,9 +4290,9 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Reset values for the next attempt (keep negative values) for (u32 i = 0; i < s_reg_max; i++) { - if (block.reg_origin[i] <= 0x40000) + if (block.reg_origin[i] <= SPU_LS_SIZE) block.reg_origin[i] = 0x80000000; - if (block.reg_origin_abs[i] <= 0x40000) + if (block.reg_origin_abs[i] <= SPU_LS_SIZE) block.reg_origin_abs[i] = 0x80000000; } } @@ -4323,21 +4325,21 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s bb.reg_val32[i] = src.reg_val32[i]; } - if (!bb.reg_save_dom[i] && bb.reg_use[i] && (orig == 0x40000 || orig + 2 == 0)) + if (!bb.reg_save_dom[i] && bb.reg_use[i] && (orig == SPU_LS_SIZE || orig + 2 == 0)) { // Destroy offset if external reg value is used func.reg_save_off[i] = -1; } } - if (u32 orig = bb.reg_origin_abs[s_reg_sp]; orig < 0x40000) + if (u32 orig = bb.reg_origin_abs[s_reg_sp]; orig < SPU_LS_SIZE) { auto& prologue = ::at32(m_bbs, orig); // Copy stack offset (from the assumed prologue) bb.stack_sub = prologue.stack_sub; } - else if (orig > 0x40000) + else if (orig > SPU_LS_SIZE) { // Unpredictable stack bb.stack_sub = 0x80000000; @@ -4510,7 +4512,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } // Check for funny values. - if (bb.stack_sub >= 0x40000 || bb.stack_sub % 16) + if (bb.stack_sub >= SPU_LS_SIZE || bb.stack_sub % 16) { bb.stack_sub = 0x80000000; } @@ -4659,7 +4661,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (is_ok && bb.terminator == term_type::ret) { // Check $LR (alternative return registers are currently not supported) - if (u32 lr_orig = bb.reg_mod[s_reg_lr] ? addr : bb.reg_origin_abs[s_reg_lr]; lr_orig < 0x40000) + if (u32 lr_orig = bb.reg_mod[s_reg_lr] ? addr : bb.reg_origin_abs[s_reg_lr]; lr_orig < SPU_LS_SIZE) { auto& src = ::at32(m_bbs, lr_orig); @@ -4674,7 +4676,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s is_ok = false; } } - else if (lr_orig > 0x40000) + else if (lr_orig > SPU_LS_SIZE) { spu_log.todo("Function 0x%05x: [0x%05x] $LR unpredictable (src=0x%x)", f.first, addr, lr_orig); is_ok = false; @@ -4683,7 +4685,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Check $80..$127 (should be restored or unmodified) for (u32 i = s_reg_80; is_ok && i <= s_reg_127; i++) { - if (u32 orig = bb.reg_mod[i] ? addr : bb.reg_origin_abs[i]; orig < 0x40000) + if (u32 orig = bb.reg_mod[i] ? addr : bb.reg_origin_abs[i]; orig < SPU_LS_SIZE) { auto& src = ::at32(m_bbs, orig); @@ -4693,7 +4695,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s is_ok = false; } } - else if (orig > 0x40000) + else if (orig > SPU_LS_SIZE) { spu_log.todo("Function 0x%05x: [0x%05x] $%u unpredictable (src=0x%x)", f.first, addr, i, orig); is_ok = false; @@ -5229,7 +5231,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s auto& block = infos[bpc]; - if (pos == entry_point || (g_cfg.core.spu_block_size != spu_block_size_type::safe && (m_ret_info[bpc / 4] || m_entry_info[bpc / 4]))) + if (bpc == entry_point || (g_cfg.core.spu_block_size != spu_block_size_type::safe && (m_ret_info[bpc / 4] || m_entry_info[bpc / 4]))) { // Do not allow value passthrough for (reg_state_t& f : block->start_reg_state) @@ -7269,6 +7271,24 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out) for (auto& bb : m_bbs) { + if (m_block_info[bb.first / 4]) + { + fmt::append(out, "A: [0x%05x] %s\n", bb.first, m_entry_info[bb.first / 4] ? (m_ret_info[bb.first / 4] ? "Chunk" : "Entry") : "Block"); + + fmt::append(out, "\t F: 0x%05x\n", bb.second.func); + + for (u32 pred : bb.second.preds) + { + fmt::append(out, "\t <- 0x%05x\n", pred); + } + } + else + { + fmt::append(out, "A: [0x%05x] ?\n", bb.first); + } + + out += "\n"; + for (u32 pos = bb.first, end = bb.first + bb.second.size * 4; pos < end; pos += 4) { dis_asm.disasm(pos); @@ -7285,24 +7305,11 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out) if (m_block_info[bb.first / 4]) { - fmt::append(out, "A: [0x%05x] %s\n", bb.first, m_entry_info[bb.first / 4] ? (m_ret_info[bb.first / 4] ? "Chunk" : "Entry") : "Block"); - - fmt::append(out, "\tF: 0x%05x\n", bb.second.func); - - for (u32 pred : bb.second.preds) - { - fmt::append(out, "\t<- 0x%05x\n", pred); - } - for (u32 target : bb.second.targets) { fmt::append(out, "\t-> 0x%05x%s\n", target, m_bbs.count(target) ? "" : " (null)"); } } - else - { - fmt::append(out, "A: [0x%05x] ?\n", bb.first); - } out += '\n'; } @@ -8143,23 +8150,15 @@ std::array& block_reg_info::evaluate_start_state(const s } } - if (qi == 0) - { - // TODO: First block is always resolved here, but this logic can be improved to detect more cases of opportunistic resolving - is_all_resolved = true; - } - auto& res_state = is_all_resolved ? cur_node->start_reg_state : temp; - for (usz bi = 0; bi < it->state_prev.size(); bi++) + if (!is_all_resolved) { - if (it->state_prev[bi].disconnected) - { - // Loop state, even if not ignored for a million times the result would still be the same - // So ignore it - continue; - } + res_state = reg_state_t::make_unknown(it->block_pc); + } + for (usz bi = 0; is_all_resolved && bi < it->state_prev.size(); bi++) + { std::array* arg_state{}; const auto& node = ::at32(map, it->state_prev[bi].block_pc); diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 469d52d43c..cfa047f2ed 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -2054,7 +2054,7 @@ public: _phi->addIncoming(value, &m_function->getEntryBlock()); } } - else if (src < 0x40000) + else if (src < SPU_LS_SIZE) { // Passthrough register value const auto bfound = m_blocks.find(src);