diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index d358085a54..d41d7992d3 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -730,6 +730,11 @@ spu_function_t spu_recompiler::compile(std::vector&& func_rv) if (found != instr_labels.end()) { + if (m_preds.count(pos)) + { + c->align(kAlignCode, 16); + } + c->bind(found->second); } @@ -1118,11 +1123,22 @@ static void check_state_ret(SPUThread& _spu, void*, u8*) static void check_state(SPUThread* _spu, spu_function_t _ret) { - if (_spu->check_state()) + if (test(_spu->state) && _spu->check_state()) { _ret = &check_state_ret; } + if (g_cfg.core.spu_block_size != spu_block_size_type::safe) + { + // Get stack pointer, try to use native return address (check SPU return address) + const auto x = _spu->stack_mirror[(_spu->gpr[1]._u32[3] & 0x3fff0) >> 4]; + + if (x._u32[2] == _spu->pc) + { + _ret = reinterpret_cast(x._u64[0]); + } + } + _ret(*_spu, _spu->_ptr(0), nullptr); } @@ -1172,11 +1188,11 @@ void spu_recompiler::branch_fixed(u32 target) c->jmp(x86::rax); } -void spu_recompiler::branch_indirect(spu_opcode_t op, bool local) +void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) { using namespace asmjit; - if (g_cfg.core.spu_block_size == spu_block_size_type::safe && !local) + if (g_cfg.core.spu_block_size != spu_block_size_type::giga && !jt) { // Simply external call (return or indirect call) c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher))); @@ -1238,12 +1254,59 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool local) c->mov(SPU_OFF_32(pc), *addr); c->cmp(SPU_OFF_32(state), 0); c->jnz(label_check); + + if (g_cfg.core.spu_block_size != spu_block_size_type::safe && ret) + { + // Get stack pointer, try to use native return address (check SPU return address) + c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3)); + c->and_(qw1->r32(), 0x3fff0); + c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror))); + c->cmp(x86::dword_ptr(*qw1, 8), *addr); + c->cmove(x86::r10, x86::qword_ptr(*qw1)); + } + c->jmp(x86::r10); c->bind(label_check); c->mov(*ls, x86::r10); c->jmp(imm_ptr(&check_state)); } +void spu_recompiler::branch_set_link(u32 target) +{ + using namespace asmjit; + + if (g_cfg.core.spu_block_size != spu_block_size_type::safe) + { + // Find instruction at target + const auto local = instr_labels.find(target); + + if (local != instr_labels.end() && local->second.isValid()) + { + Label ret = c->newLabel(); + + // Get stack pointer, write native and SPU return addresses into the stack mirror + c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3)); + c->and_(qw1->r32(), 0x3fff0); + c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror))); + c->lea(x86::r10, x86::qword_ptr(ret)); + c->mov(x86::qword_ptr(*qw1, 0), x86::r10); + c->mov(x86::qword_ptr(*qw1, 8), target); + + after.emplace_back([=, target = local->second] + { + // Clear return info after use + c->align(kAlignCode, 16); + c->bind(ret); + c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3)); + c->and_(qw1->r32(), 0x3fff0); + c->pcmpeqd(x86::xmm0, x86::xmm0); + c->movdqa(x86::dqword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror)), x86::xmm0); + c->jmp(target); + }); + } + } +} + void spu_recompiler::fall(spu_opcode_t op) { auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func, spu_function_t _ret) @@ -2768,9 +2831,17 @@ void spu_recompiler::STQX(spu_opcode_t op) void spu_recompiler::BI(spu_opcode_t op) { + const auto found = m_targets.find(m_pos); + const auto is_jt = found == m_targets.end() || found->second.size() != 1 || found->second.front() != -1; + + if (found == m_targets.end() || found->second.empty()) + { + LOG_ERROR(SPU, "[0x%x] BI: no targets", m_pos); + } + c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); c->and_(*addr, 0x3fffc); - branch_indirect(op, m_targets.find(m_pos) != m_targets.end()); + branch_indirect(op, is_jt, !is_jt); m_pos = -1; } @@ -2781,7 +2852,8 @@ void spu_recompiler::BISL(spu_opcode_t op) const XmmLink& vr = XmmAlloc(); c->movdqa(vr, XmmConst(_mm_set_epi32(spu_branch_target(m_pos + 4), 0, 0, 0))); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); - branch_indirect(op, m_targets.find(m_pos) != m_targets.end()); + branch_set_link(m_pos + 4); + branch_indirect(op, true, false); m_pos = -1; } @@ -4282,6 +4354,7 @@ void spu_recompiler::BRASL(spu_opcode_t op) if (target != m_pos + 4) { + branch_set_link(m_pos + 4); branch_fixed(target); m_pos = -1; } @@ -4319,6 +4392,7 @@ void spu_recompiler::BRSL(spu_opcode_t op) if (target != m_pos + 4) { + branch_set_link(m_pos + 4); branch_fixed(target); m_pos = -1; } diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h index a2c77a5e75..6388cb157c 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h @@ -103,7 +103,8 @@ private: asmjit::X86Mem XmmConst(__m128i data); void branch_fixed(u32 target); - void branch_indirect(spu_opcode_t op, bool local = false); + void branch_indirect(spu_opcode_t op, bool jt = false, bool ret = true); + void branch_set_link(u32 target); void fall(spu_opcode_t op); void save_rcx(); void load_rcx(); diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 47968c3a7f..8768a1fd65 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -90,6 +90,12 @@ void spu_cache::initialize() return; } + if (g_cfg.core.spu_decoder == spu_decoder_type::llvm) + { + // Force Safe mode + g_cfg.core.spu_block_size.from_default(); + } + // SPU cache file (version + block size type) const std::string loc = _main->cache + u8"spu-ยง" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v3.dat"; @@ -384,7 +390,7 @@ std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) continue; } - if (g_cfg.core.spu_block_size != spu_block_size_type::giga) + if (g_cfg.core.spu_block_size == spu_block_size_type::safe) { // Stop on special instructions (TODO) m_targets[pos].push_back(-1); @@ -437,8 +443,9 @@ std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) add_block(target); } - if (type == spu_itype::BISL && target >= lsa && g_cfg.core.spu_block_size == spu_block_size_type::giga) + if (type == spu_itype::BISL && g_cfg.core.spu_block_size != spu_block_size_type::safe) { + m_targets[pos].push_back(pos + 4); add_block(pos + 4); } } @@ -548,7 +555,7 @@ std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) if (type == spu_itype::BI || type == spu_itype::BISL) { - if (type == spu_itype::BI || g_cfg.core.spu_block_size != spu_block_size_type::giga) + if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe) { if (m_targets[pos].empty()) { @@ -557,6 +564,7 @@ std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) } else { + m_targets[pos].push_back(pos + 4); add_block(pos + 4); } } @@ -587,8 +595,9 @@ std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) m_targets[pos].push_back(target); - if (target >= lsa && g_cfg.core.spu_block_size == spu_block_size_type::giga) + if (g_cfg.core.spu_block_size != spu_block_size_type::safe) { + m_targets[pos].push_back(pos + 4); add_block(pos + 4); } @@ -803,11 +812,11 @@ std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) } } - while (g_cfg.core.spu_block_size == spu_block_size_type::safe) + while (g_cfg.core.spu_block_size != spu_block_size_type::giga) { const u32 initial_size = result.size(); - // Check unreachable blocks in safe mode (TODO) + // Check unreachable blocks in safe and mega modes (TODO) u32 limit = lsa + result.size() * 4 - 4; for (auto& pair : m_preds) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 069af53ca3..514ca91f4d 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -637,8 +637,17 @@ SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group) jit = spu_recompiler_base::make_llvm_recompiler(); } - // Initialize lookup table - jit_dispatcher.fill(&spu_recompiler_base::dispatch); + if (g_cfg.core.spu_decoder != spu_decoder_type::fast && g_cfg.core.spu_decoder != spu_decoder_type::precise) + { + // Initialize lookup table + jit_dispatcher.fill(&spu_recompiler_base::dispatch); + + if (g_cfg.core.spu_block_size != spu_block_size_type::safe) + { + // Initialize stack mirror + std::memset(stack_mirror.data(), 0xff, sizeof(stack_mirror)); + } + } } void SPUThread::push_snr(u32 number, u32 value) diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 061adee884..baee513c58 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -588,6 +588,8 @@ public: std::array jit_dispatcher; // Dispatch table for indirect calls + std::array stack_mirror; // Return address information + void push_snr(u32 number, u32 value); void do_dma_transfer(const spu_mfc_cmd& args); bool do_dma_check(const spu_mfc_cmd& args);