mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
SPU ASMJIT: Implement Mega block mode (experimental)
Disable extra modes for SPU LLVM for now. In Mega mode, SPU Analyser tries to determine complete functions. Recompiler tries to speed up returns via 'stack mirror'.
This commit is contained in:
parent
11bdb4102a
commit
12eee6a19e
5 changed files with 109 additions and 14 deletions
|
@ -730,6 +730,11 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
|||
|
||||
if (found != instr_labels.end())
|
||||
{
|
||||
if (m_preds.count(pos))
|
||||
{
|
||||
c->align(kAlignCode, 16);
|
||||
}
|
||||
|
||||
c->bind(found->second);
|
||||
}
|
||||
|
||||
|
@ -1118,11 +1123,22 @@ static void check_state_ret(SPUThread& _spu, void*, u8*)
|
|||
|
||||
static void check_state(SPUThread* _spu, spu_function_t _ret)
|
||||
{
|
||||
if (_spu->check_state())
|
||||
if (test(_spu->state) && _spu->check_state())
|
||||
{
|
||||
_ret = &check_state_ret;
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||
{
|
||||
// Get stack pointer, try to use native return address (check SPU return address)
|
||||
const auto x = _spu->stack_mirror[(_spu->gpr[1]._u32[3] & 0x3fff0) >> 4];
|
||||
|
||||
if (x._u32[2] == _spu->pc)
|
||||
{
|
||||
_ret = reinterpret_cast<spu_function_t>(x._u64[0]);
|
||||
}
|
||||
}
|
||||
|
||||
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
|
||||
}
|
||||
|
||||
|
@ -1172,11 +1188,11 @@ void spu_recompiler::branch_fixed(u32 target)
|
|||
c->jmp(x86::rax);
|
||||
}
|
||||
|
||||
void spu_recompiler::branch_indirect(spu_opcode_t op, bool local)
|
||||
void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::safe && !local)
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::giga && !jt)
|
||||
{
|
||||
// Simply external call (return or indirect call)
|
||||
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
|
||||
|
@ -1238,12 +1254,59 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool local)
|
|||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->cmp(SPU_OFF_32(state), 0);
|
||||
c->jnz(label_check);
|
||||
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::safe && ret)
|
||||
{
|
||||
// Get stack pointer, try to use native return address (check SPU return address)
|
||||
c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3));
|
||||
c->and_(qw1->r32(), 0x3fff0);
|
||||
c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror)));
|
||||
c->cmp(x86::dword_ptr(*qw1, 8), *addr);
|
||||
c->cmove(x86::r10, x86::qword_ptr(*qw1));
|
||||
}
|
||||
|
||||
c->jmp(x86::r10);
|
||||
c->bind(label_check);
|
||||
c->mov(*ls, x86::r10);
|
||||
c->jmp(imm_ptr(&check_state));
|
||||
}
|
||||
|
||||
void spu_recompiler::branch_set_link(u32 target)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||
{
|
||||
// Find instruction at target
|
||||
const auto local = instr_labels.find(target);
|
||||
|
||||
if (local != instr_labels.end() && local->second.isValid())
|
||||
{
|
||||
Label ret = c->newLabel();
|
||||
|
||||
// Get stack pointer, write native and SPU return addresses into the stack mirror
|
||||
c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3));
|
||||
c->and_(qw1->r32(), 0x3fff0);
|
||||
c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror)));
|
||||
c->lea(x86::r10, x86::qword_ptr(ret));
|
||||
c->mov(x86::qword_ptr(*qw1, 0), x86::r10);
|
||||
c->mov(x86::qword_ptr(*qw1, 8), target);
|
||||
|
||||
after.emplace_back([=, target = local->second]
|
||||
{
|
||||
// Clear return info after use
|
||||
c->align(kAlignCode, 16);
|
||||
c->bind(ret);
|
||||
c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3));
|
||||
c->and_(qw1->r32(), 0x3fff0);
|
||||
c->pcmpeqd(x86::xmm0, x86::xmm0);
|
||||
c->movdqa(x86::dqword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror)), x86::xmm0);
|
||||
c->jmp(target);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void spu_recompiler::fall(spu_opcode_t op)
|
||||
{
|
||||
auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func, spu_function_t _ret)
|
||||
|
@ -2768,9 +2831,17 @@ void spu_recompiler::STQX(spu_opcode_t op)
|
|||
|
||||
void spu_recompiler::BI(spu_opcode_t op)
|
||||
{
|
||||
const auto found = m_targets.find(m_pos);
|
||||
const auto is_jt = found == m_targets.end() || found->second.size() != 1 || found->second.front() != -1;
|
||||
|
||||
if (found == m_targets.end() || found->second.empty())
|
||||
{
|
||||
LOG_ERROR(SPU, "[0x%x] BI: no targets", m_pos);
|
||||
}
|
||||
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
branch_indirect(op, m_targets.find(m_pos) != m_targets.end());
|
||||
branch_indirect(op, is_jt, !is_jt);
|
||||
m_pos = -1;
|
||||
}
|
||||
|
||||
|
@ -2781,7 +2852,8 @@ void spu_recompiler::BISL(spu_opcode_t op)
|
|||
const XmmLink& vr = XmmAlloc();
|
||||
c->movdqa(vr, XmmConst(_mm_set_epi32(spu_branch_target(m_pos + 4), 0, 0, 0)));
|
||||
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
|
||||
branch_indirect(op, m_targets.find(m_pos) != m_targets.end());
|
||||
branch_set_link(m_pos + 4);
|
||||
branch_indirect(op, true, false);
|
||||
m_pos = -1;
|
||||
}
|
||||
|
||||
|
@ -4282,6 +4354,7 @@ void spu_recompiler::BRASL(spu_opcode_t op)
|
|||
|
||||
if (target != m_pos + 4)
|
||||
{
|
||||
branch_set_link(m_pos + 4);
|
||||
branch_fixed(target);
|
||||
m_pos = -1;
|
||||
}
|
||||
|
@ -4319,6 +4392,7 @@ void spu_recompiler::BRSL(spu_opcode_t op)
|
|||
|
||||
if (target != m_pos + 4)
|
||||
{
|
||||
branch_set_link(m_pos + 4);
|
||||
branch_fixed(target);
|
||||
m_pos = -1;
|
||||
}
|
||||
|
|
|
@ -103,7 +103,8 @@ private:
|
|||
asmjit::X86Mem XmmConst(__m128i data);
|
||||
|
||||
void branch_fixed(u32 target);
|
||||
void branch_indirect(spu_opcode_t op, bool local = false);
|
||||
void branch_indirect(spu_opcode_t op, bool jt = false, bool ret = true);
|
||||
void branch_set_link(u32 target);
|
||||
void fall(spu_opcode_t op);
|
||||
void save_rcx();
|
||||
void load_rcx();
|
||||
|
|
|
@ -90,6 +90,12 @@ void spu_cache::initialize()
|
|||
return;
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::llvm)
|
||||
{
|
||||
// Force Safe mode
|
||||
g_cfg.core.spu_block_size.from_default();
|
||||
}
|
||||
|
||||
// SPU cache file (version + block size type)
|
||||
const std::string loc = _main->cache + u8"spu-§" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v3.dat";
|
||||
|
||||
|
@ -384,7 +390,7 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
|||
continue;
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::giga)
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::safe)
|
||||
{
|
||||
// Stop on special instructions (TODO)
|
||||
m_targets[pos].push_back(-1);
|
||||
|
@ -437,8 +443,9 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
|||
add_block(target);
|
||||
}
|
||||
|
||||
if (type == spu_itype::BISL && target >= lsa && g_cfg.core.spu_block_size == spu_block_size_type::giga)
|
||||
if (type == spu_itype::BISL && g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||
{
|
||||
m_targets[pos].push_back(pos + 4);
|
||||
add_block(pos + 4);
|
||||
}
|
||||
}
|
||||
|
@ -548,7 +555,7 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
|||
|
||||
if (type == spu_itype::BI || type == spu_itype::BISL)
|
||||
{
|
||||
if (type == spu_itype::BI || g_cfg.core.spu_block_size != spu_block_size_type::giga)
|
||||
if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe)
|
||||
{
|
||||
if (m_targets[pos].empty())
|
||||
{
|
||||
|
@ -557,6 +564,7 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
|||
}
|
||||
else
|
||||
{
|
||||
m_targets[pos].push_back(pos + 4);
|
||||
add_block(pos + 4);
|
||||
}
|
||||
}
|
||||
|
@ -587,8 +595,9 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
|||
|
||||
m_targets[pos].push_back(target);
|
||||
|
||||
if (target >= lsa && g_cfg.core.spu_block_size == spu_block_size_type::giga)
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||
{
|
||||
m_targets[pos].push_back(pos + 4);
|
||||
add_block(pos + 4);
|
||||
}
|
||||
|
||||
|
@ -803,11 +812,11 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
|||
}
|
||||
}
|
||||
|
||||
while (g_cfg.core.spu_block_size == spu_block_size_type::safe)
|
||||
while (g_cfg.core.spu_block_size != spu_block_size_type::giga)
|
||||
{
|
||||
const u32 initial_size = result.size();
|
||||
|
||||
// Check unreachable blocks in safe mode (TODO)
|
||||
// Check unreachable blocks in safe and mega modes (TODO)
|
||||
u32 limit = lsa + result.size() * 4 - 4;
|
||||
|
||||
for (auto& pair : m_preds)
|
||||
|
|
|
@ -637,8 +637,17 @@ SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group)
|
|||
jit = spu_recompiler_base::make_llvm_recompiler();
|
||||
}
|
||||
|
||||
// Initialize lookup table
|
||||
jit_dispatcher.fill(&spu_recompiler_base::dispatch);
|
||||
if (g_cfg.core.spu_decoder != spu_decoder_type::fast && g_cfg.core.spu_decoder != spu_decoder_type::precise)
|
||||
{
|
||||
// Initialize lookup table
|
||||
jit_dispatcher.fill(&spu_recompiler_base::dispatch);
|
||||
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||
{
|
||||
// Initialize stack mirror
|
||||
std::memset(stack_mirror.data(), 0xff, sizeof(stack_mirror));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SPUThread::push_snr(u32 number, u32 value)
|
||||
|
|
|
@ -588,6 +588,8 @@ public:
|
|||
|
||||
std::array<spu_function_t, 0x10000> jit_dispatcher; // Dispatch table for indirect calls
|
||||
|
||||
std::array<v128, 0x4000> stack_mirror; // Return address information
|
||||
|
||||
void push_snr(u32 number, u32 value);
|
||||
void do_dma_transfer(const spu_mfc_cmd& args);
|
||||
bool do_dma_check(const spu_mfc_cmd& args);
|
||||
|
|
Loading…
Add table
Reference in a new issue