mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 19:45:20 +00:00
SPU ASMJIT v2.0
Use X86Assembler and blocks
This commit is contained in:
parent
477522210e
commit
8ca33bcb94
12 changed files with 1363 additions and 1187 deletions
|
@ -13,6 +13,12 @@ bool utils::has_ssse3()
|
|||
return g_value;
|
||||
}
|
||||
|
||||
bool utils::has_sse41()
|
||||
{
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x1 && get_cpuid(1, 0)[2] & 0x80000;
|
||||
return g_value;
|
||||
}
|
||||
|
||||
bool utils::has_avx()
|
||||
{
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x1 && get_cpuid(1, 0)[2] & 0x10000000 && (get_cpuid(1, 0)[2] & 0x0C000000) == 0x0C000000 && (get_xgetbv(0) & 0x6) == 0x6;
|
||||
|
|
|
@ -29,6 +29,8 @@ namespace utils
|
|||
|
||||
bool has_ssse3();
|
||||
|
||||
bool has_sse41();
|
||||
|
||||
bool has_avx();
|
||||
|
||||
bool has_avx2();
|
||||
|
|
|
@ -42,10 +42,10 @@ protected:
|
|||
public:
|
||||
std::string last_opcode;
|
||||
u32 dump_pc;
|
||||
u8* offset;
|
||||
const u8* offset;
|
||||
|
||||
protected:
|
||||
CPUDisAsm(CPUDisAsmMode mode)
|
||||
CPUDisAsm(CPUDisAsmMode mode)
|
||||
: m_mode(mode)
|
||||
, offset(0)
|
||||
{
|
||||
|
|
|
@ -37,7 +37,7 @@ void RawSPUThread::on_init(const std::shared_ptr<void>& _this)
|
|||
}
|
||||
|
||||
RawSPUThread::RawSPUThread(const std::string& name)
|
||||
: SPUThread(name)
|
||||
: SPUThread(name, 0, nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,56 +1,73 @@
|
|||
#pragma once
|
||||
|
||||
#include "Utilities/mutex.h"
|
||||
#include "SPURecompiler.h"
|
||||
|
||||
namespace asmjit
|
||||
#include <functional>
|
||||
|
||||
#define ASMJIT_STATIC
|
||||
#define ASMJIT_DEBUG
|
||||
|
||||
#include "asmjit.h"
|
||||
|
||||
// SPU ASMJIT Runtime object (global)
|
||||
class spu_runtime
|
||||
{
|
||||
struct JitRuntime;
|
||||
struct CodeHolder;
|
||||
struct X86Compiler;
|
||||
struct X86Gp;
|
||||
struct X86Xmm;
|
||||
struct X86Mem;
|
||||
struct Label;
|
||||
}
|
||||
shared_mutex m_mutex;
|
||||
|
||||
asmjit::JitRuntime m_jitrt;
|
||||
|
||||
// All functions
|
||||
std::map<std::vector<u32>, spu_function_t> m_map;
|
||||
|
||||
// TODO
|
||||
std::array<atomic_t<spu_function_t>, 0x10000> m_dispatcher;
|
||||
|
||||
friend class spu_recompiler;
|
||||
|
||||
public:
|
||||
spu_runtime();
|
||||
};
|
||||
|
||||
// SPU ASMJIT Recompiler
|
||||
class spu_recompiler : public spu_recompiler_base
|
||||
{
|
||||
const std::shared_ptr<asmjit::JitRuntime> m_jit;
|
||||
const std::shared_ptr<asmjit::JitRuntime> m_rt;
|
||||
|
||||
std::shared_ptr<spu_runtime> m_spurt;
|
||||
|
||||
public:
|
||||
spu_recompiler();
|
||||
spu_recompiler(class SPUThread& spu);
|
||||
|
||||
virtual void compile(spu_function_t& f) override;
|
||||
virtual spu_function_t compile(const std::vector<u32>& func) override;
|
||||
|
||||
private:
|
||||
// emitter:
|
||||
asmjit::X86Compiler* c;
|
||||
asmjit::CodeHolder* codeHolder;
|
||||
asmjit::X86Assembler* c;
|
||||
|
||||
// input:
|
||||
asmjit::X86Gp* cpu;
|
||||
asmjit::X86Gp* ls;
|
||||
// arguments:
|
||||
const asmjit::X86Gp* cpu;
|
||||
const asmjit::X86Gp* ls;
|
||||
const asmjit::X86Gp* qw0;
|
||||
const asmjit::X86Gp* qw1;
|
||||
|
||||
// temporary:
|
||||
asmjit::X86Gp* addr;
|
||||
asmjit::X86Gp* qw0;
|
||||
asmjit::X86Gp* qw1;
|
||||
asmjit::X86Gp* qw2;
|
||||
asmjit::X86Gp* qw3;
|
||||
std::array<asmjit::X86Xmm*, 6> vec;
|
||||
const asmjit::X86Gp* addr;
|
||||
std::array<const asmjit::X86Xmm*, 6> vec;
|
||||
|
||||
// labels:
|
||||
asmjit::Label* labels; // array[0x10000]
|
||||
asmjit::Label* jt; // jump table resolver (uses *addr)
|
||||
asmjit::Label* end; // function end (return *addr)
|
||||
// workload for the end of function:
|
||||
std::vector<std::function<void()>> after;
|
||||
std::vector<std::function<void()>> consts;
|
||||
|
||||
// All emitted 128-bit consts
|
||||
std::map<std::pair<u64, u64>, asmjit::Label> xmm_consts;
|
||||
|
||||
class XmmLink
|
||||
{
|
||||
asmjit::X86Xmm* m_var;
|
||||
const asmjit::X86Xmm* m_var;
|
||||
|
||||
public:
|
||||
XmmLink(asmjit::X86Xmm*& xmm_var)
|
||||
XmmLink(const asmjit::X86Xmm*& xmm_var)
|
||||
: m_var(xmm_var)
|
||||
{
|
||||
xmm_var = nullptr;
|
||||
|
@ -58,7 +75,7 @@ private:
|
|||
|
||||
XmmLink(XmmLink&&) = default; // MoveConstructible + delete copy constructor and copy/move operators
|
||||
|
||||
operator asmjit::X86Xmm&() const
|
||||
operator const asmjit::X86Xmm&() const
|
||||
{
|
||||
return *m_var;
|
||||
}
|
||||
|
@ -78,10 +95,15 @@ private:
|
|||
asmjit::X86Mem XmmConst(__m128 data);
|
||||
asmjit::X86Mem XmmConst(__m128i data);
|
||||
|
||||
void branch_fixed(u32 target);
|
||||
void branch_indirect(spu_opcode_t op);
|
||||
asmjit::Label halt(u32 pos);
|
||||
void fall(spu_opcode_t op);
|
||||
void save_rcx();
|
||||
void load_rcx();
|
||||
|
||||
public:
|
||||
void CheckInterruptStatus(spu_opcode_t op);
|
||||
void InterpreterCall(spu_opcode_t op);
|
||||
void FunctionCall();
|
||||
void UNK(spu_opcode_t op);
|
||||
|
||||
void STOP(spu_opcode_t op);
|
||||
void LNOP(spu_opcode_t op);
|
||||
|
@ -282,6 +304,4 @@ public:
|
|||
void FNMS(spu_opcode_t op);
|
||||
void FMA(spu_opcode_t op);
|
||||
void FMS(spu_opcode_t op);
|
||||
|
||||
void UNK(spu_opcode_t op);
|
||||
};
|
||||
|
|
|
@ -1,39 +1,12 @@
|
|||
#include "stdafx.h"
|
||||
#include "Emu/Memory/vm.h"
|
||||
#include "SPUAnalyser.h"
|
||||
#include "SPURecompiler.h"
|
||||
#include "SPUOpcodes.h"
|
||||
|
||||
const spu_decoder<spu_itype> s_spu_itype;
|
||||
|
||||
spu_function_t* SPUDatabase::find(const be_t<u32>* data, u64 key, u32 max_size)
|
||||
{
|
||||
for (auto found = m_db.equal_range(key); found.first != found.second; found.first++)
|
||||
{
|
||||
const auto& func = found.first->second;
|
||||
|
||||
// Compare binary data explicitly (TODO: optimize)
|
||||
if (LIKELY(func->size <= max_size) && std::memcmp(func->data.data(), data, func->size) == 0)
|
||||
{
|
||||
return func.get();
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SPUDatabase::SPUDatabase()
|
||||
{
|
||||
// TODO: load existing database associated with currently running executable
|
||||
|
||||
LOG_SUCCESS(SPU, "SPU Database initialized...");
|
||||
}
|
||||
|
||||
SPUDatabase::~SPUDatabase()
|
||||
{
|
||||
// TODO: serialize database
|
||||
}
|
||||
|
||||
spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_limit)
|
||||
std::shared_ptr<spu_function> spu_analyse(const be_t<u32>* ls, u32 entry, u32 max_limit)
|
||||
{
|
||||
// Check arguments (bounds and alignment)
|
||||
if (max_limit > 0x40000 || entry >= max_limit || entry % 4 || max_limit % 4)
|
||||
|
@ -47,23 +20,23 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
const u32 block_sz = max_limit - entry;
|
||||
|
||||
{
|
||||
reader_lock lock(m_mutex);
|
||||
//reader_lock lock(m_mutex);
|
||||
|
||||
// Try to find existing function in the database
|
||||
if (auto func = find(base, key, block_sz))
|
||||
{
|
||||
return func;
|
||||
}
|
||||
// if (auto func = find(base, key, block_sz))
|
||||
// {
|
||||
// return func;
|
||||
// }
|
||||
}
|
||||
|
||||
{
|
||||
writer_lock lock(m_mutex);
|
||||
//writer_lock lock(m_mutex);
|
||||
|
||||
// Double-check
|
||||
if (auto func = find(base, key, block_sz))
|
||||
{
|
||||
return func;
|
||||
}
|
||||
// if (auto func = find(base, key, block_sz))
|
||||
// {
|
||||
// return func;
|
||||
// }
|
||||
}
|
||||
|
||||
// Initialize block entries with the function entry point
|
||||
|
@ -89,14 +62,14 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
const auto type = s_spu_itype.decode(op.opcode);
|
||||
|
||||
{
|
||||
reader_lock lock(m_mutex);
|
||||
//reader_lock lock(m_mutex);
|
||||
|
||||
// Find existing function
|
||||
if (pos != entry && find(ls + pos / 4, pos | u64{ op.opcode } << 32, limit - pos))
|
||||
{
|
||||
limit = pos;
|
||||
break;
|
||||
}
|
||||
// if (pos != entry && find(ls + pos / 4, pos | u64{ op.opcode } << 32, limit - pos))
|
||||
// {
|
||||
// limit = pos;
|
||||
// break;
|
||||
// }
|
||||
}
|
||||
|
||||
// Additional analysis at the beginning of the block
|
||||
|
@ -156,7 +129,7 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
|
||||
// Fix pos value
|
||||
start = pos; pos = pos - 4;
|
||||
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -179,10 +152,10 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
|
||||
// if upcoming instruction is not BI, reset the pigeonhole optimization
|
||||
// todo: can constant propogation somewhere get rid of this check?
|
||||
if ((type != BI))
|
||||
if ((type != spu_itype::BI))
|
||||
ila_r2_addr = 0; // reset
|
||||
|
||||
if (type == BI || type == IRET) // Branch Indirect
|
||||
|
||||
if (type == spu_itype::BI || type == spu_itype::IRET) // Branch Indirect
|
||||
{
|
||||
blocks.emplace(start);
|
||||
start = pos + 4;
|
||||
|
@ -190,9 +163,9 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
if (op.ra == 2 && ila_r2_addr > entry)
|
||||
blocks.emplace(ila_r2_addr);
|
||||
}
|
||||
else if (type == BR || type == BRA) // Branch Relative/Absolute
|
||||
else if (type == spu_itype::BR || type == spu_itype::BRA) // Branch Relative/Absolute
|
||||
{
|
||||
const u32 target = spu_branch_target(type == BR ? pos : 0, op.i16);
|
||||
const u32 target = spu_branch_target(type == spu_itype::BR ? pos : 0, op.i16);
|
||||
|
||||
// Add adjacent function because it always could be
|
||||
adjacent.emplace(target);
|
||||
|
@ -205,9 +178,9 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
blocks.emplace(start);
|
||||
start = pos + 4;
|
||||
}
|
||||
else if (type == BRSL || type == BRASL) // Branch Relative/Absolute and Set Link
|
||||
else if (type == spu_itype::BRSL || type == spu_itype::BRASL) // Branch Relative/Absolute and Set Link
|
||||
{
|
||||
const u32 target = spu_branch_target(type == BRSL ? pos : 0, op.i16);
|
||||
const u32 target = spu_branch_target(type == spu_itype::BRSL ? pos : 0, op.i16);
|
||||
|
||||
if (target == pos + 4)
|
||||
{
|
||||
|
@ -228,11 +201,11 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
if (op.rt != 0) LOG_ERROR(SPU, "[0x%05x] Function call without $LR", pos);
|
||||
}
|
||||
}
|
||||
else if (type == BISL || type == BISLED) // Branch Indirect and Set Link
|
||||
else if (type == spu_itype::BISL || type == spu_itype::BISLED) // Branch Indirect and Set Link
|
||||
{
|
||||
if (op.rt != 0) LOG_ERROR(SPU, "[0x%05x] Indirect function call without $LR", pos);
|
||||
}
|
||||
else if (type == BRNZ || type == BRZ || type == BRHNZ || type == BRHZ) // Branch Relative if (Not) Zero (Half)word
|
||||
else if (type == spu_itype::BRNZ || type == spu_itype::BRZ || type == spu_itype::BRHNZ || type == spu_itype::BRHZ) // Branch Relative if (Not) Zero (Half)word
|
||||
{
|
||||
const u32 target = spu_branch_target(pos, op.i16);
|
||||
|
||||
|
@ -244,7 +217,7 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
blocks.emplace(target);
|
||||
}
|
||||
}
|
||||
else if (type == LNOP || type == NOP) {
|
||||
else if (type == spu_itype::LNOP || type == spu_itype::NOP) {
|
||||
// theres a chance that theres some random lnops/nops after the end of a function
|
||||
// havent found a definite pattern, but, is an easy optimization to check for, just push start down if lnop is tagged as a start
|
||||
// todo: remove the last added start pos as its probly unnecessary
|
||||
|
@ -262,7 +235,7 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
// Analyse stack pointer access
|
||||
else if (rt == 1)
|
||||
{
|
||||
if (type == ILA && pos < ila_sp_pos)
|
||||
if (type == spu_itype::ILA && pos < ila_sp_pos)
|
||||
{
|
||||
// set minimal ila $SP,* instruction position
|
||||
ila_sp_pos = pos;
|
||||
|
@ -272,7 +245,7 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
// ila r2, addr
|
||||
// bi r2
|
||||
else if (rt == 2) {
|
||||
if (type == ILA)
|
||||
if (type == spu_itype::ILA)
|
||||
ila_r2_addr = spu_branch_target(op.i18);
|
||||
}
|
||||
}
|
||||
|
@ -285,9 +258,9 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
|
||||
const auto type = s_spu_itype.decode(op.opcode);
|
||||
|
||||
if (type == BRSL || type == BRASL) // Branch Relative/Absolute and Set Link
|
||||
if (type == spu_itype::BRSL || type == spu_itype::BRASL) // Branch Relative/Absolute and Set Link
|
||||
{
|
||||
const u32 target = spu_branch_target(type == BRSL ? pos : 0, op.i16);
|
||||
const u32 target = spu_branch_target(type == spu_itype::BRSL ? pos : 0, op.i16);
|
||||
|
||||
if (target != pos + 4 && target > entry && limit > target)
|
||||
{
|
||||
|
@ -308,7 +281,7 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
}
|
||||
|
||||
// Prepare new function (set addr and size)
|
||||
auto func = std::make_shared<spu_function_t>(entry, limit - entry);
|
||||
auto func = std::make_shared<spu_function>(entry, limit - entry);
|
||||
|
||||
// Copy function contents
|
||||
func->data = { ls + entry / 4, ls + limit / 4 };
|
||||
|
@ -346,13 +319,13 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
|||
// Lock here just before we write to the db
|
||||
// Its is unlikely that the second check will pass anyway so we delay this step since compiling functions is very fast
|
||||
{
|
||||
writer_lock lock(m_mutex);
|
||||
//writer_lock lock(m_mutex);
|
||||
|
||||
// Add function to the database
|
||||
m_db.emplace(key, func);
|
||||
//m_db.emplace(key, func);
|
||||
}
|
||||
|
||||
LOG_NOTICE(SPU, "Function detected [0x%05x-0x%05x] (size=0x%x)", func->addr, func->addr + func->size, func->size);
|
||||
|
||||
return func.get();
|
||||
return func;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
#include "Utilities/mutex.h"
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
// SPU Instruction Type
|
||||
|
@ -247,7 +246,7 @@ struct spu_itype
|
|||
class SPUThread;
|
||||
|
||||
// SPU basic function information structure
|
||||
struct spu_function_t
|
||||
struct spu_function
|
||||
{
|
||||
// Entry point (LS address)
|
||||
const u32 addr;
|
||||
|
@ -273,28 +272,9 @@ struct spu_function_t
|
|||
// Pointer to the compiled function
|
||||
u32(*compiled)(SPUThread* _spu, be_t<u32>* _ls) = nullptr;
|
||||
|
||||
spu_function_t(u32 addr, u32 size)
|
||||
spu_function(u32 addr, u32 size)
|
||||
: addr(addr)
|
||||
, size(size)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
// SPU Function Database (must be global or PS3 process-local)
|
||||
class SPUDatabase final : spu_itype
|
||||
{
|
||||
shared_mutex m_mutex;
|
||||
|
||||
// All registered functions (uses addr and first instruction as a key)
|
||||
std::unordered_multimap<u64, std::shared_ptr<spu_function_t>> m_db;
|
||||
|
||||
// For internal use
|
||||
spu_function_t* find(const be_t<u32>* data, u64 key, u32 max_size);
|
||||
|
||||
public:
|
||||
SPUDatabase();
|
||||
~SPUDatabase();
|
||||
|
||||
// Try to retrieve SPU function information
|
||||
spu_function_t* analyse(const be_t<u32>* ls, u32 entry, u32 limit = 0x40000);
|
||||
};
|
||||
|
|
|
@ -3,92 +3,183 @@
|
|||
#include "Emu/Memory/Memory.h"
|
||||
|
||||
#include "SPUThread.h"
|
||||
#include "SPUAnalyser.h"
|
||||
#include "SPURecompiler.h"
|
||||
#include "SPUASMJITRecompiler.h"
|
||||
#include <algorithm>
|
||||
|
||||
extern u64 get_system_time();
|
||||
|
||||
const spu_decoder<spu_itype> s_spu_itype;
|
||||
|
||||
spu_recompiler_base::spu_recompiler_base(SPUThread& spu)
|
||||
: m_spu(spu)
|
||||
{
|
||||
// Initialize lookup table
|
||||
spu.jit_dispatcher.fill(&dispatch);
|
||||
|
||||
// Initialize "empty" block
|
||||
spu.jit_map[std::vector<u32>()] = &dispatch;
|
||||
}
|
||||
|
||||
spu_recompiler_base::~spu_recompiler_base()
|
||||
{
|
||||
}
|
||||
|
||||
void spu_recompiler_base::enter(SPUThread& spu)
|
||||
void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip)
|
||||
{
|
||||
if (spu.pc >= 0x40000 || spu.pc % 4)
|
||||
const auto result = spu.jit_map.emplace(block(spu, spu.pc), nullptr);
|
||||
|
||||
if (result.second || !result.first->second)
|
||||
{
|
||||
fmt::throw_exception("Invalid PC: 0x%05x", spu.pc);
|
||||
result.first->second = spu.jit->compile(result.first->first);
|
||||
}
|
||||
|
||||
// Get SPU LS pointer
|
||||
const auto _ls = vm::_ptr<u32>(spu.offset);
|
||||
spu.jit_dispatcher[spu.pc / 4] = result.first->second;
|
||||
}
|
||||
|
||||
// Search if cached data matches
|
||||
auto func = spu.compiled_cache[spu.pc / 4];
|
||||
void spu_recompiler_base::branch(SPUThread& spu, std::pair<const std::vector<u32>, spu_function_t>* pair, u8* rip)
|
||||
{
|
||||
spu.pc = pair->first[0];
|
||||
|
||||
// Check shared db if we dont have a match
|
||||
if (!func || !std::equal(func->data.begin(), func->data.end(), _ls + spu.pc / 4, [](const be_t<u32>& l, const be_t<u32>& r) { return *(u32*)(u8*)&l == *(u32*)(u8*)&r; }))
|
||||
if (!pair->second)
|
||||
{
|
||||
func = spu.spu_db->analyse(_ls, spu.pc);
|
||||
spu.compiled_cache[spu.pc / 4] = func;
|
||||
pair->second = spu.jit->compile(pair->first);
|
||||
}
|
||||
|
||||
// Reset callstack if necessary
|
||||
if ((func->does_reset_stack && spu.recursion_level) || spu.recursion_level >= 128)
|
||||
{
|
||||
spu.state += cpu_flag::ret;
|
||||
return;
|
||||
}
|
||||
spu.jit_dispatcher[spu.pc / 4] = pair->second;
|
||||
|
||||
// Compile if needed
|
||||
if (!func->compiled)
|
||||
// Overwrite jump to this function with jump to the compiled function
|
||||
const s64 rel = reinterpret_cast<u64>(pair->second) - reinterpret_cast<u64>(rip) - 5;
|
||||
|
||||
if (rel >= INT32_MIN && rel <= INT32_MAX)
|
||||
{
|
||||
if (!spu.spu_rec)
|
||||
const s64 rel8 = (rel + 5) - 2;
|
||||
|
||||
alignas(8) u8 bytes[8];
|
||||
|
||||
if (rel8 >= INT8_MIN && rel8 <= INT8_MAX)
|
||||
{
|
||||
spu.spu_rec = fxm::get_always<spu_recompiler>();
|
||||
bytes[0] = 0xeb; // jmp rel8
|
||||
bytes[1] = static_cast<s8>(rel8);
|
||||
std::memset(bytes + 2, 0x90, 5);
|
||||
bytes[7] = 0x48;
|
||||
}
|
||||
else
|
||||
{
|
||||
bytes[0] = 0xe9; // jmp rel32
|
||||
std::memcpy(bytes + 1, &rel, 4);
|
||||
std::memset(bytes + 5, 0x90, 2);
|
||||
bytes[7] = 0x48;
|
||||
}
|
||||
|
||||
spu.spu_rec->compile(*func);
|
||||
|
||||
if (!func->compiled) fmt::throw_exception("Compilation failed" HERE);
|
||||
#ifdef _MSC_VER
|
||||
*(volatile u64*)(rip) = *reinterpret_cast<u64*>(+bytes);
|
||||
#else
|
||||
__atomic_store_n(reinterpret_cast<u64*>(rip), *reinterpret_cast<u64*>(+bytes), __ATOMIC_RELAXED);
|
||||
#endif
|
||||
}
|
||||
|
||||
const u32 res = func->compiled(&spu, _ls);
|
||||
|
||||
if (const auto exception = spu.pending_exception)
|
||||
else
|
||||
{
|
||||
spu.pending_exception = nullptr;
|
||||
std::rethrow_exception(exception);
|
||||
}
|
||||
alignas(16) u8 bytes[16];
|
||||
|
||||
if (res & 0x1000000)
|
||||
{
|
||||
spu.halt();
|
||||
}
|
||||
bytes[0] = 0xff; // jmp [rip+2]
|
||||
bytes[1] = 0x25;
|
||||
bytes[2] = 0x02;
|
||||
bytes[3] = 0x00;
|
||||
bytes[4] = 0x00;
|
||||
bytes[5] = 0x00;
|
||||
bytes[6] = 0x48; // mov rax, imm64 (not executed)
|
||||
bytes[7] = 0xb8;
|
||||
std::memcpy(bytes + 8, &pair->second, 8);
|
||||
|
||||
if (res & 0x2000000)
|
||||
{
|
||||
}
|
||||
|
||||
if (res & 0x4000000)
|
||||
{
|
||||
if (res & 0x8000000)
|
||||
{
|
||||
fmt::throw_exception("Invalid interrupt status set (0x%x)" HERE, res);
|
||||
}
|
||||
|
||||
spu.set_interrupt_status(true);
|
||||
}
|
||||
else if (res & 0x8000000)
|
||||
{
|
||||
spu.set_interrupt_status(false);
|
||||
}
|
||||
|
||||
spu.pc = res & 0x3fffc;
|
||||
|
||||
if (spu.interrupts_enabled && (spu.ch_event_mask & spu.ch_event_stat & SPU_EVENT_INTR_IMPLEMENTED) > 0)
|
||||
{
|
||||
spu.interrupts_enabled = false;
|
||||
spu.srr0 = std::exchange(spu.pc, 0);
|
||||
reinterpret_cast<atomic_t<u128>*>(rip)->store(*reinterpret_cast<u128*>(+bytes));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<u32> spu_recompiler_base::block(SPUThread& spu, u32 lsa)
|
||||
{
|
||||
u32 addr = lsa;
|
||||
|
||||
std::vector<u32> result;
|
||||
|
||||
while (addr < 0x40000)
|
||||
{
|
||||
const u32 data = spu._ref<u32>(addr);
|
||||
|
||||
if (data == 0 && addr == lsa)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
addr += 4;
|
||||
|
||||
if (result.empty())
|
||||
{
|
||||
result.emplace_back(lsa);
|
||||
}
|
||||
|
||||
result.emplace_back(se_storage<u32>::swap(data));
|
||||
|
||||
const auto type = s_spu_itype.decode(data);
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case spu_itype::UNK:
|
||||
case spu_itype::STOP:
|
||||
case spu_itype::STOPD:
|
||||
case spu_itype::SYNC:
|
||||
case spu_itype::DSYNC:
|
||||
case spu_itype::DFCEQ:
|
||||
case spu_itype::DFCMEQ:
|
||||
case spu_itype::DFCGT:
|
||||
//case spu_itype::DFCMGT:
|
||||
case spu_itype::DFTSV:
|
||||
case spu_itype::BI:
|
||||
case spu_itype::IRET:
|
||||
case spu_itype::BISL:
|
||||
{
|
||||
break;
|
||||
}
|
||||
case spu_itype::BRA:
|
||||
case spu_itype::BRASL:
|
||||
{
|
||||
if (spu_branch_target(0, spu_opcode_t{data}.i16) == addr)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case spu_itype::BR:
|
||||
case spu_itype::BRSL:
|
||||
{
|
||||
if (spu_branch_target(addr - 4, spu_opcode_t{data}.i16) == addr)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case spu_itype::BRZ:
|
||||
case spu_itype::BRNZ:
|
||||
case spu_itype::BRHZ:
|
||||
case spu_itype::BRHNZ:
|
||||
{
|
||||
if (spu_branch_target(addr - 4, spu_opcode_t{data}.i16) >= addr)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -1,25 +1,32 @@
|
|||
#pragma once
|
||||
|
||||
#include "SPUAnalyser.h"
|
||||
#include "SPUThread.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
// SPU Recompiler instance base (must be global or PS3 process-local)
|
||||
// SPU Recompiler instance base class
|
||||
class spu_recompiler_base
|
||||
{
|
||||
protected:
|
||||
std::mutex m_mutex; // must be locked in compile()
|
||||
SPUThread& m_spu;
|
||||
|
||||
const spu_function_t* m_func; // current function
|
||||
|
||||
u32 m_pos; // current position
|
||||
u32 m_pos;
|
||||
|
||||
public:
|
||||
spu_recompiler_base(SPUThread& spu);
|
||||
|
||||
virtual ~spu_recompiler_base();
|
||||
|
||||
// Compile specified function
|
||||
virtual void compile(spu_function_t& f) = 0;
|
||||
// Compile function
|
||||
virtual spu_function_t compile(const std::vector<u32>& func) = 0;
|
||||
|
||||
// Run
|
||||
static void enter(class SPUThread&);
|
||||
// Default dispatch function fallback (second pointer is unused)
|
||||
static void dispatch(SPUThread&, void*, u8*);
|
||||
|
||||
// Direct branch fallback for non-compiled destination
|
||||
static void branch(SPUThread&, std::pair<const std::vector<u32>, spu_function_t>*, u8* rip);
|
||||
|
||||
// Get the block at specified address
|
||||
static std::vector<u32> block(SPUThread&, u32 lsa);
|
||||
|
||||
// Create recompiler instance (ASMJIT)
|
||||
static std::unique_ptr<spu_recompiler_base> make_asmjit_recompiler(SPUThread& spu);
|
||||
};
|
||||
|
|
|
@ -314,7 +314,7 @@ std::string SPUThread::dump() const
|
|||
|
||||
// Print some transaction statistics
|
||||
fmt::append(ret, "\nTX: %u; Fail: %u (0x%x)", tx_success, tx_failure, tx_status);
|
||||
fmt::append(ret, "\nRaddr: 0x%08x; R: 0x%x", raddr, raddr ? +vm::reservation_acquire(raddr, 128) : 0);
|
||||
fmt::append(ret, "\nBlocks: %u; Fail: %u", block_counter, block_failure);
|
||||
fmt::append(ret, "\nTag Mask: 0x%08x", ch_tag_mask);
|
||||
fmt::append(ret, "\nMFC Stall: 0x%08x", ch_stall_mask);
|
||||
fmt::append(ret, "\nMFC Queue Size: %u", mfc_size);
|
||||
|
@ -397,12 +397,6 @@ void SPUThread::cpu_task()
|
|||
{
|
||||
std::fesetround(FE_TOWARDZERO);
|
||||
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit)
|
||||
{
|
||||
if (!spu_db) spu_db = fxm::get_always<SPUDatabase>();
|
||||
return spu_recompiler_base::enter(*this);
|
||||
}
|
||||
|
||||
g_tls_log_prefix = []
|
||||
{
|
||||
const auto cpu = static_cast<SPUThread*>(get_current_cpu_thread());
|
||||
|
@ -410,6 +404,16 @@ void SPUThread::cpu_task()
|
|||
return fmt::format("%s [0x%05x]", cpu->get_name(), cpu->pc);
|
||||
};
|
||||
|
||||
if (jit)
|
||||
{
|
||||
while (LIKELY(!test(state) || !check_state()))
|
||||
{
|
||||
jit_dispatcher[pc / 4](*this, vm::_ptr<u8>(offset), nullptr);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Select opcode table
|
||||
const auto& table = *(
|
||||
g_cfg.core.spu_decoder == spu_decoder_type::precise ? &g_spu_interpreter_precise.get_table() :
|
||||
|
@ -502,15 +506,6 @@ SPUThread::~SPUThread()
|
|||
vm::dealloc_verbose_nothrow(offset);
|
||||
}
|
||||
|
||||
SPUThread::SPUThread(const std::string& name)
|
||||
: cpu_thread(idm::last_id())
|
||||
, m_name(name)
|
||||
, index(0)
|
||||
, offset(0)
|
||||
, group(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group)
|
||||
: cpu_thread(idm::last_id())
|
||||
, m_name(name)
|
||||
|
@ -518,6 +513,14 @@ SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group)
|
|||
, offset(0)
|
||||
, group(group)
|
||||
{
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit)
|
||||
{
|
||||
jit = spu_recompiler_base::make_asmjit_recompiler(*this);
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::llvm)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
void SPUThread::push_snr(u32 number, u32 value)
|
||||
|
|
|
@ -5,10 +5,17 @@
|
|||
#include "Emu/Cell/SPUInterpreter.h"
|
||||
#include "MFC.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
struct lv2_event_queue;
|
||||
struct lv2_spu_group;
|
||||
struct lv2_int_tag;
|
||||
|
||||
class SPUThread;
|
||||
|
||||
// JIT Block
|
||||
using spu_function_t = void(*)(SPUThread&, void*, u8*);
|
||||
|
||||
// SPU Channels
|
||||
enum : u32
|
||||
{
|
||||
|
@ -514,16 +521,14 @@ public:
|
|||
virtual ~SPUThread() override;
|
||||
void cpu_init();
|
||||
|
||||
protected:
|
||||
SPUThread(const std::string& name);
|
||||
|
||||
public:
|
||||
static const u32 id_base = 0x02000000; // TODO (used to determine thread type)
|
||||
static const u32 id_step = 1;
|
||||
static const u32 id_count = 2048;
|
||||
|
||||
SPUThread(const std::string& name, u32 index, lv2_spu_group* group);
|
||||
|
||||
u32 pc = 0;
|
||||
|
||||
// General-Purpose Registers
|
||||
std::array<v128, 128> gpr;
|
||||
SPU_FPSCR fpscr;
|
||||
|
@ -577,24 +582,26 @@ public:
|
|||
std::array<std::pair<u32, std::weak_ptr<lv2_event_queue>>, 32> spuq; // Event Queue Keys for SPU Thread
|
||||
std::weak_ptr<lv2_event_queue> spup[64]; // SPU Ports
|
||||
|
||||
u32 pc = 0; //
|
||||
const u32 index; // SPU index
|
||||
const u32 offset; // SPU LS offset
|
||||
lv2_spu_group* const group; // SPU Thread Group
|
||||
|
||||
const std::string m_name; // Thread name
|
||||
|
||||
std::exception_ptr pending_exception;
|
||||
|
||||
std::array<struct spu_function_t*, 65536> compiled_cache{};
|
||||
std::shared_ptr<class SPUDatabase> spu_db;
|
||||
std::shared_ptr<class spu_recompiler_base> spu_rec;
|
||||
u32 recursion_level = 0;
|
||||
|
||||
u64 tx_success = 0;
|
||||
u64 tx_failure = 0;
|
||||
uint tx_status = 0;
|
||||
|
||||
std::unique_ptr<class spu_recompiler_base> jit; // Recompiler instance
|
||||
|
||||
std::map<std::vector<u32>, spu_function_t> jit_map; // All compiled blocks (first u32 is addr)
|
||||
|
||||
u64 block_counter = 0;
|
||||
u64 block_recover = 0;
|
||||
u64 block_failure = 0;
|
||||
|
||||
std::array<spu_function_t, 0x10000> jit_dispatcher; // Dispatch table for indirect calls
|
||||
|
||||
void push_snr(u32 number, u32 value);
|
||||
void do_dma_transfer(const spu_mfc_cmd& args);
|
||||
bool do_dma_check(const spu_mfc_cmd& args);
|
||||
|
|
Loading…
Add table
Reference in a new issue