SPU ASMJIT v2.0

Use X86Assembler and blocks
This commit is contained in:
Nekotekina 2018-04-09 17:45:37 +03:00
parent 477522210e
commit 8ca33bcb94
12 changed files with 1363 additions and 1187 deletions

View file

@ -13,6 +13,12 @@ bool utils::has_ssse3()
return g_value;
}
bool utils::has_sse41()
{
static const bool g_value = get_cpuid(0, 0)[0] >= 0x1 && get_cpuid(1, 0)[2] & 0x80000;
return g_value;
}
bool utils::has_avx()
{
static const bool g_value = get_cpuid(0, 0)[0] >= 0x1 && get_cpuid(1, 0)[2] & 0x10000000 && (get_cpuid(1, 0)[2] & 0x0C000000) == 0x0C000000 && (get_xgetbv(0) & 0x6) == 0x6;

View file

@ -29,6 +29,8 @@ namespace utils
bool has_ssse3();
bool has_sse41();
bool has_avx();
bool has_avx2();

View file

@ -42,10 +42,10 @@ protected:
public:
std::string last_opcode;
u32 dump_pc;
u8* offset;
const u8* offset;
protected:
CPUDisAsm(CPUDisAsmMode mode)
CPUDisAsm(CPUDisAsmMode mode)
: m_mode(mode)
, offset(0)
{

View file

@ -37,7 +37,7 @@ void RawSPUThread::on_init(const std::shared_ptr<void>& _this)
}
RawSPUThread::RawSPUThread(const std::string& name)
: SPUThread(name)
: SPUThread(name, 0, nullptr)
{
}

File diff suppressed because it is too large Load diff

View file

@ -1,56 +1,73 @@
#pragma once
#include "Utilities/mutex.h"
#include "SPURecompiler.h"
namespace asmjit
#include <functional>
#define ASMJIT_STATIC
#define ASMJIT_DEBUG
#include "asmjit.h"
// SPU ASMJIT Runtime object (global)
class spu_runtime
{
struct JitRuntime;
struct CodeHolder;
struct X86Compiler;
struct X86Gp;
struct X86Xmm;
struct X86Mem;
struct Label;
}
shared_mutex m_mutex;
asmjit::JitRuntime m_jitrt;
// All functions
std::map<std::vector<u32>, spu_function_t> m_map;
// TODO
std::array<atomic_t<spu_function_t>, 0x10000> m_dispatcher;
friend class spu_recompiler;
public:
spu_runtime();
};
// SPU ASMJIT Recompiler
class spu_recompiler : public spu_recompiler_base
{
const std::shared_ptr<asmjit::JitRuntime> m_jit;
const std::shared_ptr<asmjit::JitRuntime> m_rt;
std::shared_ptr<spu_runtime> m_spurt;
public:
spu_recompiler();
spu_recompiler(class SPUThread& spu);
virtual void compile(spu_function_t& f) override;
virtual spu_function_t compile(const std::vector<u32>& func) override;
private:
// emitter:
asmjit::X86Compiler* c;
asmjit::CodeHolder* codeHolder;
asmjit::X86Assembler* c;
// input:
asmjit::X86Gp* cpu;
asmjit::X86Gp* ls;
// arguments:
const asmjit::X86Gp* cpu;
const asmjit::X86Gp* ls;
const asmjit::X86Gp* qw0;
const asmjit::X86Gp* qw1;
// temporary:
asmjit::X86Gp* addr;
asmjit::X86Gp* qw0;
asmjit::X86Gp* qw1;
asmjit::X86Gp* qw2;
asmjit::X86Gp* qw3;
std::array<asmjit::X86Xmm*, 6> vec;
const asmjit::X86Gp* addr;
std::array<const asmjit::X86Xmm*, 6> vec;
// labels:
asmjit::Label* labels; // array[0x10000]
asmjit::Label* jt; // jump table resolver (uses *addr)
asmjit::Label* end; // function end (return *addr)
// workload for the end of function:
std::vector<std::function<void()>> after;
std::vector<std::function<void()>> consts;
// All emitted 128-bit consts
std::map<std::pair<u64, u64>, asmjit::Label> xmm_consts;
class XmmLink
{
asmjit::X86Xmm* m_var;
const asmjit::X86Xmm* m_var;
public:
XmmLink(asmjit::X86Xmm*& xmm_var)
XmmLink(const asmjit::X86Xmm*& xmm_var)
: m_var(xmm_var)
{
xmm_var = nullptr;
@ -58,7 +75,7 @@ private:
XmmLink(XmmLink&&) = default; // MoveConstructible + delete copy constructor and copy/move operators
operator asmjit::X86Xmm&() const
operator const asmjit::X86Xmm&() const
{
return *m_var;
}
@ -78,10 +95,15 @@ private:
asmjit::X86Mem XmmConst(__m128 data);
asmjit::X86Mem XmmConst(__m128i data);
void branch_fixed(u32 target);
void branch_indirect(spu_opcode_t op);
asmjit::Label halt(u32 pos);
void fall(spu_opcode_t op);
void save_rcx();
void load_rcx();
public:
void CheckInterruptStatus(spu_opcode_t op);
void InterpreterCall(spu_opcode_t op);
void FunctionCall();
void UNK(spu_opcode_t op);
void STOP(spu_opcode_t op);
void LNOP(spu_opcode_t op);
@ -282,6 +304,4 @@ public:
void FNMS(spu_opcode_t op);
void FMA(spu_opcode_t op);
void FMS(spu_opcode_t op);
void UNK(spu_opcode_t op);
};

View file

@ -1,39 +1,12 @@
#include "stdafx.h"
#include "Emu/Memory/vm.h"
#include "SPUAnalyser.h"
#include "SPURecompiler.h"
#include "SPUOpcodes.h"
const spu_decoder<spu_itype> s_spu_itype;
spu_function_t* SPUDatabase::find(const be_t<u32>* data, u64 key, u32 max_size)
{
for (auto found = m_db.equal_range(key); found.first != found.second; found.first++)
{
const auto& func = found.first->second;
// Compare binary data explicitly (TODO: optimize)
if (LIKELY(func->size <= max_size) && std::memcmp(func->data.data(), data, func->size) == 0)
{
return func.get();
}
}
return nullptr;
}
SPUDatabase::SPUDatabase()
{
// TODO: load existing database associated with currently running executable
LOG_SUCCESS(SPU, "SPU Database initialized...");
}
SPUDatabase::~SPUDatabase()
{
// TODO: serialize database
}
spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_limit)
std::shared_ptr<spu_function> spu_analyse(const be_t<u32>* ls, u32 entry, u32 max_limit)
{
// Check arguments (bounds and alignment)
if (max_limit > 0x40000 || entry >= max_limit || entry % 4 || max_limit % 4)
@ -47,23 +20,23 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
const u32 block_sz = max_limit - entry;
{
reader_lock lock(m_mutex);
//reader_lock lock(m_mutex);
// Try to find existing function in the database
if (auto func = find(base, key, block_sz))
{
return func;
}
// if (auto func = find(base, key, block_sz))
// {
// return func;
// }
}
{
writer_lock lock(m_mutex);
//writer_lock lock(m_mutex);
// Double-check
if (auto func = find(base, key, block_sz))
{
return func;
}
// if (auto func = find(base, key, block_sz))
// {
// return func;
// }
}
// Initialize block entries with the function entry point
@ -89,14 +62,14 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
const auto type = s_spu_itype.decode(op.opcode);
{
reader_lock lock(m_mutex);
//reader_lock lock(m_mutex);
// Find existing function
if (pos != entry && find(ls + pos / 4, pos | u64{ op.opcode } << 32, limit - pos))
{
limit = pos;
break;
}
// if (pos != entry && find(ls + pos / 4, pos | u64{ op.opcode } << 32, limit - pos))
// {
// limit = pos;
// break;
// }
}
// Additional analysis at the beginning of the block
@ -156,7 +129,7 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
// Fix pos value
start = pos; pos = pos - 4;
continue;
}
@ -179,10 +152,10 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
// if upcoming instruction is not BI, reset the pigeonhole optimization
// todo: can constant propogation somewhere get rid of this check?
if ((type != BI))
if ((type != spu_itype::BI))
ila_r2_addr = 0; // reset
if (type == BI || type == IRET) // Branch Indirect
if (type == spu_itype::BI || type == spu_itype::IRET) // Branch Indirect
{
blocks.emplace(start);
start = pos + 4;
@ -190,9 +163,9 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
if (op.ra == 2 && ila_r2_addr > entry)
blocks.emplace(ila_r2_addr);
}
else if (type == BR || type == BRA) // Branch Relative/Absolute
else if (type == spu_itype::BR || type == spu_itype::BRA) // Branch Relative/Absolute
{
const u32 target = spu_branch_target(type == BR ? pos : 0, op.i16);
const u32 target = spu_branch_target(type == spu_itype::BR ? pos : 0, op.i16);
// Add adjacent function because it always could be
adjacent.emplace(target);
@ -205,9 +178,9 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
blocks.emplace(start);
start = pos + 4;
}
else if (type == BRSL || type == BRASL) // Branch Relative/Absolute and Set Link
else if (type == spu_itype::BRSL || type == spu_itype::BRASL) // Branch Relative/Absolute and Set Link
{
const u32 target = spu_branch_target(type == BRSL ? pos : 0, op.i16);
const u32 target = spu_branch_target(type == spu_itype::BRSL ? pos : 0, op.i16);
if (target == pos + 4)
{
@ -228,11 +201,11 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
if (op.rt != 0) LOG_ERROR(SPU, "[0x%05x] Function call without $LR", pos);
}
}
else if (type == BISL || type == BISLED) // Branch Indirect and Set Link
else if (type == spu_itype::BISL || type == spu_itype::BISLED) // Branch Indirect and Set Link
{
if (op.rt != 0) LOG_ERROR(SPU, "[0x%05x] Indirect function call without $LR", pos);
}
else if (type == BRNZ || type == BRZ || type == BRHNZ || type == BRHZ) // Branch Relative if (Not) Zero (Half)word
else if (type == spu_itype::BRNZ || type == spu_itype::BRZ || type == spu_itype::BRHNZ || type == spu_itype::BRHZ) // Branch Relative if (Not) Zero (Half)word
{
const u32 target = spu_branch_target(pos, op.i16);
@ -244,7 +217,7 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
blocks.emplace(target);
}
}
else if (type == LNOP || type == NOP) {
else if (type == spu_itype::LNOP || type == spu_itype::NOP) {
// theres a chance that theres some random lnops/nops after the end of a function
// havent found a definite pattern, but, is an easy optimization to check for, just push start down if lnop is tagged as a start
// todo: remove the last added start pos as its probly unnecessary
@ -262,7 +235,7 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
// Analyse stack pointer access
else if (rt == 1)
{
if (type == ILA && pos < ila_sp_pos)
if (type == spu_itype::ILA && pos < ila_sp_pos)
{
// set minimal ila $SP,* instruction position
ila_sp_pos = pos;
@ -272,7 +245,7 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
// ila r2, addr
// bi r2
else if (rt == 2) {
if (type == ILA)
if (type == spu_itype::ILA)
ila_r2_addr = spu_branch_target(op.i18);
}
}
@ -285,9 +258,9 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
const auto type = s_spu_itype.decode(op.opcode);
if (type == BRSL || type == BRASL) // Branch Relative/Absolute and Set Link
if (type == spu_itype::BRSL || type == spu_itype::BRASL) // Branch Relative/Absolute and Set Link
{
const u32 target = spu_branch_target(type == BRSL ? pos : 0, op.i16);
const u32 target = spu_branch_target(type == spu_itype::BRSL ? pos : 0, op.i16);
if (target != pos + 4 && target > entry && limit > target)
{
@ -308,7 +281,7 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
}
// Prepare new function (set addr and size)
auto func = std::make_shared<spu_function_t>(entry, limit - entry);
auto func = std::make_shared<spu_function>(entry, limit - entry);
// Copy function contents
func->data = { ls + entry / 4, ls + limit / 4 };
@ -346,13 +319,13 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
// Lock here just before we write to the db
// Its is unlikely that the second check will pass anyway so we delay this step since compiling functions is very fast
{
writer_lock lock(m_mutex);
//writer_lock lock(m_mutex);
// Add function to the database
m_db.emplace(key, func);
//m_db.emplace(key, func);
}
LOG_NOTICE(SPU, "Function detected [0x%05x-0x%05x] (size=0x%x)", func->addr, func->addr + func->size, func->size);
return func.get();
return func;
}

View file

@ -1,7 +1,6 @@
#pragma once
#include "Utilities/mutex.h"
#include <vector>
#include <set>
// SPU Instruction Type
@ -247,7 +246,7 @@ struct spu_itype
class SPUThread;
// SPU basic function information structure
struct spu_function_t
struct spu_function
{
// Entry point (LS address)
const u32 addr;
@ -273,28 +272,9 @@ struct spu_function_t
// Pointer to the compiled function
u32(*compiled)(SPUThread* _spu, be_t<u32>* _ls) = nullptr;
spu_function_t(u32 addr, u32 size)
spu_function(u32 addr, u32 size)
: addr(addr)
, size(size)
{
}
};
// SPU Function Database (must be global or PS3 process-local)
class SPUDatabase final : spu_itype
{
shared_mutex m_mutex;
// All registered functions (uses addr and first instruction as a key)
std::unordered_multimap<u64, std::shared_ptr<spu_function_t>> m_db;
// For internal use
spu_function_t* find(const be_t<u32>* data, u64 key, u32 max_size);
public:
SPUDatabase();
~SPUDatabase();
// Try to retrieve SPU function information
spu_function_t* analyse(const be_t<u32>* ls, u32 entry, u32 limit = 0x40000);
};

View file

@ -3,92 +3,183 @@
#include "Emu/Memory/Memory.h"
#include "SPUThread.h"
#include "SPUAnalyser.h"
#include "SPURecompiler.h"
#include "SPUASMJITRecompiler.h"
#include <algorithm>
extern u64 get_system_time();
const spu_decoder<spu_itype> s_spu_itype;
spu_recompiler_base::spu_recompiler_base(SPUThread& spu)
: m_spu(spu)
{
// Initialize lookup table
spu.jit_dispatcher.fill(&dispatch);
// Initialize "empty" block
spu.jit_map[std::vector<u32>()] = &dispatch;
}
spu_recompiler_base::~spu_recompiler_base()
{
}
void spu_recompiler_base::enter(SPUThread& spu)
void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip)
{
if (spu.pc >= 0x40000 || spu.pc % 4)
const auto result = spu.jit_map.emplace(block(spu, spu.pc), nullptr);
if (result.second || !result.first->second)
{
fmt::throw_exception("Invalid PC: 0x%05x", spu.pc);
result.first->second = spu.jit->compile(result.first->first);
}
// Get SPU LS pointer
const auto _ls = vm::_ptr<u32>(spu.offset);
spu.jit_dispatcher[spu.pc / 4] = result.first->second;
}
// Search if cached data matches
auto func = spu.compiled_cache[spu.pc / 4];
void spu_recompiler_base::branch(SPUThread& spu, std::pair<const std::vector<u32>, spu_function_t>* pair, u8* rip)
{
spu.pc = pair->first[0];
// Check shared db if we dont have a match
if (!func || !std::equal(func->data.begin(), func->data.end(), _ls + spu.pc / 4, [](const be_t<u32>& l, const be_t<u32>& r) { return *(u32*)(u8*)&l == *(u32*)(u8*)&r; }))
if (!pair->second)
{
func = spu.spu_db->analyse(_ls, spu.pc);
spu.compiled_cache[spu.pc / 4] = func;
pair->second = spu.jit->compile(pair->first);
}
// Reset callstack if necessary
if ((func->does_reset_stack && spu.recursion_level) || spu.recursion_level >= 128)
{
spu.state += cpu_flag::ret;
return;
}
spu.jit_dispatcher[spu.pc / 4] = pair->second;
// Compile if needed
if (!func->compiled)
// Overwrite jump to this function with jump to the compiled function
const s64 rel = reinterpret_cast<u64>(pair->second) - reinterpret_cast<u64>(rip) - 5;
if (rel >= INT32_MIN && rel <= INT32_MAX)
{
if (!spu.spu_rec)
const s64 rel8 = (rel + 5) - 2;
alignas(8) u8 bytes[8];
if (rel8 >= INT8_MIN && rel8 <= INT8_MAX)
{
spu.spu_rec = fxm::get_always<spu_recompiler>();
bytes[0] = 0xeb; // jmp rel8
bytes[1] = static_cast<s8>(rel8);
std::memset(bytes + 2, 0x90, 5);
bytes[7] = 0x48;
}
else
{
bytes[0] = 0xe9; // jmp rel32
std::memcpy(bytes + 1, &rel, 4);
std::memset(bytes + 5, 0x90, 2);
bytes[7] = 0x48;
}
spu.spu_rec->compile(*func);
if (!func->compiled) fmt::throw_exception("Compilation failed" HERE);
#ifdef _MSC_VER
*(volatile u64*)(rip) = *reinterpret_cast<u64*>(+bytes);
#else
__atomic_store_n(reinterpret_cast<u64*>(rip), *reinterpret_cast<u64*>(+bytes), __ATOMIC_RELAXED);
#endif
}
const u32 res = func->compiled(&spu, _ls);
if (const auto exception = spu.pending_exception)
else
{
spu.pending_exception = nullptr;
std::rethrow_exception(exception);
}
alignas(16) u8 bytes[16];
if (res & 0x1000000)
{
spu.halt();
}
bytes[0] = 0xff; // jmp [rip+2]
bytes[1] = 0x25;
bytes[2] = 0x02;
bytes[3] = 0x00;
bytes[4] = 0x00;
bytes[5] = 0x00;
bytes[6] = 0x48; // mov rax, imm64 (not executed)
bytes[7] = 0xb8;
std::memcpy(bytes + 8, &pair->second, 8);
if (res & 0x2000000)
{
}
if (res & 0x4000000)
{
if (res & 0x8000000)
{
fmt::throw_exception("Invalid interrupt status set (0x%x)" HERE, res);
}
spu.set_interrupt_status(true);
}
else if (res & 0x8000000)
{
spu.set_interrupt_status(false);
}
spu.pc = res & 0x3fffc;
if (spu.interrupts_enabled && (spu.ch_event_mask & spu.ch_event_stat & SPU_EVENT_INTR_IMPLEMENTED) > 0)
{
spu.interrupts_enabled = false;
spu.srr0 = std::exchange(spu.pc, 0);
reinterpret_cast<atomic_t<u128>*>(rip)->store(*reinterpret_cast<u128*>(+bytes));
}
}
std::vector<u32> spu_recompiler_base::block(SPUThread& spu, u32 lsa)
{
u32 addr = lsa;
std::vector<u32> result;
while (addr < 0x40000)
{
const u32 data = spu._ref<u32>(addr);
if (data == 0 && addr == lsa)
{
break;
}
addr += 4;
if (result.empty())
{
result.emplace_back(lsa);
}
result.emplace_back(se_storage<u32>::swap(data));
const auto type = s_spu_itype.decode(data);
switch (type)
{
case spu_itype::UNK:
case spu_itype::STOP:
case spu_itype::STOPD:
case spu_itype::SYNC:
case spu_itype::DSYNC:
case spu_itype::DFCEQ:
case spu_itype::DFCMEQ:
case spu_itype::DFCGT:
//case spu_itype::DFCMGT:
case spu_itype::DFTSV:
case spu_itype::BI:
case spu_itype::IRET:
case spu_itype::BISL:
{
break;
}
case spu_itype::BRA:
case spu_itype::BRASL:
{
if (spu_branch_target(0, spu_opcode_t{data}.i16) == addr)
{
continue;
}
break;
}
case spu_itype::BR:
case spu_itype::BRSL:
{
if (spu_branch_target(addr - 4, spu_opcode_t{data}.i16) == addr)
{
continue;
}
break;
}
case spu_itype::BRZ:
case spu_itype::BRNZ:
case spu_itype::BRHZ:
case spu_itype::BRHNZ:
{
if (spu_branch_target(addr - 4, spu_opcode_t{data}.i16) >= addr)
{
continue;
}
break;
}
default:
{
continue;
}
}
break;
}
return result;
}

View file

@ -1,25 +1,32 @@
#pragma once
#include "SPUAnalyser.h"
#include "SPUThread.h"
#include <mutex>
// SPU Recompiler instance base (must be global or PS3 process-local)
// SPU Recompiler instance base class
class spu_recompiler_base
{
protected:
std::mutex m_mutex; // must be locked in compile()
SPUThread& m_spu;
const spu_function_t* m_func; // current function
u32 m_pos; // current position
u32 m_pos;
public:
spu_recompiler_base(SPUThread& spu);
virtual ~spu_recompiler_base();
// Compile specified function
virtual void compile(spu_function_t& f) = 0;
// Compile function
virtual spu_function_t compile(const std::vector<u32>& func) = 0;
// Run
static void enter(class SPUThread&);
// Default dispatch function fallback (second pointer is unused)
static void dispatch(SPUThread&, void*, u8*);
// Direct branch fallback for non-compiled destination
static void branch(SPUThread&, std::pair<const std::vector<u32>, spu_function_t>*, u8* rip);
// Get the block at specified address
static std::vector<u32> block(SPUThread&, u32 lsa);
// Create recompiler instance (ASMJIT)
static std::unique_ptr<spu_recompiler_base> make_asmjit_recompiler(SPUThread& spu);
};

View file

@ -314,7 +314,7 @@ std::string SPUThread::dump() const
// Print some transaction statistics
fmt::append(ret, "\nTX: %u; Fail: %u (0x%x)", tx_success, tx_failure, tx_status);
fmt::append(ret, "\nRaddr: 0x%08x; R: 0x%x", raddr, raddr ? +vm::reservation_acquire(raddr, 128) : 0);
fmt::append(ret, "\nBlocks: %u; Fail: %u", block_counter, block_failure);
fmt::append(ret, "\nTag Mask: 0x%08x", ch_tag_mask);
fmt::append(ret, "\nMFC Stall: 0x%08x", ch_stall_mask);
fmt::append(ret, "\nMFC Queue Size: %u", mfc_size);
@ -397,12 +397,6 @@ void SPUThread::cpu_task()
{
std::fesetround(FE_TOWARDZERO);
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit)
{
if (!spu_db) spu_db = fxm::get_always<SPUDatabase>();
return spu_recompiler_base::enter(*this);
}
g_tls_log_prefix = []
{
const auto cpu = static_cast<SPUThread*>(get_current_cpu_thread());
@ -410,6 +404,16 @@ void SPUThread::cpu_task()
return fmt::format("%s [0x%05x]", cpu->get_name(), cpu->pc);
};
if (jit)
{
while (LIKELY(!test(state) || !check_state()))
{
jit_dispatcher[pc / 4](*this, vm::_ptr<u8>(offset), nullptr);
}
return;
}
// Select opcode table
const auto& table = *(
g_cfg.core.spu_decoder == spu_decoder_type::precise ? &g_spu_interpreter_precise.get_table() :
@ -502,15 +506,6 @@ SPUThread::~SPUThread()
vm::dealloc_verbose_nothrow(offset);
}
SPUThread::SPUThread(const std::string& name)
: cpu_thread(idm::last_id())
, m_name(name)
, index(0)
, offset(0)
, group(nullptr)
{
}
SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group)
: cpu_thread(idm::last_id())
, m_name(name)
@ -518,6 +513,14 @@ SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group)
, offset(0)
, group(group)
{
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit)
{
jit = spu_recompiler_base::make_asmjit_recompiler(*this);
}
if (g_cfg.core.spu_decoder == spu_decoder_type::llvm)
{
}
}
void SPUThread::push_snr(u32 number, u32 value)

View file

@ -5,10 +5,17 @@
#include "Emu/Cell/SPUInterpreter.h"
#include "MFC.h"
#include <map>
struct lv2_event_queue;
struct lv2_spu_group;
struct lv2_int_tag;
class SPUThread;
// JIT Block
using spu_function_t = void(*)(SPUThread&, void*, u8*);
// SPU Channels
enum : u32
{
@ -514,16 +521,14 @@ public:
virtual ~SPUThread() override;
void cpu_init();
protected:
SPUThread(const std::string& name);
public:
static const u32 id_base = 0x02000000; // TODO (used to determine thread type)
static const u32 id_step = 1;
static const u32 id_count = 2048;
SPUThread(const std::string& name, u32 index, lv2_spu_group* group);
u32 pc = 0;
// General-Purpose Registers
std::array<v128, 128> gpr;
SPU_FPSCR fpscr;
@ -577,24 +582,26 @@ public:
std::array<std::pair<u32, std::weak_ptr<lv2_event_queue>>, 32> spuq; // Event Queue Keys for SPU Thread
std::weak_ptr<lv2_event_queue> spup[64]; // SPU Ports
u32 pc = 0; //
const u32 index; // SPU index
const u32 offset; // SPU LS offset
lv2_spu_group* const group; // SPU Thread Group
const std::string m_name; // Thread name
std::exception_ptr pending_exception;
std::array<struct spu_function_t*, 65536> compiled_cache{};
std::shared_ptr<class SPUDatabase> spu_db;
std::shared_ptr<class spu_recompiler_base> spu_rec;
u32 recursion_level = 0;
u64 tx_success = 0;
u64 tx_failure = 0;
uint tx_status = 0;
std::unique_ptr<class spu_recompiler_base> jit; // Recompiler instance
std::map<std::vector<u32>, spu_function_t> jit_map; // All compiled blocks (first u32 is addr)
u64 block_counter = 0;
u64 block_recover = 0;
u64 block_failure = 0;
std::array<spu_function_t, 0x10000> jit_dispatcher; // Dispatch table for indirect calls
void push_snr(u32 number, u32 value);
void do_dma_transfer(const spu_mfc_cmd& args);
bool do_dma_check(const spu_mfc_cmd& args);