mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-22 12:35:21 +00:00
Implemented branch instructions and some basic block combining.
This commit is contained in:
parent
d5e86f7190
commit
1bd235616f
3 changed files with 587 additions and 208 deletions
|
@ -26,8 +26,7 @@ bool PPULLVMRecompiler::s_rotate_mask_inited = false;
|
|||
|
||||
PPULLVMRecompiler::PPULLVMRecompiler()
|
||||
: ThreadBase("PPULLVMRecompiler")
|
||||
, m_compilation_time(0.0)
|
||||
, m_idling_time(0.0) {
|
||||
, m_revision(0) {
|
||||
InitializeNativeTarget();
|
||||
InitializeNativeTargetAsmPrinter();
|
||||
InitializeNativeTargetDisassembler();
|
||||
|
@ -77,28 +76,33 @@ PPULLVMRecompiler::~PPULLVMRecompiler() {
|
|||
|
||||
std::string error;
|
||||
raw_fd_ostream log_file("PPULLVMRecompiler.log", error, sys::fs::F_Text);
|
||||
log_file << "Time spent compiling = " << m_compilation_time.count() << "s\n";
|
||||
log_file << "Time spent idling = " << m_idling_time.count() << "s\n\n";
|
||||
log_file << "Interpreter fallback stats:\n";
|
||||
log_file << "Total time = " << m_total_time.count() / 1000000 << "ms\n";
|
||||
log_file << " Time spent compiling = " << m_compilation_time.count() / 1000000 << "ms\n";
|
||||
log_file << " Time spent building IR = " << m_ir_build_time.count() / 1000000 << "ms\n";
|
||||
log_file << " Time spent optimizing = " << m_optimizing_time.count() / 1000000 << "ms\n";
|
||||
log_file << " Time spent translating = " << m_translation_time.count() / 1000000 << "ms\n";
|
||||
log_file << " Time spent idling = " << m_idling_time.count() / 1000000 << "ms\n";
|
||||
log_file << " Time spent doing misc tasks = " << (m_total_time.count() - m_idling_time.count() - m_compilation_time.count()) / 1000000 << "ms\n";
|
||||
log_file << "Revision = " << m_revision << "\n";
|
||||
log_file << "\nInterpreter fallback stats:\n";
|
||||
for (auto i = m_interpreter_fallback_stats.begin(); i != m_interpreter_fallback_stats.end(); i++) {
|
||||
log_file << i->first << " = " << i->second << "\n";
|
||||
}
|
||||
|
||||
log_file << "\nBlock Information:\n";
|
||||
for (auto i = m_compiled_blocks.begin(); i != m_compiled_blocks.end(); i++) {
|
||||
log_file << fmt::Format("\n%s: Size = %u bytes, Reference count = %llu\n", i->second.llvm_function->getName().str().c_str(), i->second.size, i->second.reference_count);
|
||||
log_file << "\nDisassembly:\n";
|
||||
for (auto i = m_compiled.begin(); i != m_compiled.end(); i++) {
|
||||
log_file << fmt::Format("%s: Size = %u bytes, Number of instructions = %u\n", i->second.llvm_function->getName().str().c_str(), i->second.size, i->second.num_instructions);
|
||||
//for (size_t pc = 0; pc < i->second.size;) {
|
||||
// char str[1024];
|
||||
|
||||
log_file << "Disassembly:\n";
|
||||
for (size_t pc = 0; pc < i->second.size;) {
|
||||
char str[1024];
|
||||
|
||||
auto size = LLVMDisasmInstruction(m_disassembler, (uint8_t *)i->second.block + pc, i->second.size - pc, (uint64_t)((uint8_t *)i->second.block + pc), str, sizeof(str));
|
||||
log_file << str << '\n';
|
||||
pc += size;
|
||||
}
|
||||
// auto size = LLVMDisasmInstruction(m_disassembler, (uint8_t *)i->second.executable + pc, i->second.size - pc,
|
||||
// (uint64_t)((uint8_t *)i->second.executable + pc), str, sizeof(str));
|
||||
// log_file << str << '\n';
|
||||
// pc += size;
|
||||
//}
|
||||
}
|
||||
|
||||
log_file << "\nLLVM IR:\n" << *m_module;
|
||||
//log_file << "\nLLVM IR:\n" << *m_module;
|
||||
|
||||
LLVMDisasmDispose(m_disassembler);
|
||||
delete m_execution_engine;
|
||||
|
@ -107,33 +111,31 @@ PPULLVMRecompiler::~PPULLVMRecompiler() {
|
|||
delete m_llvm_context;
|
||||
}
|
||||
|
||||
PPULLVMRecompiler::CompiledBlockInfo PPULLVMRecompiler::GetCompiledBlock(u32 address) {
|
||||
static CompiledBlockInfo unknown_block_info = {0};
|
||||
std::pair<PPULLVMRecompiler::Executable, u32> PPULLVMRecompiler::GetExecutable(u32 address) {
|
||||
std::lock_guard<std::mutex> lock(m_compiled_shared_lock);
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_compiled_blocks_mutex);
|
||||
|
||||
auto compiled_block = m_compiled_blocks.lower_bound(std::make_pair(address, 0));
|
||||
if (compiled_block != m_compiled_blocks.end() && compiled_block->first.first == address) {
|
||||
compiled_block->second.reference_count++;
|
||||
return compiled_block->second;
|
||||
auto compiled = m_compiled_shared.lower_bound(std::make_pair(address, 0));
|
||||
if (compiled != m_compiled_shared.end() && compiled->first.first == address) {
|
||||
compiled->second.second++;
|
||||
return std::make_pair(compiled->second.first, compiled->first.second);
|
||||
}
|
||||
|
||||
return unknown_block_info;
|
||||
return std::make_pair(nullptr, 0);
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::ReleaseCompiledBlock(u32 address, u32 revision) {
|
||||
std::lock_guard<std::mutex> lock(m_compiled_blocks_mutex);
|
||||
void PPULLVMRecompiler::ReleaseExecutable(u32 address, u32 revision) {
|
||||
std::lock_guard<std::mutex> lock(m_compiled_shared_lock);
|
||||
|
||||
auto compiled_block = m_compiled_blocks.find(std::make_pair(address, revision));
|
||||
if (compiled_block != m_compiled_blocks.end()) {
|
||||
compiled_block->second.reference_count--;
|
||||
auto compiled = m_compiled_shared.find(std::make_pair(address, revision));
|
||||
if (compiled != m_compiled_shared.end()) {
|
||||
compiled->second.second--;
|
||||
}
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::RequestCompilation(u32 address) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_pending_compilation_blocks_mutex);
|
||||
m_pending_compilation_blocks.insert(address);
|
||||
std::lock_guard<std::mutex> lock(m_uncompiled_shared_lock);
|
||||
m_uncompiled_shared.push_back(address);
|
||||
}
|
||||
|
||||
if (!IsAlive()) {
|
||||
|
@ -143,42 +145,67 @@ void PPULLVMRecompiler::RequestCompilation(u32 address) {
|
|||
Notify();
|
||||
}
|
||||
|
||||
u32 PPULLVMRecompiler::GetCurrentRevision() {
|
||||
return m_revision;
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::Task() {
|
||||
std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
while (!TestDestroy() && !Emu.IsStopped()) {
|
||||
WaitForAnySignal(500);
|
||||
// Wait a few ms for something to happen
|
||||
auto idling_start = std::chrono::high_resolution_clock::now();
|
||||
WaitForAnySignal(250);
|
||||
auto idling_end = std::chrono::high_resolution_clock::now();
|
||||
m_idling_time += std::chrono::duration_cast<std::chrono::nanoseconds>(idling_end - idling_start);
|
||||
|
||||
u32 num_blocks_compiled = 0;
|
||||
// Update the set of blocks that have been hit with the set of blocks that have been requested for compilation.
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_uncompiled_shared_lock);
|
||||
for (auto i = m_uncompiled_shared.begin(); i != m_uncompiled_shared.end(); i++) {
|
||||
m_hit_blocks.insert(*i);
|
||||
}
|
||||
}
|
||||
|
||||
u32 num_compiled = 0;
|
||||
while (!TestDestroy() && !Emu.IsStopped()) {
|
||||
u32 address;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_pending_compilation_blocks_mutex);
|
||||
std::lock_guard<std::mutex> lock(m_uncompiled_shared_lock);
|
||||
|
||||
auto i = m_pending_compilation_blocks.begin();
|
||||
if (i != m_pending_compilation_blocks.end()) {
|
||||
auto i = m_uncompiled_shared.begin();
|
||||
if (i != m_uncompiled_shared.end()) {
|
||||
address = *i;
|
||||
m_pending_compilation_blocks.erase(i);
|
||||
m_uncompiled_shared.erase(i);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Compile(address);
|
||||
num_blocks_compiled++;
|
||||
m_hit_blocks.insert(address);
|
||||
if (NeedsCompiling(address)) {
|
||||
Compile(address);
|
||||
num_compiled++;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_blocks_compiled == 0) {
|
||||
if (num_compiled == 0) {
|
||||
// If we get here, it means the recompilation thread is idling.
|
||||
|
||||
// We use this oppurtunity to optimize the code.
|
||||
RemoveUnusedOldVersions();
|
||||
for (auto i = m_compiled.begin(); i != m_compiled.end(); i++) {
|
||||
if (NeedsCompiling(i->first.first)) {
|
||||
Compile(i->first.first);
|
||||
num_compiled++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
|
||||
m_idling_time = std::chrono::duration_cast<std::chrono::duration<double>>(end - start - m_compilation_time);
|
||||
|
||||
LOG_NOTICE(PPU, "Compilation thread exiting.");
|
||||
m_total_time = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
|
||||
LOG_NOTICE(PPU, "PPU LLVM compiler thread exiting.");
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::Decode(const u32 code) {
|
||||
|
@ -219,7 +246,7 @@ void PPULLVMRecompiler::VADDCUW(u32 vd, u32 va, u32 vb) {
|
|||
auto vb_v4i32 = GetVrAsIntVec(vb, 32);
|
||||
|
||||
u32 not_mask_v4i32[4] = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
|
||||
va_v4i32 = m_ir_builder->CreateXor(va_v4i32, ConstantDataVector::get(*m_llvm_context, not_mask_v4i32));
|
||||
va_v4i32 = m_ir_builder->CreateXor(va_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), not_mask_v4i32));
|
||||
auto cmpv4i1 = m_ir_builder->CreateICmpULT(va_v4i32, vb_v4i32);
|
||||
auto cmpv4i32 = m_ir_builder->CreateZExt(cmpv4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
|
||||
SetVr(vd, cmpv4i32);
|
||||
|
@ -264,7 +291,7 @@ void PPULLVMRecompiler::VADDSWS(u32 vd, u32 va, u32 vb) {
|
|||
// of any one of the operands.
|
||||
u32 tmp1_v4i32[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
|
||||
auto tmp2_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 31);
|
||||
tmp2_v4i32 = m_ir_builder->CreateAdd(tmp2_v4i32, ConstantDataVector::get(*m_llvm_context, tmp1_v4i32));
|
||||
tmp2_v4i32 = m_ir_builder->CreateAdd(tmp2_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), tmp1_v4i32));
|
||||
auto tmp2_v16i8 = m_ir_builder->CreateBitCast(tmp2_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
|
||||
|
||||
// Next, we find if the addition can actually result in an overflow. Since an overflow can only happen if the operands
|
||||
|
@ -273,7 +300,7 @@ void PPULLVMRecompiler::VADDSWS(u32 vd, u32 va, u32 vb) {
|
|||
// same sign.
|
||||
auto tmp3_v4i32 = m_ir_builder->CreateXor(va_v4i32, vb_v4i32);
|
||||
u32 not_mask_v4i32[4] = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
|
||||
tmp3_v4i32 = m_ir_builder->CreateXor(tmp3_v4i32, ConstantDataVector::get(*m_llvm_context, not_mask_v4i32));
|
||||
tmp3_v4i32 = m_ir_builder->CreateXor(tmp3_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), not_mask_v4i32));
|
||||
|
||||
// Perform the sum.
|
||||
auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32);
|
||||
|
@ -357,7 +384,7 @@ void PPULLVMRecompiler::VANDC(u32 vd, u32 va, u32 vb) {
|
|||
auto va_v4i32 = GetVrAsIntVec(va, 32);
|
||||
auto vb_v4i32 = GetVrAsIntVec(vb, 32);
|
||||
u32 not_mask_v4i32[4] = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
|
||||
vb_v4i32 = m_ir_builder->CreateXor(vb_v4i32, ConstantDataVector::get(*m_llvm_context, not_mask_v4i32));
|
||||
vb_v4i32 = m_ir_builder->CreateXor(vb_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), not_mask_v4i32));
|
||||
auto res_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vb_v4i32);
|
||||
SetVr(vd, res_v4i32);
|
||||
}
|
||||
|
@ -369,7 +396,7 @@ void PPULLVMRecompiler::VAVGSB(u32 vd, u32 va, u32 vb) {
|
|||
auto vb_v16i16 = m_ir_builder->CreateSExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16));
|
||||
auto sum_v16i16 = m_ir_builder->CreateAdd(va_v16i16, vb_v16i16);
|
||||
u16 one_v16i16[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
sum_v16i16 = m_ir_builder->CreateAdd(sum_v16i16, ConstantDataVector::get(*m_llvm_context, one_v16i16));
|
||||
sum_v16i16 = m_ir_builder->CreateAdd(sum_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), one_v16i16));
|
||||
auto avg_v16i16 = m_ir_builder->CreateAShr(sum_v16i16, 1);
|
||||
auto avg_v16i8 = m_ir_builder->CreateTrunc(avg_v16i16, VectorType::get(m_ir_builder->getInt8Ty(), 16));
|
||||
SetVr(vd, avg_v16i8);
|
||||
|
@ -382,7 +409,7 @@ void PPULLVMRecompiler::VAVGSH(u32 vd, u32 va, u32 vb) {
|
|||
auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8));
|
||||
auto sum_v8i32 = m_ir_builder->CreateAdd(va_v8i32, vb_v8i32);
|
||||
u32 one_v8i32[8] = {1, 1, 1, 1, 1, 1, 1, 1};
|
||||
sum_v8i32 = m_ir_builder->CreateAdd(sum_v8i32, ConstantDataVector::get(*m_llvm_context, one_v8i32));
|
||||
sum_v8i32 = m_ir_builder->CreateAdd(sum_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), one_v8i32));
|
||||
auto avg_v8i32 = m_ir_builder->CreateAShr(sum_v8i32, 1);
|
||||
auto avg_v8i16 = m_ir_builder->CreateTrunc(avg_v8i32, VectorType::get(m_ir_builder->getInt16Ty(), 8));
|
||||
SetVr(vd, avg_v8i16);
|
||||
|
@ -395,7 +422,7 @@ void PPULLVMRecompiler::VAVGSW(u32 vd, u32 va, u32 vb) {
|
|||
auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4));
|
||||
auto sum_v4i64 = m_ir_builder->CreateAdd(va_v4i64, vb_v4i64);
|
||||
u64 one_v4i64[4] = {1, 1, 1, 1};
|
||||
sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, ConstantDataVector::get(*m_llvm_context, one_v4i64));
|
||||
sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, ConstantDataVector::get(m_ir_builder->getContext(), one_v4i64));
|
||||
auto avg_v4i64 = m_ir_builder->CreateAShr(sum_v4i64, 1);
|
||||
auto avg_v4i32 = m_ir_builder->CreateTrunc(avg_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4));
|
||||
SetVr(vd, avg_v4i32);
|
||||
|
@ -422,7 +449,7 @@ void PPULLVMRecompiler::VAVGUW(u32 vd, u32 va, u32 vb) {
|
|||
auto vb_v4i64 = m_ir_builder->CreateZExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4));
|
||||
auto sum_v4i64 = m_ir_builder->CreateAdd(va_v4i64, vb_v4i64);
|
||||
u64 one_v4i64[4] = {1, 1, 1, 1};
|
||||
sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, ConstantDataVector::get(*m_llvm_context, one_v4i64));
|
||||
sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, ConstantDataVector::get(m_ir_builder->getContext(), one_v4i64));
|
||||
auto avg_v4i64 = m_ir_builder->CreateLShr(sum_v4i64, 1);
|
||||
auto avg_v4i32 = m_ir_builder->CreateTrunc(avg_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4));
|
||||
SetVr(vd, avg_v4i32);
|
||||
|
@ -435,7 +462,7 @@ void PPULLVMRecompiler::VCFSX(u32 vd, u32 uimm5, u32 vb) {
|
|||
if (uimm5) {
|
||||
float scale = (float)((u64)1 << uimm5);
|
||||
float scale_v4f32[4] = {scale, scale, scale, scale};
|
||||
res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, ConstantDataVector::get(*m_llvm_context, scale_v4f32));
|
||||
res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, ConstantDataVector::get(m_ir_builder->getContext(), scale_v4f32));
|
||||
}
|
||||
|
||||
SetVr(vd, res_v4f32);
|
||||
|
@ -448,7 +475,7 @@ void PPULLVMRecompiler::VCFUX(u32 vd, u32 uimm5, u32 vb) {
|
|||
if (uimm5) {
|
||||
float scale = (float)((u64)1 << uimm5);
|
||||
float scale_v4f32[4] = {scale, scale, scale, scale};
|
||||
res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, ConstantDataVector::get(*m_llvm_context, scale_v4f32));
|
||||
res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, ConstantDataVector::get(m_ir_builder->getContext(), scale_v4f32));
|
||||
}
|
||||
|
||||
SetVr(vd, res_v4f32);
|
||||
|
@ -475,7 +502,7 @@ void PPULLVMRecompiler::VCMPBFP_(u32 vd, u32 va, u32 vb) {
|
|||
|
||||
auto vd_v16i8 = GetVrAsIntVec(vd, 8);
|
||||
u8 mask_v16i8[16] = {3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
vd_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), vd_v16i8, ConstantDataVector::get(*m_llvm_context, mask_v16i8));
|
||||
vd_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), vd_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i8));
|
||||
auto vd_v4i32 = m_ir_builder->CreateBitCast(vd_v16i8, VectorType::get(m_ir_builder->getInt32Ty(), 4));
|
||||
auto vd_mask_i32 = m_ir_builder->CreateExtractElement(vd_v4i32, m_ir_builder->getInt32(0));
|
||||
auto cmp_i1 = m_ir_builder->CreateICmpEQ(vd_mask_i32, m_ir_builder->getInt32(0));
|
||||
|
@ -1150,8 +1177,13 @@ void PPULLVMRecompiler::ADDIS(u32 rd, u32 ra, s32 simm16) {
|
|||
}
|
||||
|
||||
void PPULLVMRecompiler::BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) {
|
||||
InterpreterCall("BC", &PPUInterpreter::BC, bo, bi, bd, aa, lk);
|
||||
m_hit_branch_instruction = true;
|
||||
auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_current_instruction_address, bd));
|
||||
CreateBranch(CheckBranchCondition(bo, bi), target_i64, lk ? true : false);
|
||||
//m_hit_branch_instruction = true;
|
||||
//SetPc(m_ir_builder->getInt32(m_current_instruction_address));
|
||||
//InterpreterCall("BC", &PPUInterpreter::BC, bo, bi, bd, aa, lk);
|
||||
//SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4));
|
||||
//m_ir_builder->CreateRetVoid();
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::SC(u32 sc_code) {
|
||||
|
@ -1159,8 +1191,12 @@ void PPULLVMRecompiler::SC(u32 sc_code) {
|
|||
}
|
||||
|
||||
void PPULLVMRecompiler::B(s32 ll, u32 aa, u32 lk) {
|
||||
InterpreterCall("B", &PPUInterpreter::B, ll, aa, lk);
|
||||
m_hit_branch_instruction = true;
|
||||
auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_current_instruction_address, ll));
|
||||
CreateBranch(nullptr, target_i64, lk ? true : false);
|
||||
//m_hit_branch_instruction = true;
|
||||
//SetPc(m_ir_builder->getInt32(m_current_instruction_address));
|
||||
//InterpreterCall("B", &PPUInterpreter::B, ll, aa, lk);
|
||||
//m_ir_builder->CreateRetVoid();
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::MCRF(u32 crfd, u32 crfs) {
|
||||
|
@ -1174,8 +1210,14 @@ void PPULLVMRecompiler::MCRF(u32 crfd, u32 crfs) {
|
|||
}
|
||||
|
||||
void PPULLVMRecompiler::BCLR(u32 bo, u32 bi, u32 bh, u32 lk) {
|
||||
InterpreterCall("BCLR", &PPUInterpreter::BCLR, bo, bi, bh, lk);
|
||||
m_hit_branch_instruction = true;
|
||||
auto lr_i64 = GetLr();
|
||||
lr_i64 = m_ir_builder->CreateAnd(lr_i64, ~0x3ULL);
|
||||
CreateBranch(CheckBranchCondition(bo, bi), lr_i64, lk ? true : false);
|
||||
//m_hit_branch_instruction = true;
|
||||
//SetPc(m_ir_builder->getInt32(m_current_instruction_address));
|
||||
//InterpreterCall("BCLR", &PPUInterpreter::BCLR, bo, bi, bh, lk);
|
||||
//SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4));
|
||||
//m_ir_builder->CreateRetVoid();
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::CRNOR(u32 crbd, u32 crba, u32 crbb) {
|
||||
|
@ -1268,8 +1310,14 @@ void PPULLVMRecompiler::CROR(u32 crbd, u32 crba, u32 crbb) {
|
|||
}
|
||||
|
||||
void PPULLVMRecompiler::BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) {
|
||||
InterpreterCall("BCCTR", &PPUInterpreter::BCCTR, bo, bi, bh, lk);
|
||||
m_hit_branch_instruction = true;
|
||||
auto ctr_i64 = GetCtr();
|
||||
ctr_i64 = m_ir_builder->CreateAnd(ctr_i64, ~0x3ULL);
|
||||
CreateBranch(CheckBranchCondition(bo, bi), ctr_i64, lk ? true : false);
|
||||
//m_hit_branch_instruction = true;
|
||||
//SetPc(m_ir_builder->getInt32(m_current_instruction_address));
|
||||
//InterpreterCall("BCCTR", &PPUInterpreter::BCCTR, bo, bi, bh, lk);
|
||||
//SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4));
|
||||
//m_ir_builder->CreateRetVoid();
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) {
|
||||
|
@ -3130,7 +3178,7 @@ void PPULLVMRecompiler::FCFID(u32 frd, u32 frb, bool rc) {
|
|||
auto res_f64 = m_ir_builder->CreateSIToFP(rb_i64, m_ir_builder->getDoubleTy());
|
||||
SetFpr(frd, res_f64);
|
||||
|
||||
// TODO: Set flag
|
||||
// TODO: Set flags
|
||||
//InterpreterCall("FCFID", &PPUInterpreter::FCFID, frd, frb, rc);
|
||||
}
|
||||
|
||||
|
@ -3138,71 +3186,186 @@ void PPULLVMRecompiler::UNK(const u32 code, const u32 opcode, const u32 gcode) {
|
|||
//InterpreterCall("UNK", &PPUInterpreter::UNK, code, opcode, gcode);
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::Compile(u32 address) {
|
||||
std::chrono::high_resolution_clock::time_point compilation_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto function_name = fmt::Format("fn_0x%X", address);
|
||||
m_function = m_module->getFunction(function_name);
|
||||
if (!m_function) {
|
||||
|
||||
m_function = (Function *)m_module->getOrInsertFunction(function_name, m_ir_builder->getVoidTy(),
|
||||
m_ir_builder->getInt8PtrTy() /*ppu_state*/,
|
||||
m_ir_builder->getInt64Ty() /*base_addres*/,
|
||||
m_ir_builder->getInt8PtrTy() /*interpreter*/, nullptr);
|
||||
m_function->setCallingConv(CallingConv::X86_64_Win64);
|
||||
auto arg_i = m_function->arg_begin();
|
||||
arg_i->setName("ppu_state");
|
||||
(++arg_i)->setName("base_address");
|
||||
(++arg_i)->setName("interpreter");
|
||||
|
||||
auto block = BasicBlock::Create(*m_llvm_context, "start", m_function);
|
||||
m_ir_builder->SetInsertPoint(block);
|
||||
|
||||
u32 offset = 0;
|
||||
m_hit_branch_instruction = false;
|
||||
while (!m_hit_branch_instruction) {
|
||||
u32 instr = vm::read32(address + offset);
|
||||
Decode(instr);
|
||||
offset += 4;
|
||||
|
||||
SetPc(m_ir_builder->getInt32(address + offset));
|
||||
}
|
||||
|
||||
m_ir_builder->CreateRetVoid();
|
||||
m_fpm->run(*m_function);
|
||||
|
||||
MachineCodeInfo mci;
|
||||
m_execution_engine->runJITOnFunction(m_function, &mci);
|
||||
|
||||
CompiledBlockInfo block_info;
|
||||
block_info.block_address = address;
|
||||
block_info.revision = 0xFFFFFFFF;
|
||||
block_info.block = (CompiledBlock)mci.address();
|
||||
block_info.reference_count = 0;
|
||||
block_info.size = mci.size();
|
||||
block_info.llvm_function = m_function;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_compiled_blocks_mutex);
|
||||
m_compiled_blocks[std::make_pair(address, block_info.revision)] = block_info;
|
||||
BasicBlock * PPULLVMRecompiler::GetBlockInFunction(u32 address, Function * function, bool create_if_not_exist) {
|
||||
auto block_name = fmt::Format("instr_0x%X", address);
|
||||
BasicBlock * block = nullptr;
|
||||
for (auto i = function->getBasicBlockList().begin(); i != function->getBasicBlockList().end(); i++) {
|
||||
if (i->getName() == block_name) {
|
||||
block = &(*i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::chrono::high_resolution_clock::time_point compilation_end = std::chrono::high_resolution_clock::now();
|
||||
m_compilation_time += std::chrono::duration_cast<std::chrono::duration<double>>(compilation_end - compilation_start);
|
||||
if (!block && create_if_not_exist) {
|
||||
block = BasicBlock::Create(m_ir_builder->getContext(), block_name, function);
|
||||
}
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::Compile(u32 address) {
|
||||
auto compilation_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Get the revision number for this section
|
||||
u32 revision = 0;
|
||||
auto compiled = m_compiled.lower_bound(std::make_pair(address, 0));
|
||||
if (compiled != m_compiled.end() && compiled->first.first == address) {
|
||||
revision = ~(compiled->first.second);
|
||||
revision++;
|
||||
}
|
||||
|
||||
auto ir_build_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Create a function for this section
|
||||
auto function_name = fmt::Format("fn_0x%X_%u", address, revision);
|
||||
m_current_function = (Function *)m_module->getOrInsertFunction(function_name, m_ir_builder->getVoidTy(),
|
||||
m_ir_builder->getInt8PtrTy() /*ppu_state*/,
|
||||
m_ir_builder->getInt64Ty() /*base_addres*/,
|
||||
m_ir_builder->getInt8PtrTy() /*interpreter*/, nullptr);
|
||||
m_current_function->setCallingConv(CallingConv::X86_64_Win64);
|
||||
auto arg_i = m_current_function->arg_begin();
|
||||
arg_i->setName("ppu_state");
|
||||
(++arg_i)->setName("base_address");
|
||||
(++arg_i)->setName("interpreter");
|
||||
|
||||
// Add an entry block that branches to the first instruction
|
||||
m_ir_builder->SetInsertPoint(BasicBlock::Create(m_ir_builder->getContext(), "entry", m_current_function));
|
||||
m_ir_builder->CreateBr(GetBlockInFunction(address, m_current_function, true));
|
||||
|
||||
// Convert each block in this section to LLVM IR
|
||||
m_num_instructions = 0;
|
||||
m_current_function_uncompiled_blocks_list.clear();
|
||||
m_current_function_unhit_blocks_list.clear();
|
||||
m_current_function_uncompiled_blocks_list.push_back(address);
|
||||
while (!m_current_function_uncompiled_blocks_list.empty()) {
|
||||
m_current_instruction_address = m_current_function_uncompiled_blocks_list.front();
|
||||
auto block = GetBlockInFunction(m_current_instruction_address, m_current_function, true);
|
||||
m_hit_branch_instruction = false;
|
||||
m_ir_builder->SetInsertPoint(block);
|
||||
m_current_function_uncompiled_blocks_list.pop_front();
|
||||
|
||||
while (!m_hit_branch_instruction) {
|
||||
if (!block->getInstList().empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
u32 instr = vm::read32(m_current_instruction_address);
|
||||
Decode(instr);
|
||||
m_num_instructions++;
|
||||
|
||||
m_current_instruction_address += 4;
|
||||
if (!m_hit_branch_instruction) {
|
||||
block = GetBlockInFunction(m_current_instruction_address, m_current_function, true);
|
||||
m_ir_builder->CreateBr(block);
|
||||
m_ir_builder->SetInsertPoint(block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto ir_build_end = std::chrono::high_resolution_clock::now();
|
||||
m_ir_build_time += std::chrono::duration_cast<std::chrono::nanoseconds>(ir_build_end - ir_build_start);
|
||||
|
||||
// Optimize this function
|
||||
auto optimize_start = std::chrono::high_resolution_clock::now();
|
||||
m_fpm->run(*m_current_function);
|
||||
auto optimize_end = std::chrono::high_resolution_clock::now();
|
||||
m_optimizing_time += std::chrono::duration_cast<std::chrono::nanoseconds>(optimize_end - optimize_start);
|
||||
|
||||
// Translate to machine code
|
||||
auto translate_start = std::chrono::high_resolution_clock::now();
|
||||
MachineCodeInfo mci;
|
||||
m_execution_engine->runJITOnFunction(m_current_function, &mci);
|
||||
auto translate_end = std::chrono::high_resolution_clock::now();
|
||||
m_translation_time += std::chrono::duration_cast<std::chrono::nanoseconds>(translate_end - translate_start);
|
||||
|
||||
// Add the executable to private and shared data stores
|
||||
ExecutableInfo executable_info;
|
||||
executable_info.executable = (Executable)mci.address();
|
||||
executable_info.size = mci.size();
|
||||
executable_info.num_instructions = m_num_instructions;
|
||||
executable_info.unhit_blocks_list = std::move(m_current_function_unhit_blocks_list);
|
||||
executable_info.llvm_function = m_current_function;
|
||||
m_compiled[std::make_pair(address, ~revision)] = executable_info;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_compiled_shared_lock);
|
||||
m_compiled_shared[std::make_pair(address, ~revision)] = std::make_pair(executable_info.executable, 0);
|
||||
}
|
||||
|
||||
auto compilation_end = std::chrono::high_resolution_clock::now();
|
||||
m_compilation_time += std::chrono::duration_cast<std::chrono::nanoseconds>(compilation_end - compilation_start);
|
||||
m_revision++;
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::RemoveUnusedOldVersions() {
|
||||
u32 num_removed = 0;
|
||||
u32 prev_address = 0;
|
||||
for (auto i = m_compiled.begin(); i != m_compiled.end(); i++) {
|
||||
u32 current_address = i->first.first;
|
||||
if (prev_address == current_address) {
|
||||
bool erase_this_entry = false;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_compiled_shared_lock);
|
||||
auto j = m_compiled_shared.find(i->first);
|
||||
if (j->second.second == 0) {
|
||||
m_compiled_shared.erase(j);
|
||||
erase_this_entry = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (erase_this_entry) {
|
||||
auto tmp = i;
|
||||
i--;
|
||||
m_execution_engine->freeMachineCodeForFunction(tmp->second.llvm_function);
|
||||
tmp->second.llvm_function->eraseFromParent();
|
||||
m_compiled.erase(tmp);
|
||||
num_removed++;
|
||||
}
|
||||
}
|
||||
|
||||
prev_address = current_address;
|
||||
}
|
||||
|
||||
if (num_removed > 0) {
|
||||
LOG_NOTICE(PPU, "Removed %u old versions", num_removed);
|
||||
}
|
||||
}
|
||||
|
||||
bool PPULLVMRecompiler::NeedsCompiling(u32 address) {
|
||||
auto i = m_compiled.lower_bound(std::make_pair(address, 0));
|
||||
if (i != m_compiled.end() && i->first.first == address) {
|
||||
if (i->second.num_instructions >= 300) {
|
||||
// This section has reached its limit. Don't allow further expansion.
|
||||
return false;
|
||||
}
|
||||
|
||||
// If any of the unhit blocks in this function have been hit, then recompile this section
|
||||
for(auto j = i->second.unhit_blocks_list.begin(); j != i->second.unhit_blocks_list.end(); j++) {
|
||||
if (m_hit_blocks.find(*j) != m_hit_blocks.end()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} else {
|
||||
// This section has not been encountered before
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
Value * PPULLVMRecompiler::GetPPUState() {
|
||||
return m_function->arg_begin();
|
||||
return m_current_function->arg_begin();
|
||||
}
|
||||
|
||||
Value * PPULLVMRecompiler::GetBaseAddress() {
|
||||
auto i = m_function->arg_begin();
|
||||
auto i = m_current_function->arg_begin();
|
||||
i++;
|
||||
return i;
|
||||
}
|
||||
|
||||
Value * PPULLVMRecompiler::GetInterpreter() {
|
||||
auto i = m_function->arg_begin();
|
||||
auto i = m_current_function->arg_begin();
|
||||
i++;
|
||||
i++;
|
||||
return i;
|
||||
|
@ -3222,7 +3385,7 @@ Value * PPULLVMRecompiler::GetBit(Value * val, u32 n) {
|
|||
bit = m_ir_builder->CreateLShr(val, val->getType()->getIntegerBitWidth() - n - 1);
|
||||
}
|
||||
|
||||
bit = m_ir_builder->CreateAnd(val, 1);
|
||||
bit = m_ir_builder->CreateAnd(bit, 1);
|
||||
#ifdef PPU_LLVM_RECOMPILER_USE_BMI
|
||||
}
|
||||
#endif
|
||||
|
@ -3326,9 +3489,10 @@ Value * PPULLVMRecompiler::GetPc() {
|
|||
return m_ir_builder->CreateLoad(pc_i32_ptr);
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::SetPc(Value * val_i32) {
|
||||
void PPULLVMRecompiler::SetPc(Value * val_ix) {
|
||||
auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, PC));
|
||||
auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo());
|
||||
auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_ix, m_ir_builder->getInt32Ty());
|
||||
m_ir_builder->CreateStore(val_i32, pc_i32_ptr);
|
||||
}
|
||||
|
||||
|
@ -3549,6 +3713,109 @@ void PPULLVMRecompiler::SetVr(u32 vr, Value * val_x128) {
|
|||
m_ir_builder->CreateStore(val_i128, vr_i128_ptr);
|
||||
}
|
||||
|
||||
Value * PPULLVMRecompiler::CheckBranchCondition(u32 bo, u32 bi) {
|
||||
bool bo0 = bo & 0x10 ? true : false;
|
||||
bool bo1 = bo & 0x08 ? true : false;
|
||||
bool bo2 = bo & 0x04 ? true : false;
|
||||
bool bo3 = bo & 0x02 ? true : false;
|
||||
|
||||
auto ctr_i64 = GetCtr();
|
||||
if (!bo2) {
|
||||
ctr_i64 = m_ir_builder->CreateSub(ctr_i64, m_ir_builder->getInt64(1));
|
||||
SetCtr(ctr_i64);
|
||||
}
|
||||
|
||||
Value * ctr_ok_i1 = nullptr;
|
||||
if (!bo2) {
|
||||
// TODO: Check if we should compare all bits or just the lower 32 bits. This depends on MSR[SF]. Not sure what it is for PS3.
|
||||
ctr_ok_i1 = m_ir_builder->CreateICmpNE(ctr_i64, m_ir_builder->getInt64(0));
|
||||
if (bo3) {
|
||||
ctr_ok_i1 = m_ir_builder->CreateXor(ctr_ok_i1, m_ir_builder->getInt1(bo3));
|
||||
}
|
||||
}
|
||||
|
||||
Value * cond_ok_i1 = nullptr;
|
||||
if (!bo0) {
|
||||
auto cr_bi_i32 = GetBit(GetCr(), bi);
|
||||
cond_ok_i1 = m_ir_builder->CreateTrunc(cr_bi_i32, m_ir_builder->getInt1Ty());
|
||||
if (!bo1) {
|
||||
cond_ok_i1 = m_ir_builder->CreateXor(cond_ok_i1, m_ir_builder->getInt1(!bo1));
|
||||
}
|
||||
}
|
||||
|
||||
Value * cmp_i1 = nullptr;
|
||||
if (ctr_ok_i1 && cond_ok_i1) {
|
||||
cmp_i1 = m_ir_builder->CreateAnd(ctr_ok_i1, cond_ok_i1);
|
||||
} else if (ctr_ok_i1) {
|
||||
cmp_i1 = ctr_ok_i1;
|
||||
} else if (cond_ok_i1) {
|
||||
cmp_i1 = cond_ok_i1;
|
||||
}
|
||||
|
||||
return cmp_i1;
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk) {
|
||||
if (lk) {
|
||||
SetLr(m_ir_builder->getInt64(m_current_instruction_address + 4));
|
||||
}
|
||||
|
||||
auto current_block = m_ir_builder->GetInsertBlock();
|
||||
BasicBlock * target_block = nullptr;
|
||||
if (dyn_cast<ConstantInt>(target_i64)) {
|
||||
// Target address is an immediate value.
|
||||
u32 target_address = (u32)(dyn_cast<ConstantInt>(target_i64)->getLimitedValue());
|
||||
target_block = GetBlockInFunction(target_address, m_current_function);
|
||||
if (!target_block) {
|
||||
target_block = GetBlockInFunction(target_address, m_current_function, true);
|
||||
if ((m_hit_blocks.find(target_address) != m_hit_blocks.end() || !cmp_i1) && m_num_instructions < 300) {
|
||||
// Target block has either been hit or this is an unconditional branch.
|
||||
m_current_function_uncompiled_blocks_list.push_back(target_address);
|
||||
m_hit_blocks.insert(target_address);
|
||||
} else {
|
||||
// Target block has not been encountered yet and this is not an unconditional branch
|
||||
m_ir_builder->SetInsertPoint(target_block);
|
||||
SetPc(target_i64);
|
||||
m_ir_builder->CreateRetVoid();
|
||||
m_current_function_unhit_blocks_list.push_back(target_address);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Target addres is in a register
|
||||
target_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function);
|
||||
m_ir_builder->SetInsertPoint(target_block);
|
||||
SetPc(target_i64);
|
||||
m_ir_builder->CreateRetVoid();
|
||||
}
|
||||
|
||||
if (cmp_i1) {
|
||||
// Conditional branch
|
||||
auto next_block = GetBlockInFunction(m_current_instruction_address + 4, m_current_function);
|
||||
if (!next_block) {
|
||||
next_block = GetBlockInFunction(m_current_instruction_address + 4, m_current_function, true);
|
||||
if (m_hit_blocks.find(m_current_instruction_address + 4) != m_hit_blocks.end() && m_num_instructions < 300) {
|
||||
// Next block has already been hit.
|
||||
m_current_function_uncompiled_blocks_list.push_back(m_current_instruction_address + 4);
|
||||
} else {
|
||||
// Next block has not been encountered yet
|
||||
m_ir_builder->SetInsertPoint(next_block);
|
||||
SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4));
|
||||
m_ir_builder->CreateRetVoid();
|
||||
m_current_function_unhit_blocks_list.push_back(m_current_instruction_address + 4);
|
||||
}
|
||||
}
|
||||
|
||||
m_ir_builder->SetInsertPoint(current_block);
|
||||
m_ir_builder->CreateCondBr(cmp_i1, target_block, next_block);
|
||||
} else {
|
||||
// Unconditional branch
|
||||
m_ir_builder->SetInsertPoint(current_block);
|
||||
m_ir_builder->CreateBr(target_block);
|
||||
}
|
||||
|
||||
m_hit_branch_instruction = true;
|
||||
}
|
||||
|
||||
Value * PPULLVMRecompiler::ReadMemory(Value * addr_i64, u32 bits, bool bswap) {
|
||||
if (bits != 32) {
|
||||
auto eaddr_i64 = m_ir_builder->CreateAdd(addr_i64, GetBaseAddress());
|
||||
|
@ -3560,10 +3827,22 @@ Value * PPULLVMRecompiler::ReadMemory(Value * addr_i64, u32 bits, bool bswap) {
|
|||
|
||||
return val_ix;
|
||||
} else {
|
||||
BasicBlock * next_block = nullptr;
|
||||
for (auto i = m_current_function->begin(); i != m_current_function->end(); i++) {
|
||||
if (&(*i) == m_ir_builder->GetInsertBlock()) {
|
||||
i++;
|
||||
if (i != m_current_function->end()) {
|
||||
next_block = &(*i);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto cmp_i1 = m_ir_builder->CreateICmpULT(addr_i64, m_ir_builder->getInt64(RAW_SPU_BASE_ADDR));
|
||||
auto then_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_ir_builder->GetInsertBlock()->getParent());
|
||||
auto else_bb = BasicBlock::Create(m_ir_builder->getContext());
|
||||
auto merge_bb = BasicBlock::Create(m_ir_builder->getContext());
|
||||
auto then_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block);
|
||||
auto else_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block);
|
||||
auto merge_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block);
|
||||
m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb);
|
||||
|
||||
m_ir_builder->SetInsertPoint(then_bb);
|
||||
|
@ -3576,7 +3855,6 @@ Value * PPULLVMRecompiler::ReadMemory(Value * addr_i64, u32 bits, bool bswap) {
|
|||
|
||||
m_ir_builder->CreateBr(merge_bb);
|
||||
|
||||
m_ir_builder->GetInsertBlock()->getParent()->getBasicBlockList().push_back(else_bb);
|
||||
m_ir_builder->SetInsertPoint(else_bb);
|
||||
auto val_else_i32 = Call<u32>("vm_read32", (u32(*)(u64))vm::read32, addr_i64);
|
||||
if (!bswap) {
|
||||
|
@ -3584,7 +3862,6 @@ Value * PPULLVMRecompiler::ReadMemory(Value * addr_i64, u32 bits, bool bswap) {
|
|||
}
|
||||
m_ir_builder->CreateBr(merge_bb);
|
||||
|
||||
m_ir_builder->GetInsertBlock()->getParent()->getBasicBlockList().push_back(merge_bb);
|
||||
m_ir_builder->SetInsertPoint(merge_bb);
|
||||
auto phi = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 2);
|
||||
phi->addIncoming(val_then_i32, then_bb);
|
||||
|
@ -3604,10 +3881,22 @@ void PPULLVMRecompiler::WriteMemory(Value * addr_i64, Value * val_ix, bool bswap
|
|||
auto eaddr_ix_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, val_ix->getType()->getPointerTo());
|
||||
m_ir_builder->CreateStore(val_ix, eaddr_ix_ptr);
|
||||
} else {
|
||||
BasicBlock * next_block = nullptr;
|
||||
for (auto i = m_current_function->begin(); i != m_current_function->end(); i++) {
|
||||
if (&(*i) == m_ir_builder->GetInsertBlock()) {
|
||||
i++;
|
||||
if (i != m_current_function->end()) {
|
||||
next_block = &(*i);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto cmp_i1 = m_ir_builder->CreateICmpULT(addr_i64, m_ir_builder->getInt64(RAW_SPU_BASE_ADDR));
|
||||
auto then_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_ir_builder->GetInsertBlock()->getParent());
|
||||
auto else_bb = BasicBlock::Create(m_ir_builder->getContext());
|
||||
auto merge_bb = BasicBlock::Create(m_ir_builder->getContext());
|
||||
auto then_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block);
|
||||
auto else_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block);
|
||||
auto merge_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block);
|
||||
m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb);
|
||||
|
||||
m_ir_builder->SetInsertPoint(then_bb);
|
||||
|
@ -3621,7 +3910,6 @@ void PPULLVMRecompiler::WriteMemory(Value * addr_i64, Value * val_ix, bool bswap
|
|||
m_ir_builder->CreateStore(val_then_i32, eaddr_i32_ptr);
|
||||
m_ir_builder->CreateBr(merge_bb);
|
||||
|
||||
m_ir_builder->GetInsertBlock()->getParent()->getBasicBlockList().push_back(else_bb);
|
||||
m_ir_builder->SetInsertPoint(else_bb);
|
||||
Value * val_else_i32 = val_ix;
|
||||
if (!bswap) {
|
||||
|
@ -3631,7 +3919,6 @@ void PPULLVMRecompiler::WriteMemory(Value * addr_i64, Value * val_ix, bool bswap
|
|||
Call<void>("vm_write32", (void(*)(u64, u32))vm::write32, addr_i64, val_else_i32);
|
||||
m_ir_builder->CreateBr(merge_bb);
|
||||
|
||||
m_ir_builder->GetInsertBlock()->getParent()->getBasicBlockList().push_back(merge_bb);
|
||||
m_ir_builder->SetInsertPoint(merge_bb);
|
||||
}
|
||||
}
|
||||
|
@ -3659,7 +3946,7 @@ Type * PPULLVMRecompiler::CppToLlvmType() {
|
|||
} else if (std::is_same<T, short>::value || std::is_same<T, unsigned short>::value) {
|
||||
return m_ir_builder->getInt16Ty();
|
||||
} else if (std::is_same<T, char>::value || std::is_same<T, unsigned char>::value) {
|
||||
return m_ir_builder->getInt8Ty();
|
||||
return m_ir_builder->getInt8Ty();
|
||||
} else if (std::is_same<T, float>::value) {
|
||||
return m_ir_builder->getFloatTy();
|
||||
} else if (std::is_same<T, double>::value) {
|
||||
|
@ -3725,35 +4012,69 @@ PPULLVMEmulator::~PPULLVMEmulator() {
|
|||
}
|
||||
|
||||
u8 PPULLVMEmulator::DecodeMemory(const u32 address) {
|
||||
static u32 last_instr_address = 0;
|
||||
static bool s_last_instr_was_branch = false;
|
||||
static auto s_last_cache_clear_time = std::chrono::high_resolution_clock::now();
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto compiled_block = m_compiled_blocks.find(address);
|
||||
if (compiled_block == m_compiled_blocks.end()) {
|
||||
auto compiled_block_info = s_recompiler->GetCompiledBlock(address);
|
||||
if (compiled_block_info.block) {
|
||||
compiled_block = m_compiled_blocks.insert(m_compiled_blocks.end(), std::make_pair(address, std::make_pair(compiled_block_info.block, compiled_block_info.revision)));
|
||||
m_pending_compilation_blocks.erase(address);
|
||||
if (std::chrono::duration_cast<std::chrono::milliseconds>(now - s_last_cache_clear_time).count() > 1000) {
|
||||
static u32 s_revision = 0;
|
||||
bool clear_all = false;
|
||||
|
||||
u32 revision = s_recompiler->GetCurrentRevision();
|
||||
if (s_revision != revision) {
|
||||
s_revision = revision;
|
||||
clear_all = true;
|
||||
}
|
||||
|
||||
for (auto iter = m_address_to_executable.begin(); iter != m_address_to_executable.end();) {
|
||||
auto tmp = iter;
|
||||
iter++;
|
||||
if (tmp->second.num_hits == 0 || clear_all) {
|
||||
m_address_to_executable.erase(tmp);
|
||||
s_recompiler->ReleaseExecutable(tmp->first, tmp->second.revision);
|
||||
} else {
|
||||
tmp->second.num_hits = 0;
|
||||
}
|
||||
}
|
||||
|
||||
s_last_cache_clear_time = now;
|
||||
}
|
||||
|
||||
auto address_to_executable_iter = m_address_to_executable.find(address);
|
||||
if (address_to_executable_iter == m_address_to_executable.end()) {
|
||||
auto executable_and_revision = s_recompiler->GetExecutable(address);
|
||||
if (executable_and_revision.first) {
|
||||
ExecutableInfo executable_info;
|
||||
executable_info.executable = executable_and_revision.first;
|
||||
executable_info.revision = executable_and_revision.second;
|
||||
executable_info.num_hits = 0;
|
||||
|
||||
address_to_executable_iter = m_address_to_executable.insert(m_address_to_executable.end(), std::make_pair(address, executable_info));
|
||||
m_uncompiled.erase(address);
|
||||
} else {
|
||||
if (address != (last_instr_address + 4)) {
|
||||
auto pending_compilation_block = m_pending_compilation_blocks.find(address);
|
||||
if (pending_compilation_block != m_pending_compilation_blocks.end()) {
|
||||
pending_compilation_block->second++;
|
||||
if ((pending_compilation_block->second % 1000) == 0) {
|
||||
if (s_last_instr_was_branch) {
|
||||
auto uncompiled_iter = m_uncompiled.find(address);
|
||||
if (uncompiled_iter != m_uncompiled.end()) {
|
||||
uncompiled_iter->second++;
|
||||
if ((uncompiled_iter->second % 1000) == 0) {
|
||||
s_recompiler->RequestCompilation(address);
|
||||
}
|
||||
} else {
|
||||
m_pending_compilation_blocks[address] = 0;
|
||||
m_uncompiled[address] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
last_instr_address = address;
|
||||
|
||||
if (compiled_block != m_compiled_blocks.end()) {
|
||||
compiled_block->second.first(&m_ppu, (u64)Memory.GetBaseAddr(), m_interpreter);
|
||||
return 0;
|
||||
u8 ret = 0;
|
||||
if (address_to_executable_iter != m_address_to_executable.end()) {
|
||||
address_to_executable_iter->second.executable(&m_ppu, (u64)Memory.GetBaseAddr(), m_interpreter);
|
||||
address_to_executable_iter->second.num_hits++;
|
||||
s_last_instr_was_branch = true;
|
||||
} else {
|
||||
return m_decoder.DecodeMemory(address);
|
||||
ret = m_decoder.DecodeMemory(address);
|
||||
s_last_instr_was_branch = m_ppu.m_is_branch;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -17,27 +17,7 @@ struct PPURegState;
|
|||
/// PPU to LLVM recompiler
|
||||
class PPULLVMRecompiler : public ThreadBase, protected PPUOpcodes, protected PPCDecoder {
|
||||
public:
|
||||
typedef void(*CompiledBlock)(PPUThread * ppu_state, u64 base_address, PPUInterpreter * interpreter);
|
||||
|
||||
struct CompiledBlockInfo {
|
||||
/// Address of the block
|
||||
u32 block_address;
|
||||
|
||||
/// The version of the block
|
||||
u32 revision;
|
||||
|
||||
/// Pointer to the block
|
||||
CompiledBlock block;
|
||||
|
||||
/// Size of the compiled block
|
||||
size_t size;
|
||||
|
||||
/// Reference count for the block
|
||||
u64 reference_count;
|
||||
|
||||
/// LLVM function for this block
|
||||
llvm::Function * llvm_function;
|
||||
};
|
||||
typedef void(*Executable)(PPUThread * ppu_state, u64 base_address, PPUInterpreter * interpreter);
|
||||
|
||||
PPULLVMRecompiler();
|
||||
|
||||
|
@ -49,15 +29,18 @@ public:
|
|||
PPULLVMRecompiler & operator = (const PPULLVMRecompiler & other) = delete;
|
||||
PPULLVMRecompiler & operator = (PPULLVMRecompiler && other) = delete;
|
||||
|
||||
/// Get a compiled block
|
||||
CompiledBlockInfo GetCompiledBlock(u32 address);
|
||||
/// Get the executable for the code starting at address
|
||||
std::pair<Executable, u32> GetExecutable(u32 address);
|
||||
|
||||
/// Release a compiled block
|
||||
void ReleaseCompiledBlock(u32 address, u32 revision);
|
||||
/// Release an executable earlier obtained through GetExecutable
|
||||
void ReleaseExecutable(u32 address, u32 revision);
|
||||
|
||||
/// Request a block to be compiled
|
||||
/// Request the code at the sepcified address to be compiled
|
||||
void RequestCompilation(u32 address);
|
||||
|
||||
/// Get the current revision
|
||||
u32 GetCurrentRevision();
|
||||
|
||||
/// Execute all tests
|
||||
void RunAllTests(PPUThread * ppu_state, u64 base_address, PPUInterpreter * interpreter);
|
||||
|
||||
|
@ -467,18 +450,45 @@ protected:
|
|||
void UNK(const u32 code, const u32 opcode, const u32 gcode) override;
|
||||
|
||||
private:
|
||||
/// Mutex for accessing m_compiled_blocks
|
||||
std::mutex m_compiled_blocks_mutex;
|
||||
struct ExecutableInfo {
|
||||
/// Pointer to the executable
|
||||
Executable executable;
|
||||
|
||||
/// Blocks that have been compiled
|
||||
/// Key is block address
|
||||
std::map<std::pair<u32, u32>, CompiledBlockInfo> m_compiled_blocks;
|
||||
/// Size of the executable
|
||||
size_t size;
|
||||
|
||||
/// Mutex for accessing m_pending_compilation_blocks;
|
||||
std::mutex m_pending_compilation_blocks_mutex;
|
||||
/// Number of PPU instructions compiled into this executable
|
||||
u32 num_instructions;
|
||||
|
||||
/// Blocks pending compilation
|
||||
std::set<u32> m_pending_compilation_blocks;
|
||||
/// List of blocks that this executable refers to that have not been hit yet
|
||||
std::list<u32> unhit_blocks_list;
|
||||
|
||||
/// LLVM function corresponding to the executable
|
||||
llvm::Function * llvm_function;
|
||||
};
|
||||
|
||||
/// Lock for accessing m_compiled_shared
|
||||
// TODO: Use a RW lock
|
||||
std::mutex m_compiled_shared_lock;
|
||||
|
||||
/// Sections that have been compiled. This data store is shared with the execution threads.
|
||||
/// Keys are starting address of the section and ~revision. Data is pointer to the executable and its reference count.
|
||||
std::map<std::pair<u32, u32>, std::pair<Executable, u32>> m_compiled_shared;
|
||||
|
||||
/// Lock for accessing m_uncompiled_shared
|
||||
std::mutex m_uncompiled_shared_lock;
|
||||
|
||||
/// Current revision. This is incremented everytime a section is compiled.
|
||||
std::atomic<u32> m_revision;
|
||||
|
||||
/// Sections that have not been compiled yet. This data store is shared with the execution threads.
|
||||
std::list<u32> m_uncompiled_shared;
|
||||
|
||||
/// Set of all blocks that have been hit
|
||||
std::set<u32> m_hit_blocks;
|
||||
|
||||
/// Sections that have been compiled. Keys are starting address of the section and ~revision.
|
||||
std::map<std::pair<u32, u32>, ExecutableInfo> m_compiled;
|
||||
|
||||
/// LLVM context
|
||||
llvm::LLVMContext * m_llvm_context;
|
||||
|
@ -504,20 +514,53 @@ private:
|
|||
bool m_hit_branch_instruction;
|
||||
|
||||
/// The function being compiled
|
||||
llvm::Function * m_function;
|
||||
llvm::Function * m_current_function;
|
||||
|
||||
/// List of blocks to be compiled in the current function being compiled
|
||||
std::list<u32> m_current_function_uncompiled_blocks_list;
|
||||
|
||||
/// List of blocks that the current function refers to but have not been hit yet
|
||||
std::list<u32> m_current_function_unhit_blocks_list;
|
||||
|
||||
/// Address of the current instruction
|
||||
u32 m_current_instruction_address;
|
||||
|
||||
/// Number of instructions in this section
|
||||
u32 m_num_instructions;
|
||||
|
||||
/// Time spent building the LLVM IR
|
||||
std::chrono::nanoseconds m_ir_build_time;
|
||||
|
||||
/// Time spent optimizing
|
||||
std::chrono::nanoseconds m_optimizing_time;
|
||||
|
||||
/// Time spent translating LLVM IR to machine code
|
||||
std::chrono::nanoseconds m_translation_time;
|
||||
|
||||
/// Time spent compiling
|
||||
std::chrono::duration<double> m_compilation_time;
|
||||
std::chrono::nanoseconds m_compilation_time;
|
||||
|
||||
/// Time spent idling
|
||||
std::chrono::duration<double> m_idling_time;
|
||||
std::chrono::nanoseconds m_idling_time;
|
||||
|
||||
/// Total time
|
||||
std::chrono::nanoseconds m_total_time;
|
||||
|
||||
/// Contains the number of times the interpreter fallback was used
|
||||
std::map<std::string, u64> m_interpreter_fallback_stats;
|
||||
|
||||
/// Compile a block of code
|
||||
/// Get the block in function for the instruction at the specified address.
|
||||
llvm::BasicBlock * GetBlockInFunction(u32 address, llvm::Function * function, bool create_if_not_exist = false);
|
||||
|
||||
/// Compile the section startin at address
|
||||
void Compile(u32 address);
|
||||
|
||||
/// Remove old versions of executables that are no longer used by any execution thread
|
||||
void RemoveUnusedOldVersions();
|
||||
|
||||
/// Test whether the blocks needs to be compiled
|
||||
bool NeedsCompiling(u32 address);
|
||||
|
||||
/// Get PPU state pointer
|
||||
llvm::Value * GetPPUState();
|
||||
|
||||
|
@ -552,7 +595,7 @@ private:
|
|||
llvm::Value * GetPc();
|
||||
|
||||
/// Set PC
|
||||
void SetPc(llvm::Value * val_i32);
|
||||
void SetPc(llvm::Value * val_ix);
|
||||
|
||||
/// Load GPR
|
||||
llvm::Value * GetGpr(u32 r, u32 num_bits = 64);
|
||||
|
@ -644,6 +687,12 @@ private:
|
|||
/// Set VR to the specified value
|
||||
void SetVr(u32 vr, llvm::Value * val_x128);
|
||||
|
||||
/// Check condition for branch instructions
|
||||
llvm::Value * CheckBranchCondition(u32 bo, u32 bi);
|
||||
|
||||
/// Create IR for a branch instruction
|
||||
void CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk);
|
||||
|
||||
/// Read from memory
|
||||
llvm::Value * ReadMemory(llvm::Value * addr_i64, u32 bits, bool bswap = true);
|
||||
|
||||
|
@ -696,6 +745,17 @@ public:
|
|||
u8 DecodeMemory(const u32 address) override;
|
||||
|
||||
private:
|
||||
struct ExecutableInfo {
|
||||
/// Pointer to the executable
|
||||
PPULLVMRecompiler::Executable executable;
|
||||
|
||||
/// The revision of the executable
|
||||
u32 revision;
|
||||
|
||||
/// Number of times the executable was hit
|
||||
u32 num_hits;
|
||||
};
|
||||
|
||||
/// PPU processor context
|
||||
PPUThread & m_ppu;
|
||||
|
||||
|
@ -705,13 +765,11 @@ private:
|
|||
/// PPU instruction Decoder
|
||||
PPUDecoder m_decoder;
|
||||
|
||||
/// Compiled blocks
|
||||
/// Key is block address.
|
||||
std::unordered_map<u32, std::pair<PPULLVMRecompiler::CompiledBlock, u32>> m_compiled_blocks;
|
||||
/// Address to executable map. Key is address.
|
||||
std::unordered_map<u32, ExecutableInfo> m_address_to_executable;
|
||||
|
||||
/// Uncompiled blocks
|
||||
/// Key is block address
|
||||
std::unordered_map<u32, u64> m_pending_compilation_blocks;
|
||||
/// Sections that have not been compiled yet. Key is starting address of the section.
|
||||
std::unordered_map<u32, u64> m_uncompiled;
|
||||
|
||||
/// Number of instances of this class
|
||||
static u32 s_num_instances;
|
||||
|
|
|
@ -190,17 +190,17 @@ void PPULLVMRecompiler::VerifyInstructionAgainstInterpreter(const char * name, P
|
|||
|
||||
void PPULLVMRecompiler::RunTest(const char * name, std::function<void()> test_case, std::function<void()> input, std::function<bool(std::string & msg)> check_result) {
|
||||
// Create the unit test function
|
||||
m_function = (Function *)m_module->getOrInsertFunction(name, m_ir_builder->getVoidTy(),
|
||||
m_ir_builder->getInt8PtrTy() /*ppu_state*/,
|
||||
m_ir_builder->getInt64Ty() /*base_addres*/,
|
||||
m_ir_builder->getInt8PtrTy() /*interpreter*/, nullptr);
|
||||
m_function->setCallingConv(CallingConv::X86_64_Win64);
|
||||
auto arg_i = m_function->arg_begin();
|
||||
m_current_function = (Function *)m_module->getOrInsertFunction(name, m_ir_builder->getVoidTy(),
|
||||
m_ir_builder->getInt8PtrTy() /*ppu_state*/,
|
||||
m_ir_builder->getInt64Ty() /*base_addres*/,
|
||||
m_ir_builder->getInt8PtrTy() /*interpreter*/, nullptr);
|
||||
m_current_function->setCallingConv(CallingConv::X86_64_Win64);
|
||||
auto arg_i = m_current_function->arg_begin();
|
||||
arg_i->setName("ppu_state");
|
||||
(++arg_i)->setName("base_address");
|
||||
(++arg_i)->setName("interpreter");
|
||||
|
||||
auto block = BasicBlock::Create(*m_llvm_context, "start", m_function);
|
||||
auto block = BasicBlock::Create(*m_llvm_context, "start", m_current_function);
|
||||
m_ir_builder->SetInsertPoint(block);
|
||||
|
||||
test_case();
|
||||
|
@ -210,19 +210,19 @@ void PPULLVMRecompiler::RunTest(const char * name, std::function<void()> test_ca
|
|||
// Print the IR
|
||||
std::string ir;
|
||||
raw_string_ostream ir_ostream(ir);
|
||||
m_function->print(ir_ostream);
|
||||
m_current_function->print(ir_ostream);
|
||||
LOG_NOTICE(PPU, "[UT %s] LLVM IR:%s", name, ir.c_str());
|
||||
|
||||
std::string verify;
|
||||
raw_string_ostream verify_ostream(verify);
|
||||
if (verifyFunction(*m_function, &verify_ostream)) {
|
||||
if (verifyFunction(*m_current_function, &verify_ostream)) {
|
||||
LOG_ERROR(PPU, "[UT %s] Verification Failed:%s", name, verify.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate the function
|
||||
MachineCodeInfo mci;
|
||||
m_execution_engine->runJITOnFunction(m_function, &mci);
|
||||
m_execution_engine->runJITOnFunction(m_current_function, &mci);
|
||||
|
||||
// Disassember the generated function
|
||||
LOG_NOTICE(PPU, "[UT %s] Disassembly:", name);
|
||||
|
@ -242,7 +242,7 @@ void PPULLVMRecompiler::RunTest(const char * name, std::function<void()> test_ca
|
|||
base_address.IntVal = APInt(64, s_base_address);
|
||||
args.push_back(base_address);
|
||||
args.push_back(GenericValue(s_interpreter));
|
||||
m_execution_engine->runFunction(m_function, args);
|
||||
m_execution_engine->runFunction(m_current_function, args);
|
||||
|
||||
// Verify results
|
||||
std::string msg;
|
||||
|
@ -253,7 +253,7 @@ void PPULLVMRecompiler::RunTest(const char * name, std::function<void()> test_ca
|
|||
LOG_ERROR(PPU, "[UT %s] Test failed. %s", name, msg.c_str());
|
||||
}
|
||||
|
||||
m_execution_engine->freeMachineCodeForFunction(m_function);
|
||||
m_execution_engine->freeMachineCodeForFunction(m_current_function);
|
||||
}
|
||||
|
||||
void PPULLVMRecompiler::RunAllTests(PPUThread * ppu_state, u64 base_address, PPUInterpreter * interpreter) {
|
||||
|
|
Loading…
Add table
Reference in a new issue