PPU/LLVM: Remove duplicate m_address_to_ordinal map and improve cleaning

This commit is contained in:
vlj 2015-07-21 17:14:47 +02:00 committed by Nekotekina
parent 501c14fc65
commit 26f6b89530
3 changed files with 123 additions and 158 deletions

View file

@ -58,8 +58,6 @@ Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable
}
Compiler::~Compiler() {
for (auto execution_engine : m_execution_engines)
delete execution_engine;
delete m_ir_builder;
delete m_llvm_context;
}
@ -84,7 +82,7 @@ public:
};
Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & cfg, bool generate_linkable_exits) {
std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::string & name, const ControlFlowGraph & cfg, bool generate_linkable_exits) {
auto compilation_start = std::chrono::high_resolution_clock::now();
m_module = new llvm::Module("Module", *m_llvm_context);
@ -254,7 +252,6 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph &
void *function = execution_engine->getPointerToFunction(m_state.function);
auto translate_end = std::chrono::high_resolution_clock::now();
m_stats.translation_time += std::chrono::duration_cast<std::chrono::nanoseconds>(translate_end - optimize_end);
m_execution_engines.push_back(execution_engine);
/* m_recompilation_engine.Log() << "\nDisassembly:\n";
auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr);
@ -273,15 +270,7 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph &
delete fpm;
assert(function != nullptr);
return (Executable)function;
}
void Compiler::FreeExecutable(const std::string & name) {
auto function = m_module->getFunction(name);
if (function) {
// m_execution_engine->freeMachineCodeForFunction(function);
function->eraseFromParent();
}
return std::make_pair((Executable)function, execution_engine);
}
Compiler::Stats Compiler::GetStats() {
@ -297,48 +286,49 @@ std::shared_ptr<RecompilationEngine> RecompilationEngine::s_the_instance = nullp
RecompilationEngine::RecompilationEngine()
: m_log(nullptr)
, m_next_ordinal(0)
, m_last_cache_clear_time(std::chrono::high_resolution_clock::now())
, m_compiler(*this, CPUHybridDecoderRecompiler::ExecuteFunction, CPUHybridDecoderRecompiler::ExecuteTillReturn, CPUHybridDecoderRecompiler::PollStatus) {
m_compiler.RunAllTests();
}
RecompilationEngine::~RecompilationEngine() {
m_address_to_function.clear();
join();
}
u32 RecompilationEngine::AllocateOrdinal(u32 address, bool is_function) {
std::lock_guard<std::mutex> lock(m_address_to_ordinal_lock);
Executable executeFunc;
Executable executeUntilReturn;
auto i = m_address_to_ordinal.find(address);
if (i == m_address_to_ordinal.end()) {
assert(m_next_ordinal < (sizeof(m_executable_lookup) / sizeof(m_executable_lookup[0])));
const Executable *RecompilationEngine::GetExecutable(u32 address, bool isFunction) {
return isFunction ? &executeFunc : &executeUntilReturn;
}
m_executable_lookup[m_next_ordinal] = is_function ? CPUHybridDecoderRecompiler::ExecuteFunction : CPUHybridDecoderRecompiler::ExecuteTillReturn;
std::atomic_thread_fence(std::memory_order_release);
i = m_address_to_ordinal.insert(m_address_to_ordinal.end(), std::make_pair(address, m_next_ordinal++));
const Executable *RecompilationEngine::GetCompiledExecutableIfAvailable(u32 address, std::mutex *mut)
{
std::lock_guard<std::mutex> lock(m_address_to_function_lock);
std::unordered_map<u32, ExecutableStorage>::iterator It = m_address_to_function.find(address);
if (It == m_address_to_function.end())
return nullptr;
if(std::get<1>(It->second) == nullptr)
return nullptr;
mut = &(std::get<3>(It->second));
return &(std::get<0>(It->second));
}
void RecompilationEngine::RemoveUnusedEntriesFromCache() {
auto now = std::chrono::high_resolution_clock::now();
if (std::chrono::duration_cast<std::chrono::milliseconds>(now - m_last_cache_clear_time).count() > 10000) {
for (auto i = m_address_to_function.begin(); i != m_address_to_function.end();) {
auto tmp = i;
i++;
if (std::get<2>(tmp->second) == 0)
m_address_to_function.erase(tmp);
else
std::get<2>(tmp->second) = 0;
}
return i->second;
}
u32 RecompilationEngine::GetOrdinal(u32 address) const {
std::lock_guard<std::mutex> lock(m_address_to_ordinal_lock);
auto i = m_address_to_ordinal.find(address);
if (i != m_address_to_ordinal.end()) {
return i->second;
} else {
return 0xFFFFFFFF;
}
}
const Executable RecompilationEngine::GetExecutable(u32 ordinal) const {
std::atomic_thread_fence(std::memory_order_acquire);
return m_executable_lookup[ordinal];
}
u64 RecompilationEngine::GetAddressOfExecutableLookup() const {
return (u64)m_executable_lookup;
m_last_cache_clear_time = now;
}
}
void RecompilationEngine::NotifyTrace(ExecutionTrace * execution_trace) {
@ -447,7 +437,6 @@ void RecompilationEngine::Task() {
Log() << " Time spent recompiling = " << recompiling_time.count() / 1000000 << "ms\n";
Log() << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n";
Log() << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n";
Log() << "Ordinals allocated = " << m_next_ordinal << "\n";
LOG_NOTICE(PPU, "PPU LLVM Recompilation thread exiting.");
s_the_instance = nullptr; // Can cause deadlock if this is the last instance. Need to fix this.
@ -543,12 +532,27 @@ void RecompilationEngine::CompileBlock(BlockEntry & block_entry) {
Log() << "Compile: " << block_entry.ToString() << "\n";
Log() << "CFG: " << block_entry.cfg.ToString() << "\n";
u32 ordinal = AllocateOrdinal(block_entry.cfg.start_address, block_entry.IsFunction());
Executable executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), block_entry.cfg,
block_entry.IsFunction() ? true : false /*generate_linkable_exits*/);
m_executable_lookup[ordinal] = executable;
const std::pair<Executable, llvm::ExecutionEngine *> &compileResult =
m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), block_entry.cfg,
block_entry.IsFunction() ? true : false /*generate_linkable_exits*/);
// If entry doesn't exist, create it (using lock)
std::unordered_map<u32, ExecutableStorage>::iterator It = m_address_to_function.find(block_entry.cfg.start_address);
if (It == m_address_to_function.end())
{
std::lock_guard<std::mutex> lock(m_address_to_function_lock);
std::get<1>(m_address_to_function[block_entry.cfg.start_address]) = nullptr;
}
// Prevent access on this block
std::lock_guard<std::mutex> lock(std::get<3>(m_address_to_function[block_entry.cfg.start_address]));
std::get<1>(m_address_to_function[block_entry.cfg.start_address]) = std::unique_ptr<llvm::ExecutionEngine>(compileResult.second);
std::get<0>(m_address_to_function[block_entry.cfg.start_address]) = compileResult.first;
block_entry.last_compiled_cfg_size = block_entry.cfg.GetSize();
block_entry.is_compiled = true;
block_entry.is_compiled = true;
}
std::shared_ptr<RecompilationEngine> RecompilationEngine::GetInstance() {
@ -645,8 +649,9 @@ ppu_recompiler_llvm::CPUHybridDecoderRecompiler::CPUHybridDecoderRecompiler(PPUT
: m_ppu(ppu)
, m_interpreter(new PPUInterpreter(ppu))
, m_decoder(m_interpreter)
, m_last_cache_clear_time(std::chrono::high_resolution_clock::now())
, m_recompilation_engine(RecompilationEngine::GetInstance()) {
executeFunc = CPUHybridDecoderRecompiler::ExecuteFunction;
executeUntilReturn = CPUHybridDecoderRecompiler::ExecuteTillReturn;
}
ppu_recompiler_llvm::CPUHybridDecoderRecompiler::~CPUHybridDecoderRecompiler() {
@ -654,53 +659,39 @@ ppu_recompiler_llvm::CPUHybridDecoderRecompiler::~CPUHybridDecoderRecompiler() {
}
u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::DecodeMemory(const u32 address) {
ExecuteFunction(&m_ppu, 0);
return 0;
}
void ppu_recompiler_llvm::CPUHybridDecoderRecompiler::RemoveUnusedEntriesFromCache() const {
auto now = std::chrono::high_resolution_clock::now();
if (std::chrono::duration_cast<std::chrono::milliseconds>(now - m_last_cache_clear_time).count() > 10000) {
for (auto i = m_address_to_ordinal.begin(); i != m_address_to_ordinal.end();) {
auto tmp = i;
i++;
if (tmp->second.second == 0) {
m_address_to_ordinal.erase(tmp);
} else {
tmp->second.second = 0;
}
}
m_last_cache_clear_time = now;
}
}
Executable ppu_recompiler_llvm::CPUHybridDecoderRecompiler::GetExecutable(u32 address, Executable default_executable) const {
// Find the ordinal for the specified address and insert it to the cache
auto i = m_address_to_ordinal.find(address);
if (i == m_address_to_ordinal.end()) {
auto ordinal = m_recompilation_engine->GetOrdinal(address);
if (ordinal != 0xFFFFFFFF) {
i = m_address_to_ordinal.insert(m_address_to_ordinal.end(), std::make_pair(address, std::make_pair(ordinal, 0)));
}
}
Executable executable = default_executable;
if (i != m_address_to_ordinal.end()) {
i->second.second++;
executable = m_recompilation_engine->GetExecutable(i->second.first);
}
RemoveUnusedEntriesFromCache();
return executable;
}
u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteFunction(PPUThread * ppu_state, u64 context) {
auto execution_engine = (CPUHybridDecoderRecompiler *)ppu_state->GetDecoder();
execution_engine->m_tracer.Trace(Tracer::TraceType::EnterFunction, ppu_state->PC, 0);
return ExecuteTillReturn(ppu_state, 0);
}
/// Get the branch type from a branch instruction
static BranchType GetBranchTypeFromInstruction(u32 instruction) {
u32 field1 = instruction >> 26;
u32 lk = instruction & 1;
if (field1 == 16 || field1 == 18)
return lk ? BranchType::FunctionCall : BranchType::LocalBranch;
if (field1 == 19) {
u32 field2 = (instruction >> 1) & 0x3FF;
if (field2 == 16)
return lk ? BranchType::FunctionCall : BranchType::Return;
if (field2 == 528)
return lk ? BranchType::FunctionCall : BranchType::LocalBranch;
return BranchType::NonBranch;
}
if (field1 == 1 && (instruction & EIF_PERFORM_BLR)) // classify HACK instruction
return instruction & EIF_USE_BRANCH ? BranchType::FunctionCall : BranchType::Return;
if (field1 == 1 && (instruction & EIF_USE_BRANCH))
return BranchType::LocalBranch;
return BranchType::NonBranch;
}
u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteTillReturn(PPUThread * ppu_state, u64 context) {
CPUHybridDecoderRecompiler *execution_engine = (CPUHybridDecoderRecompiler *)ppu_state->GetDecoder();
@ -708,10 +699,12 @@ u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteTillReturn(PPUThread
execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledFunction, context >> 32, context & 0xFFFFFFFF);
while (PollStatus(ppu_state) == false) {
Executable executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteTillReturn);
if (executable != ExecuteTillReturn && executable != ExecuteFunction) {
std::mutex mut;
const Executable *executable = execution_engine->m_recompilation_engine->GetCompiledExecutableIfAvailable(ppu_state->PC, &mut);
if (executable) {
std::lock_guard<std::mutex> lock(mut);
auto entry = ppu_state->PC;
u32 exit = (u32)executable(ppu_state, 0);
u32 exit = (u32)(*executable)(ppu_state, 0);
execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledBlock, entry, exit);
if (exit == 0)
return 0;
@ -730,8 +723,8 @@ u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteTillReturn(PPUThread
return 0;
case BranchType::FunctionCall:
execution_engine->m_tracer.Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0);
executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteFunction);
executable(ppu_state, 0);
executable = execution_engine->m_recompilation_engine->GetExecutable(ppu_state->PC, true);
(*executable)(ppu_state, 0);
break;
case BranchType::LocalBranch:
break;
@ -750,24 +743,3 @@ u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteTillReturn(PPUThread
bool ppu_recompiler_llvm::CPUHybridDecoderRecompiler::PollStatus(PPUThread * ppu_state) {
return ppu_state->check_status();
}
BranchType ppu_recompiler_llvm::GetBranchTypeFromInstruction(u32 instruction) {
u32 field1 = instruction >> 26;
u32 lk = instruction & 1;
if (field1 == 16 || field1 == 18)
return lk ? BranchType::FunctionCall : BranchType::LocalBranch;
if (field1 == 19) {
u32 field2 = (instruction >> 1) & 0x3FF;
if (field2 == 16)
return lk ? BranchType::FunctionCall : BranchType::Return;
if (field2 == 528)
return lk ? BranchType::FunctionCall : BranchType::LocalBranch;
return BranchType::NonBranch;
}
if (field1 == 1 && (instruction & EIF_PERFORM_BLR)) // classify HACK instruction
return instruction & EIF_USE_BRANCH ? BranchType::FunctionCall : BranchType::Return;
if (field1 == 1 && (instruction & EIF_USE_BRANCH))
return BranchType::LocalBranch;
return BranchType::NonBranch;
}

View file

@ -284,11 +284,11 @@ namespace ppu_recompiler_llvm {
Compiler & operator = (const Compiler & other) = delete;
Compiler & operator = (Compiler && other) = delete;
/// Compile a code fragment described by a cfg and return an executable
Executable Compile(const std::string & name, const ControlFlowGraph & cfg, bool generate_linkable_exits);
/// Free an executable earilier obtained via a call to Compile
void FreeExecutable(const std::string & name);
/**
* Compile a code fragment described by a cfg and return an executable and the ExecutionEngine storing it
* Pointer to function can be retrieved with getPointerToFunction
*/
std::pair<Executable, llvm::ExecutionEngine *> Compile(const std::string & name, const ControlFlowGraph & cfg, bool generate_linkable_exits);
/// Retrieve compiler stats
Stats GetStats();
@ -755,9 +755,6 @@ namespace ppu_recompiler_llvm {
/// Module to which all generated code is output to
llvm::Module * m_module;
/// Execution engine list. An execution engine is a JITed function
std::vector<llvm::ExecutionEngine *> m_execution_engines;
/// LLVM type of the functions genreated by the compiler
llvm::FunctionType * m_compiled_function_type;
@ -991,21 +988,30 @@ namespace ppu_recompiler_llvm {
static void InitRotateMask();
};
/**
* Manages block compilation.
* PPUInterpreter1 execution is traced (using Tracer class)
* Periodically RecompilationEngine process traces result to find blocks
* whose compilation can improve performances.
* It then builds them asynchroneously and update the executable mapping
* using atomic based locks to avoid undefined behavior.
**/
class RecompilationEngine final : protected thread_t {
public:
virtual ~RecompilationEngine() override;
/// Allocate an ordinal
u32 AllocateOrdinal(u32 address, bool is_function);
/**
* Get the executable for the specified address
* The pointer is always valid during the lifetime of RecompilationEngine
* but the function pointed to can be updated.
**/
const Executable *GetExecutable(u32 address, bool isFunction);
/// Get the ordinal for the specified address
u32 GetOrdinal(u32 address) const;
/// Get the executable specified by the ordinal
const Executable GetExecutable(u32 ordinal) const;
/// Get the address of the executable lookup
u64 GetAddressOfExecutableLookup() const;
/**
* Get the executable for the specified address if a compiled version is
* available, otherwise returns nullptr.
**/
const Executable *GetCompiledExecutableIfAvailable(u32 address, std::mutex*);
/// Notify the recompilation engine about a newly detected trace. It takes ownership of the trace.
void NotifyTrace(ExecutionTrace * execution_trace);
@ -1085,22 +1091,23 @@ namespace ppu_recompiler_llvm {
/// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes.
std::unordered_map<ExecutionTrace::Id, std::vector<BlockEntry *>> m_processed_execution_traces;
/// Lock for accessing m_address_to_ordinal.
// TODO: Make this a RW lock
mutable std::mutex m_address_to_ordinal_lock;
/// Lock for accessing m_address_to_function.
std::mutex m_address_to_function_lock;
/// Mapping from address to ordinal
std::unordered_map<u32, u32> m_address_to_ordinal;
/// (function, module containing function, times hit, mutex for access).
typedef std::tuple<Executable, std::unique_ptr<llvm::ExecutionEngine>, u32, std::mutex> ExecutableStorage;
/// Address to ordinal cahce. Key is address.
std::unordered_map<u32, ExecutableStorage> m_address_to_function;
/// Next ordinal to allocate
u32 m_next_ordinal;
/// The time at which the m_address_to_ordinal cache was last cleared
std::chrono::high_resolution_clock::time_point m_last_cache_clear_time;
/// Remove unused entries from the m_address_to_ordinal cache
void RemoveUnusedEntriesFromCache();
/// PPU Compiler
Compiler m_compiler;
/// Executable lookup table
Executable m_executable_lookup[10000]; // TODO: Adjust size
RecompilationEngine();
RecompilationEngine(const RecompilationEngine & other) = delete;
@ -1170,6 +1177,7 @@ namespace ppu_recompiler_llvm {
*/
class CPUHybridDecoderRecompiler : public CPUDecoder {
friend class RecompilationEngine;
friend class Compiler;
public:
CPUHybridDecoderRecompiler(PPUThread & ppu);
CPUHybridDecoderRecompiler() = delete;
@ -1197,21 +1205,9 @@ namespace ppu_recompiler_llvm {
/// Execution tracer
Tracer m_tracer;
/// The time at which the m_address_to_ordinal cache was last cleared
mutable std::chrono::high_resolution_clock::time_point m_last_cache_clear_time;
/// Address to ordinal cahce. Key is address. Data is the pair (ordinal, times hit).
mutable std::unordered_map<u32, std::pair<u32, u32>> m_address_to_ordinal;
/// Recompilation engine
std::shared_ptr<RecompilationEngine> m_recompilation_engine;
/// Remove unused entries from the m_address_to_ordinal cache
void RemoveUnusedEntriesFromCache() const;
/// Get the executable for the specified address
Executable GetExecutable(u32 address, Executable default_executable) const;
/// Execute a function
static u32 ExecuteFunction(PPUThread * ppu_state, u64 context);
@ -1221,9 +1217,6 @@ namespace ppu_recompiler_llvm {
/// Check thread status. Returns true if the thread must exit.
static bool PollStatus(PPUThread * ppu_state);
};
/// Get the branch type from a branch instruction
BranchType GetBranchTypeFromInstruction(u32 instruction);
}
#endif // LLVM_AVAILABLE

View file

@ -5284,8 +5284,8 @@ void Compiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool
}
llvm::Value * Compiler::IndirectCall(u32 address, Value * context_i64, bool is_function) {
auto ordinal = m_recompilation_engine.AllocateOrdinal(address, is_function);
auto location_i64 = m_ir_builder->getInt64(m_recompilation_engine.GetAddressOfExecutableLookup() + (ordinal * sizeof(u64)));
const Executable *functionPtr = m_recompilation_engine.GetExecutable(address, is_function);
auto location_i64 = m_ir_builder->getInt64((uint64_t)functionPtr);
auto location_i64_ptr = m_ir_builder->CreateIntToPtr(location_i64, m_ir_builder->getInt64Ty()->getPointerTo());
auto executable_i64 = m_ir_builder->CreateLoad(location_i64_ptr);
auto executable_ptr = m_ir_builder->CreateIntToPtr(executable_i64, m_compiled_function_type->getPointerTo());