From 73ea02052537378bac64b69e5d7ee73273953fdb Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 21 Jun 2016 23:30:12 +0300 Subject: [PATCH 1/6] PPU LLVM: Thread context argument --- rpcs3/Emu/Cell/PPUThread.cpp | 33 +++++++++++++++----------------- rpcs3/Emu/Cell/PPUTranslator.cpp | 30 +++++++++++------------------ rpcs3/Emu/Cell/PPUTranslator.h | 5 ++++- 3 files changed, 30 insertions(+), 38 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index ef1655b7a4..93033fed96 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -33,7 +33,7 @@ struct ppu_addr_hash } }; -static std::unordered_map s_ppu_compiled; +static std::unordered_map s_ppu_compiled; @@ -109,7 +109,7 @@ void PPUThread::cpu_task() if (found != s_ppu_compiled.end()) { - return found->second(); + return found->second(*this); } } @@ -430,7 +430,7 @@ static std::vector s_unwind; struct MemoryManager final : llvm::RTDyldMemoryManager { - static PPUThread* context(u64 addr) + static PPUThread* context(u64 addr) // Unused { //trace(addr); return static_cast(get_current_cpu_thread()); @@ -447,16 +447,14 @@ struct MemoryManager final : llvm::RTDyldMemoryManager LOG_NOTICE(PPU, "Trace: 0x%llx", addr); } - static void hack(u32 index) + static void hlecall(PPUThread& ppu, u32 index) { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); ppu_execute_function(ppu, index); if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily } - static void syscall(u64 code) + static void syscall(PPUThread& ppu, u64 code) { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); ppu_execute_syscall(ppu, code); if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily } @@ -466,13 +464,13 @@ struct MemoryManager final : llvm::RTDyldMemoryManager return (u32)get_timebased_time(); } - static void call(u32 addr) + static void call(PPUThread& ppu, u32 addr) { const auto found = s_ppu_compiled.find(addr); if (found != s_ppu_compiled.end()) { - return found->second(); + return found->second(ppu); } const auto op = vm::read32(addr).value(); @@ -481,7 +479,7 @@ struct MemoryManager final : llvm::RTDyldMemoryManager // Allow HLE callbacks without compiling them if (itype == ppu_itype::HACK && vm::read32(addr + 4) == ppu_instructions::BLR()) { - return hack(op & 0x3ffffff); + return hlecall(ppu, op & 0x3ffffff); } trap(addr); @@ -683,10 +681,9 @@ struct MemoryManager final : llvm::RTDyldMemoryManager std::unordered_map table { { "__memory", (u64)vm::base(0) }, - { "__context", (u64)&context }, { "__trap", (u64)&trap }, { "__trace", (u64)&trace }, - { "__hlecall", (u64)&hack }, + { "__hlecall", (u64)&hlecall }, { "__syscall", (u64)&syscall }, { "__get_tbl", (u64)&tbl }, { "__call", (u64)&call }, @@ -839,15 +836,11 @@ extern void ppu_initialize(const std::string& name, const std::vector module = std::make_unique(name, g_context); @@ -857,6 +850,10 @@ extern void ppu_initialize(const std::string& name, const std::vector translator = std::make_unique(g_context, module.get(), 0, entry); + // Define some types + const auto _void = Type::getVoidTy(g_context); + const auto _func = FunctionType::get(_void, { translator->GetContextType()->getPointerTo() }, false); + // Initialize function list for (const auto& info : funcs) { @@ -971,7 +968,7 @@ extern void ppu_initialize(const std::string& name, const std::vectorgetFunctionAddress(fmt::format("__sub_%x", info.first)); - s_ppu_compiled.emplace(info.first, (void(*)())link); + s_ppu_compiled.emplace(info.first, (void(*)(PPUThread&))link); LOG_NOTICE(PPU, "** Function __sub_%x -> 0x%llx (addr=0x%x, size=0x%x)", info.first, link, info.first, info.second); } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 41db86b163..054418cae4 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -88,6 +88,11 @@ PPUTranslator::~PPUTranslator() { } +Type* PPUTranslator::GetContextType() +{ + return m_thread_type; +} + void PPUTranslator::AddFunction(u64 addr, Function* func, FunctionType* type) { if (!m_func_types.emplace(addr, type).second || !m_func_list.emplace(addr, func).second) @@ -114,7 +119,8 @@ Function* PPUTranslator::TranslateToIR(u64 start_addr, u64 end_addr, be_t* m_ir = &builder; /* Create context variables */ - m_thread = Call(m_thread_type->getPointerTo(), AttributeSet::get(m_context, AttributeSet::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadOnly}), "__context", m_ir->getInt64(start_addr)); + //m_thread = Call(m_thread_type->getPointerTo(), AttributeSet::get(m_context, AttributeSet::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadOnly}), "__context", m_ir->getInt64(start_addr)); + m_thread = &*m_function->getArgumentList().begin(); // Non-volatile registers with special meaning (TODO) m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 1, ".sp"); @@ -259,7 +265,7 @@ Function* PPUTranslator::TranslateToIR(u64 start_addr, u64 end_addr, be_t* } m_ir->SetInsertPoint(_default); - Call(GetType(), "__call", _ctr); + Call(GetType(), "__call", m_thread, _ctr); m_ir->CreateRetVoid(); } @@ -315,27 +321,13 @@ void PPUTranslator::CallFunction(u64 target, bool tail, Value* indirect) const auto callee_type = func ? m_func_types[target] : nullptr; - // Prepare function arguments - std::vector args; - - if (!callee_type) - { - // Prepare args for untyped function - } - - // Call the function - const auto result = func ? m_ir->CreateCall(func, args) : Call(GetType(), "__call", indirect ? indirect : m_ir->getInt64(target)); + const auto result = func ? m_ir->CreateCall(func, {m_thread}) : Call(GetType(), "__call", m_thread, indirect ? indirect : m_ir->getInt64(target)); if (!tail) { UndefineVolatileRegisters(); } - if (!callee_type) - { - // Get result from untyped function - } - if (tail) { m_ir->CreateRetVoid(); @@ -1746,13 +1738,13 @@ void PPUTranslator::BC(ppu_opcode_t op) void PPUTranslator::HACK(ppu_opcode_t op) { - Call(GetType(), "__hlecall", m_ir->getInt32(op.opcode & 0x3ffffff)); + Call(GetType(), "__hlecall", m_thread, m_ir->getInt32(op.opcode & 0x3ffffff)); UndefineVolatileRegisters(); } void PPUTranslator::SC(ppu_opcode_t op) { - Call(GetType(), fmt::format(op.lev == 0 ? "__syscall" : "__lv%ucall", +op.lev), m_ir->CreateLoad(m_gpr[11])); + Call(GetType(), fmt::format(op.lev == 0 ? "__syscall" : "__lv%ucall", +op.lev), m_thread, m_ir->CreateLoad(m_gpr[11])); UndefineVolatileRegisters(); } diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index 10f8969fbe..8fa4664b20 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -152,7 +152,7 @@ class PPUTranslator final //: public CPUTranslator // Memory base llvm::Value* m_base; - // Thread context (obtained by __context) + // Thread context llvm::Value* m_thread; // Thread context struct @@ -433,6 +433,9 @@ public: PPUTranslator(llvm::LLVMContext& context, llvm::Module* module, u64 base, u64 entry); ~PPUTranslator(); + // Get thread context struct type + llvm::Type* GetContextType(); + // Add function void AddFunction(u64 addr, llvm::Function* func, llvm::FunctionType* type = nullptr); From a19b721c2614f3b35e2c3af36b41c15b34f08518 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 22 Jun 2016 16:37:51 +0300 Subject: [PATCH 2/6] LLVM: Added JIT.cpp Refactoring, used -fno-rtti option --- Utilities/JIT.cpp | 327 ++++++++++++++ Utilities/JIT.h | 50 +++ rpcs3/CMakeLists.txt | 3 + rpcs3/Emu/Cell/PPUThread.cpp | 761 ++++++++------------------------- rpcs3/Emu/Cell/PPUTranslator.h | 4 + rpcs3/emucore.vcxproj | 30 +- rpcs3/emucore.vcxproj.filters | 18 + 7 files changed, 595 insertions(+), 598 deletions(-) create mode 100644 Utilities/JIT.cpp create mode 100644 Utilities/JIT.h diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp new file mode 100644 index 0000000000..3f279db3d4 --- /dev/null +++ b/Utilities/JIT.cpp @@ -0,0 +1,327 @@ +#ifdef LLVM_AVAILABLE + +#include +#include +#include +#include +#include + +#include "types.h" +#include "Macro.h" +#include "StrFmt.h" +#include "File.h" +#include "Log.h" + +#ifdef _MSC_VER +#pragma warning(push, 0) +#endif +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm/ExecutionEngine/JITEventListener.h" +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#ifdef _WIN32 +#include +#else +#include +#include +#include +#include +#include +#endif + +#include "JIT.h" + +// Global LLVM context (thread-unsafe) +llvm::LLVMContext g_llvm_ctx; + +// Size of virtual memory area reserved: 512 MB +static const u64 s_memory_size = 0x20000000; + +// Try to reserve a portion of virtual memory in the first 2 GB address space beforehand, if possible. +static void* const s_memory = []() -> void* +{ +#ifdef _WIN32 + for (u64 addr = 0x1000000; addr <= 0x60000000; addr += 0x1000000) + { + if (VirtualAlloc((void*)addr, s_memory_size, MEM_RESERVE, PAGE_NOACCESS)) + { + return (void*)addr; + } + } + + return VirtualAlloc(NULL, s_memory_size, MEM_RESERVE, PAGE_NOACCESS); +#else + return ::mmap((void*)0x10000000, s_memory_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); +#endif +}(); + +// EH frames +static u8* s_unwind_info; +static u64 s_unwind_size; + +#ifdef _WIN32 +static std::vector s_unwind; // Custom .pdata section replacement +#endif + +// Helper class +struct MemoryManager final : llvm::RTDyldMemoryManager +{ + std::unordered_map table; + + MemoryManager(std::unordered_map&& table) + : table(std::move(table)) + { + } + + [[noreturn]] static void null() + { + throw std::runtime_error("Null function" HERE); + } + + virtual u64 getSymbolAddress(const std::string& name) override + { + if (u64 addr = RTDyldMemoryManager::getSymbolAddress(name)) + { + // This may be bad if LLVM requests some built-in functions like fma. + LOG_ERROR(GENERAL, "LLVM: Symbol requested %s -> 0x%016llx", name, addr); + return addr; + } + + const auto found = table.find(name); + + if (found != table.end()) + { + return found->second; + } + + // It's fine if some function is never called, for example. + LOG_ERROR(GENERAL, "LLVM: Linkage failed for %s", name); + return (u64)null; + } + + virtual u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override + { + // Simple allocation + const u64 next = ::align((u64)m_next + size, 4096); + + if (next > (u64)s_memory + s_memory_size) + { + LOG_FATAL(GENERAL, "LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align); + return nullptr; + } + +#ifdef _WIN32 + if (!VirtualAlloc(m_next, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE)) +#else + if (::mprotect(m_next, size, PROT_READ | PROT_WRITE | PROT_EXEC)) +#endif + { + LOG_FATAL(GENERAL, "LLVM: Failed to allocate memory at 0x%p", m_next); + return nullptr; + } + + LOG_SUCCESS(GENERAL, "LLVM: Code section %u '%s' allocated -> 0x%p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), m_next, size, align); + return (u8*)std::exchange(m_next, (void*)next); + } + + virtual u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override + { + // Simple allocation + const u64 next = ::align((u64)m_next + size, 4096); + + if (next > (u64)s_memory + s_memory_size) + { + LOG_FATAL(GENERAL, "LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align); + return nullptr; + } + +#ifdef _WIN32 + if (!VirtualAlloc(m_next, size, MEM_COMMIT, PAGE_READWRITE)) +#else + if (::mprotect(m_next, size, PROT_READ | PROT_WRITE)) +#endif + { + LOG_FATAL(GENERAL, "LLVM: Failed to allocate memory at 0x%p", m_next); + return nullptr; + } + + LOG_SUCCESS(GENERAL, "LLVM: Data section %u '%s' allocated -> 0x%p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), m_next, size, align, is_ro ? "ro" : "rw"); + return (u8*)std::exchange(m_next, (void*)next); + } + + virtual bool finalizeMemory(std::string* = nullptr) override + { + // TODO: make sections read-only when necessary + return false; + } + + virtual void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override + { + s_unwind_info = addr; + s_unwind_size = size; + + return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size); + } + + virtual void deregisterEHFrames(u8* addr, u64 load_addr, std::size_t size) override + { + LOG_ERROR(GENERAL, "deregisterEHFrames() called"); // Not expected + + return RTDyldMemoryManager::deregisterEHFrames(addr, load_addr, size); + } + + ~MemoryManager() + { +#ifdef _WIN32 + if (!RtlDeleteFunctionTable(s_unwind.data())) + { + LOG_FATAL(GENERAL, "RtlDeleteFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError()); + } + + if (!VirtualFree(s_memory, 0, MEM_DECOMMIT)) + { + LOG_FATAL(GENERAL, "VirtualFree(0x%p) failed! Error %u", s_memory, GetLastError()); + } +#else + if (::mprotect(s_memory, s_memory_size, PROT_NONE)) + { + LOG_FATAL(GENERAL, "mprotect(0x%p) failed! Error %d", s_memory, errno); + } + + // TODO: unregister EH frames if necessary +#endif + } + +private: + void* m_next = s_memory; +}; + +// Helper class +struct EventListener final : llvm::JITEventListener +{ + virtual void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override + { + const llvm::StringRef elf = obj.getData(); + fs::file(fs::get_config_dir() + "LLVM.obj", fs::rewrite) + .write(elf.data(), elf.size()); + } +}; + +static EventListener s_listener; + +jit_compiler::jit_compiler(std::unique_ptr&& _module, std::unordered_map&& table) +{ + EXPECTS(s_memory); + + std::string result; + + const auto module_ptr = _module.get(); + + // Initialization + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + LLVMLinkInMCJIT(); + + m_engine.reset(llvm::EngineBuilder(std::move(_module)) + .setErrorStr(&result) + .setMCJITMemoryManager(std::make_unique(std::move(table))) + .setOptLevel(llvm::CodeGenOpt::Aggressive) + .setRelocationModel(llvm::Reloc::PIC_) + .setCodeModel((u64)s_memory <= 0x60000000 ? llvm::CodeModel::Medium : llvm::CodeModel::Large) // TODO + .setMCPU(llvm::sys::getHostCPUName()) + .create()); + + if (!m_engine) + { + throw fmt::exception("LLVM: Failed to create ExecutionEngine: %s", result); + } + + m_engine->setProcessAllSections(true); // ??? + m_engine->RegisterJITEventListener(&s_listener); + m_engine->finalizeObject(); + + for (auto& func : module_ptr->functions()) + { + if (!func.empty()) + { + const std::string& name = func.getName(); + + // Register compiled function + m_map[name] = m_engine->getFunctionAddress(name); + } + + // Delete IR to lower memory consumption + func.deleteBody(); + } + +#ifdef _WIN32 + // Register .xdata UNWIND_INFO (.pdata section is empty for some reason) + std::set func_set; + + for (const auto& pair : m_map) + { + func_set.emplace(pair.second); + } + + // Hack (cannot obtain last function size) + func_set.emplace(::align(*--func_set.end() + 4096, 4096)); + + const u64 base = (u64)s_memory; + const u8* bits = s_unwind_info; + + s_unwind.clear(); + s_unwind.reserve(m_map.size()); + + for (auto it = func_set.begin(), end = --func_set.end(); it != end; it++) + { + const u64 addr = *it; + const u64 next = *func_set.upper_bound(addr); + + // Generate RUNTIME_FUNCTION record + RUNTIME_FUNCTION uw; + uw.BeginAddress = static_cast(addr - base); + uw.EndAddress = static_cast(next - base); + uw.UnwindData = static_cast((u64)bits - base); + s_unwind.emplace_back(uw); + + // Parse .xdata UNWIND_INFO record + const u8 flags = *bits++; // Version and flags + const u8 prolog = *bits++; // Size of prolog + const u8 count = *bits++; // Count of unwind codes + const u8 frame = *bits++; // Frame Reg + Off + bits += ::align(std::max(1, count), 2) * sizeof(u16); // UNWIND_CODE array + + if (flags != 1) + { + LOG_ERROR(GENERAL, "LLVM: unsupported UNWIND_INFO version/flags (0x%02x)", flags); + break; + } + + LOG_TRACE(GENERAL, "LLVM: .xdata at 0x%llx: function 0x%x..0x%x: p0x%02x, c0x%02x, f0x%02x", uw.UnwindData + base, uw.BeginAddress + base, uw.EndAddress + base, prolog, count, frame); + } + + if (s_unwind_info + s_unwind_size != bits) + { + LOG_FATAL(GENERAL, "LLVM: .xdata analysis failed! (0x%p != 0x%p)", s_unwind_info + s_unwind_size, bits); + } + else if (!RtlAddFunctionTable(s_unwind.data(), (DWORD)s_unwind.size(), base)) + { + LOG_FATAL(GENERAL, "RtlAddFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError()); + } + else + { + LOG_SUCCESS(GENERAL, "LLVM: UNWIND_INFO registered (addr=0x%p, size=0x%llx)", s_unwind_info, s_unwind_size); + } +#endif +} + +jit_compiler::~jit_compiler() +{ +} + +#endif diff --git a/Utilities/JIT.h b/Utilities/JIT.h new file mode 100644 index 0000000000..24c27d068d --- /dev/null +++ b/Utilities/JIT.h @@ -0,0 +1,50 @@ +#pragma once + +#ifdef LLVM_AVAILABLE + +#include +#include +#include + +#include "types.h" + +#ifdef _MSC_VER +#pragma warning(push, 0) +#endif +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +extern llvm::LLVMContext g_llvm_ctx; + +// Temporary compiler interface +class jit_compiler final +{ + // Execution instance + std::unique_ptr m_engine; + + // Compiled functions + std::unordered_map m_map; + +public: + jit_compiler(std::unique_ptr&&, std::unordered_map&&); + ~jit_compiler(); + + // Get compiled function address + std::uintptr_t get(const std::string& name) const + { + const auto found = m_map.find(name); + + if (found != m_map.end()) + { + return found->second; + } + + return 0; + } +}; + +#endif diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 8812204613..0929157806 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -146,6 +146,9 @@ else() else() llvm_map_components_to_libnames(LLVM_LIBS mcjit vectorize ipo x86codegen x86disassembler mcdisassembler) endif() + if (NOT MSVC) + set_source_files_properties(${RPCS3_SRC_DIR}/../Utilities/JIT.cpp PROPERTIES COMPILE_FLAGS -fno-rtti) + endif() endif() link_directories( diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 93033fed96..eb50c750c6 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -8,6 +8,36 @@ #include "PPUAnalyser.h" #include "PPUModule.h" +#ifdef LLVM_AVAILABLE +#ifdef _MSC_VER +#pragma warning(push, 0) +#endif +#include "llvm/Support/FormattedStream.h" +#include "llvm/IR/LLVMContext.h" +//#include "llvm/IR/Dominators.h" +#include "llvm/IR/Verifier.h" +//#include "llvm/IR/InstIterator.h" +#include "llvm/IR/LegacyPassManager.h" +//#include "llvm/IR/Module.h" +//#include "llvm/IR/Function.h" +//#include "llvm/Analysis/Passes.h" +//#include "llvm/Analysis/BasicAliasAnalysis.h" +//#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +//#include "llvm/Analysis/LoopInfo.h" +//#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/Lint.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Vectorize.h" +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#include "Utilities/JIT.h" +#include "PPUTranslator.h" +#endif + enum class ppu_decoder_type { precise, @@ -322,54 +352,6 @@ void PPUThread::fast_call(u32 addr, u32 rtoc) //} } -#ifdef LLVM_AVAILABLE -#ifdef _MSC_VER -#pragma warning(push, 0) -#endif -//#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -//#include "llvm/Support/Host.h" -#include "llvm/Support/FormattedStream.h" -//#include "llvm/Support/Debug.h" -//#include "llvm/CodeGen/CommandFlags.h" -//#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/LLVMContext.h" -//#include "llvm/IR/Dominators.h" -#include "llvm/IR/Verifier.h" -//#include "llvm/IR/InstIterator.h" -#include "llvm/IR/LegacyPassManager.h" -//#include "llvm/IR/Module.h" -//#include "llvm/IR/Function.h" -//#include "llvm/Analysis/Passes.h" -//#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" -//#include "llvm/Analysis/LoopInfo.h" -//#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/Lint.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/Vectorize.h" - -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" -#include "llvm/ExecutionEngine/JITEventListener.h" -//#include "llvm/Object/ObjectFile.h" -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -#include "PPUTranslator.h" - -#ifdef _WIN32 -#include -#else -#include -#include -#include -#include -#include -#endif - const ppu_decoder s_ppu_itype; extern u64 get_timebased_time(); @@ -386,326 +368,159 @@ extern __m128i sse_cellbe_lvrx(u64 addr); extern void sse_cellbe_stvlx(u64 addr, __m128i a); extern void sse_cellbe_stvrx(u64 addr, __m128i a); -struct Listener final : llvm::JITEventListener +[[noreturn]] static void ppu_trap(u64 addr) { - virtual void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override - { - const llvm::StringRef elf = obj.getData(); - fs::file(fs::get_config_dir() + "LLVM.obj", fs::rewrite) - .write(elf.data(), elf.size()); - } -}; + throw fmt::exception("Trap! (0x%llx)", addr); +} -static Listener s_listener; - -// Memory size: 512 MB -static const u64 s_memory_size = 0x20000000; - -// Try to reserve a portion of virtual memory in the first 2 GB address space, if possible. -static void* const s_memory = []() -> void* +static void ppu_trace(u64 addr) { -#ifdef _WIN32 - for (u64 addr = 0x1000000; addr <= 0x60000000; addr += 0x1000000) + LOG_NOTICE(PPU, "Trace: 0x%llx", addr); +} + +static void ppu_hlecall(PPUThread& ppu, u32 index) +{ + ppu_execute_function(ppu, index); + if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily +} + +static void ppu_syscall(PPUThread& ppu, u64 code) +{ + ppu_execute_syscall(ppu, code); + if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily +} + +static u32 ppu_tbl() +{ + return (u32)get_timebased_time(); +} + +static void ppu_call(PPUThread& ppu, u32 addr) +{ + const auto found = s_ppu_compiled.find(addr); + + if (found != s_ppu_compiled.end()) { - if (VirtualAlloc((void*)addr, s_memory_size, MEM_RESERVE, PAGE_NOACCESS)) - { - return (void*)addr; - } + return found->second(ppu); } - return VirtualAlloc(NULL, s_memory_size, MEM_RESERVE, PAGE_NOACCESS); + const auto op = vm::read32(addr).value(); + const auto itype = s_ppu_itype.decode(op); + + // Allow HLE callbacks without compiling them + if (itype == ppu_itype::HACK && vm::read32(addr + 4) == ppu_instructions::BLR()) + { + return ppu_hlecall(ppu, op & 0x3ffffff); + } + + ppu_trap(addr); +} + +static __m128 sse_rcp_ps(__m128 A) +{ + return _mm_rcp_ps(A); +} + +static __m128 sse_rsqrt_ps(__m128 A) +{ + return _mm_rsqrt_ps(A); +} + +static float sse_rcp_ss(float A) +{ + _mm_store_ss(&A, _mm_rcp_ss(_mm_load_ss(&A))); + return A; +} + +static float sse_rsqrt_ss(float A) +{ + _mm_store_ss(&A, _mm_rsqrt_ss(_mm_load_ss(&A))); + return A; +} + +static u32 ppu_lwarx(u32 addr) +{ + be_t reg_value; + vm::reservation_acquire(®_value, addr, sizeof(reg_value)); + return reg_value; +} + +static u64 ppu_ldarx(u32 addr) +{ + be_t reg_value; + vm::reservation_acquire(®_value, addr, sizeof(reg_value)); + return reg_value; +} + +static bool ppu_stwcx(u32 addr, u32 reg_value) +{ + const be_t data = reg_value; + return vm::reservation_update(addr, &data, sizeof(data)); +} + +static bool ppu_stdcx(u32 addr, u64 reg_value) +{ + const be_t data = reg_value; + return vm::reservation_update(addr, &data, sizeof(data)); +} + +static bool adde_carry(u64 a, u64 b, bool c) +{ +#ifdef _MSC_VER + return _addcarry_u64(c, a, b, nullptr) != 0; #else - return ::mmap((void*)0x10000000, s_memory_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + bool result; + __asm__("addb $0xff, %[c] \n adcq %[a], %[b] \n setb %[result]" : [a] "+&r" (a), [b] "+&r" (b), [c] "+&r" (c), [result] "=r" (result)); + return result; #endif -}(); +} -// EH frames -static u8* s_unwind_info; -static u64 s_unwind_size; - -#ifdef _WIN32 -// Custom .pdata section replacement -static std::vector s_unwind; -#endif - -struct MemoryManager final : llvm::RTDyldMemoryManager +// Interpreter call for simple vector instructions +static __m128i ppu_vec3op(decltype(&ppu_interpreter::UNK) func, __m128i _a, __m128i _b, __m128i _c) { - static PPUThread* context(u64 addr) // Unused + PPUThread& ppu = static_cast(*get_current_cpu_thread()); + ppu.VR[21].vi = _a; + ppu.VR[22].vi = _b; + ppu.VR[23].vi = _c; + + ppu_opcode_t op{}; + op.vd = 20; + op.va = 21; + op.vb = 22; + op.vc = 23; + func(ppu, op); + + return ppu.VR[20].vi; +} + +extern void ppu_initialize(const std::string& name, const std::vector>& funcs, u32 entry) +{ + if (g_cfg_ppu_decoder.get() != ppu_decoder_type::llvm || funcs.empty()) { - //trace(addr); - return static_cast(get_current_cpu_thread()); + return; } - [[noreturn]] static void trap(u64 addr) + std::unordered_map link_table { - LOG_ERROR(PPU, "Trap! (0x%llx)", addr); - throw fmt::exception("Trap! (0x%llx)", addr); - } - - static void trace(u64 addr) - { - LOG_NOTICE(PPU, "Trace: 0x%llx", addr); - } - - static void hlecall(PPUThread& ppu, u32 index) - { - ppu_execute_function(ppu, index); - if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily - } - - static void syscall(PPUThread& ppu, u64 code) - { - ppu_execute_syscall(ppu, code); - if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily - } - - static u32 tbl() - { - return (u32)get_timebased_time(); - } - - static void call(PPUThread& ppu, u32 addr) - { - const auto found = s_ppu_compiled.find(addr); - - if (found != s_ppu_compiled.end()) - { - return found->second(ppu); - } - - const auto op = vm::read32(addr).value(); - const auto itype = s_ppu_itype.decode(op); - - // Allow HLE callbacks without compiling them - if (itype == ppu_itype::HACK && vm::read32(addr + 4) == ppu_instructions::BLR()) - { - return hlecall(ppu, op & 0x3ffffff); - } - - trap(addr); - } - - static __m128 sse_rcp_ps(__m128 A) - { - return _mm_rcp_ps(A); - } - - static __m128 sse_rsqrt_ps(__m128 A) - { - return _mm_rsqrt_ps(A); - } - - static float sse_rcp_ss(float A) - { - _mm_store_ss(&A, _mm_rcp_ss(_mm_load_ss(&A))); - return A; - } - - static float sse_rsqrt_ss(float A) - { - _mm_store_ss(&A, _mm_rsqrt_ss(_mm_load_ss(&A))); - return A; - } - - static u32 lwarx(u32 addr) - { - be_t reg_value; - vm::reservation_acquire(®_value, addr, sizeof(reg_value)); - return reg_value; - } - - static u64 ldarx(u32 addr) - { - be_t reg_value; - vm::reservation_acquire(®_value, addr, sizeof(reg_value)); - return reg_value; - } - - static bool stwcx(u32 addr, u32 reg_value) - { - const be_t data = reg_value; - return vm::reservation_update(addr, &data, sizeof(data)); - } - - static bool stdcx(u32 addr, u64 reg_value) - { - const be_t data = reg_value; - return vm::reservation_update(addr, &data, sizeof(data)); - } - - static bool adde_carry(u64 a, u64 b, bool c) - { - return _addcarry_u64(c, a, b, nullptr) != 0; - } - - // Interpreter call for simple vector instructions - static __m128i vec3op(decltype(&ppu_interpreter::UNK) func, __m128i _a, __m128i _b, __m128i _c) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - ppu.VR[21].vi = _a; - ppu.VR[22].vi = _b; - ppu.VR[23].vi = _c; - - ppu_opcode_t op{}; - op.vd = 20; - op.va = 21; - op.vb = 22; - op.vc = 23; - func(ppu, op); - - return ppu.VR[20].vi; - } - - // Interpreter call for simple vector instructions with immediate - static __m128i veciop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, __m128i _b) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - ppu.VR[22].vi = _b; - - op.vd = 20; - op.vb = 22; - func(ppu, op); - - return ppu.VR[20].vi; - } - - // Interpreter call for FP instructions - static f64 fpop(decltype(&ppu_interpreter::UNK) func, f64 _a, f64 _b, f64 _c) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - ppu.FPR[21] = _a; - ppu.FPR[22] = _b; - ppu.FPR[23] = _c; - - ppu_opcode_t op{}; - op.frd = 20; - op.fra = 21; - op.frb = 22; - op.frc = 23; - func(ppu, op); - - return ppu.FPR[20]; - } - - // Interpreter call for GPR instructions writing result to RA - static u64 aimmop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - const u64 a = ppu.GPR[op.ra]; - const u64 s = ppu.GPR[op.rs]; - ppu.GPR[op.rs] = _s; - - func(ppu, op); - - const u64 r = ppu.GPR[op.ra]; - ppu.GPR[op.ra] = a; - ppu.GPR[op.rs] = s; - return r; - } - - // Interpreter call for GPR instructions writing result to RA - static u64 aimmbop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s, u64 _b) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - const u64 a = ppu.GPR[op.ra]; - const u64 s = ppu.GPR[op.rs]; - const u64 b = ppu.GPR[op.rb]; - ppu.GPR[op.rs] = _s; - ppu.GPR[op.rb] = _b; - - func(ppu, op); - - const u64 r = ppu.GPR[op.ra]; - ppu.GPR[op.ra] = a; - ppu.GPR[op.rs] = s; - ppu.GPR[op.rb] = b; - return r; - } - - // Interpreter call for GPR instructions writing result to RA (destructive) - static u64 aaimmop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s, u64 _a) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - const u64 s = ppu.GPR[op.rs]; - const u64 a = ppu.GPR[op.ra]; - ppu.GPR[op.rs] = _s; - ppu.GPR[op.ra] = _a; - - func(ppu, op); - - const u64 r = ppu.GPR[op.ra]; - ppu.GPR[op.rs] = s; - ppu.GPR[op.ra] = a; - return r; - } - - static u64 immaop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _a) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - const u64 a = ppu.GPR[op.ra]; - const u64 d = ppu.GPR[op.rd]; - ppu.GPR[op.ra] = _a; - - func(ppu, op); - - const u64 r = ppu.GPR[op.rd]; - ppu.GPR[op.ra] = a; - ppu.GPR[op.rd] = d; - return r; - } - - static u64 immabop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _a, u64 _b) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - const u64 a = ppu.GPR[op.ra]; - const u64 b = ppu.GPR[op.rb]; - const u64 d = ppu.GPR[op.rd]; - ppu.GPR[op.ra] = _a; - ppu.GPR[op.rb] = _b; - - func(ppu, op); - - const u64 r = ppu.GPR[op.rd]; - ppu.GPR[op.ra] = a; - ppu.GPR[op.rb] = b; - ppu.GPR[op.rd] = d; - return r; - } - - // No operation on specific u64 value (silly optimization barrier) - static u64 nop64(u64 value) - { - return value; - } - - std::unordered_map table - { - { "__memory", (u64)vm::base(0) }, - { "__trap", (u64)&trap }, - { "__trace", (u64)&trace }, - { "__hlecall", (u64)&hlecall }, - { "__syscall", (u64)&syscall }, - { "__get_tbl", (u64)&tbl }, - { "__call", (u64)&call }, - { "__lwarx", (u64)&lwarx }, - { "__ldarx", (u64)&ldarx }, - { "__stwcx", (u64)&stwcx }, - { "__stdcx", (u64)&stdcx }, + { "__memory", (u64)vm::g_base_addr }, + { "__memptr", (u64)&vm::g_base_addr }, + { "__trap", (u64)&ppu_trap }, + { "__trace", (u64)&ppu_trace }, + { "__hlecall", (u64)&ppu_hlecall }, + { "__syscall", (u64)&ppu_syscall }, + { "__get_tbl", (u64)&ppu_tbl }, + { "__call", (u64)&ppu_call }, + { "__lwarx", (u64)&ppu_lwarx }, + { "__ldarx", (u64)&ppu_ldarx }, + { "__stwcx", (u64)&ppu_stwcx }, + { "__stdcx", (u64)&ppu_stdcx }, + { "__vec3op", (u64)&ppu_vec3op }, { "__adde_get_ca", (u64)&adde_carry }, { "__vexptefp", (u64)&sse_exp2_ps }, { "__vlogefp", (u64)&sse_log2_ps }, { "__vperm", (u64)&sse_altivec_vperm }, { "__vrefp", (u64)&sse_rcp_ps }, { "__vrsqrtefp", (u64)&sse_rsqrt_ps }, - { "__vec3op", (u64)&vec3op }, - { "__veciop", (u64)&veciop }, - { "__aimmop", (u64)&aimmop }, - { "__aimmbop", (u64)&aimmbop }, - { "__aaimmop", (u64)&aaimmop }, - { "__immaop", (u64)&immaop }, - { "__immabop", (u64)&immabop }, - { "__fpop", (u64)&fpop }, - { "__nop64", (u64)&nop64 }, { "__lvsl", (u64)&sse_altivec_lvsl }, { "__lvsr", (u64)&sse_altivec_lvsr }, { "__lvlx", (u64)&sse_cellbe_lvlx }, @@ -716,142 +531,20 @@ struct MemoryManager final : llvm::RTDyldMemoryManager { "__frsqrte", (u64)&sse_rsqrt_ss }, }; - virtual u64 getSymbolAddress(const std::string& name) override - { - if (u64 addr = RTDyldMemoryManager::getSymbolAddress(name)) - { - LOG_ERROR(GENERAL, "LLVM: Linkage requested %s -> 0x%016llx", name, addr); - return addr; - } - - const auto found = table.find(name); - - if (found != table.end()) - { - return found->second; - } - - LOG_FATAL(GENERAL, "LLVM: Linkage failed for %s", name); - return (u64)trap; - } - - virtual u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override - { - // Simple allocation (TODO) - const auto ptr = m_next; m_next = (void*)::align((u64)m_next + size, 4096); - -#ifdef _WIN32 - if (!VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE)) -#else - if (::mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC)) -#endif - { - LOG_FATAL(GENERAL, "LLVM: Failed to allocate code section '%s'", sec_name.data()); - return nullptr; - } - - LOG_SUCCESS(GENERAL, "LLVM: Code section '%s' allocated -> 0x%p", sec_name.data(), ptr); - return (u8*)ptr; - } - - virtual u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override - { - // Simple allocation (TODO) - const auto ptr = m_next; m_next = (void*)::align((u64)m_next + size, 4096); - -#ifdef _WIN32 - if (!VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_READWRITE)) -#else - if (::mprotect(ptr, size, PROT_READ | PROT_WRITE)) -#endif - { - LOG_FATAL(GENERAL, "LLVM: Failed to allocate data section '%s'", sec_name.data()); - return nullptr; - } - - LOG_SUCCESS(GENERAL, "LLVM: Data section '%s' allocated -> 0x%p", sec_name.data(), ptr); - return (u8*)ptr; - } - - virtual bool finalizeMemory(std::string* = nullptr) override - { - // TODO: make sections read-only when necessary - return false; - } - - virtual void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override - { - s_unwind_info = addr; - s_unwind_size = size; - - return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size); - } - - virtual void deregisterEHFrames(u8* addr, u64 load_addr, std::size_t size) override - { - LOG_ERROR(GENERAL, "deregisterEHFrames() called"); // Not expected - - return RTDyldMemoryManager::deregisterEHFrames(addr, load_addr, size); - } - - ~MemoryManager() - { -#ifdef _WIN32 - if (!RtlDeleteFunctionTable(s_unwind.data())) - { - LOG_FATAL(GENERAL, "RtlDeleteFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError()); - } - - if (!VirtualFree(s_memory, 0, MEM_DECOMMIT)) - { - LOG_FATAL(GENERAL, "VirtualFree(0x%p) failed! Error %u", s_memory, GetLastError()); - } -#else - if (::mprotect(s_memory, s_memory_size, PROT_NONE)) - { - LOG_FATAL(GENERAL, "mprotect(0x%p) failed! Error %d", s_memory, errno); - } - - // TODO: unregister EH frames if necessary -#endif - } - -private: - void* m_next = s_memory; -}; - -llvm::LLVMContext g_context; - -extern void ppu_initialize(const std::string& name, const std::vector>& funcs, u32 entry) -{ - if (!s_memory) - { - throw std::runtime_error("LLVM: Memory not allocated, report to the developers." HERE); - } - - if (g_cfg_ppu_decoder.get() != ppu_decoder_type::llvm || funcs.empty()) - { - return; - } - +#ifdef LLVM_AVAILABLE using namespace llvm; - // Initialization - InitializeNativeTarget(); - InitializeNativeTargetAsmPrinter(); - LLVMLinkInMCJIT(); - // Create LLVM module - std::unique_ptr module = std::make_unique(name, g_context); + std::unique_ptr module = std::make_unique(name, g_llvm_ctx); // Initialize target module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); // Initialize translator - std::unique_ptr translator = std::make_unique(g_context, module.get(), 0, entry); + std::unique_ptr translator = std::make_unique(g_llvm_ctx, module.get(), 0, entry); // Define some types - const auto _void = Type::getVoidTy(g_context); + const auto _void = Type::getVoidTy(g_llvm_ctx); const auto _func = FunctionType::get(_void, { translator->GetContextType()->getPointerTo() }, false); // Initialize function list @@ -859,7 +552,9 @@ extern void ppu_initialize(const std::string& name, const std::vectorAddFunction(info.first, cast(module->getOrInsertFunction(fmt::format("__sub_%x", info.first), _func))); + const auto f = cast(module->getOrInsertFunction(fmt::format("__sub_%x", info.first), _func)); + f->addAttribute(1, Attribute::NoAlias); + translator->AddFunction(info.first, f); } translator->AddBlockInfo(info.first); @@ -874,18 +569,16 @@ extern void ppu_initialize(const std::string& name, const std::vectorTranslateToIR(pseudo_addr, pseudo_addr, nullptr, [](PPUTranslator* _this) - // { - // (_this->*s_current)(op); - // _this->ReturnFromFunction(); - // }); - - // pm.run(*func); - //} - legacy::PassManager mpm; // Remove unused functions, structs, global variables, etc mpm.add(createStripDeadPrototypesPass()); + //mpm.add(createFunctionInliningPass()); mpm.run(*module); + // TODO: replacing __syscall/__hlecall + // TODO: improve __call and s_ppu_compiled + std::string result; raw_string_ostream out(result); @@ -941,25 +622,14 @@ extern void ppu_initialize(const std::string& name, const std::vector engine(EngineBuilder(std::move(module)) - .setErrorStr(&result) - .setMCJITMemoryManager(std::make_unique()) - .setOptLevel(llvm::CodeGenOpt::Aggressive) - .setRelocationModel(Reloc::PIC_) - .setCodeModel((u64)s_memory <= 0x60000000 ? CodeModel::Medium : CodeModel::Large) - .setMCPU(sys::getHostCPUName()) - .create()); + const auto jit = fxm::make(std::move(module), std::move(link_table)); - if (!engine) + if (!jit) { - throw fmt::exception("LLVM: Failed to create ExecutionEngine: %s", result); + LOG_FATAL(PPU, "LLVM: Multiple modules are not yet supported"); + return; } - engine->setProcessAllSections(true); - //engine->setVerifyModules(true); - engine->RegisterJITEventListener(&s_listener); - engine->finalizeObject(); - s_ppu_compiled.clear(); // Get function addresses @@ -967,72 +637,13 @@ extern void ppu_initialize(const std::string& name, const std::vectorgetFunctionAddress(fmt::format("__sub_%x", info.first)); + const std::uintptr_t link = jit->get(fmt::format("__sub_%x", info.first)); s_ppu_compiled.emplace(info.first, (void(*)(PPUThread&))link); LOG_NOTICE(PPU, "** Function __sub_%x -> 0x%llx (addr=0x%x, size=0x%x)", info.first, link, info.first, info.second); } } - // Delete IR to lower memory consumption - for (auto& func : module_ptr->functions()) - { - func.deleteBody(); - } - -#ifdef _WIN32 - // Register .xdata UNWIND_INFO (.pdata section is empty for some reason) - std::set func_set; - - for (const auto& pair : s_ppu_compiled) - { - // Get addresses - func_set.emplace((u64)pair.second); - } - - func_set.emplace(::align(*--func_set.end() + 4096, 4096)); - - const u64 base = (u64)s_memory; - const u8* bits = s_unwind_info; - - s_unwind.clear(); - s_unwind.reserve(s_ppu_compiled.size()); - - for (auto it = func_set.begin(), end = --func_set.end(); it != end; it++) - { - const u64 addr = *it; - const u64 next = *func_set.upper_bound(addr); - - // Generate RUNTIME_FUNCTION record - RUNTIME_FUNCTION uw; - uw.BeginAddress = static_cast(addr - base); - uw.EndAddress = static_cast(next - base); - uw.UnwindData = static_cast((u64)bits - base); - s_unwind.emplace_back(uw); - - // Parse .xdata record - VERIFY(*bits++ == 1); // Version and flags - bits++; // Size of prolog - const u8 count = *bits++; // Count of unwind codes - bits++; // Frame Reg + Off - bits += ::align(count, 2) * sizeof(u16); // UNWIND_CODE array - while (!*bits && bits < s_unwind_info + s_unwind_size) bits++; // Skip strange zero padding (???) - } - - VERIFY(bits == s_unwind_info + s_unwind_size); - VERIFY(RtlAddFunctionTable(s_unwind.data(), (DWORD)s_unwind.size(), base)); - LOG_SUCCESS(GENERAL, "LLVM: UNWIND_INFO registered (addr=0x%p, size=0x%llx)", s_unwind_info, s_unwind_size); -#endif - - fxm::import(WRAP_EXPR(engine)); - LOG_SUCCESS(PPU, "LLVM: Compilation finished (%s)", sys::getHostCPUName().data()); -} - -#else - -extern void ppu_initialize(const std::string& name, const std::vector>& funcs, u32 entry) -{ -} - #endif +} diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index 8fa4664b20..e56ca7a31c 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -1,5 +1,7 @@ #pragma once +#ifdef LLVM_AVAILABLE + #include #include #include @@ -830,3 +832,5 @@ public: void UNK(ppu_opcode_t op); }; + +#endif diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 0e10df8f90..54c248e965 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -64,36 +64,19 @@ ..\llvm\include;..\llvm_build\include; - %windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd" - Updating git-version.h - - - %windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd" - Updating git-version.h - - - %windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd" - Updating git-version.h - - - %windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd" - Updating git-version.h - - - %windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd" - Updating git-version.h + %windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd" + Updating git-version.h - NotUsing - NotUsing - NotUsing - NotUsing - NotUsing + NotUsing + + NotUsing + NotUsing @@ -404,6 +387,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index bdc7dfe983..c0420a3774 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -872,6 +872,15 @@ Source Files + + Source Files + + + Source Files + + + Utilities + @@ -1666,5 +1675,14 @@ Header Files + + Header Files + + + Header Files + + + Utilities + \ No newline at end of file From 63e690ca11932cd26f27fb3f8631443f38b2fa0d Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 23 Jun 2016 00:39:39 +0300 Subject: [PATCH 3/6] CMP, TEST instruction support --- Utilities/Thread.cpp | 77 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index e21023c2c0..c44543d4fa 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -127,6 +127,8 @@ enum x64_op_t : u32 X64OP_NONE, X64OP_LOAD, // obtain and put the value into x64 register X64OP_LOAD_BE, + X64OP_LOAD_CMP, + X64OP_LOAD_TEST, X64OP_STORE, // take the value from x64 register or an immediate and use it X64OP_STORE_BE, X64OP_MOVS, @@ -433,7 +435,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz case 4: out_op = X64OP_AND; break; case 5: out_op = X64OP_SUB; break; case 6: out_op = X64OP_XOR; break; - default: out_op = X64OP_NONE; break; // CMP + default: out_op = X64OP_LOAD_CMP; break; } out_reg = X64_IMM8; @@ -452,7 +454,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz case 4: out_op = X64OP_AND; break; case 5: out_op = X64OP_SUB; break; case 6: out_op = X64OP_XOR; break; - default: out_op = X64OP_NONE; break; // CMP + default: out_op = X64OP_LOAD_CMP; break; } out_reg = oso ? X64_IMM16 : X64_IMM32; @@ -471,7 +473,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz case 4: out_op = X64OP_AND; break; case 5: out_op = X64OP_SUB; break; case 6: out_op = X64OP_XOR; break; - default: out_op = X64OP_NONE; break; // CMP + default: out_op = X64OP_LOAD_CMP; break; } out_reg = X64_IMM8; @@ -611,6 +613,32 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz } break; } + case 0xf6: + { + switch (auto mod_code = get_modRM_reg(code, 0)) + { + case 0: out_op = X64OP_LOAD_TEST; break; + default: out_op = X64OP_NONE; break; // TODO... + } + + out_reg = X64_IMM8; + out_size = 1; + out_length += get_modRM_size(code) + 1; + return; + } + case 0xf7: + { + switch (auto mod_code = get_modRM_reg(code, 0)) + { + case 0: out_op = X64OP_LOAD_TEST; break; + default: out_op = X64OP_NONE; break; // TODO... + } + + out_reg = oso ? X64_IMM16 : X64_IMM32; + out_size = get_op_size(rex, oso); + out_length += get_modRM_size(code) + (oso ? 2 : 4); + return; + } } out_op = X64OP_NONE; @@ -990,9 +1018,43 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) { case X64OP_LOAD: case X64OP_LOAD_BE: + case X64OP_LOAD_CMP: + case X64OP_LOAD_TEST: { u32 value; - if (is_writing || !thread->read_reg(addr, value) || !put_x64_reg_value(context, reg, d_size, op == X64OP_LOAD ? se_storage::swap(value) : value)) + if (is_writing || !thread->read_reg(addr, value)) + { + return false; + } + + if (op != X64OP_LOAD_BE) + { + value = se_storage::swap(value); + } + + if (op == X64OP_LOAD_CMP) + { + u64 rvalue; + if (!get_x64_reg_value(context, reg, d_size, i_size, rvalue) || !set_x64_cmp_flags(context, d_size, value, rvalue)) + { + return false; + } + + break; + } + + if (op == X64OP_LOAD_TEST) + { + u64 rvalue; + if (!get_x64_reg_value(context, reg, d_size, i_size, rvalue) || !set_x64_cmp_flags(context, d_size, value & rvalue, 0)) + { + return false; + } + + break; + } + + if (!put_x64_reg_value(context, reg, d_size, value)) { return false; } @@ -1003,7 +1065,12 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) case X64OP_STORE_BE: { u64 reg_value; - if (!is_writing || !get_x64_reg_value(context, reg, d_size, i_size, reg_value) || !thread->write_reg(addr, op == X64OP_STORE ? se_storage::swap((u32)reg_value) : (u32)reg_value)) + if (!is_writing || !get_x64_reg_value(context, reg, d_size, i_size, reg_value)) + { + return false; + } + + if (!thread->write_reg(addr, op == X64OP_STORE ? se_storage::swap((u32)reg_value) : (u32)reg_value)) { return false; } From 9db7de29fb4903afa734ad45abfc3a9d0b16e691 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 25 Jun 2016 08:16:15 +0300 Subject: [PATCH 4/6] Syscall analysis implemented --- Utilities/Thread.cpp | 3 +- Utilities/dynamic_library.cpp | 2 +- rpcs3/Emu/Cell/PPUFunction.cpp | 2 +- rpcs3/Emu/Cell/PPUFunction.h | 7 +- rpcs3/Emu/Cell/PPUModule.cpp | 54 +++++++----- rpcs3/Emu/Cell/PPUModule.h | 15 +--- rpcs3/Emu/Cell/PPUThread.cpp | 139 ++++++++++++++++++++----------- rpcs3/Emu/Cell/PPUThread.h | 1 + rpcs3/Emu/Cell/lv2/lv2.cpp | 38 ++++----- rpcs3/Emu/Cell/lv2/sys_timer.cpp | 2 + rpcs3/Emu/IdManager.cpp | 4 +- rpcs3/Emu/IdManager.h | 34 ++++---- rpcs3/Emu/PSP2/ARMv7Function.h | 7 +- rpcs3/Emu/PSP2/ARMv7Module.cpp | 33 ++++---- rpcs3/Emu/PSP2/ARMv7Thread.cpp | 47 +++++++---- rpcs3/Emu/PSP2/ARMv7Thread.h | 1 + 16 files changed, 234 insertions(+), 155 deletions(-) diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index c44543d4fa..607bf03945 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -299,8 +299,9 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz switch (op2) { case 0x11: + case 0x29: { - if (!repe && !repne && !oso) // MOVUPS xmm/m, xmm + if (!repe && !repne) // MOVUPS/MOVAPS/MOVUPD/MOVAPD xmm/m, xmm { out_op = X64OP_STORE; out_reg = get_modRM_reg_xmm(code, rex); diff --git a/Utilities/dynamic_library.cpp b/Utilities/dynamic_library.cpp index 572edc42ea..68ec5066c4 100644 --- a/Utilities/dynamic_library.cpp +++ b/Utilities/dynamic_library.cpp @@ -42,7 +42,7 @@ namespace utils void *dynamic_library::get_impl(const std::string &name) const { #ifdef _WIN32 - return GetProcAddress((HMODULE)m_handle, name.c_str()); + return (void*)GetProcAddress((HMODULE)m_handle, name.c_str()); #else return dlsym(m_handle, (char *)name.c_str()); #endif diff --git a/rpcs3/Emu/Cell/PPUFunction.cpp b/rpcs3/Emu/Cell/PPUFunction.cpp index dc1612bb68..692a83a589 100644 --- a/rpcs3/Emu/Cell/PPUFunction.cpp +++ b/rpcs3/Emu/Cell/PPUFunction.cpp @@ -2363,7 +2363,7 @@ s32 ppu_error_code::report(s32 error, const char* text) { if (auto func = static_cast(thread)->last_function) { - LOG_ERROR(PPU, "Function '%s' failed with 0x%08x : %s", func, error, text); + LOG_ERROR(PPU, "'%s' failed with 0x%08x : %s", func, error, text); } else { diff --git a/rpcs3/Emu/Cell/PPUFunction.h b/rpcs3/Emu/Cell/PPUFunction.h index a6f066a40c..3b60580510 100644 --- a/rpcs3/Emu/Cell/PPUFunction.h +++ b/rpcs3/Emu/Cell/PPUFunction.h @@ -4,7 +4,12 @@ using ppu_function_t = void(*)(PPUThread&); -#define BIND_FUNC(func) [](PPUThread& ppu){ ppu.last_function = #func; ppu_func_detail::do_call(ppu, func); } +#define BIND_FUNC(func) static_cast([](PPUThread& ppu){\ + const auto old_f = ppu.last_function;\ + ppu.last_function = #func;\ + ppu_func_detail::do_call(ppu, func);\ + ppu.last_function = old_f;\ +}) struct ppu_va_args_t { diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index 09920d6d32..49e02818eb 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -125,9 +125,22 @@ extern void ppu_initialize(const std::string& name, const std::vector g_ppu_function_cache; +// Function name cache in format %s.%s (module name, function name) +std::vector g_ppu_function_names; + // Function NID cache for autopause. Autopause tool should probably be rewritten. std::vector g_ppu_fnid_cache; +extern std::string ppu_get_module_function_name(u32 index) +{ + if (index < g_ppu_function_names.size()) + { + return g_ppu_function_names[index]; + } + + return fmt::format(".%u", index); +} + extern void ppu_execute_function(PPUThread& ppu, u32 index) { if (index < g_ppu_function_cache.size()) @@ -137,21 +150,8 @@ extern void ppu_execute_function(PPUThread& ppu, u32 index) if (const auto func = g_ppu_function_cache[index]) { - const auto previous_function = ppu.last_function; // TODO: use gsl::finally or something, but only if it's equally fast - - try - { - func(ppu); - } - catch (...) - { - logs::HLE.format(Emu.IsStopped() ? logs::level::warning : logs::level::error, "Function '%s' aborted", ppu.last_function); - ppu.last_function = previous_function; - throw; - } - - LOG_TRACE(HLE, "Function '%s' finished, r3=0x%llx", ppu.last_function, ppu.GPR[3]); - ppu.last_function = previous_function; + func(ppu); + LOG_TRACE(HLE, "'%s' finished, r3=0x%llx", ppu_get_module_function_name(index), ppu.GPR[3]); return; } } @@ -159,6 +159,16 @@ extern void ppu_execute_function(PPUThread& ppu, u32 index) throw fmt::exception("Function not registered (index %u)" HERE, index); } +extern ppu_function_t ppu_get_function(u32 index) +{ + if (index < g_ppu_function_cache.size()) + { + return g_ppu_function_cache[index]; + } + + return nullptr; +} + extern u32 ppu_generate_id(const char* name) { // Symbol name suffix @@ -312,7 +322,10 @@ static void ppu_initialize_modules() // Reinitialize function cache g_ppu_function_cache = ppu_function_manager::get(); - g_ppu_fnid_cache = std::vector(g_ppu_function_cache.size()); + g_ppu_function_names.clear(); + g_ppu_function_names.resize(g_ppu_function_cache.size()); + g_ppu_fnid_cache.clear(); + g_ppu_fnid_cache.resize(g_ppu_function_cache.size()); // "Use" all the modules for correct linkage for (auto& module : registered) @@ -322,6 +335,7 @@ static void ppu_initialize_modules() for (auto& function : module->functions) { LOG_TRACE(LOADER, "** 0x%08X: %s", function.first, function.second.name); + g_ppu_function_names.at(function.second.index) = fmt::format("%s.%s", module->name, function.second.name); g_ppu_fnid_cache.at(function.second.index) = function.first; } @@ -1531,10 +1545,12 @@ void ppu_exec_loader::load() const { // TODO const u32 index = ::size32(g_ppu_function_cache); + const std::string& fname = ppu_get_function_name(module.first, fnid); g_ppu_function_cache.emplace_back(); + g_ppu_function_names.emplace_back(fmt::format("%s.%s", module.first, fname)); g_ppu_fnid_cache.emplace_back(fnid); - LOG_ERROR(LOADER, "Unknown function '%s' in module '%s' (index %u)", ppu_get_function_name(module.first, fnid), module.first, index); + LOG_ERROR(LOADER, "Unknown function '%s' in module '%s' (index %u)", fname, module.first, index); for (auto& import : entry.second.second) { @@ -1544,11 +1560,11 @@ void ppu_exec_loader::load() const if (!ppu_patch_import_stub(stub, index)) { - LOG_ERROR(LOADER, "Failed to inject code for function '%s' in module '%s' (0x%x)", ppu_get_function_name(module.first, fnid), module.first, stub); + LOG_ERROR(LOADER, "Failed to inject code for function '%s' in module '%s' (0x%x)", fname, module.first, stub); } else { - LOG_NOTICE(LOADER, "Injected hack for function '%s' in module '%s' (*0x%x)", ppu_get_function_name(module.first, fnid), module.first, stub); + LOG_NOTICE(LOADER, "Injected hack for function '%s' in module '%s' (*0x%x)", fname, module.first, stub); } } diff --git a/rpcs3/Emu/Cell/PPUModule.h b/rpcs3/Emu/Cell/PPUModule.h index 957f8b6a38..274c0964e9 100644 --- a/rpcs3/Emu/Cell/PPUModule.h +++ b/rpcs3/Emu/Cell/PPUModule.h @@ -200,20 +200,7 @@ public: template> inline RT ppu_execute_function_or_callback(const char* name, PPUThread& ppu, Args&&... args) { - const auto previous_function = ppu.last_function; // TODO - - try - { - return Func(std::forward(args)...); - } - catch (...) - { - LOG_ERROR(PPU, "Function call '%s' aborted", ppu.last_function); - ppu.last_function = previous_function; - throw; - } - - ppu.last_function = previous_function; + return Func(std::forward(args)...); } #define CALL_FUNC(ppu, func, ...) ppu_execute_function_or_callback(#func, ppu, __VA_ARGS__) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index eb50c750c6..b46ac6ffb0 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -16,7 +16,7 @@ #include "llvm/IR/LLVMContext.h" //#include "llvm/IR/Dominators.h" #include "llvm/IR/Verifier.h" -//#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LegacyPassManager.h" //#include "llvm/IR/Module.h" //#include "llvm/IR/Function.h" @@ -27,6 +27,7 @@ //#include "llvm/Analysis/LoopInfo.h" //#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/Lint.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Vectorize.h" @@ -63,7 +64,7 @@ struct ppu_addr_hash } }; -static std::unordered_map s_ppu_compiled; +static std::unordered_map s_ppu_compiled; // TODO @@ -126,13 +127,11 @@ void PPUThread::cpu_task() { //SetHostRoundingMode(FPSCR_RN_NEAR); - if (custom_task) - { - if (check_status()) return; - - return custom_task(*this); - } + return custom_task ? custom_task(*this) : fast_call(pc, static_cast(GPR[2])); +} +void PPUThread::cpu_task_main() +{ if (g_cfg_ppu_decoder.get() == ppu_decoder_type::llvm) { const auto found = s_ppu_compiled.find(pc); @@ -311,39 +310,54 @@ be_t* PPUThread::get_stack_arg(s32 i, u64 align) void PPUThread::fast_call(u32 addr, u32 rtoc) { - auto old_PC = pc; - auto old_stack = GPR[1]; - auto old_rtoc = GPR[2]; - auto old_LR = LR; - auto old_task = std::move(custom_task); + const auto old_PC = pc; + const auto old_stack = GPR[1]; + const auto old_rtoc = GPR[2]; + const auto old_LR = LR; + const auto old_task = std::move(custom_task); + const auto old_func = last_function; pc = addr; GPR[2] = rtoc; LR = Emu.GetCPUThreadStop(); custom_task = nullptr; + last_function = nullptr; try { - cpu_task(); + cpu_task_main(); + + if (GPR[1] != old_stack && !state.test(cpu_state::ret) && !state.test(cpu_state::exit)) // GPR[1] shouldn't change + { + throw fmt::exception("Stack inconsistency (addr=0x%x, rtoc=0x%x, SP=0x%llx, old=0x%llx)", addr, rtoc, GPR[1], old_stack); + } } catch (cpu_state _s) { state += _s; if (_s != cpu_state::ret) throw; } + catch (EmulationStopped) + { + if (last_function) LOG_WARNING(PPU, "'%s' aborted", last_function); + last_function = old_func; + throw; + } + catch (...) + { + if (last_function) LOG_ERROR(PPU, "'%s' aborted", last_function); + last_function = old_func; + throw; + } state -= cpu_state::ret; pc = old_PC; - - if (GPR[1] != old_stack) // GPR[1] shouldn't change - { - throw EXCEPTION("Stack inconsistency (addr=0x%x, rtoc=0x%x, SP=0x%llx, old=0x%llx)", addr, rtoc, GPR[1], old_stack); - } - + GPR[1] = old_stack; GPR[2] = old_rtoc; LR = old_LR; custom_task = std::move(old_task); + last_function = old_func; //if (custom_task) //{ @@ -357,6 +371,10 @@ const ppu_decoder s_ppu_itype; extern u64 get_timebased_time(); extern void ppu_execute_syscall(PPUThread& ppu, u64 code); extern void ppu_execute_function(PPUThread& ppu, u32 index); +extern ppu_function_t ppu_get_syscall(u64 code); +extern std::string ppu_get_syscall_name(u64 code); +extern ppu_function_t ppu_get_function(u32 index); +extern std::string ppu_get_module_function_name(u32 index); extern __m128 sse_exp2_ps(__m128 A); extern __m128 sse_log2_ps(__m128 A); @@ -378,23 +396,6 @@ static void ppu_trace(u64 addr) LOG_NOTICE(PPU, "Trace: 0x%llx", addr); } -static void ppu_hlecall(PPUThread& ppu, u32 index) -{ - ppu_execute_function(ppu, index); - if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily -} - -static void ppu_syscall(PPUThread& ppu, u64 code) -{ - ppu_execute_syscall(ppu, code); - if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily -} - -static u32 ppu_tbl() -{ - return (u32)get_timebased_time(); -} - static void ppu_call(PPUThread& ppu, u32 addr) { const auto found = s_ppu_compiled.find(addr); @@ -410,7 +411,7 @@ static void ppu_call(PPUThread& ppu, u32 addr) // Allow HLE callbacks without compiling them if (itype == ppu_itype::HACK && vm::read32(addr + 4) == ppu_instructions::BLR()) { - return ppu_hlecall(ppu, op & 0x3ffffff); + return ppu_execute_function(ppu, op & 0x3ffffff); } ppu_trap(addr); @@ -506,9 +507,9 @@ extern void ppu_initialize(const std::string& name, const std::vectorTranslateToIR(info.first, info.first + info.second, vm::_ptr(info.first))); + const auto func = translator->TranslateToIR(info.first, info.first + info.second, vm::_ptr(info.first)); + + // Run optimization passes + pm.run(*func); + + const auto _syscall = module->getFunction("__syscall"); + const auto _hlecall = module->getFunction("__hlecall"); + + for (auto i = inst_begin(*func), end = inst_end(*func); i != end;) + { + const auto inst = &*i++; + + if (const auto ci = dyn_cast(inst)) + { + const auto cif = ci->getCalledFunction(); + const auto op1 = ci->getNumArgOperands() > 1 ? ci->getArgOperand(1) : nullptr; + + if (cif == _syscall && op1 && isa(op1)) + { + // Try to determine syscall using the value from r11 (requires constant propagation) + const u64 index = cast(op1)->getZExtValue(); + + if (const auto ptr = ppu_get_syscall(index)) + { + const auto n = ppu_get_syscall_name(index); + const auto f = cast(module->getOrInsertFunction(n, _func)); + link_table.emplace(n, reinterpret_cast(ptr)); + + // Call the syscall directly + ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)})); + } + } + + if (cif == _hlecall && op1 && isa(op1)) + { + const u32 index = static_cast(cast(op1)->getZExtValue()); + + if (const auto ptr = ppu_get_function(index)) + { + const auto n = ppu_get_module_function_name(index); + const auto f = cast(module->getOrInsertFunction(n, _func)); + link_table.emplace(n, reinterpret_cast(ptr)); + + // Call the function directly + ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)})); + } + } + } + } } } @@ -599,9 +647,6 @@ extern void ppu_initialize(const std::string& name, const std::vectorget(fmt::format("__sub_%x", info.first)); - s_ppu_compiled.emplace(info.first, (void(*)(PPUThread&))link); + s_ppu_compiled.emplace(info.first, (ppu_function_t)link); LOG_NOTICE(PPU, "** Function __sub_%x -> 0x%llx (addr=0x%x, size=0x%x)", info.first, link, info.first, info.second); } diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 6e3ae11888..1d95f8dd35 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -11,6 +11,7 @@ public: virtual std::string dump() const override; virtual void cpu_init() override; virtual void cpu_task() override; + virtual void cpu_task_main(); virtual bool handle_interrupt() override; virtual ~PPUThread() override; diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 0c4ae498c0..037a24cce7 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -910,40 +910,36 @@ std::array g_ppu_syscall_table extern void ppu_execute_syscall(PPUThread& ppu, u64 code) { - if (code >= g_ppu_syscall_table.size()) + if (code < g_ppu_syscall_table.size()) { - throw fmt::exception("Invalid syscall number (%llu)", code); - } + // If autopause occures, check_status() will hold the thread till unpaused. + if (debug::autopause::pause_syscall(code) && ppu.check_status()) throw cpu_state::ret; - // If autopause occures, check_status() will hold the thread till unpaused. - if (debug::autopause::pause_syscall(code) && ppu.check_status()) - { - throw cpu_state::ret; - } - - const auto previous_function = ppu.last_function; // TODO: use gsl::finally or something - - try - { if (auto func = g_ppu_syscall_table[code]) { func(ppu); + LOG_TRACE(PPU, "Syscall '%s' (%llu) finished, r3=0x%llx", ppu_get_syscall_name(code), code, ppu.GPR[3]); } else { LOG_TODO(HLE, "Unimplemented syscall %s -> CELL_OK", ppu_get_syscall_name(code)); ppu.GPR[3] = 0; } - } - catch (...) - { - logs::PPU.format(Emu.IsStopped() ? logs::level::warning : logs::level::error, "Syscall '%s' (%llu) aborted", ppu_get_syscall_name(code), code); - ppu.last_function = previous_function; - throw; + + return; } - LOG_TRACE(PPU, "Syscall '%s' (%llu) finished, r3=0x%llx", ppu_get_syscall_name(code), code, ppu.GPR[3]); - ppu.last_function = previous_function; + throw fmt::exception("Invalid syscall number (%llu)", code); +} + +extern ppu_function_t ppu_get_syscall(u64 code) +{ + if (code < g_ppu_syscall_table.size()) + { + return g_ppu_syscall_table[code]; + } + + return nullptr; } DECLARE(lv2_lock_t::mutex); diff --git a/rpcs3/Emu/Cell/lv2/sys_timer.cpp b/rpcs3/Emu/Cell/lv2/sys_timer.cpp index d4edeea141..d0ccbf2c1c 100644 --- a/rpcs3/Emu/Cell/lv2/sys_timer.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_timer.cpp @@ -271,6 +271,7 @@ s32 sys_timer_sleep(u32 sleep_time) std::this_thread::sleep_for(std::chrono::microseconds(useconds - passed)); } + CHECK_EMU_STATUS; return CELL_OK; } @@ -294,5 +295,6 @@ s32 sys_timer_usleep(const u64 sleep_time) std::this_thread::sleep_for(std::chrono::microseconds(sleep_time - passed)); } + CHECK_EMU_STATUS; return CELL_OK; } diff --git a/rpcs3/Emu/IdManager.cpp b/rpcs3/Emu/IdManager.cpp index 639b3c7c0f..636eac6b34 100644 --- a/rpcs3/Emu/IdManager.cpp +++ b/rpcs3/Emu/IdManager.cpp @@ -89,9 +89,11 @@ void idm::clear() // Call recorded finalization functions for all IDs for (std::size_t i = 0; i < g_map.size(); i++) { + const auto on_stop = id_manager::typeinfo::get()[i].on_stop; + for (auto& id : g_map[i]) { - id_manager::typeinfo::get()[i].on_stop(id.second.get()); + on_stop(id.second.get()); } g_map[i].clear(); diff --git a/rpcs3/Emu/IdManager.h b/rpcs3/Emu/IdManager.h index bc9ac403fd..c78175fdc8 100644 --- a/rpcs3/Emu/IdManager.h +++ b/rpcs3/Emu/IdManager.h @@ -37,6 +37,8 @@ namespace id_manager { static inline void func(T*) { + // Forbid forward declarations + static constexpr auto size = sizeof(std::conditional_t::value, void*, T>); } }; @@ -45,7 +47,7 @@ namespace id_manager { static inline void func(T* ptr) { - ptr->on_init(); + if (ptr) ptr->on_init(); } }; @@ -55,6 +57,8 @@ namespace id_manager { static inline void func(T*) { + // Forbid forward declarations + static constexpr auto size = sizeof(std::conditional_t::value, void*, T>); } }; @@ -63,7 +67,7 @@ namespace id_manager { static inline void func(T* ptr) { - ptr->on_stop(); + if (ptr) ptr->on_stop(); } }; @@ -97,9 +101,6 @@ namespace id_manager template static inline void update() { - // Forbid forward declarations - static constexpr auto size = sizeof(std::conditional_t::value, void*, T>); - auto& info = access()[get_index()]; info.on_init = [](void* ptr) { return_ id_manager::on_init::func(static_cast(ptr)); }; @@ -111,12 +112,6 @@ namespace id_manager { return access(); } - - template - static inline auto get_stop() - { - return access()[get_index()].on_stop; - } }; template @@ -257,6 +252,7 @@ public: if (auto pair = create_id(WRAP_EXPR(std::make_shared(std::forward(args)...)))) { id_manager::on_init::func(static_cast(pair->second.get())); + id_manager::on_stop::func(nullptr); return{ pair->second, static_cast(pair->second.get()) }; } @@ -270,6 +266,7 @@ public: if (auto pair = create_id(WRAP_EXPR(std::make_shared(std::forward(args)...)))) { id_manager::on_init::func(static_cast(pair->second.get())); + id_manager::on_stop::func(nullptr); return pair->first; } @@ -283,6 +280,7 @@ public: if (auto pair = create_id(WRAP_EXPR(ptr))) { id_manager::on_init::func(static_cast(pair->second.get())); + id_manager::on_stop::func(nullptr); return pair->first; } @@ -296,6 +294,7 @@ public: if (auto pair = create_id(std::forward(provider))) { id_manager::on_init::func(static_cast(pair->second.get())); + id_manager::on_stop::func(nullptr); return { pair->second, static_cast(pair->second.get()) }; } @@ -389,7 +388,7 @@ public: if (LIKELY(ptr)) { - id_manager::typeinfo::get_stop()(static_cast(ptr.get())); + id_manager::on_stop::func(static_cast(ptr.get())); } return ptr.operator bool(); @@ -403,7 +402,7 @@ public: if (LIKELY(ptr)) { - id_manager::typeinfo::get_stop()(static_cast(ptr.get())); + id_manager::on_stop::func(static_cast(ptr.get())); } return{ ptr, static_cast(ptr.get()) }; @@ -429,7 +428,7 @@ public: g_map[get_type()].erase(id); } - id_manager::typeinfo::get_stop()(static_cast(ptr.get())); + id_manager::on_stop::func(static_cast(ptr.get())); return{ ptr, static_cast(ptr.get()) }; } @@ -479,6 +478,7 @@ public: if (ptr) { id_manager::on_init::func(ptr.get()); + id_manager::on_stop::func(nullptr); } return ptr; @@ -531,6 +531,7 @@ public: if (ptr) { id_manager::on_init::func(ptr.get()); + id_manager::on_stop::func(nullptr); } return ptr; @@ -585,6 +586,7 @@ public: } id_manager::on_init::func(ptr.get()); + id_manager::on_stop::func(nullptr); return ptr; } @@ -616,7 +618,7 @@ public: if (ptr) { - id_manager::typeinfo::get_stop()(static_cast(ptr.get())); + id_manager::on_stop::func(static_cast(ptr.get())); } return ptr.operator bool(); @@ -630,7 +632,7 @@ public: if (ptr) { - id_manager::typeinfo::get_stop()(static_cast(ptr.get())); + id_manager::on_stop::func(static_cast(ptr.get())); } return{ ptr, static_cast(ptr.get()) }; diff --git a/rpcs3/Emu/PSP2/ARMv7Function.h b/rpcs3/Emu/PSP2/ARMv7Function.h index 5d2f5d0e64..eb9e418cf7 100644 --- a/rpcs3/Emu/PSP2/ARMv7Function.h +++ b/rpcs3/Emu/PSP2/ARMv7Function.h @@ -4,7 +4,12 @@ using arm_function_t = void(*)(ARMv7Thread&); -#define BIND_FUNC(func) [](ARMv7Thread& cpu){ cpu.last_function = #func; arm_func_detail::do_call(cpu, func); } +#define BIND_FUNC(func) static_cast([](ARMv7Thread& cpu){\ + const auto old_f = cpu.last_function;\ + cpu.last_function = #func;\ + arm_func_detail::do_call(cpu, func);\ + cpu.last_function = old_f;\ +}) struct arm_va_args_t { diff --git a/rpcs3/Emu/PSP2/ARMv7Module.cpp b/rpcs3/Emu/PSP2/ARMv7Module.cpp index bf61a8fec4..7992b3b6f8 100644 --- a/rpcs3/Emu/PSP2/ARMv7Module.cpp +++ b/rpcs3/Emu/PSP2/ARMv7Module.cpp @@ -79,27 +79,26 @@ extern std::string arm_get_variable_name(const std::string& module, u32 vnid); // Function lookup table. Not supposed to grow after emulation start. std::vector g_arm_function_cache; +std::vector g_arm_function_names; + +extern std::string arm_get_module_function_name(u32 index) +{ + if (index < g_arm_function_names.size()) + { + return g_arm_function_names[index]; + } + + return fmt::format(".%u", index); +} + extern void arm_execute_function(ARMv7Thread& cpu, u32 index) { if (index < g_arm_function_cache.size()) { if (const auto func = g_arm_function_cache[index]) { - const auto previous_function = cpu.last_function; // TODO: use gsl::finally or something - - try - { - func(cpu); - } - catch (...) - { - logs::ARMv7.format(Emu.IsStopped() ? logs::level::warning : logs::level::error, "Function '%s' aborted", cpu.last_function); - cpu.last_function = previous_function; - throw; - } - - LOG_TRACE(ARMv7, "Function '%s' finished, r0=0x%x", cpu.last_function, cpu.GPR[0]); - cpu.last_function = previous_function; + func(cpu); + LOG_TRACE(ARMv7, "Function '%s' finished, r0=0x%x", arm_get_module_function_name(index), cpu.GPR[0]); return; } } @@ -220,6 +219,8 @@ static void arm_initialize_modules() // Reinitialize function cache g_arm_function_cache = arm_function_manager::get(); + g_arm_function_names.clear(); + g_arm_function_names.resize(g_arm_function_cache.size()); // "Use" all the modules for correct linkage for (auto& module : registered) @@ -229,6 +230,7 @@ static void arm_initialize_modules() for (auto& function : module->functions) { LOG_TRACE(LOADER, "** 0x%08X: %s", function.first, function.second.name); + g_arm_function_names.at(function.second.index) = fmt::format("%s.%s", module->name, function.second.name); } for (auto& variable : module->variables) @@ -555,6 +557,7 @@ void arm_exec_loader::load() const // TODO index = ::size32(g_arm_function_cache); g_arm_function_cache.emplace_back(); + g_arm_function_names.emplace_back(fmt::format("%s.%s", module_name, fname)); LOG_ERROR(LOADER, "** Unknown function '%s' in module '%s' (*0x%x) -> index %u", fname, module_name, faddr, index); } diff --git a/rpcs3/Emu/PSP2/ARMv7Thread.cpp b/rpcs3/Emu/PSP2/ARMv7Thread.cpp index f491dff093..c0476525e3 100644 --- a/rpcs3/Emu/PSP2/ARMv7Thread.cpp +++ b/rpcs3/Emu/PSP2/ARMv7Thread.cpp @@ -126,13 +126,11 @@ extern thread_local std::string(*g_tls_log_prefix)(); void ARMv7Thread::cpu_task() { - if (custom_task) - { - if (check_status()) return; - - return custom_task(*this); - } + return custom_task ? custom_task(*this) : fast_call(PC); +} +void ARMv7Thread::cpu_task_main() +{ g_tls_log_prefix = [] { const auto cpu = static_cast(get_current_cpu_thread()); @@ -191,34 +189,49 @@ ARMv7Thread::ARMv7Thread(const std::string& name) void ARMv7Thread::fast_call(u32 addr) { - auto old_PC = PC; - auto old_stack = SP; - auto old_LR = LR; - auto old_task = std::move(custom_task); + const auto old_PC = PC; + const auto old_SP = SP; + const auto old_LR = LR; + const auto old_task = std::move(custom_task); + const auto old_func = last_function; PC = addr; LR = Emu.GetCPUThreadStop(); custom_task = nullptr; + last_function = nullptr; try { - cpu_task(); + cpu_task_main(); + + if (SP != old_SP && !state.test(cpu_state::ret) && !state.test(cpu_state::exit)) // SP shouldn't change + { + throw fmt::exception("Stack inconsistency (addr=0x%x, SP=0x%x, old=0x%x)", addr, SP, old_SP); + } } catch (cpu_state _s) { state += _s; if (_s != cpu_state::ret) throw; } + catch (EmulationStopped) + { + if (last_function) LOG_WARNING(ARMv7, "'%s' aborted", last_function); + last_function = old_func; + throw; + } + catch (...) + { + if (last_function) LOG_ERROR(ARMv7, "'%s' aborted", last_function); + last_function = old_func; + throw; + } state -= cpu_state::ret; PC = old_PC; - - if (SP != old_stack) // SP shouldn't change - { - throw EXCEPTION("Stack inconsistency (addr=0x%x, SP=0x%x, old=0x%x)", addr, SP, old_stack); - } - + SP = old_SP; LR = old_LR; custom_task = std::move(old_task); + last_function = old_func; } diff --git a/rpcs3/Emu/PSP2/ARMv7Thread.h b/rpcs3/Emu/PSP2/ARMv7Thread.h index 59b9c393a9..3f0aaa9231 100644 --- a/rpcs3/Emu/PSP2/ARMv7Thread.h +++ b/rpcs3/Emu/PSP2/ARMv7Thread.h @@ -18,6 +18,7 @@ public: virtual std::string dump() const override; virtual void cpu_init() override; virtual void cpu_task() override; + virtual void cpu_task_main(); virtual ~ARMv7Thread() override; ARMv7Thread(const std::string& name); From 7a921cbdf92d35f85fbc1f3d84c121971742d262 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 25 Jun 2016 16:54:08 +0300 Subject: [PATCH 5/6] cpu_thread compressed --- Utilities/BitSet.h | 47 ++++++++++++++------------ rpcs3/Emu/CPU/CPUThread.cpp | 3 +- rpcs3/Emu/CPU/CPUThread.h | 15 ++++---- rpcs3/Emu/Cell/PPUThread.cpp | 5 +-- rpcs3/Emu/Cell/PPUThread.h | 2 ++ rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 2 +- rpcs3/Emu/Cell/SPUThread.cpp | 8 +++-- rpcs3/Emu/Cell/SPUThread.h | 2 ++ rpcs3/Emu/PSP2/ARMv7Thread.cpp | 5 +-- rpcs3/Emu/PSP2/ARMv7Thread.h | 2 ++ 10 files changed, 52 insertions(+), 39 deletions(-) diff --git a/Utilities/BitSet.h b/Utilities/BitSet.h index 77d8f7c4de..40356be5c0 100644 --- a/Utilities/BitSet.h +++ b/Utilities/BitSet.h @@ -10,6 +10,7 @@ struct bitset_t { using type = simple_t; using under = std::underlying_type_t; + enum class raw_type : under {}; static constexpr auto bitsize = BitSize; @@ -20,8 +21,8 @@ struct bitset_t { } - constexpr bitset_t(under raw_value, const std::nothrow_t&) - : m_value(static_cast(raw_value)) + constexpr bitset_t(raw_type raw_value) + : m_value(static_cast(static_cast(raw_value))) { } @@ -38,42 +39,42 @@ struct bitset_t bitset_t& operator +=(bitset_t rhs) { - return *this = { _value() | rhs._value(), std::nothrow }; + return *this = static_cast(_value() | rhs._value()); } bitset_t& operator -=(bitset_t rhs) { - return *this = { _value() & ~rhs._value(), std::nothrow }; + return *this = static_cast(_value() & ~rhs._value()); } bitset_t& operator &=(bitset_t rhs) { - return *this = { _value() & rhs._value(), std::nothrow }; + return *this = static_cast(_value() & rhs._value()); } bitset_t& operator ^=(bitset_t rhs) { - return *this = { _value() ^ rhs._value(), std::nothrow }; + return *this = static_cast(_value() ^ rhs._value()); } friend constexpr bitset_t operator +(bitset_t lhs, bitset_t rhs) { - return{ lhs._value() | rhs._value(), std::nothrow }; + return static_cast(lhs._value() | rhs._value()); } friend constexpr bitset_t operator -(bitset_t lhs, bitset_t rhs) { - return{ lhs._value() & ~rhs._value(), std::nothrow }; + return static_cast(lhs._value() & ~rhs._value()); } friend constexpr bitset_t operator &(bitset_t lhs, bitset_t rhs) { - return{ lhs._value() & rhs._value(), std::nothrow }; + return static_cast(lhs._value() & rhs._value()); } friend constexpr bitset_t operator ^(bitset_t lhs, bitset_t rhs) { - return{ lhs._value() ^ rhs._value(), std::nothrow }; + return static_cast(lhs._value() ^ rhs._value()); } bool test(bitset_t rhs) const @@ -87,7 +88,7 @@ struct bitset_t { const under v = _value(); const under s = rhs._value(); - *this = { v | s, std::nothrow }; + *this = static_cast(v | s); return (v & s) != 0; } @@ -95,7 +96,7 @@ struct bitset_t { const under v = _value(); const under s = rhs._value(); - *this = { v & ~s, std::nothrow }; + *this = static_cast(v & ~s); return (v & s) != 0; } @@ -103,7 +104,7 @@ struct bitset_t { const under v = _value(); const under s = rhs._value(); - *this = { v ^ s, std::nothrow }; + *this = static_cast(v ^ s); return (v & s) != 0; } @@ -133,17 +134,18 @@ template struct atomic_add, CT, std::enable_if_t::value>> { using under = typename bitset_t::under; + using raw_type = typename bitset_t::raw_type; static inline bitset_t op1(bitset_t& left, bitset_t right) { - return{ atomic_storage::fetch_or(reinterpret_cast(left), right._value()), std::nothrow }; + return static_cast(atomic_storage::fetch_or(reinterpret_cast(left), right._value())); } static constexpr auto fetch_op = &op1; static inline bitset_t op2(bitset_t& left, bitset_t right) { - return{ atomic_storage::or_fetch(reinterpret_cast(left), right._value()), std::nothrow }; + return static_cast(atomic_storage::or_fetch(reinterpret_cast(left), right._value())); } static constexpr auto op_fetch = &op2; @@ -154,17 +156,18 @@ template struct atomic_sub, CT, std::enable_if_t::value>> { using under = typename bitset_t::under; + using raw_type = typename bitset_t::raw_type; static inline bitset_t op1(bitset_t& left, bitset_t right) { - return{ atomic_storage::fetch_and(reinterpret_cast(left), ~right._value()), std::nothrow }; + return static_cast(atomic_storage::fetch_and(reinterpret_cast(left), ~right._value())); } static constexpr auto fetch_op = &op1; static inline bitset_t op2(bitset_t& left, bitset_t right) { - return{ atomic_storage::and_fetch(reinterpret_cast(left), ~right._value()), std::nothrow }; + return static_cast(atomic_storage::and_fetch(reinterpret_cast(left), ~right._value())); } static constexpr auto op_fetch = &op2; @@ -175,17 +178,18 @@ template struct atomic_and, CT, std::enable_if_t::value>> { using under = typename bitset_t::under; + using raw_type = typename bitset_t::raw_type; static inline bitset_t op1(bitset_t& left, bitset_t right) { - return{ atomic_storage::fetch_and(reinterpret_cast(left), right._value()), std::nothrow }; + return static_cast(atomic_storage::fetch_and(reinterpret_cast(left), right._value())); } static constexpr auto fetch_op = &op1; static inline bitset_t op2(bitset_t& left, bitset_t right) { - return{ atomic_storage::and_fetch(reinterpret_cast(left), right._value()), std::nothrow }; + return static_cast(atomic_storage::and_fetch(reinterpret_cast(left), right._value())); } static constexpr auto op_fetch = &op2; @@ -196,17 +200,18 @@ template struct atomic_xor, CT, std::enable_if_t::value>> { using under = typename bitset_t::under; + using raw_type = typename bitset_t::raw_type; static inline bitset_t op1(bitset_t& left, bitset_t right) { - return{ atomic_storage::fetch_xor(reinterpret_cast(left), right._value()), std::nothrow }; + return static_cast(atomic_storage::fetch_xor(reinterpret_cast(left), right._value())); } static constexpr auto fetch_op = &op1; static inline bitset_t op2(bitset_t& left, bitset_t right) { - return{ atomic_storage::xor_fetch(reinterpret_cast(left), right._value()), std::nothrow }; + return static_cast(atomic_storage::xor_fetch(reinterpret_cast(left), right._value())); } static constexpr auto op_fetch = &op2; diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 294a1b6e73..7be23e9849 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -68,9 +68,8 @@ cpu_thread::~cpu_thread() { } -cpu_thread::cpu_thread(cpu_type type, const std::string& name) +cpu_thread::cpu_thread(cpu_type type) : type(type) - , name(name) { } diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 63f8d50769..3dd21a1aee 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -4,7 +4,7 @@ #include "../Utilities/BitSet.h" // CPU Thread Type -enum class cpu_type : u32 +enum class cpu_type : u8 { ppu, // PPU Thread spu, // SPU Thread @@ -12,7 +12,7 @@ enum class cpu_type : u32 }; // CPU Thread State flags -enum struct cpu_state : u32 +enum struct cpu_state : u16 { stop, // Thread not running (HLE, initial state) exit, // Irreversible exit @@ -38,18 +38,17 @@ public: virtual void on_stop() override; virtual ~cpu_thread() override; - const std::string name; - const cpu_type type; const id_value<> id{}; + const cpu_type type; - cpu_thread(cpu_type type, const std::string& name); + cpu_thread(cpu_type type); + + // Public recursive sleep state counter + atomic_t sleep_counter{}; // Public thread state atomic_t> state{ cpu_state::stop }; - // Public recursive sleep state counter - atomic_t sleep_counter{}; - // Object associated with sleep state, possibly synchronization primitive (mutex, semaphore, etc.) atomic_t owner{}; diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index b46ac6ffb0..e72f2cbce5 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -70,7 +70,7 @@ static std::unordered_map s_ppu_compiled; // std::string PPUThread::get_name() const { - return fmt::format("PPU[0x%x] Thread (%s)", id, name); + return fmt::format("PPU[0x%x] Thread (%s)", id, m_name); } std::string PPUThread::dump() const @@ -298,7 +298,8 @@ PPUThread::~PPUThread() } PPUThread::PPUThread(const std::string& name) - : cpu_thread(cpu_type::ppu, name) + : cpu_thread(cpu_type::ppu) + , m_name(name) { } diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 1d95f8dd35..4303090c0e 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -75,6 +75,8 @@ public: bool is_joinable = true; bool is_joining = false; + const std::string m_name; // Thread name + std::function custom_task; // Function name can be stored here. Used to print the last called function. diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index ecef1e01c6..aae3547b7b 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -2186,7 +2186,7 @@ void spu_recompiler::BR(spu_opcode_t op) c->mov(*addr, target | 0x2000000); //c->cmp(asmjit::host::dword_ptr(*ls, m_pos), 0x32); // compare instruction opcode with BR-to-self //c->je(labels[target / 4]); - c->lock().or_(SPU_OFF_32(state), make_bitset(cpu_state::stop, cpu_state::ret)._value()); + c->lock().or_(SPU_OFF_16(state), make_bitset(cpu_state::stop, cpu_state::ret)._value()); c->jmp(*end); c->unuse(*addr); return; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index a38fc75b86..1317779ac8 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -126,7 +126,7 @@ spu_imm_table_t::spu_imm_table_t() std::string SPUThread::get_name() const { - return fmt::format("%sSPU[0x%x] Thread (%s)", offset > RAW_SPU_BASE_ADDR ? "Raw" : "", id, name); + return fmt::format("%sSPU[0x%x] Thread (%s)", offset > RAW_SPU_BASE_ADDR ? "Raw" : "", id, m_name); } std::string SPUThread::dump() const @@ -240,14 +240,16 @@ SPUThread::~SPUThread() } SPUThread::SPUThread(const std::string& name) - : cpu_thread(cpu_type::spu, name) + : cpu_thread(cpu_type::spu) + , m_name(name) , index(0) , offset(0) { } SPUThread::SPUThread(const std::string& name, u32 index) - : cpu_thread(cpu_type::spu, name) + : cpu_thread(cpu_type::spu) + , m_name(name) , index(index) , offset(vm::alloc(0x40000, vm::main)) { diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index d75dd5e676..9e2354f8fb 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -553,6 +553,8 @@ public: const u32 index; // SPU index const u32 offset; // SPU LS offset + const std::string m_name; // Thread name + std::function custom_task; std::exception_ptr pending_exception; diff --git a/rpcs3/Emu/PSP2/ARMv7Thread.cpp b/rpcs3/Emu/PSP2/ARMv7Thread.cpp index c0476525e3..5e45fd305c 100644 --- a/rpcs3/Emu/PSP2/ARMv7Thread.cpp +++ b/rpcs3/Emu/PSP2/ARMv7Thread.cpp @@ -73,7 +73,7 @@ void armv7_free_tls(u32 thread) std::string ARMv7Thread::get_name() const { - return fmt::format("ARMv7[0x%x] Thread (%s)", id, name); + return fmt::format("ARMv7[0x%x] Thread (%s)", id, m_name); } std::string ARMv7Thread::dump() const @@ -183,7 +183,8 @@ ARMv7Thread::~ARMv7Thread() } ARMv7Thread::ARMv7Thread(const std::string& name) - : cpu_thread(cpu_type::arm, name) + : cpu_thread(cpu_type::arm) + , m_name(name) { } diff --git a/rpcs3/Emu/PSP2/ARMv7Thread.h b/rpcs3/Emu/PSP2/ARMv7Thread.h index 3f0aaa9231..721277ab2e 100644 --- a/rpcs3/Emu/PSP2/ARMv7Thread.h +++ b/rpcs3/Emu/PSP2/ARMv7Thread.h @@ -134,6 +134,8 @@ public: u32 stack_addr = 0; u32 stack_size = 0; + const std::string m_name; + std::function custom_task; const char* last_function = nullptr; From 0ea0c21fedd5da7d6772aca16f2cf6382996c530 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Mon, 27 Jun 2016 19:34:08 +0300 Subject: [PATCH 6/6] LLVM: Indirect call map --- Utilities/VirtualMemory.cpp | 10 ++--- Utilities/VirtualMemory.h | 14 +++---- rpcs3/Emu/Cell/Modules/cellGcmSys.cpp | 2 + rpcs3/Emu/Cell/PPUThread.cpp | 58 ++++++++------------------- rpcs3/Emu/Cell/PPUTranslator.cpp | 18 +++++++-- rpcs3/Emu/Cell/PPUTranslator.h | 3 ++ 6 files changed, 49 insertions(+), 56 deletions(-) diff --git a/Utilities/VirtualMemory.cpp b/Utilities/VirtualMemory.cpp index a7e85ff4ff..f3fb78af3f 100644 --- a/Utilities/VirtualMemory.cpp +++ b/Utilities/VirtualMemory.cpp @@ -25,21 +25,21 @@ namespace memory_helper return ret; } - void commit_page_memory(void* pointer, size_t page_size) + void commit_page_memory(void* pointer, size_t size) { #ifdef _WIN32 - VERIFY(VirtualAlloc((u8*)pointer, page_size, MEM_COMMIT, PAGE_READWRITE) != NULL); + VERIFY(VirtualAlloc(pointer, size, MEM_COMMIT, PAGE_READWRITE) != NULL); #else - VERIFY(mprotect((u8*)pointer, page_size, PROT_READ | PROT_WRITE) != -1); + VERIFY(mprotect((void*)((u64)pointer & -4096), size, PROT_READ | PROT_WRITE) != -1); #endif } void free_reserved_memory(void* pointer, size_t size) { #ifdef _WIN32 - VERIFY(VirtualFree(pointer, 0, MEM_RELEASE) != 0); + VERIFY(VirtualFree(pointer, 0, MEM_DECOMMIT) != 0); #else - VERIFY(munmap(pointer, size) == 0); + VERIFY(mprotect(pointer, size, PROT_NONE) != -1); #endif } } diff --git a/Utilities/VirtualMemory.h b/Utilities/VirtualMemory.h index ecce48db2e..c63d75f659 100644 --- a/Utilities/VirtualMemory.h +++ b/Utilities/VirtualMemory.h @@ -3,20 +3,20 @@ namespace memory_helper { /** - * Reserve size bytes of virtual memory and returns it. + * Reserve `size` bytes of virtual memory and returns it. * The memory should be commited before usage. */ - void* reserve_memory(size_t size); + void* reserve_memory(std::size_t size); /** - * Commit page_size bytes of virtual memory starting at pointer. + * Commit `size` bytes of virtual memory starting at pointer. * That is, bake reserved memory with physical memory. * pointer should belong to a range of reserved memory. */ - void commit_page_memory(void* pointer, size_t page_size); + void commit_page_memory(void* pointer, std::size_t size); /** - * Free memory alloced via reserve_memory. + * Decommit all memory committed via commit_page_memory. */ - void free_reserved_memory(void* pointer, size_t size); -} \ No newline at end of file + void free_reserved_memory(void* pointer, std::size_t size); +} diff --git a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp index 962218925c..5331465f4b 100644 --- a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp @@ -13,6 +13,7 @@ logs::channel cellGcmSys("cellGcmSys", logs::level::notice); extern s32 cellGcmCallback(vm::ptr context, u32 count); +extern void ppu_register_function_at(u32 addr, ppu_function_t ptr); const u32 tiled_pitches[] = { 0x00000000, 0x00000200, 0x00000300, 0x00000400, @@ -384,6 +385,7 @@ s32 _cellGcmInitBody(vm::pptr context, u32 cmdSize, u32 ioSi vm::write32(gcm_info.context_addr + 0x44, 0xabadcafe); vm::write32(gcm_info.context_addr + 0x48, ppu_instructions::HACK(FIND_FUNC(cellGcmCallback))); vm::write32(gcm_info.context_addr + 0x4c, ppu_instructions::BLR()); + ppu_register_function_at(gcm_info.context_addr + 0x48, BIND_FUNC(cellGcmCallback)); vm::_ref(gcm_info.context_addr) = current_context; context->set(gcm_info.context_addr); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index e72f2cbce5..38b437fb95 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "Utilities/Config.h" +#include "Utilities/VirtualMemory.h" #include "Emu/Memory/Memory.h" #include "Emu/System.h" #include "Emu/IdManager.h" @@ -56,17 +57,16 @@ cfg::map_entry g_cfg_ppu_decoder(cfg::root.core, "PPU Decoder" const ppu_decoder s_ppu_interpreter_precise; const ppu_decoder s_ppu_interpreter_fast; -struct ppu_addr_hash +const auto s_ppu_compiled = static_cast(memory_helper::reserve_memory(0x200000000)); + +extern void ppu_register_function_at(u32 addr, ppu_function_t ptr) { - u32 operator()(u32 value) const + if (g_cfg_ppu_decoder.get() == ppu_decoder_type::llvm) { - return value / sizeof(32); + memory_helper::commit_page_memory(s_ppu_compiled + addr / 4, sizeof(ppu_function_t)); + s_ppu_compiled[addr / 4] = ptr; } -}; - -static std::unordered_map s_ppu_compiled; // TODO - - +} std::string PPUThread::get_name() const { @@ -134,12 +134,7 @@ void PPUThread::cpu_task_main() { if (g_cfg_ppu_decoder.get() == ppu_decoder_type::llvm) { - const auto found = s_ppu_compiled.find(pc); - - if (found != s_ppu_compiled.end()) - { - return found->second(*this); - } + return s_ppu_compiled[pc / 4](*this); } g_tls_log_prefix = [] @@ -397,27 +392,6 @@ static void ppu_trace(u64 addr) LOG_NOTICE(PPU, "Trace: 0x%llx", addr); } -static void ppu_call(PPUThread& ppu, u32 addr) -{ - const auto found = s_ppu_compiled.find(addr); - - if (found != s_ppu_compiled.end()) - { - return found->second(ppu); - } - - const auto op = vm::read32(addr).value(); - const auto itype = s_ppu_itype.decode(op); - - // Allow HLE callbacks without compiling them - if (itype == ppu_itype::HACK && vm::read32(addr + 4) == ppu_instructions::BLR()) - { - return ppu_execute_function(ppu, op & 0x3ffffff); - } - - ppu_trap(addr); -} - static __m128 sse_rcp_ps(__m128 A) { return _mm_rcp_ps(A); @@ -511,7 +485,7 @@ extern void ppu_initialize(const std::string& name, const std::vectorget(fmt::format("__sub_%x", info.first)); - s_ppu_compiled.emplace(info.first, (ppu_function_t)link); + const std::uintptr_t link = jit->get(fmt::format("__sub_%x", addr)); + memory_helper::commit_page_memory(s_ppu_compiled + addr / 4, sizeof(ppu_function_t)); + s_ppu_compiled[addr / 4] = (ppu_function_t)link; - LOG_NOTICE(PPU, "** Function __sub_%x -> 0x%llx (addr=0x%x, size=0x%x)", info.first, link, info.first, info.second); + LOG_NOTICE(PPU, "** Function __sub_%x -> 0x%llx (addr=0x%x, size=0x%x)", addr, link, addr, info.second); } } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 054418cae4..7b25879f9e 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -82,6 +82,9 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, u64 base, u64 thread_struct.insert(thread_struct.end(), 32, GetType()); // CR[0..31] m_thread_type = StructType::create(m_context, thread_struct, "context_t"); + + // Callable + m_call = new GlobalVariable(*module, ArrayType::get(FunctionType::get(GetType(), {m_thread_type->getPointerTo()}, false)->getPointerTo(), 0x40000000), true, GlobalValue::ExternalLinkage, 0, "__call"); } PPUTranslator::~PPUTranslator() @@ -265,8 +268,7 @@ Function* PPUTranslator::TranslateToIR(u64 start_addr, u64 end_addr, be_t* } m_ir->SetInsertPoint(_default); - Call(GetType(), "__call", m_thread, _ctr); - m_ir->CreateRetVoid(); + CallFunction(0, true, _ctr); } //for (auto i = inst_begin(*m_function), end = inst_end(*m_function); i != end;) @@ -321,7 +323,17 @@ void PPUTranslator::CallFunction(u64 target, bool tail, Value* indirect) const auto callee_type = func ? m_func_types[target] : nullptr; - const auto result = func ? m_ir->CreateCall(func, {m_thread}) : Call(GetType(), "__call", m_thread, indirect ? indirect : m_ir->getInt64(target)); + if (func) + { + m_ir->CreateCall(func, {m_thread}); + } + else + { + const auto addr = indirect ? indirect : (Value*)m_ir->getInt64(target); + const auto pos = m_ir->CreateLShr(addr, 2, "", true); + const auto ptr = m_ir->CreateGEP(m_call, {m_ir->getInt64(0), pos}); + m_ir->CreateCall(m_ir->CreateLoad(ptr), {m_thread}); + } if (!tail) { diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index e56ca7a31c..354c745b4f 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -157,6 +157,9 @@ class PPUTranslator final //: public CPUTranslator // Thread context llvm::Value* m_thread; + // Callable functions + llvm::Value* m_call; + // Thread context struct llvm::StructType* m_thread_type;