Merge pull request #1813 from Nekotekina/llvm1

PPU LLVM: Improvements
This commit is contained in:
Ivan 2016-06-27 20:52:38 +03:00 committed by GitHub
commit 223979c088
32 changed files with 968 additions and 827 deletions

View file

@ -10,6 +10,7 @@ struct bitset_t
{
using type = simple_t<T>;
using under = std::underlying_type_t<type>;
enum class raw_type : under {};
static constexpr auto bitsize = BitSize;
@ -20,8 +21,8 @@ struct bitset_t
{
}
constexpr bitset_t(under raw_value, const std::nothrow_t&)
: m_value(static_cast<T>(raw_value))
constexpr bitset_t(raw_type raw_value)
: m_value(static_cast<T>(static_cast<under>(raw_value)))
{
}
@ -38,42 +39,42 @@ struct bitset_t
bitset_t& operator +=(bitset_t rhs)
{
return *this = { _value() | rhs._value(), std::nothrow };
return *this = static_cast<raw_type>(_value() | rhs._value());
}
bitset_t& operator -=(bitset_t rhs)
{
return *this = { _value() & ~rhs._value(), std::nothrow };
return *this = static_cast<raw_type>(_value() & ~rhs._value());
}
bitset_t& operator &=(bitset_t rhs)
{
return *this = { _value() & rhs._value(), std::nothrow };
return *this = static_cast<raw_type>(_value() & rhs._value());
}
bitset_t& operator ^=(bitset_t rhs)
{
return *this = { _value() ^ rhs._value(), std::nothrow };
return *this = static_cast<raw_type>(_value() ^ rhs._value());
}
friend constexpr bitset_t operator +(bitset_t lhs, bitset_t rhs)
{
return{ lhs._value() | rhs._value(), std::nothrow };
return static_cast<raw_type>(lhs._value() | rhs._value());
}
friend constexpr bitset_t operator -(bitset_t lhs, bitset_t rhs)
{
return{ lhs._value() & ~rhs._value(), std::nothrow };
return static_cast<raw_type>(lhs._value() & ~rhs._value());
}
friend constexpr bitset_t operator &(bitset_t lhs, bitset_t rhs)
{
return{ lhs._value() & rhs._value(), std::nothrow };
return static_cast<raw_type>(lhs._value() & rhs._value());
}
friend constexpr bitset_t operator ^(bitset_t lhs, bitset_t rhs)
{
return{ lhs._value() ^ rhs._value(), std::nothrow };
return static_cast<raw_type>(lhs._value() ^ rhs._value());
}
bool test(bitset_t rhs) const
@ -87,7 +88,7 @@ struct bitset_t
{
const under v = _value();
const under s = rhs._value();
*this = { v | s, std::nothrow };
*this = static_cast<raw_type>(v | s);
return (v & s) != 0;
}
@ -95,7 +96,7 @@ struct bitset_t
{
const under v = _value();
const under s = rhs._value();
*this = { v & ~s, std::nothrow };
*this = static_cast<raw_type>(v & ~s);
return (v & s) != 0;
}
@ -103,7 +104,7 @@ struct bitset_t
{
const under v = _value();
const under s = rhs._value();
*this = { v ^ s, std::nothrow };
*this = static_cast<raw_type>(v ^ s);
return (v & s) != 0;
}
@ -133,17 +134,18 @@ template<typename T, typename CT>
struct atomic_add<bitset_t<T>, CT, std::enable_if_t<std::is_enum<T>::value>>
{
using under = typename bitset_t<T>::under;
using raw_type = typename bitset_t<T>::raw_type;
static inline bitset_t<T> op1(bitset_t<T>& left, bitset_t<T> right)
{
return{ atomic_storage<under>::fetch_or(reinterpret_cast<under&>(left), right._value()), std::nothrow };
return static_cast<raw_type>(atomic_storage<under>::fetch_or(reinterpret_cast<under&>(left), right._value()));
}
static constexpr auto fetch_op = &op1;
static inline bitset_t<T> op2(bitset_t<T>& left, bitset_t<T> right)
{
return{ atomic_storage<under>::or_fetch(reinterpret_cast<under&>(left), right._value()), std::nothrow };
return static_cast<raw_type>(atomic_storage<under>::or_fetch(reinterpret_cast<under&>(left), right._value()));
}
static constexpr auto op_fetch = &op2;
@ -154,17 +156,18 @@ template<typename T, typename CT>
struct atomic_sub<bitset_t<T>, CT, std::enable_if_t<std::is_enum<T>::value>>
{
using under = typename bitset_t<T>::under;
using raw_type = typename bitset_t<T>::raw_type;
static inline bitset_t<T> op1(bitset_t<T>& left, bitset_t<T> right)
{
return{ atomic_storage<under>::fetch_and(reinterpret_cast<under&>(left), ~right._value()), std::nothrow };
return static_cast<raw_type>(atomic_storage<under>::fetch_and(reinterpret_cast<under&>(left), ~right._value()));
}
static constexpr auto fetch_op = &op1;
static inline bitset_t<T> op2(bitset_t<T>& left, bitset_t<T> right)
{
return{ atomic_storage<under>::and_fetch(reinterpret_cast<under&>(left), ~right._value()), std::nothrow };
return static_cast<raw_type>(atomic_storage<under>::and_fetch(reinterpret_cast<under&>(left), ~right._value()));
}
static constexpr auto op_fetch = &op2;
@ -175,17 +178,18 @@ template<typename T, typename CT>
struct atomic_and<bitset_t<T>, CT, std::enable_if_t<std::is_enum<T>::value>>
{
using under = typename bitset_t<T>::under;
using raw_type = typename bitset_t<T>::raw_type;
static inline bitset_t<T> op1(bitset_t<T>& left, bitset_t<T> right)
{
return{ atomic_storage<under>::fetch_and(reinterpret_cast<under&>(left), right._value()), std::nothrow };
return static_cast<raw_type>(atomic_storage<under>::fetch_and(reinterpret_cast<under&>(left), right._value()));
}
static constexpr auto fetch_op = &op1;
static inline bitset_t<T> op2(bitset_t<T>& left, bitset_t<T> right)
{
return{ atomic_storage<under>::and_fetch(reinterpret_cast<under&>(left), right._value()), std::nothrow };
return static_cast<raw_type>(atomic_storage<under>::and_fetch(reinterpret_cast<under&>(left), right._value()));
}
static constexpr auto op_fetch = &op2;
@ -196,17 +200,18 @@ template<typename T, typename CT>
struct atomic_xor<bitset_t<T>, CT, std::enable_if_t<std::is_enum<T>::value>>
{
using under = typename bitset_t<T>::under;
using raw_type = typename bitset_t<T>::raw_type;
static inline bitset_t<T> op1(bitset_t<T>& left, bitset_t<T> right)
{
return{ atomic_storage<under>::fetch_xor(reinterpret_cast<under&>(left), right._value()), std::nothrow };
return static_cast<raw_type>(atomic_storage<under>::fetch_xor(reinterpret_cast<under&>(left), right._value()));
}
static constexpr auto fetch_op = &op1;
static inline bitset_t<T> op2(bitset_t<T>& left, bitset_t<T> right)
{
return{ atomic_storage<under>::xor_fetch(reinterpret_cast<under&>(left), right._value()), std::nothrow };
return static_cast<raw_type>(atomic_storage<under>::xor_fetch(reinterpret_cast<under&>(left), right._value()));
}
static constexpr auto op_fetch = &op2;

327
Utilities/JIT.cpp Normal file
View file

@ -0,0 +1,327 @@
#ifdef LLVM_AVAILABLE
#include <unordered_map>
#include <map>
#include <unordered_set>
#include <set>
#include <array>
#include "types.h"
#include "Macro.h"
#include "StrFmt.h"
#include "File.h"
#include "Log.h"
#ifdef _MSC_VER
#pragma warning(push, 0)
#endif
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#ifdef _WIN32
#include <Windows.h>
#else
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/types.h>
#endif
#include "JIT.h"
// Global LLVM context (thread-unsafe)
llvm::LLVMContext g_llvm_ctx;
// Size of virtual memory area reserved: 512 MB
static const u64 s_memory_size = 0x20000000;
// Try to reserve a portion of virtual memory in the first 2 GB address space beforehand, if possible.
static void* const s_memory = []() -> void*
{
#ifdef _WIN32
for (u64 addr = 0x1000000; addr <= 0x60000000; addr += 0x1000000)
{
if (VirtualAlloc((void*)addr, s_memory_size, MEM_RESERVE, PAGE_NOACCESS))
{
return (void*)addr;
}
}
return VirtualAlloc(NULL, s_memory_size, MEM_RESERVE, PAGE_NOACCESS);
#else
return ::mmap((void*)0x10000000, s_memory_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
#endif
}();
// EH frames
static u8* s_unwind_info;
static u64 s_unwind_size;
#ifdef _WIN32
static std::vector<RUNTIME_FUNCTION> s_unwind; // Custom .pdata section replacement
#endif
// Helper class
struct MemoryManager final : llvm::RTDyldMemoryManager
{
std::unordered_map<std::string, std::uintptr_t> table;
MemoryManager(std::unordered_map<std::string, std::uintptr_t>&& table)
: table(std::move(table))
{
}
[[noreturn]] static void null()
{
throw std::runtime_error("Null function" HERE);
}
virtual u64 getSymbolAddress(const std::string& name) override
{
if (u64 addr = RTDyldMemoryManager::getSymbolAddress(name))
{
// This may be bad if LLVM requests some built-in functions like fma.
LOG_ERROR(GENERAL, "LLVM: Symbol requested %s -> 0x%016llx", name, addr);
return addr;
}
const auto found = table.find(name);
if (found != table.end())
{
return found->second;
}
// It's fine if some function is never called, for example.
LOG_ERROR(GENERAL, "LLVM: Linkage failed for %s", name);
return (u64)null;
}
virtual u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override
{
// Simple allocation
const u64 next = ::align((u64)m_next + size, 4096);
if (next > (u64)s_memory + s_memory_size)
{
LOG_FATAL(GENERAL, "LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align);
return nullptr;
}
#ifdef _WIN32
if (!VirtualAlloc(m_next, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE))
#else
if (::mprotect(m_next, size, PROT_READ | PROT_WRITE | PROT_EXEC))
#endif
{
LOG_FATAL(GENERAL, "LLVM: Failed to allocate memory at 0x%p", m_next);
return nullptr;
}
LOG_SUCCESS(GENERAL, "LLVM: Code section %u '%s' allocated -> 0x%p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), m_next, size, align);
return (u8*)std::exchange(m_next, (void*)next);
}
virtual u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override
{
// Simple allocation
const u64 next = ::align((u64)m_next + size, 4096);
if (next > (u64)s_memory + s_memory_size)
{
LOG_FATAL(GENERAL, "LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align);
return nullptr;
}
#ifdef _WIN32
if (!VirtualAlloc(m_next, size, MEM_COMMIT, PAGE_READWRITE))
#else
if (::mprotect(m_next, size, PROT_READ | PROT_WRITE))
#endif
{
LOG_FATAL(GENERAL, "LLVM: Failed to allocate memory at 0x%p", m_next);
return nullptr;
}
LOG_SUCCESS(GENERAL, "LLVM: Data section %u '%s' allocated -> 0x%p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), m_next, size, align, is_ro ? "ro" : "rw");
return (u8*)std::exchange(m_next, (void*)next);
}
virtual bool finalizeMemory(std::string* = nullptr) override
{
// TODO: make sections read-only when necessary
return false;
}
virtual void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override
{
s_unwind_info = addr;
s_unwind_size = size;
return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size);
}
virtual void deregisterEHFrames(u8* addr, u64 load_addr, std::size_t size) override
{
LOG_ERROR(GENERAL, "deregisterEHFrames() called"); // Not expected
return RTDyldMemoryManager::deregisterEHFrames(addr, load_addr, size);
}
~MemoryManager()
{
#ifdef _WIN32
if (!RtlDeleteFunctionTable(s_unwind.data()))
{
LOG_FATAL(GENERAL, "RtlDeleteFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError());
}
if (!VirtualFree(s_memory, 0, MEM_DECOMMIT))
{
LOG_FATAL(GENERAL, "VirtualFree(0x%p) failed! Error %u", s_memory, GetLastError());
}
#else
if (::mprotect(s_memory, s_memory_size, PROT_NONE))
{
LOG_FATAL(GENERAL, "mprotect(0x%p) failed! Error %d", s_memory, errno);
}
// TODO: unregister EH frames if necessary
#endif
}
private:
void* m_next = s_memory;
};
// Helper class
struct EventListener final : llvm::JITEventListener
{
virtual void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
{
const llvm::StringRef elf = obj.getData();
fs::file(fs::get_config_dir() + "LLVM.obj", fs::rewrite)
.write(elf.data(), elf.size());
}
};
static EventListener s_listener;
jit_compiler::jit_compiler(std::unique_ptr<llvm::Module>&& _module, std::unordered_map<std::string, std::uintptr_t>&& table)
{
EXPECTS(s_memory);
std::string result;
const auto module_ptr = _module.get();
// Initialization
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
LLVMLinkInMCJIT();
m_engine.reset(llvm::EngineBuilder(std::move(_module))
.setErrorStr(&result)
.setMCJITMemoryManager(std::make_unique<MemoryManager>(std::move(table)))
.setOptLevel(llvm::CodeGenOpt::Aggressive)
.setRelocationModel(llvm::Reloc::PIC_)
.setCodeModel((u64)s_memory <= 0x60000000 ? llvm::CodeModel::Medium : llvm::CodeModel::Large) // TODO
.setMCPU(llvm::sys::getHostCPUName())
.create());
if (!m_engine)
{
throw fmt::exception("LLVM: Failed to create ExecutionEngine: %s", result);
}
m_engine->setProcessAllSections(true); // ???
m_engine->RegisterJITEventListener(&s_listener);
m_engine->finalizeObject();
for (auto& func : module_ptr->functions())
{
if (!func.empty())
{
const std::string& name = func.getName();
// Register compiled function
m_map[name] = m_engine->getFunctionAddress(name);
}
// Delete IR to lower memory consumption
func.deleteBody();
}
#ifdef _WIN32
// Register .xdata UNWIND_INFO (.pdata section is empty for some reason)
std::set<u64> func_set;
for (const auto& pair : m_map)
{
func_set.emplace(pair.second);
}
// Hack (cannot obtain last function size)
func_set.emplace(::align(*--func_set.end() + 4096, 4096));
const u64 base = (u64)s_memory;
const u8* bits = s_unwind_info;
s_unwind.clear();
s_unwind.reserve(m_map.size());
for (auto it = func_set.begin(), end = --func_set.end(); it != end; it++)
{
const u64 addr = *it;
const u64 next = *func_set.upper_bound(addr);
// Generate RUNTIME_FUNCTION record
RUNTIME_FUNCTION uw;
uw.BeginAddress = static_cast<u32>(addr - base);
uw.EndAddress = static_cast<u32>(next - base);
uw.UnwindData = static_cast<u32>((u64)bits - base);
s_unwind.emplace_back(uw);
// Parse .xdata UNWIND_INFO record
const u8 flags = *bits++; // Version and flags
const u8 prolog = *bits++; // Size of prolog
const u8 count = *bits++; // Count of unwind codes
const u8 frame = *bits++; // Frame Reg + Off
bits += ::align(std::max<u8>(1, count), 2) * sizeof(u16); // UNWIND_CODE array
if (flags != 1)
{
LOG_ERROR(GENERAL, "LLVM: unsupported UNWIND_INFO version/flags (0x%02x)", flags);
break;
}
LOG_TRACE(GENERAL, "LLVM: .xdata at 0x%llx: function 0x%x..0x%x: p0x%02x, c0x%02x, f0x%02x", uw.UnwindData + base, uw.BeginAddress + base, uw.EndAddress + base, prolog, count, frame);
}
if (s_unwind_info + s_unwind_size != bits)
{
LOG_FATAL(GENERAL, "LLVM: .xdata analysis failed! (0x%p != 0x%p)", s_unwind_info + s_unwind_size, bits);
}
else if (!RtlAddFunctionTable(s_unwind.data(), (DWORD)s_unwind.size(), base))
{
LOG_FATAL(GENERAL, "RtlAddFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError());
}
else
{
LOG_SUCCESS(GENERAL, "LLVM: UNWIND_INFO registered (addr=0x%p, size=0x%llx)", s_unwind_info, s_unwind_size);
}
#endif
}
jit_compiler::~jit_compiler()
{
}
#endif

50
Utilities/JIT.h Normal file
View file

@ -0,0 +1,50 @@
#pragma once
#ifdef LLVM_AVAILABLE
#include <memory>
#include <string>
#include <unordered_map>
#include "types.h"
#ifdef _MSC_VER
#pragma warning(push, 0)
#endif
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#ifdef _MSC_VER
#pragma warning(pop)
#endif
extern llvm::LLVMContext g_llvm_ctx;
// Temporary compiler interface
class jit_compiler final
{
// Execution instance
std::unique_ptr<llvm::ExecutionEngine> m_engine;
// Compiled functions
std::unordered_map<std::string, std::uintptr_t> m_map;
public:
jit_compiler(std::unique_ptr<llvm::Module>&&, std::unordered_map<std::string, std::uintptr_t>&&);
~jit_compiler();
// Get compiled function address
std::uintptr_t get(const std::string& name) const
{
const auto found = m_map.find(name);
if (found != m_map.end())
{
return found->second;
}
return 0;
}
};
#endif

View file

@ -127,6 +127,8 @@ enum x64_op_t : u32
X64OP_NONE,
X64OP_LOAD, // obtain and put the value into x64 register
X64OP_LOAD_BE,
X64OP_LOAD_CMP,
X64OP_LOAD_TEST,
X64OP_STORE, // take the value from x64 register or an immediate and use it
X64OP_STORE_BE,
X64OP_MOVS,
@ -297,8 +299,9 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz
switch (op2)
{
case 0x11:
case 0x29:
{
if (!repe && !repne && !oso) // MOVUPS xmm/m, xmm
if (!repe && !repne) // MOVUPS/MOVAPS/MOVUPD/MOVAPD xmm/m, xmm
{
out_op = X64OP_STORE;
out_reg = get_modRM_reg_xmm(code, rex);
@ -433,7 +436,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz
case 4: out_op = X64OP_AND; break;
case 5: out_op = X64OP_SUB; break;
case 6: out_op = X64OP_XOR; break;
default: out_op = X64OP_NONE; break; // CMP
default: out_op = X64OP_LOAD_CMP; break;
}
out_reg = X64_IMM8;
@ -452,7 +455,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz
case 4: out_op = X64OP_AND; break;
case 5: out_op = X64OP_SUB; break;
case 6: out_op = X64OP_XOR; break;
default: out_op = X64OP_NONE; break; // CMP
default: out_op = X64OP_LOAD_CMP; break;
}
out_reg = oso ? X64_IMM16 : X64_IMM32;
@ -471,7 +474,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz
case 4: out_op = X64OP_AND; break;
case 5: out_op = X64OP_SUB; break;
case 6: out_op = X64OP_XOR; break;
default: out_op = X64OP_NONE; break; // CMP
default: out_op = X64OP_LOAD_CMP; break;
}
out_reg = X64_IMM8;
@ -611,6 +614,32 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz
}
break;
}
case 0xf6:
{
switch (auto mod_code = get_modRM_reg(code, 0))
{
case 0: out_op = X64OP_LOAD_TEST; break;
default: out_op = X64OP_NONE; break; // TODO...
}
out_reg = X64_IMM8;
out_size = 1;
out_length += get_modRM_size(code) + 1;
return;
}
case 0xf7:
{
switch (auto mod_code = get_modRM_reg(code, 0))
{
case 0: out_op = X64OP_LOAD_TEST; break;
default: out_op = X64OP_NONE; break; // TODO...
}
out_reg = oso ? X64_IMM16 : X64_IMM32;
out_size = get_op_size(rex, oso);
out_length += get_modRM_size(code) + (oso ? 2 : 4);
return;
}
}
out_op = X64OP_NONE;
@ -990,9 +1019,43 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
{
case X64OP_LOAD:
case X64OP_LOAD_BE:
case X64OP_LOAD_CMP:
case X64OP_LOAD_TEST:
{
u32 value;
if (is_writing || !thread->read_reg(addr, value) || !put_x64_reg_value(context, reg, d_size, op == X64OP_LOAD ? se_storage<u32>::swap(value) : value))
if (is_writing || !thread->read_reg(addr, value))
{
return false;
}
if (op != X64OP_LOAD_BE)
{
value = se_storage<u32>::swap(value);
}
if (op == X64OP_LOAD_CMP)
{
u64 rvalue;
if (!get_x64_reg_value(context, reg, d_size, i_size, rvalue) || !set_x64_cmp_flags(context, d_size, value, rvalue))
{
return false;
}
break;
}
if (op == X64OP_LOAD_TEST)
{
u64 rvalue;
if (!get_x64_reg_value(context, reg, d_size, i_size, rvalue) || !set_x64_cmp_flags(context, d_size, value & rvalue, 0))
{
return false;
}
break;
}
if (!put_x64_reg_value(context, reg, d_size, value))
{
return false;
}
@ -1003,7 +1066,12 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
case X64OP_STORE_BE:
{
u64 reg_value;
if (!is_writing || !get_x64_reg_value(context, reg, d_size, i_size, reg_value) || !thread->write_reg(addr, op == X64OP_STORE ? se_storage<u32>::swap((u32)reg_value) : (u32)reg_value))
if (!is_writing || !get_x64_reg_value(context, reg, d_size, i_size, reg_value))
{
return false;
}
if (!thread->write_reg(addr, op == X64OP_STORE ? se_storage<u32>::swap((u32)reg_value) : (u32)reg_value))
{
return false;
}

View file

@ -25,21 +25,21 @@ namespace memory_helper
return ret;
}
void commit_page_memory(void* pointer, size_t page_size)
void commit_page_memory(void* pointer, size_t size)
{
#ifdef _WIN32
VERIFY(VirtualAlloc((u8*)pointer, page_size, MEM_COMMIT, PAGE_READWRITE) != NULL);
VERIFY(VirtualAlloc(pointer, size, MEM_COMMIT, PAGE_READWRITE) != NULL);
#else
VERIFY(mprotect((u8*)pointer, page_size, PROT_READ | PROT_WRITE) != -1);
VERIFY(mprotect((void*)((u64)pointer & -4096), size, PROT_READ | PROT_WRITE) != -1);
#endif
}
void free_reserved_memory(void* pointer, size_t size)
{
#ifdef _WIN32
VERIFY(VirtualFree(pointer, 0, MEM_RELEASE) != 0);
VERIFY(VirtualFree(pointer, 0, MEM_DECOMMIT) != 0);
#else
VERIFY(munmap(pointer, size) == 0);
VERIFY(mprotect(pointer, size, PROT_NONE) != -1);
#endif
}
}

View file

@ -3,20 +3,20 @@
namespace memory_helper
{
/**
* Reserve size bytes of virtual memory and returns it.
* Reserve `size` bytes of virtual memory and returns it.
* The memory should be commited before usage.
*/
void* reserve_memory(size_t size);
void* reserve_memory(std::size_t size);
/**
* Commit page_size bytes of virtual memory starting at pointer.
* Commit `size` bytes of virtual memory starting at pointer.
* That is, bake reserved memory with physical memory.
* pointer should belong to a range of reserved memory.
*/
void commit_page_memory(void* pointer, size_t page_size);
void commit_page_memory(void* pointer, std::size_t size);
/**
* Free memory alloced via reserve_memory.
* Decommit all memory committed via commit_page_memory.
*/
void free_reserved_memory(void* pointer, size_t size);
void free_reserved_memory(void* pointer, std::size_t size);
}

View file

@ -42,7 +42,7 @@ namespace utils
void *dynamic_library::get_impl(const std::string &name) const
{
#ifdef _WIN32
return GetProcAddress((HMODULE)m_handle, name.c_str());
return (void*)GetProcAddress((HMODULE)m_handle, name.c_str());
#else
return dlsym(m_handle, (char *)name.c_str());
#endif

View file

@ -146,6 +146,9 @@ else()
else()
llvm_map_components_to_libnames(LLVM_LIBS mcjit vectorize ipo x86codegen x86disassembler mcdisassembler)
endif()
if (NOT MSVC)
set_source_files_properties(${RPCS3_SRC_DIR}/../Utilities/JIT.cpp PROPERTIES COMPILE_FLAGS -fno-rtti)
endif()
endif()
link_directories(

View file

@ -68,9 +68,8 @@ cpu_thread::~cpu_thread()
{
}
cpu_thread::cpu_thread(cpu_type type, const std::string& name)
cpu_thread::cpu_thread(cpu_type type)
: type(type)
, name(name)
{
}

View file

@ -4,7 +4,7 @@
#include "../Utilities/BitSet.h"
// CPU Thread Type
enum class cpu_type : u32
enum class cpu_type : u8
{
ppu, // PPU Thread
spu, // SPU Thread
@ -12,7 +12,7 @@ enum class cpu_type : u32
};
// CPU Thread State flags
enum struct cpu_state : u32
enum struct cpu_state : u16
{
stop, // Thread not running (HLE, initial state)
exit, // Irreversible exit
@ -38,18 +38,17 @@ public:
virtual void on_stop() override;
virtual ~cpu_thread() override;
const std::string name;
const cpu_type type;
const id_value<> id{};
const cpu_type type;
cpu_thread(cpu_type type, const std::string& name);
cpu_thread(cpu_type type);
// Public recursive sleep state counter
atomic_t<u8> sleep_counter{};
// Public thread state
atomic_t<bitset_t<cpu_state>> state{ cpu_state::stop };
// Public recursive sleep state counter
atomic_t<u32> sleep_counter{};
// Object associated with sleep state, possibly synchronization primitive (mutex, semaphore, etc.)
atomic_t<void*> owner{};

View file

@ -13,6 +13,7 @@
logs::channel cellGcmSys("cellGcmSys", logs::level::notice);
extern s32 cellGcmCallback(vm::ptr<CellGcmContextData> context, u32 count);
extern void ppu_register_function_at(u32 addr, ppu_function_t ptr);
const u32 tiled_pitches[] = {
0x00000000, 0x00000200, 0x00000300, 0x00000400,
@ -384,6 +385,7 @@ s32 _cellGcmInitBody(vm::pptr<CellGcmContextData> context, u32 cmdSize, u32 ioSi
vm::write32(gcm_info.context_addr + 0x44, 0xabadcafe);
vm::write32(gcm_info.context_addr + 0x48, ppu_instructions::HACK(FIND_FUNC(cellGcmCallback)));
vm::write32(gcm_info.context_addr + 0x4c, ppu_instructions::BLR());
ppu_register_function_at(gcm_info.context_addr + 0x48, BIND_FUNC(cellGcmCallback));
vm::_ref<CellGcmContextData>(gcm_info.context_addr) = current_context;
context->set(gcm_info.context_addr);

View file

@ -2363,7 +2363,7 @@ s32 ppu_error_code::report(s32 error, const char* text)
{
if (auto func = static_cast<PPUThread*>(thread)->last_function)
{
LOG_ERROR(PPU, "Function '%s' failed with 0x%08x : %s", func, error, text);
LOG_ERROR(PPU, "'%s' failed with 0x%08x : %s", func, error, text);
}
else
{

View file

@ -4,7 +4,12 @@
using ppu_function_t = void(*)(PPUThread&);
#define BIND_FUNC(func) [](PPUThread& ppu){ ppu.last_function = #func; ppu_func_detail::do_call(ppu, func); }
#define BIND_FUNC(func) static_cast<ppu_function_t>([](PPUThread& ppu){\
const auto old_f = ppu.last_function;\
ppu.last_function = #func;\
ppu_func_detail::do_call(ppu, func);\
ppu.last_function = old_f;\
})
struct ppu_va_args_t
{

View file

@ -125,9 +125,22 @@ extern void ppu_initialize(const std::string& name, const std::vector<std::pair<
// Function lookup table. Not supposed to grow after emulation start.
std::vector<ppu_function_t> g_ppu_function_cache;
// Function name cache in format %s.%s (module name, function name)
std::vector<std::string> g_ppu_function_names;
// Function NID cache for autopause. Autopause tool should probably be rewritten.
std::vector<u32> g_ppu_fnid_cache;
extern std::string ppu_get_module_function_name(u32 index)
{
if (index < g_ppu_function_names.size())
{
return g_ppu_function_names[index];
}
return fmt::format(".%u", index);
}
extern void ppu_execute_function(PPUThread& ppu, u32 index)
{
if (index < g_ppu_function_cache.size())
@ -136,22 +149,9 @@ extern void ppu_execute_function(PPUThread& ppu, u32 index)
if (debug::autopause::pause_function(g_ppu_fnid_cache[index]) && ppu.check_status()) throw cpu_state::ret;
if (const auto func = g_ppu_function_cache[index])
{
const auto previous_function = ppu.last_function; // TODO: use gsl::finally or something, but only if it's equally fast
try
{
func(ppu);
}
catch (...)
{
logs::HLE.format(Emu.IsStopped() ? logs::level::warning : logs::level::error, "Function '%s' aborted", ppu.last_function);
ppu.last_function = previous_function;
throw;
}
LOG_TRACE(HLE, "Function '%s' finished, r3=0x%llx", ppu.last_function, ppu.GPR[3]);
ppu.last_function = previous_function;
LOG_TRACE(HLE, "'%s' finished, r3=0x%llx", ppu_get_module_function_name(index), ppu.GPR[3]);
return;
}
}
@ -159,6 +159,16 @@ extern void ppu_execute_function(PPUThread& ppu, u32 index)
throw fmt::exception("Function not registered (index %u)" HERE, index);
}
extern ppu_function_t ppu_get_function(u32 index)
{
if (index < g_ppu_function_cache.size())
{
return g_ppu_function_cache[index];
}
return nullptr;
}
extern u32 ppu_generate_id(const char* name)
{
// Symbol name suffix
@ -312,7 +322,10 @@ static void ppu_initialize_modules()
// Reinitialize function cache
g_ppu_function_cache = ppu_function_manager::get();
g_ppu_fnid_cache = std::vector<u32>(g_ppu_function_cache.size());
g_ppu_function_names.clear();
g_ppu_function_names.resize(g_ppu_function_cache.size());
g_ppu_fnid_cache.clear();
g_ppu_fnid_cache.resize(g_ppu_function_cache.size());
// "Use" all the modules for correct linkage
for (auto& module : registered)
@ -322,6 +335,7 @@ static void ppu_initialize_modules()
for (auto& function : module->functions)
{
LOG_TRACE(LOADER, "** 0x%08X: %s", function.first, function.second.name);
g_ppu_function_names.at(function.second.index) = fmt::format("%s.%s", module->name, function.second.name);
g_ppu_fnid_cache.at(function.second.index) = function.first;
}
@ -1531,10 +1545,12 @@ void ppu_exec_loader::load() const
{
// TODO
const u32 index = ::size32(g_ppu_function_cache);
const std::string& fname = ppu_get_function_name(module.first, fnid);
g_ppu_function_cache.emplace_back();
g_ppu_function_names.emplace_back(fmt::format("%s.%s", module.first, fname));
g_ppu_fnid_cache.emplace_back(fnid);
LOG_ERROR(LOADER, "Unknown function '%s' in module '%s' (index %u)", ppu_get_function_name(module.first, fnid), module.first, index);
LOG_ERROR(LOADER, "Unknown function '%s' in module '%s' (index %u)", fname, module.first, index);
for (auto& import : entry.second.second)
{
@ -1544,11 +1560,11 @@ void ppu_exec_loader::load() const
if (!ppu_patch_import_stub(stub, index))
{
LOG_ERROR(LOADER, "Failed to inject code for function '%s' in module '%s' (0x%x)", ppu_get_function_name(module.first, fnid), module.first, stub);
LOG_ERROR(LOADER, "Failed to inject code for function '%s' in module '%s' (0x%x)", fname, module.first, stub);
}
else
{
LOG_NOTICE(LOADER, "Injected hack for function '%s' in module '%s' (*0x%x)", ppu_get_function_name(module.first, fnid), module.first, stub);
LOG_NOTICE(LOADER, "Injected hack for function '%s' in module '%s' (*0x%x)", fname, module.first, stub);
}
}

View file

@ -199,22 +199,9 @@ public:
// Call specified function directly if LLE is not available, call LLE equivalent in callback style otherwise
template<typename T, T Func, typename... Args, typename RT = std::result_of_t<T(Args...)>>
inline RT ppu_execute_function_or_callback(const char* name, PPUThread& ppu, Args&&... args)
{
const auto previous_function = ppu.last_function; // TODO
try
{
return Func(std::forward<Args>(args)...);
}
catch (...)
{
LOG_ERROR(PPU, "Function call '%s' aborted", ppu.last_function);
ppu.last_function = previous_function;
throw;
}
ppu.last_function = previous_function;
}
#define CALL_FUNC(ppu, func, ...) ppu_execute_function_or_callback<decltype(&func), &func>(#func, ppu, __VA_ARGS__)

View file

@ -1,5 +1,6 @@
#include "stdafx.h"
#include "Utilities/Config.h"
#include "Utilities/VirtualMemory.h"
#include "Emu/Memory/Memory.h"
#include "Emu/System.h"
#include "Emu/IdManager.h"
@ -8,6 +9,37 @@
#include "PPUAnalyser.h"
#include "PPUModule.h"
#ifdef LLVM_AVAILABLE
#ifdef _MSC_VER
#pragma warning(push, 0)
#endif
#include "llvm/Support/FormattedStream.h"
#include "llvm/IR/LLVMContext.h"
//#include "llvm/IR/Dominators.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LegacyPassManager.h"
//#include "llvm/IR/Module.h"
//#include "llvm/IR/Function.h"
//#include "llvm/Analysis/Passes.h"
//#include "llvm/Analysis/BasicAliasAnalysis.h"
//#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
//#include "llvm/Analysis/LoopInfo.h"
//#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/Lint.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Vectorize.h"
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#include "Utilities/JIT.h"
#include "PPUTranslator.h"
#endif
enum class ppu_decoder_type
{
precise,
@ -25,21 +57,20 @@ cfg::map_entry<ppu_decoder_type> g_cfg_ppu_decoder(cfg::root.core, "PPU Decoder"
const ppu_decoder<ppu_interpreter_precise> s_ppu_interpreter_precise;
const ppu_decoder<ppu_interpreter_fast> s_ppu_interpreter_fast;
struct ppu_addr_hash
const auto s_ppu_compiled = static_cast<ppu_function_t*>(memory_helper::reserve_memory(0x200000000));
extern void ppu_register_function_at(u32 addr, ppu_function_t ptr)
{
u32 operator()(u32 value) const
if (g_cfg_ppu_decoder.get() == ppu_decoder_type::llvm)
{
return value / sizeof(32);
memory_helper::commit_page_memory(s_ppu_compiled + addr / 4, sizeof(ppu_function_t));
s_ppu_compiled[addr / 4] = ptr;
}
}
};
static std::unordered_map<u32, void(*)(), ppu_addr_hash> s_ppu_compiled;
std::string PPUThread::get_name() const
{
return fmt::format("PPU[0x%x] Thread (%s)", id, name);
return fmt::format("PPU[0x%x] Thread (%s)", id, m_name);
}
std::string PPUThread::dump() const
@ -96,21 +127,14 @@ void PPUThread::cpu_task()
{
//SetHostRoundingMode(FPSCR_RN_NEAR);
if (custom_task)
{
if (check_status()) return;
return custom_task(*this);
return custom_task ? custom_task(*this) : fast_call(pc, static_cast<u32>(GPR[2]));
}
void PPUThread::cpu_task_main()
{
if (g_cfg_ppu_decoder.get() == ppu_decoder_type::llvm)
{
const auto found = s_ppu_compiled.find(pc);
if (found != s_ppu_compiled.end())
{
return found->second();
}
return s_ppu_compiled[pc / 4](*this);
}
g_tls_log_prefix = []
@ -269,7 +293,8 @@ PPUThread::~PPUThread()
}
PPUThread::PPUThread(const std::string& name)
: cpu_thread(cpu_type::ppu, name)
: cpu_thread(cpu_type::ppu)
, m_name(name)
{
}
@ -281,39 +306,54 @@ be_t<u64>* PPUThread::get_stack_arg(s32 i, u64 align)
void PPUThread::fast_call(u32 addr, u32 rtoc)
{
auto old_PC = pc;
auto old_stack = GPR[1];
auto old_rtoc = GPR[2];
auto old_LR = LR;
auto old_task = std::move(custom_task);
const auto old_PC = pc;
const auto old_stack = GPR[1];
const auto old_rtoc = GPR[2];
const auto old_LR = LR;
const auto old_task = std::move(custom_task);
const auto old_func = last_function;
pc = addr;
GPR[2] = rtoc;
LR = Emu.GetCPUThreadStop();
custom_task = nullptr;
last_function = nullptr;
try
{
cpu_task();
cpu_task_main();
if (GPR[1] != old_stack && !state.test(cpu_state::ret) && !state.test(cpu_state::exit)) // GPR[1] shouldn't change
{
throw fmt::exception("Stack inconsistency (addr=0x%x, rtoc=0x%x, SP=0x%llx, old=0x%llx)", addr, rtoc, GPR[1], old_stack);
}
}
catch (cpu_state _s)
{
state += _s;
if (_s != cpu_state::ret) throw;
}
catch (EmulationStopped)
{
if (last_function) LOG_WARNING(PPU, "'%s' aborted", last_function);
last_function = old_func;
throw;
}
catch (...)
{
if (last_function) LOG_ERROR(PPU, "'%s' aborted", last_function);
last_function = old_func;
throw;
}
state -= cpu_state::ret;
pc = old_PC;
if (GPR[1] != old_stack) // GPR[1] shouldn't change
{
throw EXCEPTION("Stack inconsistency (addr=0x%x, rtoc=0x%x, SP=0x%llx, old=0x%llx)", addr, rtoc, GPR[1], old_stack);
}
GPR[1] = old_stack;
GPR[2] = old_rtoc;
LR = old_LR;
custom_task = std::move(old_task);
last_function = old_func;
//if (custom_task)
//{
@ -322,59 +362,15 @@ void PPUThread::fast_call(u32 addr, u32 rtoc)
//}
}
#ifdef LLVM_AVAILABLE
#ifdef _MSC_VER
#pragma warning(push, 0)
#endif
//#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
//#include "llvm/Support/Host.h"
#include "llvm/Support/FormattedStream.h"
//#include "llvm/Support/Debug.h"
//#include "llvm/CodeGen/CommandFlags.h"
//#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/LLVMContext.h"
//#include "llvm/IR/Dominators.h"
#include "llvm/IR/Verifier.h"
//#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LegacyPassManager.h"
//#include "llvm/IR/Module.h"
//#include "llvm/IR/Function.h"
//#include "llvm/Analysis/Passes.h"
//#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
//#include "llvm/Analysis/LoopInfo.h"
//#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/Lint.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
//#include "llvm/Object/ObjectFile.h"
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#include "PPUTranslator.h"
#ifdef _WIN32
#include <Windows.h>
#else
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/types.h>
#endif
const ppu_decoder<ppu_itype> s_ppu_itype;
extern u64 get_timebased_time();
extern void ppu_execute_syscall(PPUThread& ppu, u64 code);
extern void ppu_execute_function(PPUThread& ppu, u32 index);
extern ppu_function_t ppu_get_syscall(u64 code);
extern std::string ppu_get_syscall_name(u64 code);
extern ppu_function_t ppu_get_function(u32 index);
extern std::string ppu_get_module_function_name(u32 index);
extern __m128 sse_exp2_ps(__m128 A);
extern __m128 sse_log2_ps(__m128 A);
@ -386,107 +382,16 @@ extern __m128i sse_cellbe_lvrx(u64 addr);
extern void sse_cellbe_stvlx(u64 addr, __m128i a);
extern void sse_cellbe_stvrx(u64 addr, __m128i a);
struct Listener final : llvm::JITEventListener
[[noreturn]] static void ppu_trap(u64 addr)
{
virtual void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
{
const llvm::StringRef elf = obj.getData();
fs::file(fs::get_config_dir() + "LLVM.obj", fs::rewrite)
.write(elf.data(), elf.size());
}
};
static Listener s_listener;
// Memory size: 512 MB
static const u64 s_memory_size = 0x20000000;
// Try to reserve a portion of virtual memory in the first 2 GB address space, if possible.
static void* const s_memory = []() -> void*
{
#ifdef _WIN32
for (u64 addr = 0x1000000; addr <= 0x60000000; addr += 0x1000000)
{
if (VirtualAlloc((void*)addr, s_memory_size, MEM_RESERVE, PAGE_NOACCESS))
{
return (void*)addr;
}
}
return VirtualAlloc(NULL, s_memory_size, MEM_RESERVE, PAGE_NOACCESS);
#else
return ::mmap((void*)0x10000000, s_memory_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
#endif
}();
// EH frames
static u8* s_unwind_info;
static u64 s_unwind_size;
#ifdef _WIN32
// Custom .pdata section replacement
static std::vector<RUNTIME_FUNCTION> s_unwind;
#endif
struct MemoryManager final : llvm::RTDyldMemoryManager
{
static PPUThread* context(u64 addr)
{
//trace(addr);
return static_cast<PPUThread*>(get_current_cpu_thread());
}
[[noreturn]] static void trap(u64 addr)
{
LOG_ERROR(PPU, "Trap! (0x%llx)", addr);
throw fmt::exception("Trap! (0x%llx)", addr);
}
static void trace(u64 addr)
static void ppu_trace(u64 addr)
{
LOG_NOTICE(PPU, "Trace: 0x%llx", addr);
}
static void hack(u32 index)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
ppu_execute_function(ppu, index);
if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily
}
static void syscall(u64 code)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
ppu_execute_syscall(ppu, code);
if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily
}
static u32 tbl()
{
return (u32)get_timebased_time();
}
static void call(u32 addr)
{
const auto found = s_ppu_compiled.find(addr);
if (found != s_ppu_compiled.end())
{
return found->second();
}
const auto op = vm::read32(addr).value();
const auto itype = s_ppu_itype.decode(op);
// Allow HLE callbacks without compiling them
if (itype == ppu_itype::HACK && vm::read32(addr + 4) == ppu_instructions::BLR())
{
return hack(op & 0x3ffffff);
}
trap(addr);
}
static __m128 sse_rcp_ps(__m128 A)
{
return _mm_rcp_ps(A);
@ -509,27 +414,27 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
return A;
}
static u32 lwarx(u32 addr)
static u32 ppu_lwarx(u32 addr)
{
be_t<u32> reg_value;
vm::reservation_acquire(&reg_value, addr, sizeof(reg_value));
return reg_value;
}
static u64 ldarx(u32 addr)
static u64 ppu_ldarx(u32 addr)
{
be_t<u64> reg_value;
vm::reservation_acquire(&reg_value, addr, sizeof(reg_value));
return reg_value;
}
static bool stwcx(u32 addr, u32 reg_value)
static bool ppu_stwcx(u32 addr, u32 reg_value)
{
const be_t<u32> data = reg_value;
return vm::reservation_update(addr, &data, sizeof(data));
}
static bool stdcx(u32 addr, u64 reg_value)
static bool ppu_stdcx(u32 addr, u64 reg_value)
{
const be_t<u64> data = reg_value;
return vm::reservation_update(addr, &data, sizeof(data));
@ -537,11 +442,17 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
static bool adde_carry(u64 a, u64 b, bool c)
{
#ifdef _MSC_VER
return _addcarry_u64(c, a, b, nullptr) != 0;
#else
bool result;
__asm__("addb $0xff, %[c] \n adcq %[a], %[b] \n setb %[result]" : [a] "+&r" (a), [b] "+&r" (b), [c] "+&r" (c), [result] "=r" (result));
return result;
#endif
}
// Interpreter call for simple vector instructions
static __m128i vec3op(decltype(&ppu_interpreter::UNK) func, __m128i _a, __m128i _b, __m128i _c)
static __m128i ppu_vec3op(decltype(&ppu_interpreter::UNK) func, __m128i _a, __m128i _b, __m128i _c)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
ppu.VR[21].vi = _a;
@ -558,157 +469,34 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
return ppu.VR[20].vi;
}
// Interpreter call for simple vector instructions with immediate
static __m128i veciop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, __m128i _b)
extern void ppu_initialize(const std::string& name, const std::vector<std::pair<u32, u32>>& funcs, u32 entry)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
ppu.VR[22].vi = _b;
op.vd = 20;
op.vb = 22;
func(ppu, op);
return ppu.VR[20].vi;
if (g_cfg_ppu_decoder.get() != ppu_decoder_type::llvm || funcs.empty())
{
return;
}
// Interpreter call for FP instructions
static f64 fpop(decltype(&ppu_interpreter::UNK) func, f64 _a, f64 _b, f64 _c)
std::unordered_map<std::string, std::uintptr_t> link_table
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
ppu.FPR[21] = _a;
ppu.FPR[22] = _b;
ppu.FPR[23] = _c;
ppu_opcode_t op{};
op.frd = 20;
op.fra = 21;
op.frb = 22;
op.frc = 23;
func(ppu, op);
return ppu.FPR[20];
}
// Interpreter call for GPR instructions writing result to RA
static u64 aimmop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
const u64 a = ppu.GPR[op.ra];
const u64 s = ppu.GPR[op.rs];
ppu.GPR[op.rs] = _s;
func(ppu, op);
const u64 r = ppu.GPR[op.ra];
ppu.GPR[op.ra] = a;
ppu.GPR[op.rs] = s;
return r;
}
// Interpreter call for GPR instructions writing result to RA
static u64 aimmbop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s, u64 _b)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
const u64 a = ppu.GPR[op.ra];
const u64 s = ppu.GPR[op.rs];
const u64 b = ppu.GPR[op.rb];
ppu.GPR[op.rs] = _s;
ppu.GPR[op.rb] = _b;
func(ppu, op);
const u64 r = ppu.GPR[op.ra];
ppu.GPR[op.ra] = a;
ppu.GPR[op.rs] = s;
ppu.GPR[op.rb] = b;
return r;
}
// Interpreter call for GPR instructions writing result to RA (destructive)
static u64 aaimmop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s, u64 _a)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
const u64 s = ppu.GPR[op.rs];
const u64 a = ppu.GPR[op.ra];
ppu.GPR[op.rs] = _s;
ppu.GPR[op.ra] = _a;
func(ppu, op);
const u64 r = ppu.GPR[op.ra];
ppu.GPR[op.rs] = s;
ppu.GPR[op.ra] = a;
return r;
}
static u64 immaop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _a)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
const u64 a = ppu.GPR[op.ra];
const u64 d = ppu.GPR[op.rd];
ppu.GPR[op.ra] = _a;
func(ppu, op);
const u64 r = ppu.GPR[op.rd];
ppu.GPR[op.ra] = a;
ppu.GPR[op.rd] = d;
return r;
}
static u64 immabop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _a, u64 _b)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
const u64 a = ppu.GPR[op.ra];
const u64 b = ppu.GPR[op.rb];
const u64 d = ppu.GPR[op.rd];
ppu.GPR[op.ra] = _a;
ppu.GPR[op.rb] = _b;
func(ppu, op);
const u64 r = ppu.GPR[op.rd];
ppu.GPR[op.ra] = a;
ppu.GPR[op.rb] = b;
ppu.GPR[op.rd] = d;
return r;
}
// No operation on specific u64 value (silly optimization barrier)
static u64 nop64(u64 value)
{
return value;
}
std::unordered_map<std::string, u64> table
{
{ "__memory", (u64)vm::base(0) },
{ "__context", (u64)&context },
{ "__trap", (u64)&trap },
{ "__trace", (u64)&trace },
{ "__hlecall", (u64)&hack },
{ "__syscall", (u64)&syscall },
{ "__get_tbl", (u64)&tbl },
{ "__call", (u64)&call },
{ "__lwarx", (u64)&lwarx },
{ "__ldarx", (u64)&ldarx },
{ "__stwcx", (u64)&stwcx },
{ "__stdcx", (u64)&stdcx },
{ "__memory", (u64)vm::g_base_addr },
{ "__memptr", (u64)&vm::g_base_addr },
{ "__trap", (u64)&ppu_trap },
{ "__trace", (u64)&ppu_trace },
{ "__hlecall", (u64)&ppu_execute_function },
{ "__syscall", (u64)&ppu_execute_syscall },
{ "__get_tbl", (u64)&get_timebased_time },
{ "__call", (u64)s_ppu_compiled },
{ "__lwarx", (u64)&ppu_lwarx },
{ "__ldarx", (u64)&ppu_ldarx },
{ "__stwcx", (u64)&ppu_stwcx },
{ "__stdcx", (u64)&ppu_stdcx },
{ "__vec3op", (u64)&ppu_vec3op },
{ "__adde_get_ca", (u64)&adde_carry },
{ "__vexptefp", (u64)&sse_exp2_ps },
{ "__vlogefp", (u64)&sse_log2_ps },
{ "__vperm", (u64)&sse_altivec_vperm },
{ "__vrefp", (u64)&sse_rcp_ps },
{ "__vrsqrtefp", (u64)&sse_rsqrt_ps },
{ "__vec3op", (u64)&vec3op },
{ "__veciop", (u64)&veciop },
{ "__aimmop", (u64)&aimmop },
{ "__aimmbop", (u64)&aimmbop },
{ "__aaimmop", (u64)&aaimmop },
{ "__immaop", (u64)&immaop },
{ "__immabop", (u64)&immabop },
{ "__fpop", (u64)&fpop },
{ "__nop64", (u64)&nop64 },
{ "__lvsl", (u64)&sse_altivec_lvsl },
{ "__lvsr", (u64)&sse_altivec_lvsr },
{ "__lvlx", (u64)&sse_cellbe_lvlx },
@ -719,150 +507,30 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
{ "__frsqrte", (u64)&sse_rsqrt_ss },
};
virtual u64 getSymbolAddress(const std::string& name) override
{
if (u64 addr = RTDyldMemoryManager::getSymbolAddress(name))
{
LOG_ERROR(GENERAL, "LLVM: Linkage requested %s -> 0x%016llx", name, addr);
return addr;
}
const auto found = table.find(name);
if (found != table.end())
{
return found->second;
}
LOG_FATAL(GENERAL, "LLVM: Linkage failed for %s", name);
return (u64)trap;
}
virtual u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override
{
// Simple allocation (TODO)
const auto ptr = m_next; m_next = (void*)::align((u64)m_next + size, 4096);
#ifdef _WIN32
if (!VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE))
#else
if (::mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC))
#endif
{
LOG_FATAL(GENERAL, "LLVM: Failed to allocate code section '%s'", sec_name.data());
return nullptr;
}
LOG_SUCCESS(GENERAL, "LLVM: Code section '%s' allocated -> 0x%p", sec_name.data(), ptr);
return (u8*)ptr;
}
virtual u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override
{
// Simple allocation (TODO)
const auto ptr = m_next; m_next = (void*)::align((u64)m_next + size, 4096);
#ifdef _WIN32
if (!VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_READWRITE))
#else
if (::mprotect(ptr, size, PROT_READ | PROT_WRITE))
#endif
{
LOG_FATAL(GENERAL, "LLVM: Failed to allocate data section '%s'", sec_name.data());
return nullptr;
}
LOG_SUCCESS(GENERAL, "LLVM: Data section '%s' allocated -> 0x%p", sec_name.data(), ptr);
return (u8*)ptr;
}
virtual bool finalizeMemory(std::string* = nullptr) override
{
// TODO: make sections read-only when necessary
return false;
}
virtual void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override
{
s_unwind_info = addr;
s_unwind_size = size;
return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size);
}
virtual void deregisterEHFrames(u8* addr, u64 load_addr, std::size_t size) override
{
LOG_ERROR(GENERAL, "deregisterEHFrames() called"); // Not expected
return RTDyldMemoryManager::deregisterEHFrames(addr, load_addr, size);
}
~MemoryManager()
{
#ifdef _WIN32
if (!RtlDeleteFunctionTable(s_unwind.data()))
{
LOG_FATAL(GENERAL, "RtlDeleteFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError());
}
if (!VirtualFree(s_memory, 0, MEM_DECOMMIT))
{
LOG_FATAL(GENERAL, "VirtualFree(0x%p) failed! Error %u", s_memory, GetLastError());
}
#else
if (::mprotect(s_memory, s_memory_size, PROT_NONE))
{
LOG_FATAL(GENERAL, "mprotect(0x%p) failed! Error %d", s_memory, errno);
}
// TODO: unregister EH frames if necessary
#endif
}
private:
void* m_next = s_memory;
};
llvm::LLVMContext g_context;
extern void ppu_initialize(const std::string& name, const std::vector<std::pair<u32, u32>>& funcs, u32 entry)
{
if (!s_memory)
{
throw std::runtime_error("LLVM: Memory not allocated, report to the developers." HERE);
}
if (g_cfg_ppu_decoder.get() != ppu_decoder_type::llvm || funcs.empty())
{
return;
}
#ifdef LLVM_AVAILABLE
using namespace llvm;
InitializeNativeTarget();
InitializeNativeTargetAsmPrinter();
LLVMLinkInMCJIT();
// Initialization
const auto _pi8 = Type::getInt8PtrTy(g_context);
const auto _void = Type::getVoidTy(g_context);
const auto _func = FunctionType::get(Type::getVoidTy(g_context), false);
// Create LLVM module
std::unique_ptr<Module> module = std::make_unique<Module>(name, g_context);
std::unique_ptr<Module> module = std::make_unique<Module>(name, g_llvm_ctx);
// Initialize target
module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
// Initialize translator
std::unique_ptr<PPUTranslator> translator = std::make_unique<PPUTranslator>(g_context, module.get(), 0, entry);
std::unique_ptr<PPUTranslator> translator = std::make_unique<PPUTranslator>(g_llvm_ctx, module.get(), 0, entry);
// Define some types
const auto _void = Type::getVoidTy(g_llvm_ctx);
const auto _func = FunctionType::get(_void, { translator->GetContextType()->getPointerTo() }, false);
// Initialize function list
for (const auto& info : funcs)
{
if (info.second)
{
translator->AddFunction(info.first, cast<Function>(module->getOrInsertFunction(fmt::format("__sub_%x", info.first), _func)));
const auto f = cast<Function>(module->getOrInsertFunction(fmt::format("__sub_%x", info.first), _func));
f->addAttribute(1, Attribute::NoAlias);
translator->AddFunction(info.first, f);
}
translator->AddBlockInfo(info.first);
@ -877,18 +545,15 @@ extern void ppu_initialize(const std::string& name, const std::vector<std::pair<
pm.add(createTailCallEliminationPass());
pm.add(createReassociatePass());
pm.add(createInstructionCombiningPass());
//pm.add(new DominatorTreeWrapperPass());
//pm.add(createInstructionCombiningPass());
//pm.add(new MemoryDependenceAnalysis());
//pm.add(createBasicAAWrapperPass());
pm.add(new MemoryDependenceAnalysis());
pm.add(createLICMPass());
pm.add(createLoopInstSimplifyPass());
pm.add(createGVNPass());
pm.add(createDeadStoreEliminationPass());
//pm.add(createGVNPass());
//pm.add(createBBVectorizePass());
//pm.add(new LoopInfo());
//pm.add(new ScalarEvolution());
pm.add(createSCCPPass());
//pm.addPass(new SyscallAnalysisPass()); // Requires constant propagation
pm.add(createInstructionCombiningPass());
pm.add(createInstructionSimplifierPass());
pm.add(createAggressiveDCEPass());
pm.add(createCFGSimplificationPass());
//pm.add(createLintPass()); // Check
@ -898,30 +563,63 @@ extern void ppu_initialize(const std::string& name, const std::vector<std::pair<
{
if (info.second)
{
pm.run(*translator->TranslateToIR(info.first, info.first + info.second, vm::_ptr<u32>(info.first)));
const auto func = translator->TranslateToIR(info.first, info.first + info.second, vm::_ptr<u32>(info.first));
// Run optimization passes
pm.run(*func);
const auto _syscall = module->getFunction("__syscall");
const auto _hlecall = module->getFunction("__hlecall");
for (auto i = inst_begin(*func), end = inst_end(*func); i != end;)
{
const auto inst = &*i++;
if (const auto ci = dyn_cast<CallInst>(inst))
{
const auto cif = ci->getCalledFunction();
const auto op1 = ci->getNumArgOperands() > 1 ? ci->getArgOperand(1) : nullptr;
if (cif == _syscall && op1 && isa<ConstantInt>(op1))
{
// Try to determine syscall using the value from r11 (requires constant propagation)
const u64 index = cast<ConstantInt>(op1)->getZExtValue();
if (const auto ptr = ppu_get_syscall(index))
{
const auto n = ppu_get_syscall_name(index);
const auto f = cast<Function>(module->getOrInsertFunction(n, _func));
link_table.emplace(n, reinterpret_cast<std::uintptr_t>(ptr));
// Call the syscall directly
ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)}));
}
}
//static auto s_current = &PPUTranslator::UNK;
if (cif == _hlecall && op1 && isa<ConstantInt>(op1))
{
const u32 index = static_cast<u32>(cast<ConstantInt>(op1)->getZExtValue());
//for (const auto& info : s_test)
//{
// const u64 pseudo_addr = (u64)&info.first + INT64_MIN;
if (const auto ptr = ppu_get_function(index))
{
const auto n = ppu_get_module_function_name(index);
const auto f = cast<Function>(module->getOrInsertFunction(n, _func));
link_table.emplace(n, reinterpret_cast<std::uintptr_t>(ptr));
// s_current = info.second;
// const auto func = translator->TranslateToIR(pseudo_addr, pseudo_addr, nullptr, [](PPUTranslator* _this)
// {
// (_this->*s_current)(op);
// _this->ReturnFromFunction();
// });
// pm.run(*func);
//}
// Call the function directly
ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)}));
}
}
}
}
}
}
legacy::PassManager mpm;
// Remove unused functions, structs, global variables, etc
mpm.add(createStripDeadPrototypesPass());
//mpm.add(createFunctionInliningPass());
mpm.run(*module);
std::string result;
@ -944,98 +642,30 @@ extern void ppu_initialize(const std::string& name, const std::vector<std::pair<
Module* module_ptr = module.get();
std::shared_ptr<ExecutionEngine> engine(EngineBuilder(std::move(module))
.setErrorStr(&result)
.setMCJITMemoryManager(std::make_unique<MemoryManager>())
.setOptLevel(llvm::CodeGenOpt::Aggressive)
.setRelocationModel(Reloc::PIC_)
.setCodeModel((u64)s_memory <= 0x60000000 ? CodeModel::Medium : CodeModel::Large)
.setMCPU(sys::getHostCPUName())
.create());
const auto jit = fxm::make<jit_compiler>(std::move(module), std::move(link_table));
if (!engine)
if (!jit)
{
throw fmt::exception("LLVM: Failed to create ExecutionEngine: %s", result);
LOG_FATAL(PPU, "LLVM: Multiple modules are not yet supported");
return;
}
engine->setProcessAllSections(true);
//engine->setVerifyModules(true);
engine->RegisterJITEventListener(&s_listener);
engine->finalizeObject();
memory_helper::free_reserved_memory(s_ppu_compiled, 0x200000000); // TODO
s_ppu_compiled.clear();
// Get function addresses
// Get and install function addresses
for (const auto& info : funcs)
{
const u32 addr = info.first;
if (info.second)
{
const std::uintptr_t link = engine->getFunctionAddress(fmt::format("__sub_%x", info.first));
s_ppu_compiled.emplace(info.first, (void(*)())link);
const std::uintptr_t link = jit->get(fmt::format("__sub_%x", addr));
memory_helper::commit_page_memory(s_ppu_compiled + addr / 4, sizeof(ppu_function_t));
s_ppu_compiled[addr / 4] = (ppu_function_t)link;
LOG_NOTICE(PPU, "** Function __sub_%x -> 0x%llx (addr=0x%x, size=0x%x)", info.first, link, info.first, info.second);
LOG_NOTICE(PPU, "** Function __sub_%x -> 0x%llx (addr=0x%x, size=0x%x)", addr, link, addr, info.second);
}
}
// Delete IR to lower memory consumption
for (auto& func : module_ptr->functions())
{
func.deleteBody();
}
#ifdef _WIN32
// Register .xdata UNWIND_INFO (.pdata section is empty for some reason)
std::set<u64> func_set;
for (const auto& pair : s_ppu_compiled)
{
// Get addresses
func_set.emplace((u64)pair.second);
}
func_set.emplace(::align(*--func_set.end() + 4096, 4096));
const u64 base = (u64)s_memory;
const u8* bits = s_unwind_info;
s_unwind.clear();
s_unwind.reserve(s_ppu_compiled.size());
for (auto it = func_set.begin(), end = --func_set.end(); it != end; it++)
{
const u64 addr = *it;
const u64 next = *func_set.upper_bound(addr);
// Generate RUNTIME_FUNCTION record
RUNTIME_FUNCTION uw;
uw.BeginAddress = static_cast<u32>(addr - base);
uw.EndAddress = static_cast<u32>(next - base);
uw.UnwindData = static_cast<u32>((u64)bits - base);
s_unwind.emplace_back(uw);
// Parse .xdata record
VERIFY(*bits++ == 1); // Version and flags
bits++; // Size of prolog
const u8 count = *bits++; // Count of unwind codes
bits++; // Frame Reg + Off
bits += ::align(count, 2) * sizeof(u16); // UNWIND_CODE array
while (!*bits && bits < s_unwind_info + s_unwind_size) bits++; // Skip strange zero padding (???)
}
VERIFY(bits == s_unwind_info + s_unwind_size);
VERIFY(RtlAddFunctionTable(s_unwind.data(), (DWORD)s_unwind.size(), base));
LOG_SUCCESS(GENERAL, "LLVM: UNWIND_INFO registered (addr=0x%p, size=0x%llx)", s_unwind_info, s_unwind_size);
#endif
fxm::import<ExecutionEngine>(WRAP_EXPR(engine));
LOG_SUCCESS(PPU, "LLVM: Compilation finished (%s)", sys::getHostCPUName().data());
}
#else
extern void ppu_initialize(const std::string& name, const std::vector<std::pair<u32, u32>>& funcs, u32 entry)
{
}
#endif
}

View file

@ -11,6 +11,7 @@ public:
virtual std::string dump() const override;
virtual void cpu_init() override;
virtual void cpu_task() override;
virtual void cpu_task_main();
virtual bool handle_interrupt() override;
virtual ~PPUThread() override;
@ -74,6 +75,8 @@ public:
bool is_joinable = true;
bool is_joining = false;
const std::string m_name; // Thread name
std::function<void(PPUThread&)> custom_task;
// Function name can be stored here. Used to print the last called function.

View file

@ -82,12 +82,20 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, u64 base, u64
thread_struct.insert(thread_struct.end(), 32, GetType<bool>()); // CR[0..31]
m_thread_type = StructType::create(m_context, thread_struct, "context_t");
// Callable
m_call = new GlobalVariable(*module, ArrayType::get(FunctionType::get(GetType<void>(), {m_thread_type->getPointerTo()}, false)->getPointerTo(), 0x40000000), true, GlobalValue::ExternalLinkage, 0, "__call");
}
PPUTranslator::~PPUTranslator()
{
}
Type* PPUTranslator::GetContextType()
{
return m_thread_type;
}
void PPUTranslator::AddFunction(u64 addr, Function* func, FunctionType* type)
{
if (!m_func_types.emplace(addr, type).second || !m_func_list.emplace(addr, func).second)
@ -114,7 +122,8 @@ Function* PPUTranslator::TranslateToIR(u64 start_addr, u64 end_addr, be_t<u32>*
m_ir = &builder;
/* Create context variables */
m_thread = Call(m_thread_type->getPointerTo(), AttributeSet::get(m_context, AttributeSet::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadOnly}), "__context", m_ir->getInt64(start_addr));
//m_thread = Call(m_thread_type->getPointerTo(), AttributeSet::get(m_context, AttributeSet::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadOnly}), "__context", m_ir->getInt64(start_addr));
m_thread = &*m_function->getArgumentList().begin();
// Non-volatile registers with special meaning (TODO)
m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 1, ".sp");
@ -259,8 +268,7 @@ Function* PPUTranslator::TranslateToIR(u64 start_addr, u64 end_addr, be_t<u32>*
}
m_ir->SetInsertPoint(_default);
Call(GetType<void>(), "__call", _ctr);
m_ir->CreateRetVoid();
CallFunction(0, true, _ctr);
}
//for (auto i = inst_begin(*m_function), end = inst_end(*m_function); i != end;)
@ -315,27 +323,23 @@ void PPUTranslator::CallFunction(u64 target, bool tail, Value* indirect)
const auto callee_type = func ? m_func_types[target] : nullptr;
// Prepare function arguments
std::vector<Value*> args;
if (!callee_type)
if (func)
{
// Prepare args for untyped function
m_ir->CreateCall(func, {m_thread});
}
else
{
const auto addr = indirect ? indirect : (Value*)m_ir->getInt64(target);
const auto pos = m_ir->CreateLShr(addr, 2, "", true);
const auto ptr = m_ir->CreateGEP(m_call, {m_ir->getInt64(0), pos});
m_ir->CreateCall(m_ir->CreateLoad(ptr), {m_thread});
}
// Call the function
const auto result = func ? m_ir->CreateCall(func, args) : Call(GetType<void>(), "__call", indirect ? indirect : m_ir->getInt64(target));
if (!tail)
{
UndefineVolatileRegisters();
}
if (!callee_type)
{
// Get result from untyped function
}
if (tail)
{
m_ir->CreateRetVoid();
@ -1746,13 +1750,13 @@ void PPUTranslator::BC(ppu_opcode_t op)
void PPUTranslator::HACK(ppu_opcode_t op)
{
Call(GetType<void>(), "__hlecall", m_ir->getInt32(op.opcode & 0x3ffffff));
Call(GetType<void>(), "__hlecall", m_thread, m_ir->getInt32(op.opcode & 0x3ffffff));
UndefineVolatileRegisters();
}
void PPUTranslator::SC(ppu_opcode_t op)
{
Call(GetType<void>(), fmt::format(op.lev == 0 ? "__syscall" : "__lv%ucall", +op.lev), m_ir->CreateLoad(m_gpr[11]));
Call(GetType<void>(), fmt::format(op.lev == 0 ? "__syscall" : "__lv%ucall", +op.lev), m_thread, m_ir->CreateLoad(m_gpr[11]));
UndefineVolatileRegisters();
}

View file

@ -1,5 +1,7 @@
#pragma once
#ifdef LLVM_AVAILABLE
#include <unordered_map>
#include <map>
#include <unordered_set>
@ -152,9 +154,12 @@ class PPUTranslator final //: public CPUTranslator
// Memory base
llvm::Value* m_base;
// Thread context (obtained by __context)
// Thread context
llvm::Value* m_thread;
// Callable functions
llvm::Value* m_call;
// Thread context struct
llvm::StructType* m_thread_type;
@ -433,6 +438,9 @@ public:
PPUTranslator(llvm::LLVMContext& context, llvm::Module* module, u64 base, u64 entry);
~PPUTranslator();
// Get thread context struct type
llvm::Type* GetContextType();
// Add function
void AddFunction(u64 addr, llvm::Function* func, llvm::FunctionType* type = nullptr);
@ -827,3 +835,5 @@ public:
void UNK(ppu_opcode_t op);
};
#endif

View file

@ -2186,7 +2186,7 @@ void spu_recompiler::BR(spu_opcode_t op)
c->mov(*addr, target | 0x2000000);
//c->cmp(asmjit::host::dword_ptr(*ls, m_pos), 0x32); // compare instruction opcode with BR-to-self
//c->je(labels[target / 4]);
c->lock().or_(SPU_OFF_32(state), make_bitset(cpu_state::stop, cpu_state::ret)._value());
c->lock().or_(SPU_OFF_16(state), make_bitset(cpu_state::stop, cpu_state::ret)._value());
c->jmp(*end);
c->unuse(*addr);
return;

View file

@ -126,7 +126,7 @@ spu_imm_table_t::spu_imm_table_t()
std::string SPUThread::get_name() const
{
return fmt::format("%sSPU[0x%x] Thread (%s)", offset > RAW_SPU_BASE_ADDR ? "Raw" : "", id, name);
return fmt::format("%sSPU[0x%x] Thread (%s)", offset > RAW_SPU_BASE_ADDR ? "Raw" : "", id, m_name);
}
std::string SPUThread::dump() const
@ -240,14 +240,16 @@ SPUThread::~SPUThread()
}
SPUThread::SPUThread(const std::string& name)
: cpu_thread(cpu_type::spu, name)
: cpu_thread(cpu_type::spu)
, m_name(name)
, index(0)
, offset(0)
{
}
SPUThread::SPUThread(const std::string& name, u32 index)
: cpu_thread(cpu_type::spu, name)
: cpu_thread(cpu_type::spu)
, m_name(name)
, index(index)
, offset(vm::alloc(0x40000, vm::main))
{

View file

@ -553,6 +553,8 @@ public:
const u32 index; // SPU index
const u32 offset; // SPU LS offset
const std::string m_name; // Thread name
std::function<void(SPUThread&)> custom_task;
std::exception_ptr pending_exception;

View file

@ -910,40 +910,36 @@ std::array<ppu_function_t, 1024> g_ppu_syscall_table
extern void ppu_execute_syscall(PPUThread& ppu, u64 code)
{
if (code >= g_ppu_syscall_table.size())
if (code < g_ppu_syscall_table.size())
{
throw fmt::exception("Invalid syscall number (%llu)", code);
}
// If autopause occures, check_status() will hold the thread till unpaused.
if (debug::autopause::pause_syscall(code) && ppu.check_status())
{
throw cpu_state::ret;
}
if (debug::autopause::pause_syscall(code) && ppu.check_status()) throw cpu_state::ret;
const auto previous_function = ppu.last_function; // TODO: use gsl::finally or something
try
{
if (auto func = g_ppu_syscall_table[code])
{
func(ppu);
LOG_TRACE(PPU, "Syscall '%s' (%llu) finished, r3=0x%llx", ppu_get_syscall_name(code), code, ppu.GPR[3]);
}
else
{
LOG_TODO(HLE, "Unimplemented syscall %s -> CELL_OK", ppu_get_syscall_name(code));
ppu.GPR[3] = 0;
}
}
catch (...)
{
logs::PPU.format(Emu.IsStopped() ? logs::level::warning : logs::level::error, "Syscall '%s' (%llu) aborted", ppu_get_syscall_name(code), code);
ppu.last_function = previous_function;
throw;
return;
}
LOG_TRACE(PPU, "Syscall '%s' (%llu) finished, r3=0x%llx", ppu_get_syscall_name(code), code, ppu.GPR[3]);
ppu.last_function = previous_function;
throw fmt::exception("Invalid syscall number (%llu)", code);
}
extern ppu_function_t ppu_get_syscall(u64 code)
{
if (code < g_ppu_syscall_table.size())
{
return g_ppu_syscall_table[code];
}
return nullptr;
}
DECLARE(lv2_lock_t::mutex);

View file

@ -271,6 +271,7 @@ s32 sys_timer_sleep(u32 sleep_time)
std::this_thread::sleep_for(std::chrono::microseconds(useconds - passed));
}
CHECK_EMU_STATUS;
return CELL_OK;
}
@ -294,5 +295,6 @@ s32 sys_timer_usleep(const u64 sleep_time)
std::this_thread::sleep_for(std::chrono::microseconds(sleep_time - passed));
}
CHECK_EMU_STATUS;
return CELL_OK;
}

View file

@ -89,9 +89,11 @@ void idm::clear()
// Call recorded finalization functions for all IDs
for (std::size_t i = 0; i < g_map.size(); i++)
{
const auto on_stop = id_manager::typeinfo::get()[i].on_stop;
for (auto& id : g_map[i])
{
id_manager::typeinfo::get()[i].on_stop(id.second.get());
on_stop(id.second.get());
}
g_map[i].clear();

View file

@ -37,6 +37,8 @@ namespace id_manager
{
static inline void func(T*)
{
// Forbid forward declarations
static constexpr auto size = sizeof(std::conditional_t<std::is_void<T>::value, void*, T>);
}
};
@ -45,7 +47,7 @@ namespace id_manager
{
static inline void func(T* ptr)
{
ptr->on_init();
if (ptr) ptr->on_init();
}
};
@ -55,6 +57,8 @@ namespace id_manager
{
static inline void func(T*)
{
// Forbid forward declarations
static constexpr auto size = sizeof(std::conditional_t<std::is_void<T>::value, void*, T>);
}
};
@ -63,7 +67,7 @@ namespace id_manager
{
static inline void func(T* ptr)
{
ptr->on_stop();
if (ptr) ptr->on_stop();
}
};
@ -97,9 +101,6 @@ namespace id_manager
template<typename T>
static inline void update()
{
// Forbid forward declarations
static constexpr auto size = sizeof(std::conditional_t<std::is_void<T>::value, void*, T>);
auto& info = access()[get_index<T>()];
info.on_init = [](void* ptr) { return_ id_manager::on_init<T>::func(static_cast<T*>(ptr)); };
@ -111,12 +112,6 @@ namespace id_manager
{
return access();
}
template<typename T>
static inline auto get_stop()
{
return access()[get_index<T>()].on_stop;
}
};
template<typename T>
@ -257,6 +252,7 @@ public:
if (auto pair = create_id<T>(WRAP_EXPR(std::make_shared<Make>(std::forward<Args>(args)...))))
{
id_manager::on_init<T>::func(static_cast<T*>(pair->second.get()));
id_manager::on_stop<T>::func(nullptr);
return{ pair->second, static_cast<T*>(pair->second.get()) };
}
@ -270,6 +266,7 @@ public:
if (auto pair = create_id<T>(WRAP_EXPR(std::make_shared<Make>(std::forward<Args>(args)...))))
{
id_manager::on_init<T>::func(static_cast<T*>(pair->second.get()));
id_manager::on_stop<T>::func(nullptr);
return pair->first;
}
@ -283,6 +280,7 @@ public:
if (auto pair = create_id<T>(WRAP_EXPR(ptr)))
{
id_manager::on_init<T>::func(static_cast<T*>(pair->second.get()));
id_manager::on_stop<T>::func(nullptr);
return pair->first;
}
@ -296,6 +294,7 @@ public:
if (auto pair = create_id<T>(std::forward<F>(provider)))
{
id_manager::on_init<T>::func(static_cast<T*>(pair->second.get()));
id_manager::on_stop<T>::func(nullptr);
return { pair->second, static_cast<T*>(pair->second.get()) };
}
@ -389,7 +388,7 @@ public:
if (LIKELY(ptr))
{
id_manager::typeinfo::get_stop<T>()(static_cast<T*>(ptr.get()));
id_manager::on_stop<T>::func(static_cast<T*>(ptr.get()));
}
return ptr.operator bool();
@ -403,7 +402,7 @@ public:
if (LIKELY(ptr))
{
id_manager::typeinfo::get_stop<T>()(static_cast<T*>(ptr.get()));
id_manager::on_stop<T>::func(static_cast<T*>(ptr.get()));
}
return{ ptr, static_cast<T*>(ptr.get()) };
@ -429,7 +428,7 @@ public:
g_map[get_type<T>()].erase(id);
}
id_manager::typeinfo::get_stop<T>()(static_cast<T*>(ptr.get()));
id_manager::on_stop<T>::func(static_cast<T*>(ptr.get()));
return{ ptr, static_cast<T*>(ptr.get()) };
}
@ -479,6 +478,7 @@ public:
if (ptr)
{
id_manager::on_init<T>::func(ptr.get());
id_manager::on_stop<T>::func(nullptr);
}
return ptr;
@ -531,6 +531,7 @@ public:
if (ptr)
{
id_manager::on_init<T>::func(ptr.get());
id_manager::on_stop<T>::func(nullptr);
}
return ptr;
@ -585,6 +586,7 @@ public:
}
id_manager::on_init<T>::func(ptr.get());
id_manager::on_stop<T>::func(nullptr);
return ptr;
}
@ -616,7 +618,7 @@ public:
if (ptr)
{
id_manager::typeinfo::get_stop<T>()(static_cast<T*>(ptr.get()));
id_manager::on_stop<T>::func(static_cast<T*>(ptr.get()));
}
return ptr.operator bool();
@ -630,7 +632,7 @@ public:
if (ptr)
{
id_manager::typeinfo::get_stop<T>()(static_cast<T*>(ptr.get()));
id_manager::on_stop<T>::func(static_cast<T*>(ptr.get()));
}
return{ ptr, static_cast<T*>(ptr.get()) };

View file

@ -4,7 +4,12 @@
using arm_function_t = void(*)(ARMv7Thread&);
#define BIND_FUNC(func) [](ARMv7Thread& cpu){ cpu.last_function = #func; arm_func_detail::do_call(cpu, func); }
#define BIND_FUNC(func) static_cast<arm_function_t>([](ARMv7Thread& cpu){\
const auto old_f = cpu.last_function;\
cpu.last_function = #func;\
arm_func_detail::do_call(cpu, func);\
cpu.last_function = old_f;\
})
struct arm_va_args_t
{

View file

@ -79,27 +79,26 @@ extern std::string arm_get_variable_name(const std::string& module, u32 vnid);
// Function lookup table. Not supposed to grow after emulation start.
std::vector<arm_function_t> g_arm_function_cache;
std::vector<std::string> g_arm_function_names;
extern std::string arm_get_module_function_name(u32 index)
{
if (index < g_arm_function_names.size())
{
return g_arm_function_names[index];
}
return fmt::format(".%u", index);
}
extern void arm_execute_function(ARMv7Thread& cpu, u32 index)
{
if (index < g_arm_function_cache.size())
{
if (const auto func = g_arm_function_cache[index])
{
const auto previous_function = cpu.last_function; // TODO: use gsl::finally or something
try
{
func(cpu);
}
catch (...)
{
logs::ARMv7.format(Emu.IsStopped() ? logs::level::warning : logs::level::error, "Function '%s' aborted", cpu.last_function);
cpu.last_function = previous_function;
throw;
}
LOG_TRACE(ARMv7, "Function '%s' finished, r0=0x%x", cpu.last_function, cpu.GPR[0]);
cpu.last_function = previous_function;
LOG_TRACE(ARMv7, "Function '%s' finished, r0=0x%x", arm_get_module_function_name(index), cpu.GPR[0]);
return;
}
}
@ -220,6 +219,8 @@ static void arm_initialize_modules()
// Reinitialize function cache
g_arm_function_cache = arm_function_manager::get();
g_arm_function_names.clear();
g_arm_function_names.resize(g_arm_function_cache.size());
// "Use" all the modules for correct linkage
for (auto& module : registered)
@ -229,6 +230,7 @@ static void arm_initialize_modules()
for (auto& function : module->functions)
{
LOG_TRACE(LOADER, "** 0x%08X: %s", function.first, function.second.name);
g_arm_function_names.at(function.second.index) = fmt::format("%s.%s", module->name, function.second.name);
}
for (auto& variable : module->variables)
@ -555,6 +557,7 @@ void arm_exec_loader::load() const
// TODO
index = ::size32(g_arm_function_cache);
g_arm_function_cache.emplace_back();
g_arm_function_names.emplace_back(fmt::format("%s.%s", module_name, fname));
LOG_ERROR(LOADER, "** Unknown function '%s' in module '%s' (*0x%x) -> index %u", fname, module_name, faddr, index);
}

View file

@ -73,7 +73,7 @@ void armv7_free_tls(u32 thread)
std::string ARMv7Thread::get_name() const
{
return fmt::format("ARMv7[0x%x] Thread (%s)", id, name);
return fmt::format("ARMv7[0x%x] Thread (%s)", id, m_name);
}
std::string ARMv7Thread::dump() const
@ -126,13 +126,11 @@ extern thread_local std::string(*g_tls_log_prefix)();
void ARMv7Thread::cpu_task()
{
if (custom_task)
{
if (check_status()) return;
return custom_task(*this);
return custom_task ? custom_task(*this) : fast_call(PC);
}
void ARMv7Thread::cpu_task_main()
{
g_tls_log_prefix = []
{
const auto cpu = static_cast<ARMv7Thread*>(get_current_cpu_thread());
@ -185,40 +183,56 @@ ARMv7Thread::~ARMv7Thread()
}
ARMv7Thread::ARMv7Thread(const std::string& name)
: cpu_thread(cpu_type::arm, name)
: cpu_thread(cpu_type::arm)
, m_name(name)
{
}
void ARMv7Thread::fast_call(u32 addr)
{
auto old_PC = PC;
auto old_stack = SP;
auto old_LR = LR;
auto old_task = std::move(custom_task);
const auto old_PC = PC;
const auto old_SP = SP;
const auto old_LR = LR;
const auto old_task = std::move(custom_task);
const auto old_func = last_function;
PC = addr;
LR = Emu.GetCPUThreadStop();
custom_task = nullptr;
last_function = nullptr;
try
{
cpu_task();
cpu_task_main();
if (SP != old_SP && !state.test(cpu_state::ret) && !state.test(cpu_state::exit)) // SP shouldn't change
{
throw fmt::exception("Stack inconsistency (addr=0x%x, SP=0x%x, old=0x%x)", addr, SP, old_SP);
}
}
catch (cpu_state _s)
{
state += _s;
if (_s != cpu_state::ret) throw;
}
catch (EmulationStopped)
{
if (last_function) LOG_WARNING(ARMv7, "'%s' aborted", last_function);
last_function = old_func;
throw;
}
catch (...)
{
if (last_function) LOG_ERROR(ARMv7, "'%s' aborted", last_function);
last_function = old_func;
throw;
}
state -= cpu_state::ret;
PC = old_PC;
if (SP != old_stack) // SP shouldn't change
{
throw EXCEPTION("Stack inconsistency (addr=0x%x, SP=0x%x, old=0x%x)", addr, SP, old_stack);
}
SP = old_SP;
LR = old_LR;
custom_task = std::move(old_task);
last_function = old_func;
}

View file

@ -18,6 +18,7 @@ public:
virtual std::string dump() const override;
virtual void cpu_init() override;
virtual void cpu_task() override;
virtual void cpu_task_main();
virtual ~ARMv7Thread() override;
ARMv7Thread(const std::string& name);
@ -133,6 +134,8 @@ public:
u32 stack_addr = 0;
u32 stack_size = 0;
const std::string m_name;
std::function<void(ARMv7Thread&)> custom_task;
const char* last_function = nullptr;

View file

@ -64,36 +64,19 @@
<AdditionalIncludeDirectories>..\llvm\include;..\llvm_build\include;</AdditionalIncludeDirectories>
</ClCompile>
<PreBuildEvent>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug - MemLeak|x64'">%windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd"</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug - MemLeak|x64'">Updating git-version.h</Message>
</PreBuildEvent>
<PreBuildEvent>
<Command Condition="'$(Configuration)|$(Platform)'=='Release - LLVM|x64'">%windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd"</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Release - LLVM|x64'">Updating git-version.h</Message>
</PreBuildEvent>
<PreBuildEvent>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd"</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Updating git-version.h</Message>
</PreBuildEvent>
<PreBuildEvent>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd"</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Updating git-version.h</Message>
</PreBuildEvent>
<PreBuildEvent>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug - LLVM|x64'">%windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd"</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug - LLVM|x64'">Updating git-version.h</Message>
<Command>%windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd"</Command>
<Message>Updating git-version.h</Message>
</PreBuildEvent>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\ps3emu_api\ps3emu_api.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug - LLVM|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release - LLVM|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug - MemLeak|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Utilities\AutoPause.cpp" />
<ClCompile Include="..\Utilities\dynamic_library.cpp" />
<ClCompile Include="..\Utilities\JIT.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Utilities\Log.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
@ -404,6 +387,7 @@
<ClInclude Include="..\Utilities\event.h" />
<ClInclude Include="..\Utilities\geometry.h" />
<ClInclude Include="..\Utilities\GSL.h" />
<ClInclude Include="..\Utilities\JIT.h" />
<ClInclude Include="..\Utilities\lockless.h" />
<ClInclude Include="..\Utilities\sync.h" />
<ClInclude Include="..\Utilities\Platform.h" />

View file

@ -872,6 +872,15 @@
<ClCompile Include="..\ps3emu_api\ps3emu_api.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Utilities\version.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="rpcs3_version.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Utilities\JIT.cpp">
<Filter>Utilities</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Crypto\aes.h">
@ -1666,5 +1675,14 @@
<ClInclude Include="..\ps3emu_api\ps3emu_api.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\Utilities\version.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="rpcs3_version.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\Utilities\JIT.h">
<Filter>Utilities</Filter>
</ClInclude>
</ItemGroup>
</Project>