PPU LLVM: Precompile all executable (PRX, MSELF, overlay) code at startup (#9680)

* Precompile LLVM cache at startup of games, like the GUI "Create PPU Cache" option.
* Allow OVL (overlay) precompilation as well (used by certain games).

Co-authored-by: Megamouse <studienricky89@googlemail.com>
Co-authored-by: Nekotekina <nekotekina@gmail.com>
This commit is contained in:
Eladash 2021-01-30 15:08:22 +02:00 committed by GitHub
parent bb2cc196a6
commit e3b3b0cda7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 288 additions and 46 deletions

View file

@ -3,6 +3,7 @@
#include "Utilities/bin_patch.h"
#include "Utilities/StrUtil.h"
#include "Utilities/address_range.h"
#include "Crypto/sha1.h"
#include "Crypto/unself.h"
#include "Loader/ELF.h"
@ -30,7 +31,7 @@ extern std::string ppu_get_function_name(const std::string& _module, u32 fnid);
extern std::string ppu_get_variable_name(const std::string& _module, u32 vnid);
extern void ppu_register_range(u32 addr, u32 size);
extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr);
extern void ppu_initialize(const ppu_module& info);
extern bool ppu_initialize(const ppu_module& info, bool = false);
extern void ppu_initialize();
extern void sys_initialize_tls(ppu_thread&, u64, u32, u32, u32);
@ -1648,10 +1649,8 @@ void ppu_load_exec(const ppu_exec_object& elf)
}
}
std::shared_ptr<lv2_overlay> ppu_load_overlay(const ppu_exec_object& elf, const std::string& path)
std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_object& elf, const std::string& path)
{
const auto ovlm = idm::make_ptr<lv2_obj, lv2_overlay>();
// Access linkage information object
const auto link = g_fxo->get<ppu_linkage_info>();
@ -1659,6 +1658,25 @@ std::shared_ptr<lv2_overlay> ppu_load_overlay(const ppu_exec_object& elf, const
sha1_context sha;
sha1_starts(&sha);
// Check if it is an overlay executable first
for (const auto& prog : elf.progs)
{
if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz)
{
using addr_range = utils::address_range;
const addr_range r = addr_range::start_length(::narrow<u32>(prog.p_vaddr), ::narrow<u32>(prog.p_memsz));
if (!r.valid() || !r.inside(addr_range::start_length(0x30000000, 0x10000000)))
{
// TODO: Check error and if there's a better way to error check
return {nullptr, CELL_ENOEXEC};
}
}
}
const auto ovlm = std::make_shared<lv2_overlay>();
// Allocate memory at fixed positions
for (const auto& prog : elf.progs)
{
@ -1682,7 +1700,18 @@ std::shared_ptr<lv2_overlay> ppu_load_overlay(const ppu_exec_object& elf, const
fmt::throw_exception("Invalid binary size (0x%llx, memsz=0x%x)", prog.bin.size(), size);
if (!vm::get(vm::any, 0x30000000)->falloc(addr, size))
fmt::throw_exception("vm::falloc() failed (addr=0x%x, memsz=0x%x)", addr, size);
{
ppu_loader.error("ppu_load_overlay(): vm::falloc() failed (addr=0x%x, memsz=0x%x)", addr, size);
// Revert previous allocations
for (const auto& seg : ovlm->segs)
{
ensure(vm::dealloc(seg.addr));
}
// TODO: Check error code, maybe disallow more than one overlay instance completely
return {nullptr, CELL_EBUSY};
}
// Copy segment data, hash it
std::memcpy(vm::base(addr), prog.bin.data(), prog.bin.size());
@ -1845,5 +1874,6 @@ std::shared_ptr<lv2_overlay> ppu_load_overlay(const ppu_exec_object& elf, const
ovlm->name = path.substr(path.find_last_of('/') + 1);
ovlm->path = path;
return ovlm;
idm::import_existing<lv2_obj, lv2_overlay>(ovlm);
return {std::move(ovlm), {}};
}

View file

@ -18,6 +18,8 @@
#include "SPURecompiler.h"
#include "lv2/sys_sync.h"
#include "lv2/sys_prx.h"
#include "lv2/sys_overlay.h"
#include "lv2/sys_process.h"
#include "lv2/sys_memory.h"
#include "Emu/GDB.h"
@ -117,8 +119,9 @@ const ppu_decoder<ppu_itype> g_ppu_itype;
extern void ppu_initialize();
extern void ppu_finalize(const ppu_module& info);
extern void ppu_initialize(const ppu_module& info);
extern bool ppu_initialize(const ppu_module& info, bool = false);
static void ppu_initialize2(class jit_compiler& jit, const ppu_module& module_part, const std::string& cache_path, const std::string& obj_name);
extern std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_object&, const std::string& path);
extern void ppu_unload_prx(const lv2_prx&);
extern std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object&, const std::string&);
extern void ppu_execute_syscall(ppu_thread& ppu, u64 code);
@ -788,7 +791,7 @@ void ppu_thread::cpu_task()
}
case ppu_cmd::initialize:
{
cmd_pop(), ppu_initialize();
cmd_pop(), ppu_initialize(), spu_cache::initialize();
break;
}
case ppu_cmd::sleep:
@ -2064,19 +2067,27 @@ extern void ppu_finalize(const ppu_module& info)
#endif
}
extern void ppu_precompile(std::vector<std::string>& dir_queue)
extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<lv2_prx*>* loaded_prx)
{
// Remove duplicates
std::sort(dir_queue.begin(), dir_queue.end());
dir_queue.erase(std::unique(dir_queue.begin(), dir_queue.end()), dir_queue.end());
const std::string firmware_sprx_path = vfs::get("/dev_flash/sys/external/");
// Map fixed address executables area, fake overlay support
const bool had_ovl = !vm::map(0x3000'0000, 0x1000'0000, 0x200).operator bool();
const u32 ppc_seg = std::exchange(g_ps3_process_info.ppc_seg, 0x3);
std::vector<std::pair<std::string, u64>> file_queue;
file_queue.reserve(2000);
// Initialize progress dialog
g_progr = "Scanning directories for SPRX libraries...";
// Find all .sprx files recursively (TODO: process .mself files)
for (usz i = 0; i < dir_queue.size(); i++)
{
if (Emu.IsStopped())
{
file_queue.clear();
break;
}
@ -2086,6 +2097,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue)
{
if (Emu.IsStopped())
{
file_queue.clear();
break;
}
@ -2099,17 +2111,72 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue)
continue;
}
std::string upper = fmt::to_upper(entry.name);
// Check .sprx filename
if (fmt::to_upper(entry.name).ends_with(".SPRX"))
if (upper.ends_with(".SPRX"))
{
// Skip already loaded modules or HLEd ones
if (dir_queue[i] == firmware_sprx_path)
{
bool ignore = false;
if (loaded_prx)
{
for (auto* obj : *loaded_prx)
{
if (obj->name == entry.name)
{
ignore = true;
break;
}
}
if (ignore)
{
continue;
}
}
if (g_cfg.core.libraries_control.get_set().count(entry.name + ":lle"))
{
// Force LLE
ignore = false;
}
else if (g_cfg.core.libraries_control.get_set().count(entry.name + ":hle"))
{
// Force HLE
ignore = true;
}
else
{
extern const std::map<std::string_view, int> g_prx_list;
// Use list
ignore = g_prx_list.count(entry.name) && g_prx_list.at(entry.name) != 0;
}
if (ignore)
{
continue;
}
}
// Get full path
file_queue.emplace_back(dir_queue[i] + entry.name, 0);
g_progr_ftotal++;
continue;
}
// Check .self filename
if (upper.ends_with(".SELF"))
{
// Get full path
file_queue.emplace_back(dir_queue[i] + entry.name, 0);
continue;
}
// Check .mself filename
if (fmt::to_upper(entry.name).ends_with(".MSELF"))
if (upper.ends_with(".MSELF"))
{
if (fs::file mself{dir_queue[i] + entry.name})
{
@ -2125,11 +2192,20 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue)
{
std::string name = rec.name;
if (fmt::to_upper(name).ends_with(".SPRX"))
upper = fmt::to_upper(name);
if (upper.ends_with(".SPRX"))
{
// .sprx inside .mself found
file_queue.emplace_back(dir_queue[i] + entry.name, rec.off);
g_progr_ftotal++;
continue;
}
if (upper.ends_with(".SELF"))
{
// .self inside .mself found
file_queue.emplace_back(dir_queue[i] + entry.name, rec.off);
continue;
}
}
else
@ -2146,22 +2222,35 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue)
g_progr = "Compiling PPU modules";
g_progr_ftotal += file_queue.size();
atomic_t<usz> fnext = 0;
shared_mutex sprx_mtx;
shared_mutex sprx_mtx, ovl_mtx;
named_thread_group workers("SPRX Worker ", utils::get_thread_count(), [&]
named_thread_group workers("SPRX Worker ", std::min<u32>(utils::get_thread_count(), ::size32(file_queue)), [&]
{
for (usz func_i = fnext++; func_i < file_queue.size(); func_i = fnext++)
for (usz func_i = fnext++; func_i < file_queue.size(); func_i = fnext++, g_progr_fdone++)
{
std::string path = std::as_const(file_queue)[func_i].first;
if (Emu.IsStopped())
{
continue;
}
ppu_log.notice("Trying to load SPRX: %s", path);
auto [path, offset] = std::as_const(file_queue)[func_i];
// Load MSELF or SPRX
ppu_log.notice("Trying to load: %s", path);
// Load MSELF, SPRX or SELF
fs::file src{path};
if (u64 off = file_queue[func_i].second)
if (!src)
{
ppu_log.error("Failed to open '%s' (%s)", path, fs::g_tls_error);
continue;
}
if (u64 off = offset)
{
// Adjust offset for MSELF
src.reset(std::make_unique<file_view>(std::move(src), off));
@ -2173,9 +2262,15 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue)
// Some files may fail to decrypt due to the lack of klic
src = decrypt_self(std::move(src));
const ppu_prx_object obj = src;
if (!src)
{
ppu_log.error("Failed to decrypt '%s'", path);
continue;
}
if (obj == elf_error::ok)
elf_error prx_err{}, ovl_err{};
if (const ppu_prx_object obj = src; (prx_err = obj, obj == elf_error::ok))
{
std::unique_lock lock(sprx_mtx);
@ -2188,19 +2283,64 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue)
ppu_unload_prx(*prx);
lock.unlock();
ppu_finalize(*prx);
g_progr_fdone++;
continue;
}
// Log error
prx_err = elf_error::header_type;
}
if (const ppu_exec_object obj = src; (ovl_err = obj, obj == elf_error::ok))
{
while (ovl_err == elf_error::ok)
{
// Only one thread compiles OVL atm, other can compile PRX cuncurrently
std::unique_lock lock(ovl_mtx);
auto [ovlm, error] = ppu_load_overlay(obj, path);
if (error)
{
// Abort
ovl_err = elf_error::header_type;
break;
}
ppu_initialize(*ovlm);
for (auto& seg : ovlm->segs)
{
vm::dealloc(seg.addr);
}
lock.unlock();
idm::remove<lv2_obj, lv2_overlay>(idm::last_id());
ppu_finalize(*ovlm);
break;
}
if (ovl_err == elf_error::ok)
{
continue;
}
}
ppu_log.error("Failed to load SPRX '%s' (%s)", path, obj.get_error());
g_progr_fdone++;
ppu_log.error("Failed to precompile '%s' (prx: %s, ovl: %s)", path, prx_err, ovl_err);
continue;
}
});
// Join every thread
workers.join();
// Revert changes
if (!had_ovl)
{
vm::unmap(0x3000'0000);
}
g_ps3_process_info.ppc_seg = ppc_seg;
}
extern void ppu_initialize()
@ -2216,11 +2356,13 @@ extern void ppu_initialize()
{
return;
}
bool compile_main = false;
// Initialize main module
// Check main module cache
if (!_main->segs.empty())
{
ppu_initialize(*_main);
compile_main = ppu_initialize(*_main, true);
}
std::vector<lv2_prx*> prx_list;
@ -2230,20 +2372,68 @@ extern void ppu_initialize()
prx_list.emplace_back(&prx);
});
// If empty we have no indication for cache state, check everything
bool compile_fw = prx_list.empty();
// Check preloaded libraries cache
for (auto ptr : prx_list)
{
compile_fw |= ppu_initialize(*ptr, true);
}
std::vector<std::string> dir_queue;
if (compile_fw)
{
const std::string firmware_sprx_path = vfs::get("/dev_flash/sys/external/");
dir_queue.emplace_back(firmware_sprx_path);
}
// Avoid compilation if main's cache exists or it is a standalone SELF with no PARAM.SFO
if (compile_main && !Emu.GetTitleID().empty())
{
dir_queue.emplace_back(vfs::get(Emu.GetDir()) + '/');
if (const std::string dev_bdvd = vfs::get("/dev_bdvd/PS3_GAME"); !dev_bdvd.empty())
{
dir_queue.emplace_back(dev_bdvd + '/');
}
}
ppu_precompile(dir_queue, &prx_list);
if (Emu.IsStopped())
{
return;
}
// Initialize main module cache
if (!_main->segs.empty())
{
ppu_initialize(*_main);
}
// Initialize preloaded libraries
for (auto ptr : prx_list)
{
if (Emu.IsStopped())
{
return;
}
ppu_initialize(*ptr);
}
// Initialize SPU cache
spu_cache::initialize();
}
extern void ppu_initialize(const ppu_module& info)
bool ppu_initialize(const ppu_module& info, bool check_only)
{
if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm)
{
if (check_only)
{
return false;
}
// Temporarily
s_ppu_toc = g_fxo->get<std::unordered_map<u32, u32>>();
@ -2261,7 +2451,7 @@ extern void ppu_initialize(const ppu_module& info)
}
}
return;
return false;
}
// Link table
@ -2375,6 +2565,8 @@ extern void ppu_initialize(const ppu_module& info)
// Sync variable to acquire workloads
atomic_t<u32> work_cv = 0;
bool compiled_new = false;
while (jit_mod.vars.empty() && fpos < info.funcs.size())
{
// Initialize compiler instance
@ -2581,7 +2773,7 @@ extern void ppu_initialize(const ppu_module& info)
// Check object file
if (jit_compiler::check(cache_path + obj_name))
{
if (!jit)
if (!jit && !check_only)
{
ppu_log.success("LLVM: Module exists: %s", obj_name);
continue;
@ -2590,6 +2782,14 @@ extern void ppu_initialize(const ppu_module& info)
continue;
}
// Remember, used in ppu_initialize(void)
compiled_new = true;
if (check_only)
{
return true;
}
// Adjust information (is_compiled)
link_workload.back().second = true;
@ -2651,7 +2851,7 @@ extern void ppu_initialize(const ppu_module& info)
if (Emu.IsStopped() || !get_current_cpu_thread())
{
return;
return compiled_new;
}
for (auto [obj_name, is_compiled] : link_workload)
@ -2672,7 +2872,7 @@ extern void ppu_initialize(const ppu_module& info)
if (Emu.IsStopped() || !get_current_cpu_thread())
{
return;
return compiled_new;
}
// Jit can be null if the loop doesn't ever enter.
@ -2742,6 +2942,8 @@ extern void ppu_initialize(const ppu_module& info)
}
}
}
return compiled_new;
#else
fmt::throw_exception("LLVM is not available in this build.");
#endif

View file

@ -12,9 +12,9 @@
#include "sys_overlay.h"
#include "sys_fs.h"
extern std::shared_ptr<lv2_overlay> ppu_load_overlay(const ppu_exec_object&, const std::string& path);
extern std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_object&, const std::string& path);
extern void ppu_initialize(const ppu_module&);
extern bool ppu_initialize(const ppu_module&, bool = false);
extern void ppu_finalize(const ppu_module&);
LOG_CHANNEL(sys_overlay);
@ -42,7 +42,12 @@ static error_code overlay_load_module(vm::ptr<u32> ovlmid, const std::string& vp
return {CELL_ENOEXEC, obj.operator elf_error()};
}
const auto ovlm = ppu_load_overlay(obj, vfs::get(vpath));
const auto [ovlm, error] = ppu_load_overlay(obj, vfs::get(vpath));
if (error)
{
return error;
}
ppu_initialize(*ovlm);

View file

@ -17,7 +17,7 @@
extern std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object&, const std::string&);
extern void ppu_unload_prx(const lv2_prx& prx);
extern void ppu_initialize(const ppu_module&);
extern bool ppu_initialize(const ppu_module&, bool = false);
extern void ppu_finalize(const ppu_module&);
LOG_CHANNEL(sys_prx);

View file

@ -66,8 +66,8 @@ atomic_t<u64> g_watchdog_hold_ctr{0};
extern void ppu_load_exec(const ppu_exec_object&);
extern void spu_load_exec(const spu_exec_object&);
extern void ppu_precompile(std::vector<std::string>& dir_queue);
extern void ppu_initialize(const ppu_module&);
extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<lv2_prx*>* loaded_prx);
extern bool ppu_initialize(const ppu_module&, bool = false);
extern void ppu_finalize(const ppu_module&);
extern void ppu_unload_prx(const lv2_prx&);
extern std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object&, const std::string&);
@ -1150,7 +1150,12 @@ game_boot_result Emulator::Load(const std::string& title_id, bool add_only, bool
ensure(vm::falloc(0x10000, 0xf0000, vm::main));
}
ppu_precompile(dir_queue);
if (Emu.IsStopped())
{
return;
}
ppu_precompile(dir_queue, nullptr);
// Exit "process"
Emu.CallAfter([]