diff --git a/rpcs3/Crypto/decrypt_binaries.cpp b/rpcs3/Crypto/decrypt_binaries.cpp index a87db5c0ec..6205ac6d4f 100644 --- a/rpcs3/Crypto/decrypt_binaries.cpp +++ b/rpcs3/Crypto/decrypt_binaries.cpp @@ -96,7 +96,7 @@ usz decrypt_binaries_t::decrypt(std::string_view klic_input) case "SCE\0"_u32: { // First KLIC is no KLIC - elf_file = decrypt_self(std::move(elf_file), key_it != 0 ? reinterpret_cast(&m_klics[key_it]) : nullptr); + elf_file = decrypt_self(elf_file, key_it != 0 ? reinterpret_cast(&m_klics[key_it]) : nullptr); if (!elf_file) { diff --git a/rpcs3/Crypto/key_vault.cpp b/rpcs3/Crypto/key_vault.cpp index ab99514b27..b9222cd63e 100644 --- a/rpcs3/Crypto/key_vault.cpp +++ b/rpcs3/Crypto/key_vault.cpp @@ -20,6 +20,7 @@ SELF_KEY::SELF_KEY(u64 ver_start, u64 ver_end, u16 rev, u32 type, const std::str KeyVault::KeyVault() { + std::memcpy(klicensee_key, NP_KLIC_FREE, sizeof(klicensee_key)); } void KeyVault::LoadSelfLV0Keys() @@ -751,15 +752,14 @@ SELF_KEY KeyVault::FindSelfKey(u32 type, u16 revision, u64 version) return key; } -void KeyVault::SetKlicenseeKey(u8* key) +void KeyVault::SetKlicenseeKey(const u8* key) { - klicensee_key = std::make_unique(0x10); - memcpy(klicensee_key.get(), key, 0x10); + std::memcpy(klicensee_key, key, 0x10); } -u8* KeyVault::GetKlicenseeKey() const +const u8* KeyVault::GetKlicenseeKey() const { - return klicensee_key.get(); + return klicensee_key; } void rap_to_rif(unsigned char* rap, unsigned char* rif) diff --git a/rpcs3/Crypto/key_vault.h b/rpcs3/Crypto/key_vault.h index 5fc19d1659..ac5ba084b9 100644 --- a/rpcs3/Crypto/key_vault.h +++ b/rpcs3/Crypto/key_vault.h @@ -319,13 +319,13 @@ class KeyVault std::vector sk_LDR_arr{}; std::vector sk_UNK7_arr{}; std::vector sk_NPDRM_arr{}; - std::unique_ptr klicensee_key{}; + u8 klicensee_key[16]{}; public: KeyVault(); SELF_KEY FindSelfKey(u32 type, u16 revision, u64 version); - void SetKlicenseeKey(u8* key); - u8* GetKlicenseeKey() const; + void SetKlicenseeKey(const u8* key); + const u8* GetKlicenseeKey() const; private: void LoadSelfLV0Keys(); diff --git a/rpcs3/Crypto/unself.cpp b/rpcs3/Crypto/unself.cpp index 66000c4f12..3855dcd62f 100644 --- a/rpcs3/Crypto/unself.cpp +++ b/rpcs3/Crypto/unself.cpp @@ -1045,11 +1045,8 @@ bool SELFDecrypter::DecryptNPDRM(u8 *metadata, u32 metadata_size) } else if (npd->license == 3) // Free license. { - // Use klicensee if available. - if (key_v.GetKlicenseeKey()) - memcpy(npdrm_key, key_v.GetKlicenseeKey(), 0x10); - else - memcpy(npdrm_key, NP_KLIC_FREE, 0x10); + // Use klicensee if available. (may be set to NP_KLIC_FREE if none is set) + std::memcpy(npdrm_key, key_v.GetKlicenseeKey(), 0x10); } else { @@ -1085,7 +1082,7 @@ const NPD_HEADER* SELFDecrypter::GetNPDHeader() const return nullptr; } -bool SELFDecrypter::LoadMetadata(u8* klic_key) +bool SELFDecrypter::LoadMetadata(const u8* klic_key) { aes_context aes; const auto metadata_info = std::make_unique(sizeof(meta_info)); @@ -1319,11 +1316,11 @@ static bool IsDebugSelf(const fs::file& f) return false; } -static bool CheckDebugSelf(fs::file& s) +static fs::file CheckDebugSelf(const fs::file& s) { if (s.size() < 0x18) { - return false; + return {}; } // Get the key version. @@ -1352,15 +1349,14 @@ static bool CheckDebugSelf(fs::file& s) e.write(buf, size); } - s = std::move(e); - return true; + return e; } // Leave the file untouched. - return false; + return {}; } -fs::file decrypt_self(fs::file elf_or_self, u8* klic_key, SelfAdditionalInfo* out_info, bool require_encrypted) +fs::file decrypt_self(const fs::file& elf_or_self, const u8* klic_key, SelfAdditionalInfo* out_info) { if (out_info) { @@ -1377,10 +1373,10 @@ fs::file decrypt_self(fs::file elf_or_self, u8* klic_key, SelfAdditionalInfo* ou // Check SELF header first. Check for a debug SELF. if (elf_or_self.size() >= 4 && elf_or_self.read() == "SCE\0"_u32) { - if (CheckDebugSelf(elf_or_self)) + if (fs::file res = CheckDebugSelf(elf_or_self)) { // TODO: Decrypt - return elf_or_self; + return res; } // Check the ELF file class (32 or 64 bit). @@ -1399,14 +1395,14 @@ fs::file decrypt_self(fs::file elf_or_self, u8* klic_key, SelfAdditionalInfo* ou // Load and decrypt the SELF file metadata. if (!self_dec.LoadMetadata(klic_key)) { - self_log.error("Failed to load SELF file metadata!"); + (klic_key ? self_log.notice : self_log.error)("Failed to load SELF file metadata!"); return fs::file{}; } // Decrypt the SELF file data. if (!self_dec.DecryptData()) { - self_log.error("Failed to decrypt SELF file data!"); + (klic_key ? self_log.notice : self_log.error)("Failed to decrypt SELF file data!"); return fs::file{}; } @@ -1414,12 +1410,7 @@ fs::file decrypt_self(fs::file elf_or_self, u8* klic_key, SelfAdditionalInfo* ou return self_dec.MakeElf(isElf32); } - if (require_encrypted) - { - return {}; - } - - return elf_or_self; + return {}; } bool verify_npdrm_self_headers(const fs::file& self, u8* klic_key, NPD_HEADER* npd_out) diff --git a/rpcs3/Crypto/unself.h b/rpcs3/Crypto/unself.h index afa3416095..1279bc961d 100644 --- a/rpcs3/Crypto/unself.h +++ b/rpcs3/Crypto/unself.h @@ -476,7 +476,7 @@ public: fs::file MakeElf(bool isElf32); bool LoadHeaders(bool isElf32, SelfAdditionalInfo* out_info = nullptr); void ShowHeaders(bool isElf32); - bool LoadMetadata(u8* klic_key); + bool LoadMetadata(const u8* klic_key); bool DecryptData(); bool DecryptNPDRM(u8 *metadata, u32 metadata_size); const NPD_HEADER* GetNPDHeader() const; @@ -559,7 +559,7 @@ private: } }; -fs::file decrypt_self(fs::file elf_or_self, u8* klic_key = nullptr, SelfAdditionalInfo* additional_info = nullptr, bool require_encrypted = false); +fs::file decrypt_self(const fs::file& elf_or_self, const u8* klic_key = nullptr, SelfAdditionalInfo* additional_info = nullptr); bool verify_npdrm_self_headers(const fs::file& self, u8* klic_key = nullptr, NPD_HEADER* npd_out = nullptr); bool get_npdrm_self_header(const fs::file& self, NPD_HEADER& npd); diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp index a936eb6853..808c31f8a5 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -12,7 +12,7 @@ LOG_CHANNEL(ppu_validator); -const ppu_decoder s_ppu_itype; +extern const ppu_decoder g_ppu_itype; template<> void fmt_class_string::format(std::string& out, u64 arg) @@ -535,6 +535,7 @@ static constexpr struct const_tag{} is_const; static constexpr struct range_tag{} is_range; static constexpr struct min_value_tag{} minv; static constexpr struct max_value_tag{} maxv; +static constexpr struct sign_bit_tag{} sign_bitv; static constexpr struct load_addr_tag{} load_addrv; struct reg_state_t @@ -548,13 +549,13 @@ struct reg_state_t // Check if state is a constant value bool operator()(const_tag) const { - return value_range == 1 && bit_range == 0; + return !is_loaded && value_range == 1 && bit_range == 0; } // Check if state is a ranged value bool operator()(range_tag) const { - return bit_range == 0; + return !is_loaded && bit_range == 0; } // Get minimum bound @@ -569,6 +570,11 @@ struct reg_state_t return value_range ? (ge_than | bit_range) + value_range : 0; } + u64 operator()(sign_bit_tag) const + { + return value_range == 0 || (bit_range >> 63) || (ge_than + value_range - 1) >> 63 != (ge_than >> 63) ? u64{umax} : (ge_than >> 63); + } + u64 operator()(load_addr_tag) const { return is_loaded ? ge_than : 0; @@ -922,7 +928,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con { const ppu_opcode_t op{+range[index]}; - switch (s_ppu_itype.decode(op.opcode)) + switch (g_ppu_itype.decode(op.opcode)) { case ppu_itype::UNK: { @@ -962,7 +968,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con // Register new function auto add_func = [&](u32 addr, u32 toc, u32 caller) -> ppu_function_ext& { - if (addr < start || addr >= end || s_ppu_itype.decode(*get_ptr(addr)) == ppu_itype::UNK) + if (addr < start || addr >= end || g_ppu_itype.decode(*get_ptr(addr)) == ppu_itype::UNK) { if (!fmap.contains(addr)) { @@ -1337,7 +1343,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con const u32 iaddr = _ptr.addr(); const ppu_opcode_t op{*ptr}; - const ppu_itype::type type = s_ppu_itype.decode(op.opcode); + const ppu_itype::type type = g_ppu_itype.decode(op.opcode); if ((type == ppu_itype::B || type == ppu_itype::BC) && op.lk && (!op.aa || verify_ref(iaddr))) { @@ -1365,25 +1371,68 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con u32 addr = 0; u32 size = 0; u32 parent_block_idx = umax; - u64 mapped_registers_mask = 0; - u64 moved_registers_mask = 0; + ppua_reg_mask_t mapped_registers_mask{0}; + ppua_reg_mask_t moved_registers_mask{0}; }; // Block analysis workload std::vector block_queue_storage; + bool is_function_caller_analysis = false; + // Main loop (func_queue may grow) - for (usz i = 0; i < func_queue.size(); i++) + for (usz i = 0; i <= func_queue.size(); i++) { + if (i == func_queue.size()) + { + if (is_function_caller_analysis) + { + break; + } + + // Add callers of imported functions to be analyzed + std::set added; + + for (const auto& [stub_addr, _] : stub_addr_to_constant_state_of_registers) + { + auto it = fmap.upper_bound(stub_addr); + + if (it == fmap.begin()) + { + continue; + } + + auto stub_func = std::prev(it); + + for (u32 caller : stub_func->second.callers) + { + ppu_function_ext& func = ::at32(fmap, caller); + + if (func.attr.none_of(ppu_attr::no_size) && !func.blocks.empty() && !added.contains(caller)) + { + added.emplace(caller); + func_queue.emplace_back(::at32(fmap, caller)); + } + } + } + + if (added.empty()) + { + break; + } + + is_function_caller_analysis = true; + } + if (check_aborted && check_aborted()) { return false; } - ppu_function_ext& func = func_queue[i].get(); + ppu_function_ext& func = func_queue[i]; // Fixup TOCs - if (func.toc && func.toc != umax) + if (!is_function_caller_analysis && func.toc && func.toc != umax) { // Fixup callers for (u32 addr : func.callers) @@ -1407,7 +1456,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con const u32 iaddr = addr; const ppu_opcode_t op{get_ref(iaddr)}; - const ppu_itype::type type = s_ppu_itype.decode(op.opcode); + const ppu_itype::type type = g_ppu_itype.decode(op.opcode); if (type == ppu_itype::B || type == ppu_itype::BC) { @@ -1453,7 +1502,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con } } - if (func.blocks.empty()) + if (!is_function_caller_analysis && func.blocks.empty()) { // Special function analysis const vm::cptr _ptr = vm::cast(func.addr); @@ -1760,7 +1809,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con if (parent_block != umax) { // Inherit loaded registers mask (lazily) - block.mapped_registers_mask = ::at32(block_queue, parent_block).mapped_registers_mask; + block.mapped_registers_mask.mask = ::at32(block_queue, parent_block).mapped_registers_mask.mask; } return static_cast(block_queue.size() - 1); @@ -1769,6 +1818,15 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con return umax; }; + std::map preserve_blocks; + + if (is_function_caller_analysis) + { + preserve_blocks = std::move(func.blocks); + func.blocks.clear(); + func.blocks.emplace(preserve_blocks.begin()->first, 0); + } + for (auto& block : func.blocks) { if (!block.second && block.first < func_end) @@ -1813,7 +1871,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con auto is_reg_mapped = [&](u32 index) { - return !!(block_queue[j].mapped_registers_mask & (u64{1} << index)); + return !!(block_queue[j].mapped_registers_mask.mask & (u64{1} << index)); }; reg_state_t dummy_state{}; @@ -1824,7 +1882,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con const usz reg_mask = u64{1} << index; - if (~block->moved_registers_mask & reg_mask) + if (~block->moved_registers_mask.mask & reg_mask) { if ((j + 1) * 64 >= reg_state_storage.size()) { @@ -1836,11 +1894,11 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con usz begin_block = umax; // Try searching for register origin - if (block->mapped_registers_mask & reg_mask) + if (block->mapped_registers_mask.mask & reg_mask) { for (u32 i = block->parent_block_idx; i != umax; i = block_queue[i].parent_block_idx) { - if (~block_queue[i].moved_registers_mask & reg_mask) + if (~block_queue[i].moved_registers_mask.mask & reg_mask) { continue; } @@ -1860,8 +1918,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con reg_state_storage[64 * j + index] = make_unknown_reg_state(); } - block->mapped_registers_mask |= reg_mask; - block->moved_registers_mask |= reg_mask; + block->mapped_registers_mask.mask |= reg_mask; + block->moved_registers_mask.mask |= reg_mask; } return reg_state_storage[64 * j + index]; @@ -1877,8 +1935,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con reg_state_storage[64 * block_index + index] = rhs; const usz reg_mask = u64{1} << index; - block_queue[block_index].mapped_registers_mask |= reg_mask; - block_queue[block_index].moved_registers_mask |= reg_mask; + block_queue[block_index].mapped_registers_mask.mask |= reg_mask; + block_queue[block_index].moved_registers_mask.mask |= reg_mask; }; const auto unmap_reg = [&](u32 index) @@ -1887,8 +1945,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con const usz reg_mask = u64{1} << index; - block->mapped_registers_mask &= ~reg_mask; - block->moved_registers_mask &= ~reg_mask; + block->mapped_registers_mask.mask &= ~reg_mask; + block->moved_registers_mask.mask &= ~reg_mask; }; enum : u32 @@ -1907,7 +1965,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con { const u32 iaddr = _ptr.addr(); const ppu_opcode_t op{*advance(_ptr, ptr, 1)}; - const ppu_itype::type type = s_ppu_itype.decode(op.opcode); + const ppu_itype::type type = g_ppu_itype.decode(op.opcode); switch (type) { @@ -1935,13 +1993,55 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con const bool is_call = op.lk && target != iaddr && target != _ptr.addr() && _ptr.addr() < func_end; const auto pfunc = is_call ? &add_func(target, 0, 0) : nullptr; - if (pfunc && pfunc->blocks.empty()) + if (pfunc && pfunc->blocks.empty() && !is_function_caller_analysis) { // Postpone analysis (no info) postpone_analysis = true; break; } + if (is_function_caller_analysis && is_call && !(pfunc->attr & ppu_attr::no_return)) + { + while (is_function_caller_analysis) + { + // Verify that it is the call to the imported function (may be more than one) + const auto it = stub_addr_to_constant_state_of_registers.lower_bound(target); + + if (it == stub_addr_to_constant_state_of_registers.end()) + { + break; + } + + const auto next_func = fmap.upper_bound(it->first); + + if (next_func == fmap.begin()) + { + break; + } + + const auto stub_func = std::prev(next_func); + + if (stub_func->first == target) + { + // It is + // Now, mine register state + // Currently only of R3 + + if (is_reg_mapped(3)) + { + const reg_state_t& value = get_reg(3); + + if (value(is_const)) + { + it->second.emplace_back(ppua_reg_mask_t{ 1u << 3 }, value(minv) ); + } + } + } + + break; + } + } + // Add next block if necessary if ((is_call && !(pfunc->attr & ppu_attr::no_return)) || (type == ppu_itype::BC && (op.bo & 0x14) != 0x14)) { @@ -1993,7 +2093,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con store_block_reg(next_idx, lhs_cr_state, lhs_state); store_block_reg(next_idx, rhs_cr_state, rhs_state); - const u64 reg_mask = block_queue[j].mapped_registers_mask; + const u64 reg_mask = block_queue[j].mapped_registers_mask.mask; for (u32 bit = std::countr_zero(reg_mask); bit < 64 && reg_mask & (u64{1} << bit); bit += 1, bit = std::countr_zero(reg_mask >> (bit % 64)) + bit) @@ -2024,7 +2124,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con else if (is_call || target < func.addr || target >= func_end) { // Add function call (including obvious tail call) - add_func(target, 0, 0); + add_func(target, 0, func.addr); } else { @@ -2291,6 +2391,65 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con continue; } + case ppu_itype::LWZ: + { + const bool is_load_from_toc = (is_function_caller_analysis && op.ra == 2u && func.toc && func.toc != umax); + + if (is_load_from_toc || is_reg_mapped(op.rd) || is_reg_mapped(op.ra)) + { + const reg_state_t ra = get_reg(op.ra); + auto& rd = get_reg(op.rd); + + rd = {}; + rd.tag = reg_tag_allocator++; + rd.is_loaded = true; + + reg_state_t const_offs{}; + const_offs.load_const(op.simm16); + + reg_state_t toc_offset{}; + toc_offset.load_const(func.toc); + + const reg_state_t& off_ra = is_load_from_toc ? toc_offset : ra; + + rd.ge_than = const_offs(minv); + + const bool is_negative = const_offs(sign_bitv) == 1u; + + const bool is_offset_test_ok = is_negative + ? (0 - const_offs(minv) <= off_ra(minv) && off_ra(minv) + const_offs(minv) < segs_end) + : (off_ra(minv) < segs_end && const_offs(minv) < segs_end - off_ra(minv)); + + if (off_ra(minv) < off_ra(maxv) && is_offset_test_ok) + { + rd.ge_than += off_ra(minv); + + const bool is_range_end_test_ok = is_negative + ? (off_ra(maxv) + const_offs(minv) <= segs_end) + : (off_ra(maxv) - 1 < segs_end - 1 && const_offs(minv) <= segs_end - off_ra(maxv)); + + if (is_range_end_test_ok) + { + rd.value_range = off_ra.value_range; + } + } + + if (is_load_from_toc) + { + if (rd.value_range == 1) + { + // Try to load a constant value from data segment + if (auto val_ptr = get_ptr(static_cast(rd.ge_than))) + { + rd = {}; + rd.load_const(*val_ptr); + } + } + } + } + + continue; + } case ppu_itype::LWZX: case ppu_itype::LDX: // TODO: Confirm if LDX can appear in jumptable branching (probably in LV1 applications such as ps2_emu) { @@ -2311,6 +2470,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con // Register possible jumptable offset auto& rd = get_reg(op.rd); rd = {}; + rd.tag = reg_tag_allocator++; rd.is_loaded = true; const reg_state_t& const_reg = is_ra ? ra : rb; @@ -2451,6 +2611,19 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con } } + if (!preserve_blocks.empty()) + { + ensure(func.blocks.size() == preserve_blocks.size()); + + for (auto fit = func.blocks.begin(), pit = preserve_blocks.begin(); fit != func.blocks.end(); fit++, pit++) + { + // Ensure block addresses match + ensure(fit->first == pit->first); + } + + func.blocks = std::move(preserve_blocks); + } + if (postpone_analysis) { // Block aborted: abort function, postpone @@ -2501,7 +2674,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con { const u32 iaddr = _ptr.addr(); const ppu_opcode_t op{get_ref(_ptr++)}; - const ppu_itype::type type = s_ppu_itype.decode(op.opcode); + const ppu_itype::type type = g_ppu_itype.decode(op.opcode); if (type == ppu_itype::B || type == ppu_itype::BC) { @@ -2574,7 +2747,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con { const u32 addr = _ptr.addr(); const ppu_opcode_t op{get_ref(_ptr++)}; - const ppu_itype::type type = s_ppu_itype.decode(op.opcode); + const ppu_itype::type type = g_ppu_itype.decode(op.opcode); if (type == ppu_itype::UNK) { @@ -2813,7 +2986,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con { const ppu_opcode_t op{get_ref(i_pos)}; - switch (auto type = s_ppu_itype.decode(op.opcode)) + switch (auto type = g_ppu_itype.decode(op.opcode)) { case ppu_itype::UNK: case ppu_itype::ECIWX: @@ -2884,7 +3057,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con } const ppu_opcode_t test_op{get_ref(target)}; - const auto type0 = s_ppu_itype.decode(test_op.opcode); + const auto type0 = g_ppu_itype.decode(test_op.opcode); if (type0 == ppu_itype::UNK) { @@ -2906,7 +3079,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con break; } - const auto type1 = s_ppu_itype.decode(get_ref(target + 4)); + const auto type1 = g_ppu_itype.decode(get_ref(target + 4)); if (type1 == ppu_itype::UNK) { diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h index 5d9053847d..693ad8c7ea 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.h +++ b/rpcs3/Emu/Cell/PPUAnalyser.h @@ -106,6 +106,11 @@ struct ppu_segment void* ptr{}; }; +struct ppua_reg_mask_t +{ + u64 mask; +}; + // PPU Module Information template struct ppu_module : public Type @@ -138,6 +143,8 @@ struct ppu_module : public Type ppu_module* parent = nullptr; // For compilation: refers to original structure (is whole, not partitioned) std::pair local_bounds{0, u32{umax}}; // Module addresses range std::shared_ptr> jit_bounds; // JIT instance modules addresses range + std::unordered_map imports; // Imports information for release upon unload (TODO: OVL implementation!) + std::map>> stub_addr_to_constant_state_of_registers; // Tells possible constant states of registers of functions bool is_relocatable = false; // Is code relocatable(?) template diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index d95737ba50..2c3b958e87 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -71,6 +71,25 @@ extern u32 ppu_generate_id(std::string_view name) return result; } +static void select_from_nids_scenpdrm_addrs(std::map>>& result, const std::unordered_map& fnid_to_use_addr) +{ + static const u32 fnids_list[] = + { + ppu_generate_id("sceNpDrmProcessExitSpawn"), + ppu_generate_id("sceNpDrmProcessExitSpawn2"), + ppu_generate_id("sceNpDrmIsAvailable"), + ppu_generate_id("sceNpDrmIsAvailable2"), + }; + + for (const auto& [nid, use] : fnid_to_use_addr) + { + if (std::count(std::begin(fnids_list), std::end(fnids_list), nid)) + { + result.emplace(use, 0); + } + } +} + ppu_static_module::ppu_static_module(const char* name) : name(name) { @@ -157,9 +176,6 @@ struct ppu_linkage_info // FNID -> (export; [imports...]) std::map functions{}; std::map variables{}; - - // Obsolete - bool imported = false; }; // Module map @@ -940,9 +956,12 @@ static auto ppu_load_exports(const ppu_module& _module, ppu_linkage_inf return result; } -static auto ppu_load_imports(const ppu_module& _module, std::vector& relocs, ppu_linkage_info* link, u32 imports_start, u32 imports_end) +using import_result_t = std::pair, std::unordered_map>; + +static import_result_t ppu_load_imports(const ppu_module& _module, std::vector& relocs, ppu_linkage_info* link, u32 imports_start, u32 imports_end) { - std::unordered_map result; + import_result_t result; + auto& [import_table, nid_to_use_addr] = result; std::lock_guard lock(link->mutex); @@ -976,12 +995,18 @@ static auto ppu_load_imports(const ppu_module& _module, std::vector 0x%x", module_name, ppu_get_function_name(module_name, fnid), fnid, fstub); // Function linkage info - auto& flink = link->modules[module_name].functions[fnid]; + auto& flink = mlink.functions[fnid]; // Add new import - result.emplace(faddr, &flink); + import_table.emplace(faddr, &flink); flink.imports.emplace(faddr); - mlink.imported = true; + + // Check address + // TODO: The address of use should be extracted from analyser instead + if (fstub && fstub >= _module.segs[0].addr && fstub <= _module.segs[0].addr + _module.segs[0].size) + { + nid_to_use_addr.emplace(fnid, fstub); + } // Link address (special HLE function by default) const u32 link_addr = flink.export_addr ? flink.export_addr : g_fxo->get().addr; @@ -992,7 +1017,7 @@ static auto ppu_load_imports(const ppu_module& _module, std::vector(fnids, i + lib.num_func) : 0) { - result.emplace(frefs, &flink); + import_table.emplace(frefs, &flink); flink.frefss.emplace(frefs); ppu_patch_refs(_module, &relocs, frefs, link_addr); } @@ -1010,12 +1035,11 @@ static auto ppu_load_imports(const ppu_module& _module, std::vectormodules[module_name].variables[vnid]; + auto& vlink = mlink.variables[vnid]; // Add new import - result.emplace(vref, &vlink); + import_table.emplace(vref, &vlink); vlink.imports.emplace(vref); - mlink.imported = true; // Link if available ppu_patch_refs(_module, &relocs, vref, vlink.export_addr); @@ -1838,10 +1862,13 @@ shared_ptr ppu_load_prx(const ppu_prx_object& elf, bool virtual_load, c ppu_loader.warning("Library %s (rtoc=0x%x):", lib_name, lib_info->toc); + std::unordered_map nid_to_use_addr; ppu_linkage_info dummy{}; prx->specials = ppu_load_exports(*prx, virtual_load ? &dummy : &link, prx->exports_start, prx->exports_end, true, &exported_funcs); - prx->imports = ppu_load_imports(*prx, prx->relocs, virtual_load ? &dummy : &link, lib_info->imports_start, lib_info->imports_end); + + std::tie(prx->imports, nid_to_use_addr) = ppu_load_imports(*prx, prx->relocs, virtual_load ? &dummy : &link, lib_info->imports_start, lib_info->imports_end); + select_from_nids_scenpdrm_addrs(prx->stub_addr_to_constant_state_of_registers, nid_to_use_addr); if (virtual_load) { @@ -2450,10 +2477,13 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str return false; } + std::unordered_map nid_to_use_addr; ppu_linkage_info dummy{}; ppu_load_exports(_main, virtual_load ? &dummy : &link, proc_prx_param.libent_start, proc_prx_param.libent_end); - ppu_load_imports(_main, _main.relocs, virtual_load ? &dummy : &link, proc_prx_param.libstub_start, proc_prx_param.libstub_end); + + std::tie(std::ignore, nid_to_use_addr) = ppu_load_imports(_main, _main.relocs, virtual_load ? &dummy : &link, proc_prx_param.libstub_start, proc_prx_param.libstub_end); + select_from_nids_scenpdrm_addrs(_main.stub_addr_to_constant_state_of_registers, nid_to_use_addr); std::stable_sort(_main.relocs.begin(), _main.relocs.end()); } @@ -3061,10 +3091,14 @@ std::pair, CellError> ppu_load_overlay(const ppu_exec_ob fmt::throw_exception("Bad magic! (0x%x)", proc_prx_param.magic); } + std::unordered_map nid_to_use_addr; ppu_linkage_info dummy{}; ppu_load_exports(*ovlm, virtual_load ? &dummy : &link, proc_prx_param.libent_start, proc_prx_param.libent_end); - ppu_load_imports(*ovlm, ovlm->relocs, virtual_load ? &dummy : &link, proc_prx_param.libstub_start, proc_prx_param.libstub_end); + + std::tie(std::ignore, nid_to_use_addr) = ppu_load_imports(*ovlm, ovlm->relocs, virtual_load ? &dummy : &link, proc_prx_param.libstub_start, proc_prx_param.libstub_end); + select_from_nids_scenpdrm_addrs(ovlm->stub_addr_to_constant_state_of_registers, nid_to_use_addr); + } break; } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 65467b168b..63835db35f 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -4162,6 +4162,52 @@ extern void ppu_precompile(std::vector& dir_queue, std::vector(g_cfg.core.llvm_threads ? g_cfg.core.llvm_threads : u32{umax}, ::size32(file_queue)); const u32 cpu_thread_limit = utils::get_thread_count() > 8u ? std::max(utils::get_thread_count(), 2) - 1 : utils::get_thread_count(); // One LLVM thread less + std::vector decrypt_klics; + + if (loaded_modules) + { + for (auto mod : *loaded_modules) + { + for (const auto& [stub, data_vec] : mod->stub_addr_to_constant_state_of_registers) + { + if (decrypt_klics.size() >= 4u) + { + break; + } + + for (const auto& [reg_mask, constant_value] : data_vec) + { + if (decrypt_klics.size() >= 4u) + { + break; + } + + if (constant_value > u32{umax}) + { + continue; + } + + // R3 - first argument + if (reg_mask.mask & (1u << 3)) + { + // Sizeof KLIC + if (auto klic_ptr = mod->get_ptr(static_cast(constant_value), 16)) + { + // Try to read from that address + if (const u128 klic_value = read_from_ptr(klic_ptr)) + { + if (!std::count_if(decrypt_klics.begin(), decrypt_klics.end(), FN(std::memcmp(&x, &klic_value, 16) == 0))) + { + decrypt_klics.emplace_back(klic_value); + } + } + } + } + } + } + } + } + named_thread_group workers("SPRX Worker ", std::min(software_thread_limit, cpu_thread_limit), [&] { #ifdef __APPLE__ @@ -4211,8 +4257,23 @@ extern void ppu_precompile(std::vector& dir_queue, std::vector decrypt_klics.size()) + { + src.close(); + break; + } + + // Some files may fail to decrypt due to the lack of klic + u128 key = i == decrypt_klics.size() ? u128{} : decrypt_klics[i]; + + if (auto result = decrypt_self(src, i == decrypt_klics.size() ? nullptr : reinterpret_cast(&key))) + { + src = std::move(result); + break; + } + } if (!src) { @@ -4380,8 +4441,23 @@ extern void ppu_precompile(std::vector& dir_queue, std::vector decrypt_klics.size()) + { + src.close(); + break; + } + + // Some files may fail to decrypt due to the lack of klic + u128 key = i == decrypt_klics.size() ? u128{} : decrypt_klics[i]; + + if (auto result = decrypt_self(src, i == decrypt_klics.size() ? nullptr : reinterpret_cast(&key))) + { + src = std::move(result); + break; + } + } if (!src) { @@ -4484,6 +4560,7 @@ extern void ppu_initialize() } std::vector*> module_list; + module_list.emplace_back(&g_fxo->get>()); const std::string firmware_sprx_path = vfs::get("/dev_flash/sys/external/"); diff --git a/rpcs3/Emu/Cell/lv2/sys_overlay.cpp b/rpcs3/Emu/Cell/lv2/sys_overlay.cpp index 66481ed280..33475a8553 100644 --- a/rpcs3/Emu/Cell/lv2/sys_overlay.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_overlay.cpp @@ -36,7 +36,7 @@ static error_code overlay_load_module(vm::ptr ovlmid, const std::string& vp u128 klic = g_fxo->get().last_key(); - src = decrypt_self(std::move(src), reinterpret_cast(&klic), nullptr, true); + src = decrypt_self(std::move(src), reinterpret_cast(&klic)); if (!src) { diff --git a/rpcs3/Emu/Cell/lv2/sys_prx.cpp b/rpcs3/Emu/Cell/lv2/sys_prx.cpp index 21a4f55000..046d19c48e 100644 --- a/rpcs3/Emu/Cell/lv2/sys_prx.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_prx.cpp @@ -265,7 +265,7 @@ static error_code prx_load_module(const std::string& vpath, u64 flags, vm::ptrget().last_key(); - src = decrypt_self(std::move(src), reinterpret_cast(&klic), nullptr, true); + src = decrypt_self(std::move(src), reinterpret_cast(&klic)); if (!src) { diff --git a/rpcs3/Emu/Cell/lv2/sys_prx.h b/rpcs3/Emu/Cell/lv2/sys_prx.h index 610ed68145..a78542dcbd 100644 --- a/rpcs3/Emu/Cell/lv2/sys_prx.h +++ b/rpcs3/Emu/Cell/lv2/sys_prx.h @@ -192,7 +192,6 @@ struct lv2_prx final : ppu_module shared_mutex mutex; std::unordered_map specials; - std::unordered_map imports; vm::ptr argv)> start = vm::null; vm::ptr argv)> stop = vm::null; diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 9f72e47c80..0f03311a82 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -1747,7 +1747,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, fs::file src{path}; - src = decrypt_self(std::move(src)); + src = decrypt_self(src); const ppu_exec_object obj = src; @@ -1764,6 +1764,8 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, g_fxo->init("SPRX Loader"sv, [this, dir_queue]() mutable { + std::vector*> mod_list; + if (auto& _main = *ensure(g_fxo->try_get>()); !_main.path.empty()) { if (!_main.analyse(0, _main.elf_entry, _main.seg0_code_end, _main.applied_patches, std::vector{}, [](){ return Emu.IsStopped(); })) @@ -1773,6 +1775,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, Emu.ConfigurePPUCache(); ppu_initialize(_main); + mod_list.emplace_back(&_main); } if (Emu.IsStopped()) @@ -1780,7 +1783,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, return; } - ppu_precompile(dir_queue, nullptr); + ppu_precompile(dir_queue, mod_list.empty() ? nullptr : &mod_list); if (Emu.IsStopped()) { @@ -2238,7 +2241,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, { // Decrypt SELF had_been_decrypted = true; - elf_file = decrypt_self(std::move(elf_file), klic.empty() ? nullptr : reinterpret_cast(&klic[0]), &g_ps3_process_info.self_info); + elf_file = decrypt_self(elf_file, klic.empty() ? nullptr : reinterpret_cast(&klic[0]), &g_ps3_process_info.self_info); } else { diff --git a/rpcs3/util/serialization.hpp b/rpcs3/util/serialization.hpp index 866fd7c0f4..41f6e560af 100644 --- a/rpcs3/util/serialization.hpp +++ b/rpcs3/util/serialization.hpp @@ -566,14 +566,14 @@ public: } else if constexpr (TupleAlike) { - constexpr usz tup_size = c_tup_size; + constexpr int tup_size = c_tup_size; static_assert(tup_size == 2 || tup_size == 4, "Unimplemented tuple serialization!"); - using first_t = std::remove_cvref_t(0, tup_size - 1)>(std::declval()))>; - using second_t = std::remove_cvref_t(1, tup_size - 1)>(std::declval()))>; - using third_t = std::remove_cvref_t(2, tup_size - 1)>(std::declval()))>; - using fourth_t = std::remove_cvref_t(3, tup_size - 1)>(std::declval()))>; + using first_t = typename std::tuple_element::type; + using second_t = typename std::tuple_element::type; + using third_t = typename std::tuple_element::type; + using fourth_t = typename std::tuple_element::type; first_t first = this->operator first_t();