diff --git a/rpcs3/Emu/Cell/Modules/cellPad.cpp b/rpcs3/Emu/Cell/Modules/cellPad.cpp index 2c3f0f695b..107be92a9d 100644 --- a/rpcs3/Emu/Cell/Modules/cellPad.cpp +++ b/rpcs3/Emu/Cell/Modules/cellPad.cpp @@ -67,6 +67,8 @@ void pad_info::save(utils::serial& ar) USING_SERIALIZATION_VERSION(sys_io); ar(max_connect, port_setting); + + sys_io_serialize(ar); } extern void send_sys_io_connect_event(u32 index, u32 state); diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp index 79ff56fb62..844cd18fb3 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -1390,9 +1390,9 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b block.second = _ptr.addr() - block.first; break; } - else if (type == ppu_itype::TW || type == ppu_itype::TWI || type == ppu_itype::TD || type == ppu_itype::TDI) + else if (type & ppu_itype::trap) { - if (op.opcode != ppu_instructions::TRAP()) + if (op.bo != 31) { add_block(_ptr.addr()); } @@ -1618,6 +1618,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b end = 0; } + u32 per_instruction_bytes = 0; + for (auto&& [_, func] : as_rvalue(fmap)) { if (func.attr & ppu_attr::no_size && entry) @@ -1636,6 +1638,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b block.attr = ppu_attr::no_size; } + per_instruction_bytes += utils::sub_saturate(lim, func.addr); continue; } @@ -1716,11 +1719,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b u32 exp = start; u32 lim = end; - // Start with full scan (disabled for PRX for now) - if (entry) - { - block_queue.emplace_back(exp, lim); - } + // Start with full scan + block_queue.emplace_back(exp, lim); // Add entries from patches (on per-instruction basis) for (u32 addr : applied) @@ -1754,14 +1754,17 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b { u32 i_pos = exp; + u32 block_edges[16]; + u32 edge_count = 0; + bool is_good = true; bool is_fallback = true; for (; i_pos < lim; i_pos += 4) { - const u32 opc = get_ref(i_pos); + const ppu_opcode_t op{get_ref(i_pos)}; - switch (auto type = s_ppu_itype.decode(opc)) + switch (auto type = s_ppu_itype.decode(op.opcode)) { case ppu_itype::UNK: case ppu_itype::ECIWX: @@ -1771,10 +1774,20 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b is_good = false; break; } - case ppu_itype::TD: case ppu_itype::TDI: - case ppu_itype::TW: case ppu_itype::TWI: + { + if (op.ra == 1u || op.ra == 13u || op.ra == 2u) + { + // Non-user registers, checking them against a constant value makes no sense + is_good = false; + break; + } + + [[fallthrough]]; + } + case ppu_itype::TD: + case ppu_itype::TW: case ppu_itype::B: case ppu_itype::BC: { @@ -1785,14 +1798,14 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b if (type == ppu_itype::B || type == ppu_itype::BC) { - if (entry == 0 && ppu_opcode_t{opc}.aa) + if (entry == 0 && op.aa) { // Ignore absolute branches in PIC (PRX) is_good = false; break; } - const u32 target = (opc & 2 ? 0 : i_pos) + (type == ppu_itype::B ? +ppu_opcode_t{opc}.bt24 : +ppu_opcode_t{opc}.bt14); + const u32 target = (op.aa ? 0 : i_pos) + (type == ppu_itype::B ? +op.bt24 : +op.bt14); if (target < segs[0].addr || target >= segs[0].addr + segs[0].size) { @@ -1801,9 +1814,43 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b break; } + const ppu_opcode_t test_op{get_ref(target)}; + const auto type0 = s_ppu_itype.decode(test_op.opcode); + + if (type0 == ppu_itype::UNK) + { + is_good = false; + break; + } + + // Test another instruction just in case (testing more is unlikely to improve results by much) + if (!(type0 & ppu_itype::branch)) + { + if (target + 4 >= segs[0].addr + segs[0].size) + { + is_good = false; + break; + } + + const auto type1 = s_ppu_itype.decode(get_ref(target + 4)); + + if (type1 == ppu_itype::UNK) + { + is_good = false; + break; + } + } + else if (u32 target0 = (test_op.aa ? 0 : target) + (type == ppu_itype::B ? +test_op.bt24 : +test_op.bt14); + target0 < segs[0].addr || target0 >= segs[0].addr + segs[0].size) + { + // Sanity check + is_good = false; + break; + } + if (target != i_pos && !fmap.contains(target)) { - if (block_set.count(target) == 0) + if (block_set.count(target) == 0 && std::count(block_edges, block_edges + edge_count, target) == 0) { ppu_log.trace("Block target found: 0x%x (i_pos=0x%x)", target, i_pos); block_queue.emplace_back(target, 0); @@ -1818,27 +1865,38 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b case ppu_itype::BCLR: case ppu_itype::SC: { - if (type == ppu_itype::SC && opc != ppu_instructions::SC(0)) + if (type == ppu_itype::SC && op.opcode != ppu_instructions::SC(0)) { // Strict garbage filter is_good = false; break; } - if (type == ppu_itype::BCCTR && opc & 0xe000) + if (type == ppu_itype::BCCTR && op.opcode & 0xe000) { // Garbage filter is_good = false; break; } - if (type == ppu_itype::BCLR && opc & 0xe000) + if (type == ppu_itype::BCLR && op.opcode & 0xe000) { // Garbage filter is_good = false; break; } + if ((type & ppu_itype::branch && op.lk) || type & ppu_itype::trap || type == ppu_itype::BC) + { + // if farther instructions are valid: register all blocks + // Otherwise, register none (all or nothing) + if (edge_count < std::size(block_edges)) + { + block_edges[edge_count++] = i_pos + 4; + continue; + } + } + // Good block terminator found, add single block break; } @@ -1869,17 +1927,23 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b if (is_good) { - auto& block = fmap[exp]; - - if (!block.addr) + for (u32 it = 0, prev_addr = exp; it <= edge_count; it++) { - block.addr = exp; - block.size = i_pos - exp; - ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size); + const u32 block_end = it < edge_count ? block_edges[it] : i_pos; + const u32 block_begin = std::exchange(prev_addr, block_end); - if (get_limit(exp) == end) + auto& block = fmap[block_begin]; + + if (!block.addr) { - block.attr += ppu_attr::no_size; + block.addr = block_begin; + block.size = block_end - block_begin; + ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size); + + if (get_limit(block_begin) == end) + { + block.attr += ppu_attr::no_size; + } } } } @@ -1902,9 +1966,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b // Convert map to vector (destructive) for (auto&& [_, block] : as_rvalue(std::move(fmap))) { - if (block.attr & ppu_attr::no_size && block.size > 4 && entry) + if (block.attr & ppu_attr::no_size && block.size > 4) { - // Disabled for PRX for now ppu_log.warning("Block 0x%x will be compiled on per-instruction basis (size=0x%x)", block.addr, block.size); for (u32 addr = block.addr; addr < block.addr + block.size; addr += 4) @@ -1916,12 +1979,19 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b i.attr = ppu_attr::no_size; } + per_instruction_bytes += block.size; continue; } funcs.emplace_back(std::move(block)); } + if (per_instruction_bytes) + { + const bool error = per_instruction_bytes >= 200 && per_instruction_bytes / 4 >= utils::aligned_div(funcs.size(), 128); + (error ? ppu_log.error : ppu_log.notice)("%d instructions will be compiled on per-instruction basis in total", per_instruction_bytes / 4); + } + ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size()); return true; } diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h index ef02faa530..ccbd743f92 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.h +++ b/rpcs3/Emu/Cell/PPUAnalyser.h @@ -263,6 +263,7 @@ struct ppu_pattern_matrix struct ppu_itype { static constexpr struct branch_tag{} branch{}; // Branch Instructions + static constexpr struct trap_tag{} trap{}; // Branch Instructions enum type { @@ -425,8 +426,6 @@ struct ppu_itype VUPKLSB, VUPKLSH, VXOR, - TDI, - TWI, MULLI, SUBFIC, CMPLI, @@ -461,7 +460,6 @@ struct ppu_itype RLDCL, RLDCR, CMP, - TW, LVSL, LVEBX, SUBFC, @@ -488,7 +486,6 @@ struct ppu_itype LWZUX, CNTLZD, ANDC, - TD, LVEWX, MULHD, MULHW, @@ -784,6 +781,11 @@ struct ppu_itype BC, BCLR, BCCTR, // branch_tag last + + TD, // trap_tag first + TW, + TDI, + TWI, // trap_tag last }; // Enable address-of operator for ppu_decoder<> @@ -796,6 +798,11 @@ struct ppu_itype { return value >= B && value <= BCCTR; } + + friend constexpr bool operator &(type value, trap_tag) + { + return value >= TD && value <= TWI; + } }; struct ppu_iname diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index 4df1749bcd..ff8f3c1c83 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -1356,12 +1356,6 @@ std::shared_ptr ppu_load_prx(const ppu_prx_object& elf, bool virtual_lo // Initialize executable code if necessary if (prog.p_flags & 0x1 && !virtual_load) { - if (ar) - { - // Disable analysis optimization for savestates (it's not compatible with savestate with patches applied) - end = std::max(end, utils::align(addr + mem_size, 0x10000)); - } - ppu_register_range(addr, mem_size); } } @@ -1651,6 +1645,36 @@ std::shared_ptr ppu_load_prx(const ppu_prx_object& elf, bool virtual_lo } } + // Disabled for PRX for now (problematic and does not seem to have any benefit) + end = 0; + + if (!applied.empty() || ar) + { + // Compare memory changes in memory after executable code sections end + if (end >= prx->segs[0].addr && end < prx->segs[0].addr + prx->segs[0].size) + { + for (const auto& prog : elf.progs) + { + // Find the first segment + if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz) + { + std::basic_string_view elf_memory{prog.bin.data(), prog.bin.size()}; + elf_memory.remove_prefix(end - prx->segs[0].addr); + + if (elf_memory != std::basic_string_view{&prx->get_ref(end), elf_memory.size()}) + { + // There are changes, disable analysis optimization + ppu_loader.notice("Disabling analysis optimization due to memory changes from original file"); + + end = 0; + } + + break; + } + } + } + } + // Embedded SPU elf patching for (const auto& seg : prx->segs) { @@ -1910,12 +1934,6 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str // Initialize executable code if necessary if (prog.p_flags & 0x1 && !virtual_load) { - if (already_loaded && ar) - { - // Disable analysis optimization for savestates (it's not compatible with savestate with patches applied) - end = std::max(end, utils::align(addr + size, 0x10000)); - } - ppu_register_range(addr, size); } } @@ -1969,6 +1987,33 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str applied += g_fxo->get().apply(Emu.GetTitleID() + '-' + hash, [&](u32 addr, u32 size) { return _main.get_ptr(addr, size); }); } + if (!applied.empty() || ar) + { + // Compare memory changes in memory after executable code sections end + if (end >= _main.segs[0].addr && end < _main.segs[0].addr + _main.segs[0].size) + { + for (const auto& prog : elf.progs) + { + // Find the first segment + if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz) + { + std::basic_string_view elf_memory{prog.bin.data(), prog.bin.size()}; + elf_memory.remove_prefix(end - _main.segs[0].addr); + + if (elf_memory != std::basic_string_view{&_main.get_ref(end), elf_memory.size()}) + { + // There are changes, disable analysis optimization + ppu_loader.notice("Disabling analysis optimization due to memory changes from original file"); + + end = 0; + } + + break; + } + } + } + } + if (applied.empty()) { ppu_loader.warning("PPU executable hash: %s", hash); @@ -2574,12 +2619,6 @@ std::pair, CellError> ppu_load_overlay(const ppu_ex // Initialize executable code if necessary if (prog.p_flags & 0x1 && !virtual_load) { - if (ar) - { - // Disable analysis optimization for savestates (it's not compatible with savestate with patches applied) - end = std::max(end, utils::align(addr + size, 0x10000)); - } - ppu_register_range(addr, size); } } @@ -2631,6 +2670,33 @@ std::pair, CellError> ppu_load_overlay(const ppu_ex applied += g_fxo->get().apply(Emu.GetTitleID() + '-' + hash, [ovlm](u32 addr, u32 size) { return ovlm->get_ptr(addr, size); }); } + if (!applied.empty() || ar) + { + // Compare memory changes in memory after executable code sections end + if (end >= ovlm->segs[0].addr && end < ovlm->segs[0].addr + ovlm->segs[0].size) + { + for (const auto& prog : elf.progs) + { + // Find the first segment + if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz) + { + std::basic_string_view elf_memory{prog.bin.data(), prog.bin.size()}; + elf_memory.remove_prefix(end - ovlm->segs[0].addr); + + if (elf_memory != std::basic_string_view{&ovlm->get_ref(end), elf_memory.size()}) + { + // There are changes, disable analysis optimization + ppu_loader.notice("Disabling analysis optimization due to memory changes from original file"); + + end = 0; + } + + break; + } + } + } + } + // Embedded SPU elf patching for (const auto& seg : ovlm->segs) {