diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 167e389f29..649feab13c 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -140,6 +140,22 @@ FORCE_INLINE auto ppu_feed_data(ppu_thread& ppu, u64 addr) return value; } +// Push called address to custom call history for debugging +inline u32 ppu_record_call(ppu_thread& ppu, u32 new_cia, ppu_opcode_t op) +{ + if (!op.lk) + { + return new_cia; + } + + if (auto& history = ppu.call_history; !history.data.empty()) + { + history.data[history.index++ % ppu.call_history_max_size] = new_cia; + } + + return new_cia; +} + // Compare 16 packed unsigned bytes (greater than) inline __m128i sse_cmpgt_epu8(__m128i A, __m128i B) { @@ -3085,6 +3101,10 @@ bool ppu_interpreter::BC(ppu_thread& ppu, ppu_opcode_t op) if (ctr_ok && cond_ok) { + // Provide additional information by using the origin of the call + // Because this is a fixed target branch there's no abiguity about it + ppu_record_call(ppu, ppu.cia, op); + ppu.cia = (op.aa ? 0 : ppu.cia) + op.bt14; return false; } @@ -3107,6 +3127,10 @@ bool ppu_interpreter::SC(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::B(ppu_thread& ppu, ppu_opcode_t op) { + // Provide additional information by using the origin of the call + // Because this is a fixed target branch there's no abiguity about it + ppu_record_call(ppu, ppu.cia, op); + const u32 link = ppu.cia + 4; ppu.cia = (op.aa ? 0 : ppu.cia) + op.bt24; if (op.lk) ppu.lr = link; @@ -3137,7 +3161,7 @@ bool ppu_interpreter::BCLR(ppu_thread& ppu, ppu_opcode_t op) if (ctr_ok && cond_ok) { - ppu.cia = target; + ppu.cia = ppu_record_call(ppu, target, op); return false; } else @@ -3206,7 +3230,7 @@ bool ppu_interpreter::BCCTR(ppu_thread& ppu, ppu_opcode_t op) if (op.bo & 0x10 || ppu.cr[op.bi] == ((op.bo & 0x8) != 0)) { - ppu.cia = static_cast(ppu.ctr) & ~3; + ppu.cia = ppu_record_call(ppu, static_cast(ppu.ctr) & ~3, op); return false; } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index a6b5469fa3..49a0601520 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -118,6 +118,17 @@ void fmt_class_string::format(std::string& out, u64 arg) }); } +template <> +void fmt_class_string::format(std::string& out, u64 arg) +{ + const auto& history = get_object(arg); + + for (u64 count = 0, idx = history.index - 1; idx != umax && count < ppu_thread::call_history_max_size; count++, idx--) + { + fmt::append(out, "\n(%u) 0x%08x", count, history.data[idx % ppu_thread::call_history_max_size]); + } +} + const ppu_decoder g_ppu_interpreter_precise; const ppu_decoder g_ppu_interpreter_fast; const ppu_decoder g_ppu_itype; @@ -885,6 +896,22 @@ std::string ppu_thread::dump_misc() const return ret; } +std::string ppu_thread::dump_all() const +{ + std::string ret = cpu_thread::dump_all(); + + if (!call_history.data.empty()) + { + ret += + "\nCalling History:" + "\n================"; + + fmt::append(ret, "%s", call_history); + } + + return ret; +} + extern thread_local std::string(*g_tls_log_prefix)(); void ppu_thread::cpu_task() @@ -1152,6 +1179,11 @@ ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u3 { state += cpu_flag::memory; } + + if (g_cfg.core.ppu_call_history) + { + call_history.data.resize(call_history_max_size); + } } ppu_thread::thread_name_t::operator std::string() const diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 825f934f91..9fcbae7895 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -134,6 +134,7 @@ public: virtual std::string dump_callstack() const override; virtual std::vector> dump_callstack_list() const override; virtual std::string dump_misc() const override; + virtual std::string dump_all() const override; virtual void cpu_task() override final; virtual void cpu_sleep() override; virtual void cpu_on_stop() override; @@ -296,6 +297,14 @@ public: u32 dbg_step_pc = 0; + struct call_history_t + { + std::vector data; + u64 index = 0; + } call_history; + + static constexpr u32 call_history_max_size = 4096; + // For named_thread ctor const struct thread_name_t { diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index ce8fa8d888..109d2ccc97 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1676,11 +1676,6 @@ void spu_thread::cleanup() // Deallocate local storage ensure(vm::dealloc(vm_offset(), vm::spu, &shm)); - if (g_cfg.core.mfc_debug) - { - utils::memory_decommit(vm::g_stat_addr + vm_offset(), SPU_LS_SIZE); - } - // Deallocate RawSPU ID if (get_type() >= spu_type::raw) { @@ -1713,11 +1708,6 @@ spu_thread::spu_thread(lv2_spu_group* group, u32 index, std::string_view name, u , shm(std::make_shared(SPU_LS_SIZE)) , ls([&]() { - if (g_cfg.core.mfc_debug) - { - utils::memory_commit(vm::g_stat_addr + vm_offset(), SPU_LS_SIZE); - } - if (!group) { ensure(vm::get(vm::spu)->falloc(vm_offset(), SPU_LS_SIZE, &shm, 0x200)); @@ -1791,6 +1781,11 @@ spu_thread::spu_thread(lv2_spu_group* group, u32 index, std::string_view name, u cpu_init(); } + if (g_cfg.core.mfc_debug) + { + mfc_history.resize(max_mfc_dump_idx); + } + range_lock = vm::alloc_range_lock(); } @@ -2124,7 +2119,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* { if (g_cfg.core.mfc_debug && _this) { - auto& dump = reinterpret_cast(vm::g_stat_addr + _this->vm_offset())[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; + auto& dump = _this->mfc_history[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; dump.cmd = args; dump.cmd.eah = _this->pc; std::memcpy(dump.data, is_get ? dst : src, std::min(args.size, 128)); @@ -2322,7 +2317,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* if (g_cfg.core.mfc_debug && _this) { - auto& dump = reinterpret_cast(vm::g_stat_addr + _this->vm_offset())[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; + auto& dump = _this->mfc_history[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; dump.cmd = args; dump.cmd.eah = _this->pc; std::memcpy(dump.data, is_get ? dst : src, std::min(args.size, 128)); @@ -2469,7 +2464,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* if (g_cfg.core.mfc_debug && _this) { - auto& dump = reinterpret_cast(vm::g_stat_addr + _this->vm_offset())[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; + auto& dump = _this->mfc_history[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; dump.cmd = args; dump.cmd.eah = _this->pc; std::memcpy(dump.data, is_get ? dst : src, std::min(args.size, 128)); @@ -2538,7 +2533,7 @@ plain_access: if (g_cfg.core.mfc_debug && _this) { - auto& dump = reinterpret_cast(vm::g_stat_addr + _this->vm_offset())[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; + auto& dump = _this->mfc_history[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; dump.cmd = args; dump.cmd.eah = _this->pc; std::memcpy(dump.data, is_get ? dst : src, std::min(args.size, 128)); @@ -3289,7 +3284,7 @@ bool spu_thread::process_mfc_cmd() if (g_cfg.core.mfc_debug) { - auto& dump = reinterpret_cast(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; + auto& dump = mfc_history[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; dump.cmd = ch_mfc_cmd; dump.cmd.eah = pc; std::memcpy(dump.data, rdata, 128); @@ -3314,7 +3309,7 @@ bool spu_thread::process_mfc_cmd() if (dump) { - auto& dump = reinterpret_cast(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; + auto& dump = mfc_history[mfc_dump_idx++ % max_mfc_dump_idx]; dump.cmd = ch_mfc_cmd; dump.cmd.eah = pc; dump.cmd.tag = static_cast(ch_atomic_stat.get_value()); // Use tag as atomic status @@ -3327,7 +3322,7 @@ bool spu_thread::process_mfc_cmd() { if (g_cfg.core.mfc_debug) { - auto& dump = reinterpret_cast(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; + auto& dump = mfc_history[mfc_dump_idx++ % max_mfc_dump_idx]; dump.cmd = ch_mfc_cmd; dump.cmd.eah = pc; std::memcpy(dump.data, _ptr(ch_mfc_cmd.lsa & 0x3ff80), 128); @@ -3341,7 +3336,7 @@ bool spu_thread::process_mfc_cmd() { if (g_cfg.core.mfc_debug) { - auto& dump = reinterpret_cast(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; + auto& dump = mfc_history[mfc_dump_idx++ % max_mfc_dump_idx]; dump.cmd = ch_mfc_cmd; dump.cmd.eah = pc; std::memcpy(dump.data, _ptr(ch_mfc_cmd.lsa & 0x3ff80), 128); @@ -3426,7 +3421,7 @@ bool spu_thread::process_mfc_cmd() //if (g_cfg.core.mfc_debug) //{ // TODO: This needs a disambiguator with list elements dumping - // auto& dump = reinterpret_cast(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx]; + // auto& dump = mfc_history[mfc_dump_idx++ % max_mfc_dump_idx]; // dump.cmd = ch_mfc_cmd; // dump.cmd.eah = pc; // std::memcpy(dump.data, _ptr(ch_mfc_cmd.eah & 0x3fff0), std::min(ch_mfc_cmd.size, 128)); diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 2607f7d5d3..d718671c4a 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -770,8 +770,9 @@ public: u64 last_fail = 0; u64 last_succ = 0; + std::vector mfc_history; u64 mfc_dump_idx = 0; - static constexpr u32 max_mfc_dump_idx = SPU_LS_SIZE / sizeof(mfc_cmd_dump); + static constexpr u32 max_mfc_dump_idx = 2048; std::array stack_mirror; // Return address information diff --git a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp index ce48ebdfbc..bec139a528 100644 --- a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp @@ -47,6 +47,13 @@ bool ppu_thread_exit(ppu_thread& ppu) dct.used -= ppu.stack_size; } + if (ppu.call_history.index) + { + std::string str = fmt::format("%s", ppu.call_history); + ppu.call_history.index = 0; + ppu_log.notice("Calling history: %s", str); + } + return false; } diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index 7a5c113e45..7b051a6dbe 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -22,6 +22,7 @@ struct cfg_root : cfg::node cfg::_enum ppu_decoder{ this, "PPU Decoder", ppu_decoder_type::llvm }; cfg::_int<1, 8> ppu_threads{ this, "PPU Threads", 2 }; // Amount of PPU threads running simultaneously (must be 2) cfg::_bool ppu_debug{ this, "PPU Debug" }; + cfg::_bool ppu_call_history{ this, "PPU Calling History" }; // Enable PPU calling history recording cfg::_bool llvm_logs{ this, "Save LLVM logs" }; cfg::string llvm_cpu{ this, "Use LLVM CPU" }; cfg::_int<0, 1024> llvm_threads{ this, "Max LLVM Compile Threads", 0 }; diff --git a/rpcs3/rpcs3qt/debugger_frame.cpp b/rpcs3/rpcs3qt/debugger_frame.cpp index 6ea49240ee..7c4c09557f 100644 --- a/rpcs3/rpcs3qt/debugger_frame.cpp +++ b/rpcs3/rpcs3qt/debugger_frame.cpp @@ -298,6 +298,7 @@ void debugger_frame::keyPressEvent(QKeyEvent* event) "\nKeys Ctrl+B: Open breakpoints settings." "\nKeys Alt+S: Capture SPU images of selected SPU." "\nKey D: SPU MFC commands logger, MFC debug setting must be enabled." + "\nKey D: Also PPU calling history logger, interpreter and non-zero call history size must be used." "\nKey E: Instruction Editor: click on the instruction you want to modify, then press E." "\nKey F: Dedicated floating point mode switch for SPU threads." "\nKey R: Registers Editor for selected thread." @@ -365,10 +366,9 @@ void debugger_frame::keyPressEvent(QKeyEvent* event) if (event->isAutoRepeat()) return; - if (cpu->id_type() == 2 && g_cfg.core.mfc_debug && !cpu->is_stopped()) + auto get_max_allowed = [&](QString title, QString description, u32 limit) -> u32 { - input_dialog dlg(4, "", tr("Max MFC cmds logged"), - tr("Decimal only, max allowed is 1820."), "0", this); + input_dialog dlg(4, "", title, description.arg(limit), QString::number(limit), this); QFont mono = QFontDatabase::systemFont(QFontDatabase::FixedFont); mono.setPointSize(8); @@ -383,7 +383,7 @@ void debugger_frame::keyPressEvent(QKeyEvent* event) { bool ok = false; const u32 dummy = changed.toUInt(&ok, 10); - ok = ok && dummy && dummy <= spu_thread::max_mfc_dump_idx; + ok = ok && dummy && dummy <= limit; dlg.set_button_enabled(QDialogButtonBox::StandardButton::Ok, ok); if (ok) @@ -397,30 +397,71 @@ void debugger_frame::keyPressEvent(QKeyEvent* event) max = 0; } - const auto spu = static_cast(cpu); + return max; + }; - const auto ptr = reinterpret_cast(vm::g_stat_addr + spu->vm_offset()); + auto copy_overlapping_list = [&] (u64& index, u64 max, const std::vector& in, std::vector& out, bool& emptied) + { + const u64 current_pos = index % in.size(); + const u64 last_elements = std::min(current_pos, max); + const u64 overlapped_old_elements = std::min(index, max) - last_elements; + + out.resize(overlapped_old_elements + last_elements); + + // Save list contents (only the relavant parts) + std::copy(in.end() - overlapped_old_elements, in.end(), out.begin()); + std::copy_n(in.begin() + current_pos - last_elements, last_elements, out.begin() + overlapped_old_elements); + + // Check if max elements to log is larger/equal to current list size + if ((emptied = index && max >= index)) + { + // Empty list when possible (further calls' history logging will not log any call before this) + index = 0; + } + }; + + + if (cpu->id_type() == 2 && g_cfg.core.mfc_debug) + { + const u32 max = get_max_allowed(tr("Max MFC cmds logged"), tr("Decimal only, max allowed is %0."), spu_thread::max_mfc_dump_idx); + + // Preallocate in order to save execution time when inside suspend_all. + std::vector copy(max); + + bool emptied = false; + + cpu_thread::suspend_all(nullptr, {}, [&] + { + const auto spu = static_cast(cpu); + copy_overlapping_list(spu->mfc_dump_idx, max, spu->mfc_history, copy, emptied); + }); std::string ret; - for (u64 count = 0, idx = spu->mfc_dump_idx - 1; idx != umax && count < max; count++, idx--) + u32 i = 0; + for (auto it = copy.rbegin(); it != copy.rend(); it++, i++) { - auto dump = ptr[idx % spu_thread::max_mfc_dump_idx]; + auto& dump = *it; const u32 pc = std::exchange(dump.cmd.eah, 0); - fmt::append(ret, "\n(%d) PC 0x%05x: [%s]", count, pc, dump.cmd); + fmt::append(ret, "\n(%d) PC 0x%05x: [%s]", i, pc, dump.cmd); if (dump.cmd.cmd == MFC_PUTLLC_CMD) { fmt::append(ret, " %s", dump.cmd.tag == MFC_PUTLLC_SUCCESS ? "(passed)" : "(failed)"); } - const auto data = reinterpret_cast*>(dump.data); + auto load = [&](usz index) + { + be_t data{}; + std::memcpy(&data, dump.data + index * sizeof(data), sizeof(data)); + return data; + }; for (usz i = 0; i < utils::aligned_div(std::min(dump.cmd.size, 128), 4); i += 4) { - fmt::append(ret, "\n[0x%02x] %08x %08x %08x %08x", i * sizeof(data[0]) - , data[i + 0], data[i + 1], data[i + 2], data[i + 3]); + fmt::append(ret, "\n[0x%02x] %08x %08x %08x %08x", i * sizeof(be_t) + , load(i + 0), load(i + 1), load(i + 2), load(i + 3)); } } @@ -429,8 +470,49 @@ void debugger_frame::keyPressEvent(QKeyEvent* event) ret = "No MFC commands have been logged"; } - spu_log.warning("SPU MFC dump of '%s': %s", spu->get_name(), ret); + if (emptied) + { + ret += "\nPrevious MFC history has been emptied!"; + } + + spu_log.success("SPU MFC dump of '%s': %s", cpu->get_name(), ret); } + else if (cpu->id_type() == 1 && g_cfg.core.ppu_call_history) + { + const u32 max = get_max_allowed(tr("Max PPU calls logged"), tr("Decimal only, max allowed is %0."), ppu_thread::call_history_max_size); + + // Preallocate in order to save execution time when inside suspend_all. + std::vector copy(max); + + bool emptied = false; + + cpu_thread::suspend_all(nullptr, {}, [&] + { + auto& list = static_cast(cpu)->call_history; + copy_overlapping_list(list.index, max, list.data, copy, emptied); + }); + + std::string ret; + + u32 i = 0; + for (auto it = copy.rbegin(); it != copy.rend(); it++, i++) + { + fmt::append(ret, "\n(%u) 0x%08x", i, *it); + } + + if (ret.empty()) + { + ret = "No PPU calls have been logged"; + } + + if (emptied) + { + ret += "\nPrevious call history has been emptied!"; + } + + ppu_log.success("PPU calling history dump of '%s': %s", cpu->get_name(), ret); + } + return; } case Qt::Key_E: