SPU debugger: Implement MFC journal

* Allow to dump up to 1820 commands with up 128 bytes of data each, using key D with the debugger.
This commit is contained in:
Eladash 2021-02-26 11:20:25 +02:00 committed by Ivan
parent d0edd44a69
commit 004ebfdaee
13 changed files with 222 additions and 20 deletions

View file

@ -95,3 +95,10 @@ struct alignas(16) spu_mfc_cmd
u32 eal;
u32 eah;
};
struct mfc_cmd_dump
{
spu_mfc_cmd cmd;
alignas(16) u8 data[128];
};

View file

@ -392,6 +392,11 @@ extern void ppu_register_range(u32 addr, u32 size)
utils::memory_commit(&ppu_ref(addr), u64{size} * 2, utils::protection::rw);
vm::page_protect(addr, size, 0, vm::page_executable);
if (g_cfg.core.ppu_debug)
{
utils::memory_commit(vm::g_stat_addr + addr, size);
}
const u64 fallback = reinterpret_cast<uptr>(ppu_fallback);
const u64 seg_base = addr;

View file

@ -5948,7 +5948,9 @@ public:
const auto mmio = llvm::BasicBlock::Create(m_context, "", m_function);
const auto copy = llvm::BasicBlock::Create(m_context, "", m_function);
m_ir->CreateCondBr(m_ir->CreateICmpUGE(eal.value, m_ir->getInt32(0xe0000000)), mmio, copy, m_md_unlikely);
// Always use interpreter function for MFC debug option
m_ir->CreateCondBr(m_ir->CreateICmpUGE(eal.value, m_ir->getInt32(g_cfg.core.mfc_debug ? 0 : 0xe0000000)), mmio, copy, m_md_unlikely);
m_ir->SetInsertPoint(mmio);
m_ir->CreateStore(ci, spu_ptr<u8>(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::cmd));
call("spu_exec_mfc_cmd", &exec_mfc_cmd, m_thread);

View file

@ -1675,10 +1675,13 @@ struct raw_spu_cleanup
void spu_thread::cleanup()
{
const u32 addr = group ? SPU_FAKE_BASE_ADDR + SPU_LS_SIZE * (id & 0xffffff) : RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index;
// Deallocate local storage
ensure(vm::dealloc(addr, vm::spu, &shm));
ensure(vm::dealloc(vm_offset(), vm::spu, &shm));
if (g_cfg.core.mfc_debug)
{
utils::memory_decommit(vm::g_stat_addr + vm_offset(), SPU_LS_SIZE);
}
// Deallocate RawSPU ID
if (get_type() >= spu_type::raw)
@ -1707,18 +1710,24 @@ spu_thread::~spu_thread()
spu_thread::spu_thread(lv2_spu_group* group, u32 index, std::string_view name, u32 lv2_id, bool is_isolated, u32 option)
: cpu_thread(idm::last_id())
, group(group)
, index(index)
, shm(std::make_shared<utils::shm>(SPU_LS_SIZE))
, ls([&]()
{
if (g_cfg.core.mfc_debug)
{
utils::memory_commit(vm::g_stat_addr + vm_offset(), SPU_LS_SIZE);
}
if (!group)
{
ensure(vm::get(vm::spu)->falloc(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index, SPU_LS_SIZE, &shm));
ensure(vm::get(vm::spu)->falloc(vm_offset(), SPU_LS_SIZE, &shm));
}
else
{
// 0x1000 indicates falloc to allocate page with no access rights in base memory
ensure(vm::get(vm::spu)->falloc(SPU_FAKE_BASE_ADDR + SPU_LS_SIZE * (cpu_thread::id & 0xffffff), SPU_LS_SIZE, &shm, 0x1000));
ensure(vm::get(vm::spu)->falloc(vm_offset(), SPU_LS_SIZE, &shm, 0x1000));
}
// Try to guess free area
@ -1756,7 +1765,6 @@ spu_thread::spu_thread(lv2_spu_group* group, u32 index, std::string_view name, u
fmt::throw_exception("Failed to map SPU LS memory");
}())
, thread_type(group ? spu_type::threaded : is_isolated ? spu_type::isolated : spu_type::raw)
, group(group)
, option(option)
, lv2_id(lv2_id)
, spu_tname(make_single<std::string>(name))
@ -1898,6 +1906,11 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
// SPU Thread Group MMIO (LS and SNR) and RawSPU MMIO
if (_this && eal >= RAW_SPU_BASE_ADDR)
{
if (g_cfg.core.mfc_debug && _this)
{
// TODO
}
const u32 index = (eal - SYS_SPU_THREAD_BASE_LOW) / SYS_SPU_THREAD_OFFSET; // thread number in group
const u32 offset = (eal - SYS_SPU_THREAD_BASE_LOW) % SYS_SPU_THREAD_OFFSET; // LS offset or MMIO register
@ -2111,6 +2124,14 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
if (size == size0)
{
if (g_cfg.core.mfc_debug && _this)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + _this->vm_offset())[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = args;
dump.cmd.eah = _this->pc;
std::memcpy(dump.data, is_get ? dst : src, std::min<u32>(args.size, 128));
}
return;
}
}
@ -2300,6 +2321,15 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
}
//atomic_fence_seq_cst();
if (g_cfg.core.mfc_debug && _this)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + _this->vm_offset())[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = args;
dump.cmd.eah = _this->pc;
std::memcpy(dump.data, is_get ? dst : src, std::min<u32>(args.size, 128));
}
return;
}
else
@ -2439,6 +2469,14 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
}
}
if (g_cfg.core.mfc_debug && _this)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + _this->vm_offset())[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = args;
dump.cmd.eah = _this->pc;
std::memcpy(dump.data, is_get ? dst : src, std::min<u32>(args.size, 128));
}
return;
}
@ -2499,6 +2537,14 @@ plain_access:
break;
}
}
if (g_cfg.core.mfc_debug && _this)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + _this->vm_offset())[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = args;
dump.cmd.eah = _this->pc;
std::memcpy(dump.data, is_get ? dst : src, std::min<u32>(args.size, 128));
}
}
bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
@ -3242,11 +3288,23 @@ bool spu_thread::process_mfc_cmd()
mov_rdata(_ref<spu_rdata_t>(ch_mfc_cmd.lsa & 0x3ff80), rdata);
ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS);
if (g_cfg.core.mfc_debug)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = ch_mfc_cmd;
dump.cmd.eah = pc;
std::memcpy(dump.data, rdata, 128);
}
return true;
}
case MFC_PUTLLC_CMD:
{
// Avoid logging useless commands if there is no reservation
const bool dump = g_cfg.core.mfc_debug && raddr;
if (do_putllc(ch_mfc_cmd))
{
ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS);
@ -3256,16 +3314,41 @@ bool spu_thread::process_mfc_cmd()
ch_atomic_stat.set_value(MFC_PUTLLC_FAILURE);
}
if (dump)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = ch_mfc_cmd;
dump.cmd.eah = pc;
dump.cmd.tag = static_cast<u32>(ch_atomic_stat.get_value()); // Use tag as atomic status
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
}
return !test_stopped();
}
case MFC_PUTLLUC_CMD:
{
if (g_cfg.core.mfc_debug)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = ch_mfc_cmd;
dump.cmd.eah = pc;
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
}
do_putlluc(ch_mfc_cmd);
ch_atomic_stat.set_value(MFC_PUTLLUC_SUCCESS);
return !test_stopped();
}
case MFC_PUTQLLUC_CMD:
{
if (g_cfg.core.mfc_debug)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = ch_mfc_cmd;
dump.cmd.eah = pc;
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
}
const u32 mask = utils::rol32(1, ch_mfc_cmd.tag);
if ((mfc_barrier | mfc_fence) & mask) [[unlikely]]
@ -3342,6 +3425,15 @@ bool spu_thread::process_mfc_cmd()
auto& cmd = mfc_queue[mfc_size];
cmd = ch_mfc_cmd;
//if (g_cfg.core.mfc_debug)
//{
// TODO: This needs a disambiguator with list elements dumping
// auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
// dump.cmd = ch_mfc_cmd;
// dump.cmd.eah = pc;
// std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.eah & 0x3fff0), std::min<u32>(ch_mfc_cmd.size, 128));
//}
if (do_dma_check(cmd)) [[likely]]
{
if (!cmd.size || do_list_transfer(cmd)) [[likely]]

View file

@ -739,13 +739,14 @@ public:
spu_channel exit_status{}; // Threaded SPU exit status (not a channel, but the interface fits)
atomic_t<u32> last_exit_status; // Value to be written in exit_status after checking group termination
private:
lv2_spu_group* const group; // SPU Thread Group (only safe to access in the spu thread itself)
public:
const u32 index; // SPU index
std::shared_ptr<utils::shm> shm; // SPU memory
const std::add_pointer_t<u8> ls; // SPU LS pointer
const spu_type thread_type;
private:
lv2_spu_group* const group; // SPU Thread Group (only safe to access in the spu thread itself)
public:
const u32 option; // sys_spu_thread_initialize option
const u32 lv2_id; // The actual id that is used by syscalls
@ -769,6 +770,9 @@ public:
u64 last_fail = 0;
u64 last_succ = 0;
u64 mfc_dump_idx = 0;
static constexpr u32 max_mfc_dump_idx = SPU_LS_SIZE / sizeof(mfc_cmd_dump);
std::array<v128, 0x4000> stack_mirror; // Return address information
const char* current_func{}; // Current STOP or RDCH blocking function
@ -820,6 +824,11 @@ public:
return thread_type;
}
u32 vm_offset() const
{
return group ? SPU_FAKE_BASE_ADDR + SPU_LS_SIZE * (id & 0xffffff) : RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index;
}
// Returns true if reservation existed but was just discovered to be lost
// It is safe to use on any address, even if not directly accessed by SPU (so it's slower)
bool reservation_check(u32 addr, const decltype(rdata)& data);

View file

@ -740,13 +740,13 @@ namespace vm
if (flags & page_executable)
{
// TODO
// TODO (dead code)
utils::memory_commit(g_exec_addr + addr * 2, size * 2);
}
if (g_cfg.core.ppu_debug)
{
utils::memory_commit(g_stat_addr + addr, size);
if (g_cfg.core.ppu_debug)
{
utils::memory_commit(g_stat_addr + addr, size);
}
}
for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++)
@ -926,11 +926,11 @@ namespace vm
if (is_exec)
{
utils::memory_decommit(g_exec_addr + addr * 2, size * 2);
}
if (g_cfg.core.ppu_debug)
{
utils::memory_decommit(g_stat_addr + addr, size);
if (g_cfg.core.ppu_debug)
{
utils::memory_decommit(g_stat_addr + addr, size);
}
}
// Unlock

View file

@ -1779,7 +1779,7 @@ void Emulator::Resume()
for (u32 i = 0x10000; i < 0x20000000;)
{
if (vm::check_addr(i))
if (vm::check_addr(i, vm::page_executable))
{
if (auto& data = *reinterpret_cast<be_t<u32>*>(vm::g_stat_addr + i))
{

View file

@ -39,6 +39,7 @@ struct cfg_root : cfg::node
cfg::_bool lower_spu_priority{ this, "Lower SPU thread priority" };
cfg::_bool spu_getllar_polling_detection{ this, "SPU GETLLAR polling detection", false, true };
cfg::_bool spu_debug{ this, "SPU Debug" };
cfg::_bool mfc_debug{ this, "MFC Debug" };
cfg::_int<0, 6> preferred_spu_threads{ this, "Preferred SPU Threads", 0, true }; // Number of hardware threads dedicated to heavy simultaneous spu tasks
cfg::_int<0, 16> spu_delay_penalty{ this, "SPU delay penalty", 3 }; // Number of milliseconds to block a thread if a virtual 'core' isn't free
cfg::_bool spu_loop_detection{ this, "SPU loop detection", true, true }; // Try to detect wait loops and trigger thread yield

View file

@ -7,6 +7,7 @@
#include "breakpoint_list.h"
#include "breakpoint_handler.h"
#include "call_stack_list.h"
#include "input_dialog.h"
#include "qt_utils.h"
#include "Emu/System.h"
@ -28,6 +29,7 @@
#include <QMenu>
#include <QVBoxLayout>
#include <QTimer>
#include <charconv>
#include "util/asm.hpp"
@ -259,6 +261,7 @@ void debugger_frame::keyPressEvent(QKeyEvent* event)
QLabel* l = new QLabel(tr(
"Keys Ctrl+G: Go to typed address."
"\nKeys Alt+S: Capture SPU images of selected SPU."
"\nKey D: SPU MFC commands logger, MFC debug setting must be enabled."
"\nKey E: Instruction Editor: click on the instruction you want to modify, then press E."
"\nKey F: Dedicated floating point mode switch for SPU threads."
"\nKey R: Registers Editor for selected thread."
@ -311,6 +314,76 @@ void debugger_frame::keyPressEvent(QKeyEvent* event)
{
switch (event->key())
{
case Qt::Key_D:
{
if (cpu->id_type() == 2 && g_cfg.core.mfc_debug && !cpu->is_stopped())
{
input_dialog dlg(4, "", tr("Max MFC cmds logged"),
tr("Decimal only, max allowed is 1820."), "0", this);
QFont mono = QFontDatabase::systemFont(QFontDatabase::FixedFont);
mono.setPointSize(8);
dlg.set_input_font(mono, false);
dlg.set_clear_button_enabled(false);
dlg.set_button_enabled(QDialogButtonBox::StandardButton::Ok, false);
dlg.set_validator(new QRegExpValidator(QRegExp("^[1-9][0-9]*$")));
u32 max = 0;
connect(&dlg, &input_dialog::text_changed, [&](const QString& changed)
{
bool ok = false;
const u32 dummy = changed.toUInt(&ok, 10);
ok = ok && dummy && dummy <= spu_thread::max_mfc_dump_idx;
dlg.set_button_enabled(QDialogButtonBox::StandardButton::Ok, ok);
if (ok)
{
max = dummy;
}
});
if (dlg.exec() != QDialog::Accepted)
{
max = 0;
}
auto spu = static_cast<spu_thread*>(cpu);
auto ptr = reinterpret_cast<const mfc_cmd_dump*>(vm::g_stat_addr + spu->vm_offset());
std::string ret;
for (u64 count = 0, idx = spu->mfc_dump_idx - 1; idx != umax && count < max; count++, idx--)
{
auto dump = ptr[idx % spu_thread::max_mfc_dump_idx];
const u32 pc = std::exchange(dump.cmd.eah, 0);
fmt::append(ret, "\n(%d) PC 0x%05x: [%s]", count, pc, dump.cmd);
if (dump.cmd.cmd == MFC_PUTLLC_CMD)
{
fmt::append(ret, " %s", dump.cmd.tag == MFC_PUTLLC_SUCCESS ? "(passed)" : "(failed)");
}
const auto data = reinterpret_cast<const be_t<u32>*>(dump.data);
for (usz i = 0; i < utils::aligned_div(std::min<u32>(dump.cmd.size, 128), 4); i += 4)
{
fmt::append(ret, "\n[0x%02x] %08x %08x %08x %08x", i * sizeof(data[0])
, data[i + 0], data[i + 1], data[i + 2], data[i + 3]);
}
}
if (ret.empty())
{
ret = "No MFC commands have been logged";
}
spu_log.warning("SPU MFC dump of '%s': %s", spu->get_name(), ret);
}
return;
}
case Qt::Key_E:
{
if (cpu->id_type() == 1 || cpu->id_type() == 2)

View file

@ -17,6 +17,7 @@ enum class emu_settings_type
PreferredSPUThreads,
PPUDebug,
SPUDebug,
MFCDebug,
MaxLLVMThreads,
EnableTSX,
AccurateGETLLAR,
@ -163,6 +164,7 @@ static const QMap<emu_settings_type, cfg_location> settings_location =
{ emu_settings_type::PreferredSPUThreads, { "Core", "Preferred SPU Threads"}},
{ emu_settings_type::PPUDebug, { "Core", "PPU Debug"}},
{ emu_settings_type::SPUDebug, { "Core", "SPU Debug"}},
{ emu_settings_type::MFCDebug, { "Core", "MFC Debug"}},
{ emu_settings_type::MaxLLVMThreads, { "Core", "Max LLVM Compile Threads"}},
{ emu_settings_type::EnableTSX, { "Core", "Enable TSX"}},
{ emu_settings_type::AccurateGETLLAR, { "Core", "Accurate GETLLAR"}},

View file

@ -1745,6 +1745,9 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
m_emu_settings->EnhanceCheckBox(ui->spuDebug, emu_settings_type::SPUDebug);
SubscribeTooltip(ui->spuDebug, tooltips.settings.spu_debug);
m_emu_settings->EnhanceCheckBox(ui->mfcDebug, emu_settings_type::MFCDebug);
SubscribeTooltip(ui->mfcDebug, tooltips.settings.mfc_debug);
m_emu_settings->EnhanceCheckBox(ui->setDAZandFTZ, emu_settings_type::SetDAZandFTZ);
SubscribeTooltip(ui->setDAZandFTZ, tooltips.settings.set_daz_and_ftz);

View file

@ -3453,6 +3453,13 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="mfcDebug">
<property name="text">
<string>MFC Debug</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="setDAZandFTZ">
<property name="text">

View file

@ -74,6 +74,7 @@ public:
const QString ppu_debug = tr("Creates PPU logs.\nOnly useful to developers.\nNever use this.");
const QString spu_debug = tr("Creates SPU logs.\nOnly useful to developers.\nNever use this.");
const QString mfc_debug = tr("Creates MFC logs.\nOnly useful to developers.\nNever use this.");
const QString set_daz_and_ftz = tr("Sets special MXCSR flags to debug errors in SSE operations.\nOnly used in PPU thread when it's not precise.\nOnly useful to developers.\nNever use this.");
const QString accurate_getllar = tr("Accurately processes SPU MFC_GETLLAR operation.");
const QString accurate_spu_dma = tr("Accurately processes SPU DMA operations.");