final touches and review notes

This commit is contained in:
psucien 2024-12-14 20:44:13 +01:00
commit 9c304b9af8
8 changed files with 95 additions and 53 deletions

View file

@ -15,7 +15,6 @@
using namespace DebugStateType; using namespace DebugStateType;
DebugStateImpl& DebugState = *Common::Singleton<DebugStateImpl>::Instance(); DebugStateImpl& DebugState = *Common::Singleton<DebugStateImpl>::Instance();
extern std::unique_ptr<AmdGpu::Liverpool> liverpool;
static ThreadID ThisThreadID() { static ThreadID ThisThreadID() {
#ifdef _WIN32 #ifdef _WIN32
@ -143,39 +142,59 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) {
frame.queues.push_back(std::move(dump)); frame.queues.push_back(std::move(dump));
} }
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, bool is_compute) { std::optional<RegDump*> DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) {
std::scoped_lock lock{frame_dump_list_mutex};
const auto it = waiting_reg_dumps.find(header_addr); const auto it = waiting_reg_dumps.find(header_addr);
if (it == waiting_reg_dumps.end()) { if (it == waiting_reg_dumps.end()) {
return; return std::nullopt;
} }
auto& frame = *it->second; auto& frame = *it->second;
waiting_reg_dumps.erase(it); waiting_reg_dumps.erase(it);
waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr)); waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr));
auto& dump = frame.regs[header_addr - base_addr]; return &frame.regs[header_addr - base_addr];
dump.regs = liverpool->regs; }
if (is_compute) {
dump.is_compute = true; void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
auto& cs = dump.regs.cs_program; const AmdGpu::Liverpool::Regs& regs) {
cs = liverpool->GetCsRegs(); std::scoped_lock lock{frame_dump_list_mutex};
dump.cs_data = PipelineComputerProgramDump{
.cs_program = cs, auto dump = GetRegDump(base_addr, header_addr);
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()}, if (!dump) {
}; return;
} else { }
(*dump)->regs = regs;
for (int i = 0; i < RegDump::MaxShaderStages; i++) { for (int i = 0; i < RegDump::MaxShaderStages; i++) {
if (dump.regs.stage_enable.IsStageEnabled(i)) { if ((*dump)->regs.stage_enable.IsStageEnabled(i)) {
auto stage = dump.regs.ProgramForStage(i); auto stage = (*dump)->regs.ProgramForStage(i);
if (stage->address_lo != 0) { if (stage->address_lo != 0) {
auto code = stage->Code(); auto code = stage->Code();
dump.stages[i] = PipelineShaderProgramDump{ (*dump)->stages[i] = PipelineShaderProgramDump{
.user_data = *stage, .user_data = *stage,
.code = std::vector<u32>{code.begin(), code.end()}, .code = std::vector<u32>{code.begin(), code.end()},
}; };
} }
} }
} }
}
void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr,
const CsState& cs_state) {
std::scoped_lock lock{frame_dump_list_mutex};
auto dump = GetRegDump(base_addr, header_addr);
if (!dump) {
return;
} }
(*dump)->is_compute = true;
auto& cs = (*dump)->regs.cs_program;
cs = cs_state;
(*dump)->cs_data = PipelineComputerProgramDump{
.cs_program = cs,
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
};
} }
void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage, void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage,

View file

@ -202,12 +202,18 @@ public:
void PushQueueDump(QueueDump dump); void PushQueueDump(QueueDump dump);
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, bool is_compute = false); void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
const AmdGpu::Liverpool::Regs& regs);
using CsState = AmdGpu::Liverpool::ComputeProgram;
void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state);
void CollectShader(const std::string& name, Shader::LogicalStage l_stage, void CollectShader(const std::string& name, Shader::LogicalStage l_stage,
vk::ShaderModule module, std::span<const u32> spv, vk::ShaderModule module, std::span<const u32> spv,
std::span<const u32> raw_code, std::span<const u32> patch_spv, std::span<const u32> raw_code, std::span<const u32> patch_spv,
bool is_patched); bool is_patched);
private:
std::optional<RegDump*> GetRegDump(uintptr_t base_addr, uintptr_t header_addr);
}; };
} // namespace DebugStateType } // namespace DebugStateType

View file

@ -375,8 +375,18 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
} }
case PM4ItOpcode::SetShReg: { case PM4ItOpcode::SetShReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header); const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
const auto set_size = (count - 1) * sizeof(u32);
if (set_data->reg_offset >= 0x200 &&
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
ASSERT(set_size <= sizeof(ComputeProgram));
auto* addr = reinterpret_cast<u32*>(&mapped_queues[GfxQueueId].cs_state) +
(set_data->reg_offset - 0x200);
std::memcpy(addr, header + 2, set_size);
} else {
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
(count - 1) * sizeof(u32)); set_size);
}
break; break;
} }
case PM4ItOpcode::SetUconfigReg: { case PM4ItOpcode::SetUconfigReg: {
@ -398,7 +408,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.num_indices = draw_index->index_count; regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator; regs.draw_initiator = draw_index->draw_initiator;
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header)); DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
} }
if (rasterizer) { if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
@ -415,7 +425,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.num_indices = draw_index_off->index_count; regs.num_indices = draw_index_off->index_count;
regs.draw_initiator = draw_index_off->draw_initiator; regs.draw_initiator = draw_index_off->draw_initiator;
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header)); DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
} }
if (rasterizer) { if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
@ -431,7 +441,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.num_indices = draw_index->index_count; regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator; regs.draw_initiator = draw_index->draw_initiator;
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header)); DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
} }
if (rasterizer) { if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
@ -447,7 +457,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(DrawIndirectArgs); const auto size = sizeof(DrawIndirectArgs);
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header)); DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
} }
if (rasterizer) { if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
@ -464,7 +474,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(DrawIndexedIndirectArgs); const auto size = sizeof(DrawIndexedIndirectArgs);
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header)); DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
} }
if (rasterizer) { if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
@ -481,7 +491,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto offset = draw_index_indirect->data_offset; const auto offset = draw_index_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header)); DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
} }
if (rasterizer) { if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
@ -503,7 +513,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
cs_program.dim_z = dispatch_direct->dim_z; cs_program.dim_z = dispatch_direct->dim_z;
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), true); DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
cs_program);
} }
if (rasterizer && (cs_program.dispatch_initiator & 1)) { if (rasterizer && (cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
@ -522,7 +533,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), true); DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
cs_program);
} }
if (rasterizer && (cs_program.dispatch_initiator & 1)) { if (rasterizer && (cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
@ -782,8 +794,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
if (set_data->reg_offset >= 0x200 && if (set_data->reg_offset >= 0x200 &&
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) { set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
ASSERT(set_size <= sizeof(ComputeProgram)); ASSERT(set_size <= sizeof(ComputeProgram));
auto* addr = auto* addr = reinterpret_cast<u32*>(&mapped_queues[vqid + 1].cs_state) +
reinterpret_cast<u32*>(&asc_sh_regs[vqid]) + (set_data->reg_offset - 0x200); (set_data->reg_offset - 0x200);
std::memcpy(addr, header + 2, set_size); std::memcpy(addr, header + 2, set_size);
} else { } else {
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
@ -800,7 +812,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
cs_program.dim_z = dispatch_direct->dim_z; cs_program.dim_z = dispatch_direct->dim_z;
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), true); DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
cs_program);
} }
if (rasterizer && (cs_program.dispatch_initiator & 1)) { if (rasterizer && (cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);

View file

@ -1145,7 +1145,7 @@ struct Liverpool {
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20); INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
ShaderProgram ls_program; ShaderProgram ls_program;
INSERT_PADDING_WORDS(0xA4); INSERT_PADDING_WORDS(0xA4);
ComputeProgram cs_program; ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues`
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5); INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
DepthRenderControl depth_render_control; DepthRenderControl depth_render_control;
INSERT_PADDING_WORDS(1); INSERT_PADDING_WORDS(1);
@ -1279,7 +1279,6 @@ struct Liverpool {
}; };
Regs regs{}; Regs regs{};
std::array<ComputeProgram, NumComputeRings> asc_sh_regs{};
// See for a comment in context reg parsing code // See for a comment in context reg parsing code
union CbDbExtent { union CbDbExtent {
@ -1345,7 +1344,7 @@ public:
} }
inline ComputeProgram& GetCsRegs() { inline ComputeProgram& GetCsRegs() {
return *curr_cs_regs; return mapped_queues[curr_gnm_queue_id].cs_state;
} }
struct AscQueueInfo { struct AscQueueInfo {
@ -1399,11 +1398,11 @@ private:
void Process(std::stop_token stoken); void Process(std::stop_token stoken);
inline void SaveDispatchContext() { inline void SaveDispatchContext() {
curr_cs_regs = &regs.cs_program; curr_gnm_queue_id = GfxQueueId;
} }
inline void SaveDispatchContext(u32 vqid) { inline void SaveDispatchContext(u32 vqid) {
curr_cs_regs = &asc_sh_regs[vqid]; curr_gnm_queue_id = vqid + 1;
} }
struct GpuQueue { struct GpuQueue {
@ -1413,6 +1412,7 @@ private:
std::vector<u32> dcb_buffer; std::vector<u32> dcb_buffer;
std::vector<u32> ccb_buffer; std::vector<u32> ccb_buffer;
std::queue<Task::Handle> submits{}; std::queue<Task::Handle> submits{};
ComputeProgram cs_state{};
VAddr indirect_args_addr{}; VAddr indirect_args_addr{};
}; };
std::array<GpuQueue, NumTotalQueues> mapped_queues{}; std::array<GpuQueue, NumTotalQueues> mapped_queues{};
@ -1445,7 +1445,7 @@ private:
std::mutex submit_mutex; std::mutex submit_mutex;
std::condition_variable_any submit_cv; std::condition_variable_any submit_cv;
std::queue<Common::UniqueFunction<void>> command_queue{}; std::queue<Common::UniqueFunction<void>> command_queue{};
ComputeProgram* curr_cs_regs{&regs.cs_program}; u32 curr_gnm_queue_id{GfxQueueId}; // Gnm queue processing dispatch
}; };
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);

View file

@ -172,7 +172,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_
} }
break; break;
} }
case Shader::Stage::Compute: { case Stage::Compute: {
const auto& cs_pgm = liverpool->GetCsRegs(); const auto& cs_pgm = liverpool->GetCsRegs();
info.num_user_data = cs_pgm.settings.num_user_regs; info.num_user_data = cs_pgm.settings.num_user_regs;
info.num_allocated_vgprs = cs_pgm.settings.num_vgprs * 4; info.num_allocated_vgprs = cs_pgm.settings.num_vgprs * 4;

View file

@ -324,7 +324,7 @@ void Rasterizer::DispatchDirect() {
} }
const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute); const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute);
if (ExecuteShaderHLE(cs, *this)) { if (ExecuteShaderHLE(cs, liverpool->regs, cs_program, *this)) {
return; return;
} }

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_hle.h" #include "video_core/renderer_vulkan/vk_shader_hle.h"
@ -11,10 +12,11 @@ namespace Vulkan {
static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f; static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f;
bool ExecuteCopyShaderHLE(const Shader::Info& info, Rasterizer& rasterizer) { static bool ExecuteCopyShaderHLE(const Shader::Info& info,
const AmdGpu::Liverpool::ComputeProgram& cs_program,
Rasterizer& rasterizer) {
auto& scheduler = rasterizer.GetScheduler(); auto& scheduler = rasterizer.GetScheduler();
auto& buffer_cache = rasterizer.GetBufferCache(); auto& buffer_cache = rasterizer.GetBufferCache();
const auto& cs_program = liverpool->GetCsRegs();
// Copy shader defines three formatted buffers as inputs: control, source, and destination. // Copy shader defines three formatted buffers as inputs: control, source, and destination.
const auto ctl_buf_sharp = info.texture_buffers[0].GetSharp(info); const auto ctl_buf_sharp = info.texture_buffers[0].GetSharp(info);
@ -121,10 +123,11 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, Rasterizer& rasterizer) {
return true; return true;
} }
bool ExecuteShaderHLE(const Shader::Info& info, Rasterizer& rasterizer) { bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer) {
switch (info.pgm_hash) { switch (info.pgm_hash) {
case COPY_SHADER_HASH: case COPY_SHADER_HASH:
return ExecuteCopyShaderHLE(info, rasterizer); return ExecuteCopyShaderHLE(info, cs_program, rasterizer);
default: default:
return false; return false;
} }

View file

@ -3,7 +3,7 @@
#pragma once #pragma once
#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/amdgpu/liverpool.h"
namespace Shader { namespace Shader {
struct Info; struct Info;
@ -14,6 +14,7 @@ namespace Vulkan {
class Rasterizer; class Rasterizer;
/// Attempts to execute a shader using HLE if possible. /// Attempts to execute a shader using HLE if possible.
bool ExecuteShaderHLE(const Shader::Info& info, Rasterizer& rasterizer); bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer);
} // namespace Vulkan } // namespace Vulkan