mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-06 08:08:42 +00:00
asc queues submission refactoring
This commit is contained in:
parent
26f3925212
commit
133a282be7
3 changed files with 54 additions and 31 deletions
|
@ -296,17 +296,12 @@ static_assert(CtxInitSequence400.size() == 0x61);
|
||||||
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
||||||
static u32 submission_lock{};
|
static u32 submission_lock{};
|
||||||
std::condition_variable cv_lock{};
|
std::condition_variable cv_lock{};
|
||||||
static std::mutex m_submission{};
|
std::mutex m_submission{};
|
||||||
static u64 frames_submitted{}; // frame counter
|
static u64 frames_submitted{}; // frame counter
|
||||||
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
|
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
|
||||||
static int sdk_version{0};
|
static int sdk_version{0};
|
||||||
|
|
||||||
struct AscQueueInfo {
|
static u32 asc_next_offs_dw[Liverpool::NumComputeRings];
|
||||||
VAddr map_addr;
|
|
||||||
u32* read_addr;
|
|
||||||
u32 ring_size_dw;
|
|
||||||
};
|
|
||||||
static Common::SlotVector<AscQueueInfo> asc_queues{};
|
|
||||||
static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF;
|
static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF;
|
||||||
static constexpr u32 tessellation_offchip_buffer_size = 0x800000u;
|
static constexpr u32 tessellation_offchip_buffer_size = 0x800000u;
|
||||||
|
|
||||||
|
@ -506,11 +501,19 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
||||||
}
|
}
|
||||||
|
|
||||||
auto vqid = gnm_vqid - 1;
|
auto vqid = gnm_vqid - 1;
|
||||||
auto& asc_queue = asc_queues[{vqid}];
|
auto& asc_queue = liverpool->asc_queues[{vqid}];
|
||||||
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr + *asc_queue.read_addr);
|
|
||||||
const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr
|
const auto& offs_dw = asc_next_offs_dw[vqid];
|
||||||
: (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr;
|
|
||||||
const std::span acb_span{acb_ptr, acb_size >> 2u};
|
if (next_offs_dw < offs_dw) {
|
||||||
|
ASSERT_MSG(next_offs_dw == 0, "ACB submission is split at the end of ring buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr) + offs_dw;
|
||||||
|
const auto acb_size_dw = (next_offs_dw ? next_offs_dw : asc_queue.ring_size_dw) - offs_dw;
|
||||||
|
const std::span acb_span{acb_ptr, acb_size_dw};
|
||||||
|
|
||||||
|
asc_next_offs_dw[vqid] = next_offs_dw;
|
||||||
|
|
||||||
if (DebugState.DumpingCurrentFrame()) {
|
if (DebugState.DumpingCurrentFrame()) {
|
||||||
static auto last_frame_num = -1LL;
|
static auto last_frame_num = -1LL;
|
||||||
|
@ -545,9 +548,6 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
liverpool->SubmitAsc(gnm_vqid, acb_span);
|
liverpool->SubmitAsc(gnm_vqid, acb_span);
|
||||||
|
|
||||||
*asc_queue.read_addr += acb_size;
|
|
||||||
*asc_queue.read_addr %= asc_queue.ring_size_dw * 4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) {
|
void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) {
|
||||||
|
@ -1266,12 +1266,16 @@ int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_bas
|
||||||
return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR;
|
return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto vqid = asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw);
|
const auto vqid =
|
||||||
|
liverpool->asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw, pipe_id);
|
||||||
// We need to offset index as `dingDong` assumes it to be from the range [1..64]
|
// We need to offset index as `dingDong` assumes it to be from the range [1..64]
|
||||||
const auto gnm_vqid = vqid.index + 1;
|
const auto gnm_vqid = vqid.index + 1;
|
||||||
LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id,
|
LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id,
|
||||||
gnm_vqid);
|
gnm_vqid);
|
||||||
|
|
||||||
|
const auto& queue = liverpool->asc_queues[vqid];
|
||||||
|
*queue.read_addr = 0u;
|
||||||
|
|
||||||
return gnm_vqid;
|
return gnm_vqid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,11 +29,11 @@ static_assert(Liverpool::NumComputeRings <= MAX_NAMES);
|
||||||
|
|
||||||
static const char* acb_task_name[] = NAME_ARRAY(ACB_TASK, MAX_NAMES);
|
static const char* acb_task_name[] = NAME_ARRAY(ACB_TASK, MAX_NAMES);
|
||||||
|
|
||||||
#define YIELD_CE(name) \
|
#define YIELD_CE() \
|
||||||
mapped_queues[GfxQueueId].cs_state = regs.cs_program; \
|
mapped_queues[GfxQueueId].cs_state = regs.cs_program; \
|
||||||
FIBER_EXIT; \
|
FIBER_EXIT; \
|
||||||
co_yield {}; \
|
co_yield {}; \
|
||||||
FIBER_ENTER(name); \
|
FIBER_ENTER(ccb_task_name); \
|
||||||
regs.cs_program = mapped_queues[GfxQueueId].cs_state
|
regs.cs_program = mapped_queues[GfxQueueId].cs_state
|
||||||
|
|
||||||
#define YIELD_GFX \
|
#define YIELD_GFX \
|
||||||
|
@ -44,11 +44,11 @@ static const char* acb_task_name[] = NAME_ARRAY(ACB_TASK, MAX_NAMES);
|
||||||
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
||||||
|
|
||||||
#define YIELD_ASC(id) \
|
#define YIELD_ASC(id) \
|
||||||
mapped_queues[id].cs_state = regs.cs_program; \
|
mapped_queues[id + 1].cs_state = regs.cs_program; \
|
||||||
FIBER_EXIT; \
|
FIBER_EXIT; \
|
||||||
co_yield {}; \
|
co_yield {}; \
|
||||||
FIBER_ENTER(acb_task_name[id]); \
|
FIBER_ENTER(acb_task_name[id]); \
|
||||||
regs.cs_program = mapped_queues[id].cs_state;
|
regs.cs_program = mapped_queues[id + 1].cs_state;
|
||||||
|
|
||||||
#define RESUME(task, name) \
|
#define RESUME(task, name) \
|
||||||
FIBER_EXIT; \
|
FIBER_EXIT; \
|
||||||
|
@ -114,7 +114,7 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||||
--num_commands;
|
--num_commands;
|
||||||
}
|
}
|
||||||
|
|
||||||
qid = (qid + 1) % NumTotalQueues;
|
qid = (qid + 1) % num_mapped_queues;
|
||||||
|
|
||||||
auto& queue = mapped_queues[qid];
|
auto& queue = mapped_queues[qid];
|
||||||
|
|
||||||
|
@ -190,7 +190,7 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||||
case PM4ItOpcode::WaitOnDeCounterDiff: {
|
case PM4ItOpcode::WaitOnDeCounterDiff: {
|
||||||
const auto diff = it_body[0];
|
const auto diff = it_body[0];
|
||||||
while ((cblock.de_count - cblock.ce_count) >= diff) {
|
while ((cblock.de_count - cblock.ce_count) >= diff) {
|
||||||
YIELD_CE(ccb_task_name);
|
YIELD_CE();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -198,10 +198,9 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||||
auto task =
|
auto task =
|
||||||
ProcessCeUpdate({indirect_buffer->Address<const u32>(), indirect_buffer->ib_size});
|
ProcessCeUpdate({indirect_buffer->Address<const u32>(), indirect_buffer->ib_size});
|
||||||
RESUME(task, ccb_task_name);
|
|
||||||
|
|
||||||
while (!task.handle.done()) {
|
while (!task.handle.done()) {
|
||||||
YIELD_CE(ccb_task_name);
|
YIELD_CE();
|
||||||
RESUME(task, ccb_task_name);
|
RESUME(task, ccb_task_name);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -229,7 +228,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
if (!ccb.empty()) {
|
if (!ccb.empty()) {
|
||||||
// In case of CCB provided kick off CE asap to have the constant heap ready to use
|
// In case of CCB provided kick off CE asap to have the constant heap ready to use
|
||||||
ce_task = ProcessCeUpdate(ccb);
|
ce_task = ProcessCeUpdate(ccb);
|
||||||
RESUME(ce_task, dcb_task_name);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto base_addr = reinterpret_cast<uintptr_t>(dcb.data());
|
const auto base_addr = reinterpret_cast<uintptr_t>(dcb.data());
|
||||||
|
@ -708,8 +706,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
FIBER_EXIT;
|
FIBER_EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
template <bool is_indirect>
|
||||||
|
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
||||||
FIBER_ENTER(acb_task_name[vqid]);
|
FIBER_ENTER(acb_task_name[vqid]);
|
||||||
|
const auto& queue = asc_queues[{vqid}];
|
||||||
|
|
||||||
auto base_addr = reinterpret_cast<uintptr_t>(acb.data());
|
auto base_addr = reinterpret_cast<uintptr_t>(acb.data());
|
||||||
while (!acb.empty()) {
|
while (!acb.empty()) {
|
||||||
|
@ -730,7 +730,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::IndirectBuffer: {
|
case PM4ItOpcode::IndirectBuffer: {
|
||||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||||
auto task = ProcessCompute(
|
auto task = ProcessCompute<true>(
|
||||||
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, vqid);
|
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, vqid);
|
||||||
RESUME(task, acb_task_name[vqid]);
|
RESUME(task, acb_task_name[vqid]);
|
||||||
|
|
||||||
|
@ -823,7 +823,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::ReleaseMem: {
|
case PM4ItOpcode::ReleaseMem: {
|
||||||
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
||||||
release_mem->SignalFence(Platform::InterruptId::Compute0RelMem); // <---
|
release_mem->SignalFence(static_cast<Platform::InterruptId>(queue.pipe_id));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -831,7 +831,13 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||||
static_cast<u32>(opcode), count);
|
static_cast<u32>(opcode), count);
|
||||||
}
|
}
|
||||||
|
|
||||||
acb = NextPacket(acb, header->type3.NumWords() + 1);
|
const auto packet_size_dw = header->type3.NumWords() + 1;
|
||||||
|
acb = NextPacket(acb, packet_size_dw);
|
||||||
|
|
||||||
|
if constexpr (!is_indirect) {
|
||||||
|
*queue.read_addr += packet_size_dw;
|
||||||
|
*queue.read_addr %= queue.ring_size_dw;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FIBER_EXIT;
|
FIBER_EXIT;
|
||||||
|
@ -895,13 +901,15 @@ void Liverpool::SubmitAsc(u32 gnm_vqid, std::span<const u32> acb) {
|
||||||
ASSERT_MSG(gnm_vqid > 0 && gnm_vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
ASSERT_MSG(gnm_vqid > 0 && gnm_vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
||||||
auto& queue = mapped_queues[gnm_vqid];
|
auto& queue = mapped_queues[gnm_vqid];
|
||||||
|
|
||||||
const auto& task = ProcessCompute(acb, gnm_vqid);
|
const auto vqid = gnm_vqid - 1;
|
||||||
|
const auto& task = ProcessCompute(acb, vqid);
|
||||||
{
|
{
|
||||||
std::scoped_lock lock{queue.m_access};
|
std::scoped_lock lock{queue.m_access};
|
||||||
queue.submits.emplace(task.handle);
|
queue.submits.emplace(task.handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::scoped_lock lk{submit_mutex};
|
std::scoped_lock lk{submit_mutex};
|
||||||
|
num_mapped_queues = std::max(num_mapped_queues, gnm_vqid + 1);
|
||||||
++num_submits;
|
++num_submits;
|
||||||
submit_cv.notify_one();
|
submit_cv.notify_one();
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/polyfill_thread.h"
|
#include "common/polyfill_thread.h"
|
||||||
|
#include "common/slot_vector.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "common/unique_function.h"
|
#include "common/unique_function.h"
|
||||||
#include "shader_recompiler/params.h"
|
#include "shader_recompiler/params.h"
|
||||||
|
@ -1342,6 +1343,14 @@ public:
|
||||||
gfx_queue.dcb_buffer.reserve(GfxReservedSize);
|
gfx_queue.dcb_buffer.reserve(GfxReservedSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct AscQueueInfo {
|
||||||
|
VAddr map_addr;
|
||||||
|
u32* read_addr;
|
||||||
|
u32 ring_size_dw;
|
||||||
|
u32 pipe_id;
|
||||||
|
};
|
||||||
|
Common::SlotVector<AscQueueInfo> asc_queues{};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct Task {
|
struct Task {
|
||||||
struct promise_type {
|
struct promise_type {
|
||||||
|
@ -1379,7 +1388,8 @@ private:
|
||||||
std::span<const u32> ccb);
|
std::span<const u32> ccb);
|
||||||
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
|
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||||
Task ProcessCeUpdate(std::span<const u32> ccb);
|
Task ProcessCeUpdate(std::span<const u32> ccb);
|
||||||
Task ProcessCompute(std::span<const u32> acb, int vqid);
|
template <bool is_indirect = false>
|
||||||
|
Task ProcessCompute(std::span<const u32> acb, u32 vqid);
|
||||||
|
|
||||||
void Process(std::stop_token stoken);
|
void Process(std::stop_token stoken);
|
||||||
|
|
||||||
|
@ -1394,6 +1404,7 @@ private:
|
||||||
VAddr indirect_args_addr{};
|
VAddr indirect_args_addr{};
|
||||||
};
|
};
|
||||||
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
||||||
|
u32 num_mapped_queues{1u}; // GFX is always available
|
||||||
|
|
||||||
struct ConstantEngine {
|
struct ConstantEngine {
|
||||||
void Reset() {
|
void Reset() {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue