From 70b9271cd8e4ac21e0d6adcbab0f3e099bef4e4f Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Fri, 6 Nov 2015 00:56:27 +0100 Subject: [PATCH] rsx/d3d12: Record frame buffer content per draw call. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 4 + rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 124 ++++++++++++++++++ rpcs3/Emu/RSX/RSXThread.cpp | 71 +++++++++- rpcs3/Emu/RSX/RSXThread.h | 49 +++++++ 4 files changed, 247 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index d300a0d931..d8179d5941 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -214,4 +214,8 @@ protected: virtual void load_vertex_data(u32 first, u32 count) override; virtual void load_vertex_index_data(u32 first, u32 count) override; + + virtual void copy_render_targets_to_memory(void *buffer, u8 rtt) override; + virtual void copy_depth_buffer_to_memory(void *buffer) override; + virtual void copy_stencil_buffer_to_memory(void *buffer) override; }; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index 7088951360..83b5e5c84b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -587,4 +587,128 @@ void D3D12GSRender::copy_render_target_to_dma_location() } } + +void D3D12GSRender::copy_render_targets_to_memory(void *buffer, u8 rtt) +{ + ComPtr readback_buffer = create_readback_buffer_and_download(m_device.Get(), getCurrentResourceStorage().command_list.Get(), m_readbackResources, m_rtts.bound_render_targets[rtt], m_surface.color_format); + + ThrowIfFailed(getCurrentResourceStorage().command_list->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().command_list.GetAddressOf()); + getCurrentResourceStorage().set_new_command_list(); + + wait_for_command_queue(m_device.Get(), m_commandQueueGraphic.Get()); + m_readbackResources.m_get_pos = m_readbackResources.get_current_put_pos_minus_one(); + + int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + size_t srcPitch, dstPitch; + switch (m_surface.color_format) + { + case CELL_GCM_SURFACE_A8R8G8B8: + srcPitch = align(clip_w * 4, 256); + dstPitch = clip_w * 4; + break; + case CELL_GCM_SURFACE_F_W16Z16Y16X16: + srcPitch = align(clip_w * 8, 256); + dstPitch = clip_w * 8; + break; + } + copy_readback_buffer_to_dest(buffer, readback_buffer.Get(), srcPitch, dstPitch, clip_h); +} + +void D3D12GSRender::copy_depth_buffer_to_memory(void *buffer) +{ + int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + + size_t row_pitch = align(clip_w * 4, 256); + + ComPtr readback_buffer; + size_t buffer_size = row_pitch * clip_h; + assert(m_readbackResources.can_alloc(buffer_size)); + size_t heapOffset = m_readbackResources.alloc(buffer_size); + ThrowIfFailed( + m_device->CreatePlacedResource( + m_readbackResources.m_heap, + heapOffset, + &CD3DX12_RESOURCE_DESC::Buffer(buffer_size), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(readback_buffer.GetAddressOf()) + ) + ); + + getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE)); + + getCurrentResourceStorage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(readback_buffer.Get(), { 0,{ DXGI_FORMAT_R32_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0, + &CD3DX12_TEXTURE_COPY_LOCATION(m_rtts.bound_depth_stencil, 0), nullptr); + getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE)); + + ThrowIfFailed(getCurrentResourceStorage().command_list->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().command_list.GetAddressOf()); + getCurrentResourceStorage().set_new_command_list(); + + wait_for_command_queue(m_device.Get(), m_commandQueueGraphic.Get()); + m_readbackResources.m_get_pos = m_readbackResources.get_current_put_pos_minus_one(); + + void *mapped_buffer; + ThrowIfFailed(readback_buffer->Map(0, nullptr, &mapped_buffer)); + for (unsigned row = 0; row < clip_h; row++) + { + u32 *casted_dest = (u32*)((char*)buffer + row * clip_w * 4); + u32 *casted_src = (u32*)((char*)mapped_buffer + row * row_pitch); + for (unsigned col = 0; col < row_pitch / 4; col++) + *casted_dest++ = *casted_src++; + } + readback_buffer->Unmap(0, nullptr); +} + + +void D3D12GSRender::copy_stencil_buffer_to_memory(void *buffer) +{ + int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + + size_t row_pitch = align(clip_w * 4, 256); + + ComPtr readback_buffer; + size_t buffer_size = row_pitch * clip_h; + assert(m_readbackResources.can_alloc(buffer_size)); + size_t heapOffset = m_readbackResources.alloc(buffer_size); + ThrowIfFailed( + m_device->CreatePlacedResource( + m_readbackResources.m_heap, + heapOffset, + &CD3DX12_RESOURCE_DESC::Buffer(buffer_size), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(readback_buffer.GetAddressOf()) + ) + ); + + getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE)); + + getCurrentResourceStorage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(readback_buffer.Get(), { 0,{ DXGI_FORMAT_R8_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0, + &CD3DX12_TEXTURE_COPY_LOCATION(m_rtts.bound_depth_stencil, 1), nullptr); + getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE)); + + ThrowIfFailed(getCurrentResourceStorage().command_list->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().command_list.GetAddressOf()); + getCurrentResourceStorage().set_new_command_list(); + + wait_for_command_queue(m_device.Get(), m_commandQueueGraphic.Get()); + m_readbackResources.m_get_pos = m_readbackResources.get_current_put_pos_minus_one(); + + void *mapped_buffer; + ThrowIfFailed(readback_buffer->Map(0, nullptr, &mapped_buffer)); + for (unsigned row = 0; row < clip_h; row++) + { + char *casted_dest = (char*)buffer + row * clip_w; + char *casted_src = (char*)mapped_buffer + row * row_pitch; + for (unsigned col = 0; col < row_pitch; col++) + *casted_dest++ = *casted_src++; + } + readback_buffer->Unmap(0, nullptr); +} + #endif diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 906e5e09eb..4335bf19ff 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -21,7 +21,8 @@ extern "C" #define CMD_DEBUG 0 -extern u64 get_system_time(); +bool user_asked_for_frame_capture = false; +frame_capture_data frame_debug; namespace rsx { @@ -552,6 +553,18 @@ namespace rsx void flip_command(thread* rsx, u32 arg) { + if (user_asked_for_frame_capture) + { + rsx->capture_current_frame = true; + user_asked_for_frame_capture = false; + frame_debug.reset(); + } + else if (rsx->capture_current_frame) + { + rsx->capture_current_frame = false; + Emu.Pause(); + } + rsx->gcm_current_buffer = arg; rsx->flip(arg); @@ -625,7 +638,11 @@ namespace rsx { // try process using gpu if (rsx->domethod(id, arg)) + { + if (rsx->capture_current_frame && id == NV4097_CLEAR_SURFACE) + rsx->capture_frame(); return; + } // not handled by renderer // try process using cpu @@ -872,6 +889,53 @@ namespace rsx } } + void thread::capture_frame() + { + frame_capture_data::draw_state draw_state = {}; + + int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + size_t pitch = clip_w * 4; + std::vector color_index_to_record; + switch (method_registers[NV4097_SET_SURFACE_COLOR_TARGET]) + { + case CELL_GCM_SURFACE_TARGET_0: + color_index_to_record = { 0 }; + break; + case CELL_GCM_SURFACE_TARGET_1: + color_index_to_record = { 1 }; + break; + case CELL_GCM_SURFACE_TARGET_MRT1: + color_index_to_record = { 0, 1 }; + break; + case CELL_GCM_SURFACE_TARGET_MRT2: + color_index_to_record = { 0, 1, 2 }; + break; + case CELL_GCM_SURFACE_TARGET_MRT3: + color_index_to_record = { 0, 1, 2, 3 }; + break; + } + for (size_t i : color_index_to_record) + { + draw_state.color_buffer[i].width = clip_w; + draw_state.color_buffer[i].height = clip_h; + draw_state.color_buffer[i].data.resize(pitch * clip_h); + copy_render_targets_to_memory(draw_state.color_buffer[i].data.data(), i); + } + if (get_address(method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], method_registers[NV4097_SET_CONTEXT_DMA_ZETA])) + { + draw_state.depth.width = clip_w; + draw_state.depth.height = clip_h; + draw_state.depth.data.resize(clip_w * clip_h * 4); + copy_depth_buffer_to_memory(draw_state.depth.data.data()); + draw_state.stencil.width = clip_w; + draw_state.stencil.height = clip_h; + draw_state.stencil.data.resize(clip_w * clip_h * 4); + copy_stencil_buffer_to_memory(draw_state.stencil.data.data()); + } + frame_debug.draw_calls.push_back(draw_state); + } + void thread::begin() { draw_mode = method_registers[NV4097_SET_BEGIN_END]; @@ -884,6 +948,9 @@ namespace rsx vertex_array.clear(); transform_constants.clear(); + + if (capture_current_frame) + capture_frame(); } void thread::task() @@ -1006,6 +1073,8 @@ namespace rsx } method_registers[reg] = value; + if (capture_current_frame) + frame_debug.command_queue.push_back(std::make_pair(reg, value)); if (auto method = methods[reg]) method(this, value); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 4e1d8aa824..e9f1770701 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -10,6 +10,36 @@ #include "Utilities/Timer.h" #include "Utilities/types.h" + +extern u64 get_system_time(); + +struct frame_capture_data +{ + struct buffer + { + std::vector data; + size_t width = 0, height = 0; + }; + + struct draw_state + { + buffer color_buffer[4]; + buffer depth; + buffer stencil; + }; + std::vector > command_queue; + std::vector draw_calls; + + void reset() noexcept + { + command_queue.clear(); + draw_calls.clear(); + } +}; + +extern bool user_asked_for_frame_capture; +extern frame_capture_data frame_debug; + namespace rsx { namespace limits @@ -169,6 +199,8 @@ namespace rsx virtual void load_vertex_data(u32 first, u32 count); virtual void load_vertex_index_data(u32 first, u32 count); + bool capture_current_frame = false; + void capture_frame(); public: u32 ioAddress, ioSize; int flip_status; @@ -233,6 +265,23 @@ namespace rsx */ void fill_vertex_program_constants_data(void *buffer) noexcept; + /** + * Copy rtt values to buffer. + * TODO: It's more efficient to combine multiple call of this function into one. + */ + virtual void copy_render_targets_to_memory(void *buffer, u8 rtt) {}; + + /** + * Copy depth content to buffer. + * TODO: It's more efficient to combine multiple call of this function into one. + */ + virtual void copy_depth_buffer_to_memory(void *buffer) {}; + + /** + * Copy stencil content to buffer. + * TODO: It's more efficient to combine multiple call of this function into one. + */ + virtual void copy_stencil_buffer_to_memory(void *buffer) {}; public: void reset(); void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress);