diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.h b/rpcs3/Emu/RSX/Common/ProgramStateCache.h index aba88af224..a2cddff089 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.h @@ -152,7 +152,7 @@ public: const vertex_program_type& get_transform_program(const RSXVertexProgram& rsx_vp) const { auto I = m_vertex_shader_cache.find(rsx_vp.data); - if (I == m_vertex_shader_cache.end()) + if (I != m_vertex_shader_cache.end()) return I->second; throw new EXCEPTION("Trying to get unknow transform program"); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index deba2e0d46..f9618600c4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -200,8 +200,7 @@ protected: virtual void end() override; virtual void flip(int buffer) override; - virtual void copy_render_targets_to_memory(void *buffer, u8 rtt) override; - virtual void copy_depth_buffer_to_memory(void *buffer) override; - virtual void copy_stencil_buffer_to_memory(void *buffer) override; + virtual std::array, 4> copy_render_targets_to_memory() override; + virtual std::array, 2> copy_depth_stencil_buffer_to_memory() override; virtual std::pair get_programs() const override; }; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index 3549925d0c..62e8ee8987 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -488,91 +488,21 @@ void D3D12GSRender::copy_render_target_to_dma_location() } -void D3D12GSRender::copy_render_targets_to_memory(void *buffer, u8 rtt) +std::array, 4> D3D12GSRender::copy_render_targets_to_memory() { - size_t heap_offset = download_to_readback_buffer(m_device.Get(), get_current_resource_storage().command_list.Get(), m_readback_resources, std::get<1>(m_rtts.m_bound_render_targets[rtt]), m_surface.color_format); - - CHECK_HRESULT(get_current_resource_storage().command_list->Close()); - m_command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)get_current_resource_storage().command_list.GetAddressOf()); - get_current_resource_storage().set_new_command_list(); - - wait_for_command_queue(m_device.Get(), m_command_queue.Get()); - m_readback_resources.m_get_pos = m_readback_resources.get_current_put_pos_minus_one(); - int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; - size_t srcPitch = get_aligned_pitch(m_surface.color_format, clip_w); - size_t dstPitch = get_packed_pitch(m_surface.color_format, clip_w); - copy_readback_buffer_to_dest(buffer, m_readback_resources, heap_offset, srcPitch, dstPitch, clip_h); + rsx::surface_info surface = {}; + surface.unpack(rsx::method_registers[NV4097_SET_SURFACE_FORMAT]); + return m_rtts.get_render_targets_data(surface.color_format, clip_w, clip_h, m_device.Get(), m_command_queue.Get(), m_readback_resources, get_current_resource_storage()); } -void D3D12GSRender::copy_depth_buffer_to_memory(void *buffer) +std::array, 2> D3D12GSRender::copy_depth_stencil_buffer_to_memory() { - unsigned clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; - unsigned clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; - - size_t row_pitch = align(clip_w * 4, 256); - - size_t buffer_size = row_pitch * clip_h; - size_t heap_offset = m_readback_resources.alloc(buffer_size); - - get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE)); - - get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.get_heap(), { heap_offset,{ DXGI_FORMAT_R32_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0, - &CD3DX12_TEXTURE_COPY_LOCATION(std::get<1>(m_rtts.m_bound_depth_stencil), 0), nullptr); - get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE)); - - CHECK_HRESULT(get_current_resource_storage().command_list->Close()); - m_command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)get_current_resource_storage().command_list.GetAddressOf()); - get_current_resource_storage().set_new_command_list(); - - wait_for_command_queue(m_device.Get(), m_command_queue.Get()); - m_readback_resources.m_get_pos = m_readback_resources.get_current_put_pos_minus_one(); - - void *mapped_buffer = m_readback_resources.map(heap_offset); - for (unsigned row = 0; row < clip_h; row++) - { - u32 *casted_dest = (u32*)((char*)buffer + row * clip_w * 4); - u32 *casted_src = (u32*)((char*)mapped_buffer + row * row_pitch); - for (unsigned col = 0; col < row_pitch / 4; col++) - *casted_dest++ = *casted_src++; - } - m_readback_resources.unmap(); + int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + rsx::surface_info surface = {}; + surface.unpack(rsx::method_registers[NV4097_SET_SURFACE_FORMAT]); + return m_rtts.get_depth_stencil_data(surface.depth_format, clip_w, clip_h, m_device.Get(), m_command_queue.Get(), m_readback_resources, get_current_resource_storage()); } - - -void D3D12GSRender::copy_stencil_buffer_to_memory(void *buffer) -{ - unsigned clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; - unsigned clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; - - size_t row_pitch = align(clip_w * 4, 256); - - size_t buffer_size = row_pitch * clip_h; - size_t heap_offset = m_readback_resources.alloc(buffer_size); - - get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE)); - - get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.get_heap(), { heap_offset, { DXGI_FORMAT_R8_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0, - &CD3DX12_TEXTURE_COPY_LOCATION(std::get<1>(m_rtts.m_bound_depth_stencil), 1), nullptr); - get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE)); - - CHECK_HRESULT(get_current_resource_storage().command_list->Close()); - m_command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)get_current_resource_storage().command_list.GetAddressOf()); - get_current_resource_storage().set_new_command_list(); - - wait_for_command_queue(m_device.Get(), m_command_queue.Get()); - m_readback_resources.m_get_pos = m_readback_resources.get_current_put_pos_minus_one(); - - void *mapped_buffer = m_readback_resources.map(heap_offset); - for (unsigned row = 0; row < clip_h; row++) - { - char *casted_dest = (char*)buffer + row * clip_w; - char *casted_src = (char*)mapped_buffer + row * row_pitch; - for (unsigned col = 0; col < row_pitch; col++) - *casted_dest++ = *casted_src++; - } - m_readback_resources.unmap(); -} - #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 4e43d440b2..9eeb332c96 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -5,6 +5,7 @@ #include "d3dx12.h" #include "D3D12Formats.h" +#include "D3D12MemoryHelpers.h" #include namespace rsx @@ -24,6 +25,62 @@ namespace rsx } throw EXCEPTION("Wrong color_target"); } + + template + void copy_pitched_src_to_dst(gsl::span dest, gsl::span src, size_t src_pitch_in_bytes, size_t width, size_t height) + { + for (int row = 0; row < height; row++) + { + for (unsigned col = 0; col < width; col++) + dest[col] = src[col]; + src = src.subspan(src_pitch_in_bytes / sizeof(U)); + dest = dest.subspan(width); + } + } + + size_t get_aligned_pitch(Surface_color_format format, u32 width) + { + switch (format) + { + case Surface_color_format::b8: return align(width, 256); + case Surface_color_format::g8b8: + case Surface_color_format::x1r5g5b5_o1r5g5b5: + case Surface_color_format::x1r5g5b5_z1r5g5b5: + case Surface_color_format::r5g6b5: return align(width * 2, 256); + case Surface_color_format::a8b8g8r8: + case Surface_color_format::x8b8g8r8_o8b8g8r8: + case Surface_color_format::x8b8g8r8_z8b8g8r8: + case Surface_color_format::x8r8g8b8_o8r8g8b8: + case Surface_color_format::x8r8g8b8_z8r8g8b8: + case Surface_color_format::x32: + case Surface_color_format::a8r8g8b8: return align(width * 4, 256); + case Surface_color_format::w16z16y16x16: return align(width * 8, 256); + case Surface_color_format::w32z32y32x32: return align(width * 16, 256); + } + throw EXCEPTION("Unknow color surface format"); + } + + size_t get_packed_pitch(Surface_color_format format, u32 width) + { + switch (format) + { + case Surface_color_format::b8: return width; + case Surface_color_format::g8b8: + case Surface_color_format::x1r5g5b5_o1r5g5b5: + case Surface_color_format::x1r5g5b5_z1r5g5b5: + case Surface_color_format::r5g6b5: return width * 2; + case Surface_color_format::a8b8g8r8: + case Surface_color_format::x8b8g8r8_o8b8g8r8: + case Surface_color_format::x8b8g8r8_z8b8g8r8: + case Surface_color_format::x8r8g8b8_o8r8g8b8: + case Surface_color_format::x8r8g8b8_z8r8g8b8: + case Surface_color_format::x32: + case Surface_color_format::a8r8g8b8: return width * 4; + case Surface_color_format::w16z16y16x16: return width * 8; + case Surface_color_format::w32z32y32x32: return width * 16; + } + throw EXCEPTION("Unknow color surface format"); + } } template @@ -33,6 +90,7 @@ namespace rsx using surface_storage_type = typename Traits::surface_storage_type; using surface_type = typename Traits::surface_type; using command_list_type = typename Traits::command_list_type; + using download_buffer_object = typename Traits::download_buffer_object; std::unordered_map m_render_targets_storage = {}; std::unordered_map m_depth_stencil_storage = {}; @@ -168,6 +226,134 @@ namespace rsx return It->second.Get(); return surface_type(); } + + template + std::array, 4> get_render_targets_data( + Surface_color_format surface_color_format, size_t width, size_t height, + Args&& ...args + ) + { + std::array download_data = {}; + + // Issue download commands + for (int i = 0; i < 4; i++) + { + if (std::get<0>(m_bound_render_targets[i]) == 0) + continue; + + surface_type surface_resource = std::get<1>(m_bound_render_targets[i]); + download_data[i] = std::move( + Traits::issue_download_command(surface_resource, surface_color_format, width, height, std::forward(args)...) + ); + } + + std::array, 4> result = {}; + + // Sync and copy data + for (int i = 0; i < 4; i++) + { + if (std::get<0>(m_bound_render_targets[i]) == 0) + continue; + + gsl::span raw_src = Traits::map_downloaded_buffer(download_data[i], std::forward(args)...); + + size_t src_pitch = get_aligned_pitch(surface_color_format, gsl::narrow(width)); + size_t dst_pitch = get_packed_pitch(surface_color_format, gsl::narrow(width)); + + result[i].resize(dst_pitch * height); + + // Note: MSVC + GSL doesn't support span -> span for non const span atm + // thus manual conversion + switch (surface_color_format) + { + case Surface_color_format::a8b8g8r8: + case Surface_color_format::x8b8g8r8_o8b8g8r8: + case Surface_color_format::x8b8g8r8_z8b8g8r8: + case Surface_color_format::a8r8g8b8: + case Surface_color_format::x8r8g8b8_o8r8g8b8: + case Surface_color_format::x8r8g8b8_z8r8g8b8: + case Surface_color_format::x32: + { + gsl::span> dst_span{ (be_t*)result[i].data(), gsl::narrow(dst_pitch * width / sizeof(be_t)) }; + copy_pitched_src_to_dst(dst_span, gsl::as_span(raw_src), src_pitch, width, height); + break; + } + case Surface_color_format::b8: + { + gsl::span dst_span{ (u8*)result[i].data(), gsl::narrow(dst_pitch * width / sizeof(u8)) }; + copy_pitched_src_to_dst(dst_span, gsl::as_span(raw_src), src_pitch, width, height); + break; + } + case Surface_color_format::g8b8: + case Surface_color_format::r5g6b5: + case Surface_color_format::x1r5g5b5_o1r5g5b5: + case Surface_color_format::x1r5g5b5_z1r5g5b5: + { + gsl::span> dst_span{ (be_t*)result[i].data(), gsl::narrow(dst_pitch * width / sizeof(be_t)) }; + copy_pitched_src_to_dst(dst_span, gsl::as_span(raw_src), src_pitch, width, height); + break; + } + // Note : may require some big endian swap + case Surface_color_format::w32z32y32x32: + { + gsl::span dst_span{ (u128*)result[i].data(), gsl::narrow(dst_pitch * width / sizeof(u128)) }; + copy_pitched_src_to_dst(dst_span, gsl::as_span(raw_src), src_pitch, width, height); + break; + } + case Surface_color_format::w16z16y16x16: + { + gsl::span dst_span{ (u64*)result[i].data(), gsl::narrow(dst_pitch * width / sizeof(u64)) }; + copy_pitched_src_to_dst(dst_span, gsl::as_span(raw_src), src_pitch, width, height); + break; + } + + } + Traits::unmap_downloaded_buffer(download_data[i], std::forward(args)...); + } + return result; + } + + template + std::array, 2> get_depth_stencil_data( + Surface_depth_format surface_depth_format, size_t width, size_t height, + Args&& ...args + ) + { + std::array, 2> result = {}; + if (std::get<0>(m_bound_depth_stencil) == 0) + return result; + size_t row_pitch = align(width * 4, 256); + + download_buffer_object stencil_data = {}; + download_buffer_object depth_data = Traits::issue_depth_download_command(std::get<1>(m_bound_depth_stencil), surface_depth_format, width, height, std::forward(args)...); + if (surface_depth_format == Surface_depth_format::z24s8) + stencil_data = std::move(Traits::issue_stencil_download_command(std::get<1>(m_bound_depth_stencil), width, height, std::forward(args)...)); + + gsl::span depth_buffer_raw_src = Traits::map_downloaded_buffer(depth_data, std::forward(args)...); + if (surface_depth_format == Surface_depth_format::z16) + { + result[0].resize(width * height * 2); + gsl::span dest{ (u16*)result[0].data(), gsl::narrow(width * height) }; + copy_pitched_src_to_dst(dest, gsl::as_span(depth_buffer_raw_src), row_pitch, width, height); + } + if (surface_depth_format == Surface_depth_format::z24s8) + { + result[0].resize(width * height * 4); + gsl::span dest{ (u32*)result[0].data(), gsl::narrow(width * height) }; + copy_pitched_src_to_dst(dest, gsl::as_span(depth_buffer_raw_src), row_pitch, width, height); + } + Traits::unmap_downloaded_buffer(depth_data, std::forward(args)...); + + if (surface_depth_format == Surface_depth_format::z16) + return result; + + gsl::span stencil_buffer_raw_src = Traits::map_downloaded_buffer(stencil_data, std::forward(args)...); + result[1].resize(width * height); + gsl::span dest{ (u8*)result[1].data(), gsl::narrow(width * height) }; + copy_pitched_src_to_dst(dest, gsl::as_span(stencil_buffer_raw_src), align(width, 256), width, height); + Traits::unmap_downloaded_buffer(stencil_data, std::forward(args)...); + return result; + } }; } @@ -176,6 +362,7 @@ struct render_target_traits using surface_storage_type = ComPtr; using surface_type = ID3D12Resource*; using command_list_type = gsl::not_null; + using download_buffer_object = std::tuple, HANDLE>; // heap offset, size, last_put_pos, fence, handle static ComPtr create_new_surface( @@ -284,6 +471,128 @@ struct render_target_traits //TODO: Check format return rtt->GetDesc().Width == width && rtt->GetDesc().Height == height; } + + static + std::tuple, HANDLE> issue_download_command( + gsl::not_null rtt, + Surface_color_format color_format, size_t width, size_t height, + gsl::not_null device, gsl::not_null command_queue, data_heap &readback_heap, resource_storage &res_store + ) + { + ID3D12GraphicsCommandList* command_list = res_store.command_list.Get(); + DXGI_FORMAT dxgi_format = get_color_surface_format(color_format); + size_t row_pitch = rsx::get_aligned_pitch(color_format, gsl::narrow(width)); + + size_t buffer_size = row_pitch * height; + size_t heap_offset = readback_heap.alloc(buffer_size); + + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(rtt, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE)); + + command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(readback_heap.get_heap(), { heap_offset,{ dxgi_format, (UINT)width, (UINT)height, 1, (UINT)row_pitch } }), 0, 0, 0, + &CD3DX12_TEXTURE_COPY_LOCATION(rtt, 0), nullptr); + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(rtt, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET)); + + CHECK_HRESULT(command_list->Close()); + command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)res_store.command_list.GetAddressOf()); + res_store.set_new_command_list(); + + ComPtr fence; + CHECK_HRESULT(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf()))); + HANDLE handle = CreateEventEx(nullptr, FALSE, FALSE, EVENT_ALL_ACCESS); + fence->SetEventOnCompletion(1, handle); + command_queue->Signal(fence.Get(), 1); + + return std::make_tuple(heap_offset, buffer_size, readback_heap.get_current_put_pos_minus_one(), fence, handle); + } + + static + std::tuple, HANDLE> issue_depth_download_command( + gsl::not_null ds, + Surface_depth_format depth_format, size_t width, size_t height, + gsl::not_null device, gsl::not_null command_queue, data_heap &readback_heap, resource_storage &res_store + ) + { + ID3D12GraphicsCommandList* command_list = res_store.command_list.Get(); + DXGI_FORMAT dxgi_format = (depth_format == Surface_depth_format::z24s8) ? DXGI_FORMAT_R32_TYPELESS : DXGI_FORMAT_R16_TYPELESS; + + size_t row_pitch = align(width * 4, 256); + size_t buffer_size = row_pitch * height; + size_t heap_offset = readback_heap.alloc(buffer_size); + + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE)); + + command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(readback_heap.get_heap(), { heap_offset,{ dxgi_format, (UINT)width, (UINT)height, 1, (UINT)row_pitch } }), 0, 0, 0, + &CD3DX12_TEXTURE_COPY_LOCATION(ds, 0), nullptr); + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE)); + + CHECK_HRESULT(command_list->Close()); + command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)res_store.command_list.GetAddressOf()); + res_store.set_new_command_list(); + + ComPtr fence; + CHECK_HRESULT(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf()))); + HANDLE handle = CreateEventEx(nullptr, FALSE, FALSE, EVENT_ALL_ACCESS); + fence->SetEventOnCompletion(1, handle); + command_queue->Signal(fence.Get(), 1); + + return std::make_tuple(heap_offset, buffer_size, readback_heap.get_current_put_pos_minus_one(), fence, handle); + } + + static + std::tuple, HANDLE> issue_stencil_download_command( + gsl::not_null stencil, + size_t width, size_t height, + gsl::not_null device, gsl::not_null command_queue, data_heap &readback_heap, resource_storage &res_store + ) + { + ID3D12GraphicsCommandList* command_list = res_store.command_list.Get(); + + size_t row_pitch = align(width, 256); + size_t buffer_size = row_pitch * height; + size_t heap_offset = readback_heap.alloc(buffer_size); + + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE)); + + command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(readback_heap.get_heap(), { heap_offset,{ DXGI_FORMAT_R8_TYPELESS, (UINT)width, (UINT)height, 1, (UINT)row_pitch } }), 0, 0, 0, + &CD3DX12_TEXTURE_COPY_LOCATION(stencil, 1), nullptr); + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(stencil, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE)); + + CHECK_HRESULT(command_list->Close()); + command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)res_store.command_list.GetAddressOf()); + res_store.set_new_command_list(); + + ComPtr fence; + CHECK_HRESULT(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf()))); + HANDLE handle = CreateEventEx(nullptr, FALSE, FALSE, EVENT_ALL_ACCESS); + fence->SetEventOnCompletion(1, handle); + command_queue->Signal(fence.Get(), 1); + + return std::make_tuple(heap_offset, buffer_size, readback_heap.get_current_put_pos_minus_one(), fence, handle); + } + + static + gsl::span map_downloaded_buffer(const std::tuple, HANDLE> &sync_data, + gsl::not_null device, gsl::not_null command_queue, data_heap &readback_heap, resource_storage &res_store) + { + size_t offset; + size_t buffer_size; + size_t current_put_pos_minus_one; + HANDLE handle; + std::tie(offset, buffer_size, current_put_pos_minus_one, std::ignore, handle) = sync_data; + WaitForSingleObjectEx(handle, INFINITE, FALSE); + CloseHandle(handle); + + readback_heap.m_get_pos = current_put_pos_minus_one; + const gsl::byte *mapped_buffer = readback_heap.map(CD3DX12_RANGE(offset, offset + buffer_size)); + return { mapped_buffer , gsl::narrow(buffer_size) }; + } + + static + void unmap_downloaded_buffer(const std::tuple, HANDLE> &sync_data, + gsl::not_null device, gsl::not_null command_queue, data_heap &readback_heap, resource_storage &res_store) + { + readback_heap.unmap(); + } }; struct render_targets : public rsx::surface_store diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 0bc8ddabe1..87455f4718 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -276,44 +276,14 @@ namespace rsx int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; - size_t pitch = clip_w * 4; - std::vector color_index_to_record; - switch (to_surface_target(method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) - { - case Surface_target::surface_a: - color_index_to_record = { 0 }; - break; - case Surface_target::surface_b: - color_index_to_record = { 1 }; - break; - case Surface_target::surfaces_a_b: - color_index_to_record = { 0, 1 }; - break; - case Surface_target::surfaces_a_b_c: - color_index_to_record = { 0, 1, 2 }; - break; - case Surface_target::surfaces_a_b_c_d: - color_index_to_record = { 0, 1, 2, 3 }; - break; - } - for (size_t i : color_index_to_record) - { - draw_state.color_buffer[i].width = clip_w; - draw_state.color_buffer[i].height = clip_h; - draw_state.color_buffer[i].data.resize(pitch * clip_h); - copy_render_targets_to_memory(draw_state.color_buffer[i].data.data(), i); - } - if (get_address(method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], method_registers[NV4097_SET_CONTEXT_DMA_ZETA])) - { - draw_state.depth.width = clip_w; - draw_state.depth.height = clip_h; - draw_state.depth.data.resize(clip_w * clip_h * 4); - copy_depth_buffer_to_memory(draw_state.depth.data.data()); - draw_state.stencil.width = clip_w; - draw_state.stencil.height = clip_h; - draw_state.stencil.data.resize(clip_w * clip_h * 4); - copy_stencil_buffer_to_memory(draw_state.stencil.data.data()); - } + rsx::surface_info surface = {}; + surface.unpack(rsx::method_registers[NV4097_SET_SURFACE_FORMAT]); + draw_state.width = clip_w; + draw_state.height = clip_h; + draw_state.surface_color_format = surface.color_format; + draw_state.color_buffer = std::move(copy_render_targets_to_memory()); + draw_state.surface_depth_format = surface.depth_format; + draw_state.depth_stencil = std::move(copy_depth_stencil_buffer_to_memory()); draw_state.programs = get_programs(); draw_state.name = name; frame_debug.draw_calls.push_back(draw_state); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 0e49853c63..434afb7005 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -15,19 +15,16 @@ extern u64 get_system_time(); struct frame_capture_data { - struct buffer - { - std::vector data; - size_t width = 0, height = 0; - }; struct draw_state { std::string name; std::pair programs; - buffer color_buffer[4]; - buffer depth; - buffer stencil; + size_t width = 0, height = 0; + Surface_color_format surface_color_format; + std::array, 4> color_buffer; + Surface_depth_format surface_depth_format; + std::array, 2> depth_stencil; }; std::vector > command_queue; std::vector draw_calls; @@ -347,19 +344,17 @@ namespace rsx * Copy rtt values to buffer. * TODO: It's more efficient to combine multiple call of this function into one. */ - virtual void copy_render_targets_to_memory(void *buffer, u8 rtt) {}; + virtual std::array, 4> copy_render_targets_to_memory() { + return std::array, 4>(); + }; /** - * Copy depth content to buffer. + * Copy depth and stencil content to buffers. * TODO: It's more efficient to combine multiple call of this function into one. */ - virtual void copy_depth_buffer_to_memory(void *buffer) {}; - - /** - * Copy stencil content to buffer. - * TODO: It's more efficient to combine multiple call of this function into one. - */ - virtual void copy_stencil_buffer_to_memory(void *buffer) {}; + virtual std::array, 2> copy_depth_stencil_buffer_to_memory() { + return std::array, 2>(); + }; virtual std::pair get_programs() const { return std::make_pair("", ""); }; public: diff --git a/rpcs3/Gui/RSXDebugger.cpp b/rpcs3/Gui/RSXDebugger.cpp index 6f8602af62..7f496239c9 100644 --- a/rpcs3/Gui/RSXDebugger.cpp +++ b/rpcs3/Gui/RSXDebugger.cpp @@ -251,8 +251,8 @@ RSXDebugger::RSXDebugger(wxWindow* parent) p_buffer_colorB->Bind(wxEVT_LEFT_DOWN, &RSXDebugger::OnClickBuffer, this); p_buffer_colorC->Bind(wxEVT_LEFT_DOWN, &RSXDebugger::OnClickBuffer, this); p_buffer_colorD->Bind(wxEVT_LEFT_DOWN, &RSXDebugger::OnClickBuffer, this); - //p_buffer_depth->Bind(wxEVT_BUTTON, &RSXDebugger::OnClickBuffer, this); - //p_buffer_stencil->Bind(wxEVT_BUTTON, &RSXDebugger::OnClickBuffer, this); + p_buffer_depth->Bind(wxEVT_LEFT_DOWN, &RSXDebugger::OnClickBuffer, this); + p_buffer_stencil->Bind(wxEVT_LEFT_DOWN, &RSXDebugger::OnClickBuffer, this); p_buffer_tex->Bind(wxEVT_LEFT_DOWN, &RSXDebugger::OnClickBuffer, this); m_list_captured_draw_calls->Bind(wxEVT_LEFT_DOWN, &RSXDebugger::OnClickDrawCalls, this); @@ -364,6 +364,8 @@ void RSXDebugger::OnClickBuffer(wxMouseEvent& event) if (event.GetId() == p_buffer_colorB->GetId()) display_buffer(this, buffer_img[1]); if (event.GetId() == p_buffer_colorC->GetId()) display_buffer(this, buffer_img[2]); if (event.GetId() == p_buffer_colorD->GetId()) display_buffer(this, buffer_img[3]); + if (event.GetId() == p_buffer_depth->GetId()) display_buffer(this, depth_img); + if (event.GetId() == p_buffer_stencil->GetId()) display_buffer(this, stencil_img); if (event.GetId() == p_buffer_tex->GetId()) { u8 location = render.textures[m_cur_texture].location(); @@ -380,18 +382,74 @@ void RSXDebugger::OnClickBuffer(wxMouseEvent& event) namespace { + std::array get_value(gsl::span orig_buffer, Surface_color_format format, size_t idx) + { + switch (format) + { + case Surface_color_format::b8: + { + u8 value = gsl::as_span(orig_buffer)[idx]; + return{ value, value, value }; + } + case Surface_color_format::x32: + { + be_t stored_val = gsl::as_span>(orig_buffer)[idx]; + u32 swapped_val = stored_val; + f32 float_val = (f32&)swapped_val; + u8 val = float_val * 255.f; + return{ val, val, val }; + } + case Surface_color_format::a8b8g8r8: + case Surface_color_format::x8b8g8r8_o8b8g8r8: + case Surface_color_format::x8b8g8r8_z8b8g8r8: + { + auto ptr = gsl::as_span(orig_buffer); + return{ ptr[1 + idx * 4], ptr[2 + idx * 4], ptr[3 + idx * 4] }; + } + case Surface_color_format::a8r8g8b8: + case Surface_color_format::x8r8g8b8_o8r8g8b8: + case Surface_color_format::x8r8g8b8_z8r8g8b8: + { + auto ptr = gsl::as_span(orig_buffer); + return{ ptr[3 + idx * 4], ptr[2 + idx * 4], ptr[1 + idx * 4] }; + } + case Surface_color_format::w16z16y16x16: + { + auto ptr = gsl::as_span(orig_buffer); + f16 h0 = f16(ptr[4 * idx]); + f16 h1 = f16(ptr[4 * idx + 1]); + f16 h2 = f16(ptr[4 * idx + 2]); + f32 f0 = float(h0); + f32 f1 = float(h1); + f32 f2 = float(h2); + + u8 val0 = f0 * 255.; + u8 val1 = f1 * 255.; + u8 val2 = f2 * 255.; + return{ val0, val1, val2 }; + } + case Surface_color_format::g8b8: + case Surface_color_format::r5g6b5: + case Surface_color_format::x1r5g5b5_o1r5g5b5: + case Surface_color_format::x1r5g5b5_z1r5g5b5: + case Surface_color_format::w32z32y32x32: + throw EXCEPTION("Unsupported format for display"); + } + } + /** * Return a new buffer that can be passed to wxImage ctor. * The pointer seems to be freed by wxImage. */ - u8* convert_to_wximage_buffer(u8 *orig_buffer, size_t width, size_t height) noexcept + u8* convert_to_wximage_buffer(Surface_color_format format, gsl::span orig_buffer, size_t width, size_t height) noexcept { unsigned char* buffer = (unsigned char*)malloc(width * height * 3); for (u32 i = 0; i < width * height; i++) { - buffer[0 + i * 3] = orig_buffer[3 + i * 4]; - buffer[1 + i * 3] = orig_buffer[2 + i * 4]; - buffer[2 + i * 3] = orig_buffer[1 + i * 4]; + const auto &colors = get_value(orig_buffer, format, i); + buffer[0 + i * 3] = colors[0]; + buffer[1 + i * 3] = colors[1]; + buffer[2 + i * 3] = colors[2]; } return buffer; } @@ -401,6 +459,8 @@ void RSXDebugger::OnClickDrawCalls(wxMouseEvent& event) { size_t draw_id = m_list_captured_draw_calls->GetFirstSelected(); + const auto& draw_call = frame_debug.draw_calls[draw_id]; + wxPanel* p_buffers[] = { p_buffer_colorA, @@ -409,13 +469,14 @@ void RSXDebugger::OnClickDrawCalls(wxMouseEvent& event) p_buffer_colorD, }; + size_t width = draw_call.width; + size_t height = draw_call.height; + for (size_t i = 0; i < 4; i++) { - size_t width = frame_debug.draw_calls[draw_id].color_buffer[i].width, height = frame_debug.draw_calls[draw_id].color_buffer[i].height; - if (width && height) + if (width && height && !draw_call.color_buffer[i].empty()) { - unsigned char *orig_buffer = frame_debug.draw_calls[draw_id].color_buffer[i].data.data(); - buffer_img[i] = wxImage(width, height, convert_to_wximage_buffer(orig_buffer, width, height)); + buffer_img[i] = wxImage(width, height, convert_to_wximage_buffer(draw_call.surface_color_format, draw_call.color_buffer[i], width, height)); wxClientDC dc_canvas(p_buffers[i]); if (buffer_img[i].IsOk()) @@ -425,56 +486,71 @@ void RSXDebugger::OnClickDrawCalls(wxMouseEvent& event) // Buffer Z { - size_t width = frame_debug.draw_calls[draw_id].depth.width, height = frame_debug.draw_calls[draw_id].depth.height; - if (width && height) + if (width && height && !draw_call.depth_stencil[0].empty()) { - u32 *orig_buffer = (u32*)frame_debug.draw_calls[draw_id].depth.data.data(); + gsl::span orig_buffer = draw_call.depth_stencil[0]; unsigned char *buffer = (unsigned char *)malloc(width * height * 3); - for (u32 row = 0; row < height; row++) + if (draw_call.surface_depth_format == Surface_depth_format::z24s8) { - for (u32 col = 0; col < width; col++) + for (u32 row = 0; row < height; row++) { - u32 depth_val = orig_buffer[row * width + col]; - u8 displayed_depth_val = 255 * depth_val / 0xFFFFFF; - buffer[3 * col + 0 + width * row * 3] = displayed_depth_val; - buffer[3 * col + 1 + width * row * 3] = displayed_depth_val; - buffer[3 * col + 2 + width * row * 3] = displayed_depth_val; + for (u32 col = 0; col < width; col++) + { + u32 depth_val = gsl::as_span(orig_buffer)[row * width + col]; + u8 displayed_depth_val = 255 * depth_val / 0xFFFFFF; + buffer[3 * col + 0 + width * row * 3] = displayed_depth_val; + buffer[3 * col + 1 + width * row * 3] = displayed_depth_val; + buffer[3 * col + 2 + width * row * 3] = displayed_depth_val; + } + } + } + else + { + for (u32 row = 0; row < height; row++) + { + for (u32 col = 0; col < width; col++) + { + u16 depth_val = gsl::as_span(orig_buffer)[row * width + col]; + u8 displayed_depth_val = 255 * depth_val / 0xFFFF; + buffer[3 * col + 0 + width * row * 3] = displayed_depth_val; + buffer[3 * col + 1 + width * row * 3] = displayed_depth_val; + buffer[3 * col + 2 + width * row * 3] = displayed_depth_val; + } } } - wxImage img(width, height, buffer); + depth_img = wxImage(width, height, buffer); wxClientDC dc_canvas(p_buffer_depth); - if (img.IsOk()) - dc_canvas.DrawBitmap(img.Scale(m_panel_width, m_panel_height), 0, 0, false); + if (depth_img.IsOk()) + dc_canvas.DrawBitmap(depth_img.Scale(m_panel_width, m_panel_height), 0, 0, false); } } // Buffer S { - size_t width = frame_debug.draw_calls[draw_id].stencil.width, height = frame_debug.draw_calls[draw_id].stencil.height; - if (width && height) + if (width && height && !draw_call.depth_stencil[1].empty()) { - u8 *orig_buffer = frame_debug.draw_calls[draw_id].stencil.data.data(); + gsl::span orig_buffer = draw_call.depth_stencil[1]; unsigned char *buffer = (unsigned char *)malloc(width * height * 3); for (u32 row = 0; row < height; row++) { for (u32 col = 0; col < width; col++) { - u32 stencil_val = orig_buffer[row * width + col]; + u8 stencil_val = gsl::as_span(orig_buffer)[row * width + col]; buffer[3 * col + 0 + width * row * 3] = stencil_val; buffer[3 * col + 1 + width * row * 3] = stencil_val; buffer[3 * col + 2 + width * row * 3] = stencil_val; } } - wxImage img(width, height, buffer); + stencil_img = wxImage(width, height, buffer); wxClientDC dc_canvas(p_buffer_stencil); - if (img.IsOk()) - dc_canvas.DrawBitmap(img.Scale(m_panel_width, m_panel_height), 0, 0, false); + if (stencil_img.IsOk()) + dc_canvas.DrawBitmap(stencil_img.Scale(m_panel_width, m_panel_height), 0, 0, false); } } diff --git a/rpcs3/Gui/RSXDebugger.h b/rpcs3/Gui/RSXDebugger.h index 26074a6561..3851652d8d 100644 --- a/rpcs3/Gui/RSXDebugger.h +++ b/rpcs3/Gui/RSXDebugger.h @@ -29,6 +29,8 @@ class RSXDebugger : public wxDialog wxPanel* p_buffer_tex; wxImage buffer_img[4]; + wxImage depth_img; + wxImage stencil_img; wxTextCtrl* m_text_transform_program; wxTextCtrl *m_text_shader_program;