rsx/d3d12: Record frame buffer content per draw call.

This commit is contained in:
Vincent Lejeune 2015-11-06 00:56:27 +01:00
parent 8da7361794
commit 70b9271cd8
4 changed files with 247 additions and 1 deletions

View file

@ -214,4 +214,8 @@ protected:
virtual void load_vertex_data(u32 first, u32 count) override;
virtual void load_vertex_index_data(u32 first, u32 count) override;
virtual void copy_render_targets_to_memory(void *buffer, u8 rtt) override;
virtual void copy_depth_buffer_to_memory(void *buffer) override;
virtual void copy_stencil_buffer_to_memory(void *buffer) override;
};

View file

@ -587,4 +587,128 @@ void D3D12GSRender::copy_render_target_to_dma_location()
}
}
void D3D12GSRender::copy_render_targets_to_memory(void *buffer, u8 rtt)
{
ComPtr<ID3D12Resource> readback_buffer = create_readback_buffer_and_download(m_device.Get(), getCurrentResourceStorage().command_list.Get(), m_readbackResources, m_rtts.bound_render_targets[rtt], m_surface.color_format);
ThrowIfFailed(getCurrentResourceStorage().command_list->Close());
m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().command_list.GetAddressOf());
getCurrentResourceStorage().set_new_command_list();
wait_for_command_queue(m_device.Get(), m_commandQueueGraphic.Get());
m_readbackResources.m_get_pos = m_readbackResources.get_current_put_pos_minus_one();
int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
size_t srcPitch, dstPitch;
switch (m_surface.color_format)
{
case CELL_GCM_SURFACE_A8R8G8B8:
srcPitch = align(clip_w * 4, 256);
dstPitch = clip_w * 4;
break;
case CELL_GCM_SURFACE_F_W16Z16Y16X16:
srcPitch = align(clip_w * 8, 256);
dstPitch = clip_w * 8;
break;
}
copy_readback_buffer_to_dest(buffer, readback_buffer.Get(), srcPitch, dstPitch, clip_h);
}
void D3D12GSRender::copy_depth_buffer_to_memory(void *buffer)
{
int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
size_t row_pitch = align(clip_w * 4, 256);
ComPtr<ID3D12Resource> readback_buffer;
size_t buffer_size = row_pitch * clip_h;
assert(m_readbackResources.can_alloc(buffer_size));
size_t heapOffset = m_readbackResources.alloc(buffer_size);
ThrowIfFailed(
m_device->CreatePlacedResource(
m_readbackResources.m_heap,
heapOffset,
&CD3DX12_RESOURCE_DESC::Buffer(buffer_size),
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(readback_buffer.GetAddressOf())
)
);
getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE));
getCurrentResourceStorage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(readback_buffer.Get(), { 0,{ DXGI_FORMAT_R32_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(m_rtts.bound_depth_stencil, 0), nullptr);
getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE));
ThrowIfFailed(getCurrentResourceStorage().command_list->Close());
m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().command_list.GetAddressOf());
getCurrentResourceStorage().set_new_command_list();
wait_for_command_queue(m_device.Get(), m_commandQueueGraphic.Get());
m_readbackResources.m_get_pos = m_readbackResources.get_current_put_pos_minus_one();
void *mapped_buffer;
ThrowIfFailed(readback_buffer->Map(0, nullptr, &mapped_buffer));
for (unsigned row = 0; row < clip_h; row++)
{
u32 *casted_dest = (u32*)((char*)buffer + row * clip_w * 4);
u32 *casted_src = (u32*)((char*)mapped_buffer + row * row_pitch);
for (unsigned col = 0; col < row_pitch / 4; col++)
*casted_dest++ = *casted_src++;
}
readback_buffer->Unmap(0, nullptr);
}
void D3D12GSRender::copy_stencil_buffer_to_memory(void *buffer)
{
int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
size_t row_pitch = align(clip_w * 4, 256);
ComPtr<ID3D12Resource> readback_buffer;
size_t buffer_size = row_pitch * clip_h;
assert(m_readbackResources.can_alloc(buffer_size));
size_t heapOffset = m_readbackResources.alloc(buffer_size);
ThrowIfFailed(
m_device->CreatePlacedResource(
m_readbackResources.m_heap,
heapOffset,
&CD3DX12_RESOURCE_DESC::Buffer(buffer_size),
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(readback_buffer.GetAddressOf())
)
);
getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE));
getCurrentResourceStorage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(readback_buffer.Get(), { 0,{ DXGI_FORMAT_R8_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(m_rtts.bound_depth_stencil, 1), nullptr);
getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE));
ThrowIfFailed(getCurrentResourceStorage().command_list->Close());
m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().command_list.GetAddressOf());
getCurrentResourceStorage().set_new_command_list();
wait_for_command_queue(m_device.Get(), m_commandQueueGraphic.Get());
m_readbackResources.m_get_pos = m_readbackResources.get_current_put_pos_minus_one();
void *mapped_buffer;
ThrowIfFailed(readback_buffer->Map(0, nullptr, &mapped_buffer));
for (unsigned row = 0; row < clip_h; row++)
{
char *casted_dest = (char*)buffer + row * clip_w;
char *casted_src = (char*)mapped_buffer + row * row_pitch;
for (unsigned col = 0; col < row_pitch; col++)
*casted_dest++ = *casted_src++;
}
readback_buffer->Unmap(0, nullptr);
}
#endif

View file

@ -21,7 +21,8 @@ extern "C"
#define CMD_DEBUG 0
extern u64 get_system_time();
bool user_asked_for_frame_capture = false;
frame_capture_data frame_debug;
namespace rsx
{
@ -552,6 +553,18 @@ namespace rsx
void flip_command(thread* rsx, u32 arg)
{
if (user_asked_for_frame_capture)
{
rsx->capture_current_frame = true;
user_asked_for_frame_capture = false;
frame_debug.reset();
}
else if (rsx->capture_current_frame)
{
rsx->capture_current_frame = false;
Emu.Pause();
}
rsx->gcm_current_buffer = arg;
rsx->flip(arg);
@ -625,7 +638,11 @@ namespace rsx
{
// try process using gpu
if (rsx->domethod(id, arg))
{
if (rsx->capture_current_frame && id == NV4097_CLEAR_SURFACE)
rsx->capture_frame();
return;
}
// not handled by renderer
// try process using cpu
@ -872,6 +889,53 @@ namespace rsx
}
}
void thread::capture_frame()
{
frame_capture_data::draw_state draw_state = {};
int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
size_t pitch = clip_w * 4;
std::vector<size_t> color_index_to_record;
switch (method_registers[NV4097_SET_SURFACE_COLOR_TARGET])
{
case CELL_GCM_SURFACE_TARGET_0:
color_index_to_record = { 0 };
break;
case CELL_GCM_SURFACE_TARGET_1:
color_index_to_record = { 1 };
break;
case CELL_GCM_SURFACE_TARGET_MRT1:
color_index_to_record = { 0, 1 };
break;
case CELL_GCM_SURFACE_TARGET_MRT2:
color_index_to_record = { 0, 1, 2 };
break;
case CELL_GCM_SURFACE_TARGET_MRT3:
color_index_to_record = { 0, 1, 2, 3 };
break;
}
for (size_t i : color_index_to_record)
{
draw_state.color_buffer[i].width = clip_w;
draw_state.color_buffer[i].height = clip_h;
draw_state.color_buffer[i].data.resize(pitch * clip_h);
copy_render_targets_to_memory(draw_state.color_buffer[i].data.data(), i);
}
if (get_address(method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], method_registers[NV4097_SET_CONTEXT_DMA_ZETA]))
{
draw_state.depth.width = clip_w;
draw_state.depth.height = clip_h;
draw_state.depth.data.resize(clip_w * clip_h * 4);
copy_depth_buffer_to_memory(draw_state.depth.data.data());
draw_state.stencil.width = clip_w;
draw_state.stencil.height = clip_h;
draw_state.stencil.data.resize(clip_w * clip_h * 4);
copy_stencil_buffer_to_memory(draw_state.stencil.data.data());
}
frame_debug.draw_calls.push_back(draw_state);
}
void thread::begin()
{
draw_mode = method_registers[NV4097_SET_BEGIN_END];
@ -884,6 +948,9 @@ namespace rsx
vertex_array.clear();
transform_constants.clear();
if (capture_current_frame)
capture_frame();
}
void thread::task()
@ -1006,6 +1073,8 @@ namespace rsx
}
method_registers[reg] = value;
if (capture_current_frame)
frame_debug.command_queue.push_back(std::make_pair(reg, value));
if (auto method = methods[reg])
method(this, value);

View file

@ -10,6 +10,36 @@
#include "Utilities/Timer.h"
#include "Utilities/types.h"
extern u64 get_system_time();
struct frame_capture_data
{
struct buffer
{
std::vector<u8> data;
size_t width = 0, height = 0;
};
struct draw_state
{
buffer color_buffer[4];
buffer depth;
buffer stencil;
};
std::vector<std::pair<u32, u32> > command_queue;
std::vector<draw_state> draw_calls;
void reset() noexcept
{
command_queue.clear();
draw_calls.clear();
}
};
extern bool user_asked_for_frame_capture;
extern frame_capture_data frame_debug;
namespace rsx
{
namespace limits
@ -169,6 +199,8 @@ namespace rsx
virtual void load_vertex_data(u32 first, u32 count);
virtual void load_vertex_index_data(u32 first, u32 count);
bool capture_current_frame = false;
void capture_frame();
public:
u32 ioAddress, ioSize;
int flip_status;
@ -233,6 +265,23 @@ namespace rsx
*/
void fill_vertex_program_constants_data(void *buffer) noexcept;
/**
* Copy rtt values to buffer.
* TODO: It's more efficient to combine multiple call of this function into one.
*/
virtual void copy_render_targets_to_memory(void *buffer, u8 rtt) {};
/**
* Copy depth content to buffer.
* TODO: It's more efficient to combine multiple call of this function into one.
*/
virtual void copy_depth_buffer_to_memory(void *buffer) {};
/**
* Copy stencil content to buffer.
* TODO: It's more efficient to combine multiple call of this function into one.
*/
virtual void copy_stencil_buffer_to_memory(void *buffer) {};
public:
void reset();
void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress);