Merge pull request #1415 from vlj/d3d12

D3d12: Factoring
This commit is contained in:
B1ackDaemon 2016-01-12 00:52:45 +02:00
commit 68f9898171
10 changed files with 519 additions and 501 deletions

View file

@ -13,18 +13,14 @@ namespace
/**
*
*/
D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info &vertex_array_desc, const std::vector<u8> &vertex_data, ID3D12Device *device, data_heap<ID3D12Resource, 65536> &vertex_index_heap)
D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info &vertex_array_desc, const std::vector<u8> &vertex_data, ID3D12Device *device, data_heap &vertex_index_heap)
{
size_t buffer_size = vertex_data.size();
assert(vertex_index_heap.can_alloc(buffer_size));
size_t heap_offset = vertex_index_heap.alloc(buffer_size);
size_t heap_offset = vertex_index_heap.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *buffer;
CHECK_HRESULT(vertex_index_heap.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *bufferMap = (char*)buffer + heap_offset;
memcpy(bufferMap, vertex_data.data(), vertex_data.size());
vertex_index_heap.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
return vertex_index_heap.m_heap->GetGPUVirtualAddress() + heap_offset;
memcpy(vertex_index_heap.map<float>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)), vertex_data.data(), vertex_data.size());
vertex_index_heap.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
return vertex_index_heap.get_heap()->GetGPUVirtualAddress() + heap_offset;
}
}
@ -63,22 +59,19 @@ std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::upload_vertex_attributes(co
u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size);
size_t buffer_size = element_size * vertex_count;
assert(m_vertex_index_data.can_alloc(buffer_size));
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *buffer;
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
for (const auto &range : vertex_ranges)
{
write_vertex_array_data_to_buffer(mapped_buffer, range.first, range.second, index, info);
mapped_buffer = (char*)mapped_buffer + range.second * element_size;
}
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view =
{
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset,
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
(UINT)element_size
};
@ -106,17 +99,14 @@ std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::upload_vertex_attributes(co
u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size);
size_t buffer_size = data.size();
assert(m_vertex_index_data.can_alloc(buffer_size));
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *buffer;
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
memcpy(mapped_buffer, data.data(), data.size());
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = {
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset,
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
(UINT)element_size
};
@ -143,18 +133,16 @@ void D3D12GSRender::load_vertex_index_data(u32 first, u32 count)
void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
{
assert(m_constants_data.can_alloc(256));
size_t heap_offset = m_constants_data.alloc(256);
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(256);
// Scale offset buffer
// Separate constant buffer
void *mapped_buffer;
CHECK_HRESULT(m_constants_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + 256), &mapped_buffer));
fill_scale_offset_data((char*)mapped_buffer + heap_offset);
void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + 256));
fill_scale_offset_data(mapped_buffer);
int is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]);
float alpha_ref = (float&)rsx::method_registers[NV4097_SET_ALPHA_REF];
memcpy((char*)mapped_buffer + heap_offset + 16 * sizeof(float), &is_alpha_tested, sizeof(int));
memcpy((char*)mapped_buffer + heap_offset + 17 * sizeof(float), &alpha_ref, sizeof(float));
memcpy((char*)mapped_buffer + 16 * sizeof(float), &is_alpha_tested, sizeof(int));
memcpy((char*)mapped_buffer + 17 * sizeof(float), &alpha_ref, sizeof(float));
size_t tex_idx = 0;
for (u32 i = 0; i < rsx::limits::textures_count; ++i)
@ -162,19 +150,19 @@ void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
if (!textures[i].enabled())
{
int is_unorm = false;
memcpy((char*)mapped_buffer + heap_offset + (18 + tex_idx++) * sizeof(int), &is_unorm, sizeof(int));
memcpy((char*)mapped_buffer + (18 + tex_idx++) * sizeof(int), &is_unorm, sizeof(int));
continue;
}
size_t w = textures[i].width(), h = textures[i].height();
// if (!w || !h) continue;
int is_unorm = (textures[i].format() & CELL_GCM_TEXTURE_UN);
memcpy((char*)mapped_buffer + heap_offset + (18 + tex_idx++) * sizeof(int), &is_unorm, sizeof(int));
memcpy((char*)mapped_buffer + (18 + tex_idx++) * sizeof(int), &is_unorm, sizeof(int));
}
m_constants_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + 256));
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + 256));
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {
m_constants_data.m_heap->GetGPUVirtualAddress() + heap_offset,
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
256
};
m_device->CreateConstantBufferView(&constant_buffer_view_desc,
@ -186,16 +174,14 @@ void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_in
{
size_t buffer_size = 512 * 4 * sizeof(float);
assert(m_constants_data.can_alloc(buffer_size));
size_t heap_offset = m_constants_data.alloc(buffer_size);
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *mapped_buffer;
CHECK_HRESULT(m_constants_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer));
fill_vertex_program_constants_data((char*)mapped_buffer + heap_offset);
m_constants_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
fill_vertex_program_constants_data(mapped_buffer);
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {
m_constants_data.m_heap->GetGPUVirtualAddress() + heap_offset,
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size
};
m_device->CreateConstantBufferView(&constant_buffer_view_desc,
@ -210,18 +196,15 @@ void D3D12GSRender::upload_and_bind_fragment_shader_constants(size_t descriptor_
// Multiple of 256 never 0
buffer_size = (buffer_size + 255) & ~255;
assert(m_constants_data.can_alloc(buffer_size));
size_t heap_offset = m_constants_data.alloc(buffer_size);
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t offset = 0;
void *mapped_buffer;
CHECK_HRESULT(m_constants_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer));
float *buffer = (float*)((char*)mapped_buffer + heap_offset);
m_pso_cache.fill_fragment_constans_buffer({ buffer, gsl::narrow<int>(buffer_size) }, fragment_program);
m_constants_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
float *mapped_buffer = m_buffer_data.map<float>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
m_pso_cache.fill_fragment_constans_buffer({ mapped_buffer, gsl::narrow<int>(buffer_size) }, fragment_program);
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {
m_constants_data.m_heap->GetGPUVirtualAddress() + heap_offset,
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size
};
m_device->CreateConstantBufferView(&constant_buffer_view_desc,
@ -257,17 +240,14 @@ std::tuple<D3D12_VERTEX_BUFFER_VIEW, size_t> D3D12GSRender::upload_inlined_verte
// Copy inline buffer
size_t buffer_size = inline_vertex_array.size() * sizeof(int);
assert(m_vertex_index_data.can_alloc(buffer_size));
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
void *buffer;
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
write_inline_array_to_buffer(mapped_buffer);
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view =
{
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset,
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
(UINT)offset
};
@ -283,12 +263,9 @@ std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> D3D12GSRender::generate_index_buffer
// Alloc
size_t buffer_size = align(index_count * sizeof(u16), 64);
assert(m_vertex_index_data.can_alloc(buffer_size));
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *buffer;
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
size_t first = 0;
for (const auto &pair : vertex_ranges)
{
@ -297,9 +274,9 @@ std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> D3D12GSRender::generate_index_buffer
mapped_buffer = (char*)mapped_buffer + element_count * sizeof(u16);
first += pair.second;
}
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_INDEX_BUFFER_VIEW index_buffer_view = {
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset,
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
DXGI_FORMAT_R16_UINT
};
@ -359,12 +336,9 @@ std::tuple<bool, size_t> D3D12GSRender::upload_and_set_vertex_index_data(ID3D12G
// Alloc
size_t buffer_size = align(index_count * index_size, 64);
assert(m_vertex_index_data.can_alloc(buffer_size));
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *buffer;
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
u32 min_index = (u32)-1, max_index = 0;
for (const auto &pair : m_first_count_pairs)
{
@ -372,9 +346,9 @@ std::tuple<bool, size_t> D3D12GSRender::upload_and_set_vertex_index_data(ID3D12G
write_index_array_data_to_buffer((char*)mapped_buffer, draw_mode, pair.first, pair.second, min_index, max_index);
mapped_buffer = (char*)mapped_buffer + element_count * index_size;
}
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_INDEX_BUFFER_VIEW index_buffer_view = {
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset,
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
get_index_type(indexed_type)
};

View file

@ -248,6 +248,7 @@ namespace
{
case CELL_GCM_TEXTURE_NEAREST: return D3D12_FILTER_TYPE_POINT;
case CELL_GCM_TEXTURE_LINEAR: return D3D12_FILTER_TYPE_LINEAR;
case CELL_GCM_TEXTURE_UNKNOWN_MAG_FILTER: return D3D12_FILTER_TYPE_LINEAR;
}
throw EXCEPTION("Invalid mag filter (0x%x)", mag_filter);
}

View file

@ -89,11 +89,6 @@ D3D12DLLManagement::~D3D12DLLManagement()
D3D12GSRender::D3D12GSRender()
: GSRender(frame_type::DX12), m_d3d12_lib(), m_current_pso({})
{
m_previous_address_a = 0;
m_previous_address_b = 0;
m_previous_address_c = 0;
m_previous_address_d = 0;
m_previous_address_z = 0;
gfxHandler = [this](u32 addr) {
bool result = invalidate_address(addr);
if (result)
@ -195,14 +190,9 @@ D3D12GSRender::D3D12GSRender()
IID_PPV_ARGS(&m_dummy_texture))
);
m_readback_resources.init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_STATE_COPY_DEST);
m_uav_heap.init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES);
m_rtts.init(m_device.Get());
m_constants_data.init(m_device.Get(), 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_RESOURCE_STATE_GENERIC_READ);
m_vertex_index_data.init(m_device.Get(), 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_RESOURCE_STATE_GENERIC_READ);
m_texture_upload_data.init(m_device.Get(), 1024 * 1024 * 512, D3D12_HEAP_TYPE_UPLOAD, D3D12_RESOURCE_STATE_GENERIC_READ);
m_readback_resources.init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_STATE_COPY_DEST);
m_buffer_data.init(m_device.Get(), 1024 * 1024 * 896, D3D12_HEAP_TYPE_UPLOAD, D3D12_RESOURCE_STATE_GENERIC_READ);
if (rpcs3::config.rsx.d3d12.overlay.value())
init_d2d_structures();
@ -215,11 +205,6 @@ D3D12GSRender::~D3D12GSRender()
m_texture_cache.unprotect_all();
gfxHandler = [this](u32) { return false; };
m_constants_data.release();
m_vertex_index_data.release();
m_texture_upload_data.release();
m_uav_heap.m_heap->Release();
m_readback_resources.m_heap->Release();
m_dummy_texture->Release();
m_convertPSO->Release();
m_convertRootSignature->Release();
@ -405,15 +390,12 @@ void D3D12GSRender::flip(int buffer)
row_pitch = align(w * 4, 256);
size_t texture_size = row_pitch * h; // * 4 for mipmap levels
assert(m_texture_upload_data.can_alloc(texture_size));
size_t heap_offset = m_texture_upload_data.alloc(texture_size);
size_t heap_offset = m_buffer_data.alloc<D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT>(texture_size);
void *buffer;
CHECK_HRESULT(m_texture_upload_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + texture_size), &buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
void *mapped_buffer = m_buffer_data.map<void>(heap_offset);
for (unsigned row = 0; row < h; row++)
memcpy((char*)mapped_buffer + row * row_pitch, (char*)src_buffer + row * w * 4, w * 4);
m_texture_upload_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + texture_size));
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + texture_size));
offset = heap_offset;
}
@ -428,7 +410,7 @@ void D3D12GSRender::flip(int buffer)
)
);
get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(storage.ram_framebuffer.Get(), 0), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(m_texture_upload_data.m_heap, { offset, { DXGI_FORMAT_R8G8B8A8_UNORM, (UINT)w, (UINT)h, 1, (UINT)row_pitch } }), nullptr);
&CD3DX12_TEXTURE_COPY_LOCATION(m_buffer_data.get_heap(), { offset, { DXGI_FORMAT_R8G8B8A8_UNORM, (UINT)w, (UINT)h, 1, (UINT)row_pitch } }), nullptr);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(storage.ram_framebuffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ));
resource_to_flip = storage.ram_framebuffer.Get();
@ -436,15 +418,15 @@ void D3D12GSRender::flip(int buffer)
}
else
{
if (m_rtts.bound_render_targets[0] != nullptr)
if (std::get<1>(m_rtts.m_bound_render_targets[0]) != nullptr)
{
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_render_targets[0], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ));
resource_to_flip = m_rtts.bound_render_targets[0];
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_render_targets[0]), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ));
resource_to_flip = std::get<1>(m_rtts.m_bound_render_targets[0]);
}
else if (m_rtts.bound_render_targets[1] != nullptr)
else if (std::get<1>(m_rtts.m_bound_render_targets[1]) != nullptr)
{
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_render_targets[1], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ));
resource_to_flip = m_rtts.bound_render_targets[1];
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_render_targets[1]), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ));
resource_to_flip = std::get<1>(m_rtts.m_bound_render_targets[1]);
}
else
resource_to_flip = nullptr;
@ -546,15 +528,14 @@ void D3D12GSRender::flip(int buffer)
storage.fence_value++;
storage.in_use = true;
storage.dirty_textures.merge(m_rtts.invalidated_resources);
m_rtts.invalidated_resources.clear();
// Get the put pos - 1. This way after cleaning we can set the get ptr to
// this value, allowing heap to proceed even if we cleant before allocating
// a new value (that's the reason of the -1)
storage.constants_heap_get_pos = m_constants_data.get_current_put_pos_minus_one();
storage.vertex_index_heap_get_pos = m_vertex_index_data.get_current_put_pos_minus_one();
storage.texture_upload_heap_get_pos = m_texture_upload_data.get_current_put_pos_minus_one();
storage.buffer_heap_get_pos = m_buffer_data.get_current_put_pos_minus_one();
storage.readback_heap_get_pos = m_readback_resources.get_current_put_pos_minus_one();
storage.uav_heap_get_pos = m_uav_heap.get_current_put_pos_minus_one();
// Now get ready for next frame
resource_storage &new_storage = get_current_resource_storage();
@ -562,11 +543,8 @@ void D3D12GSRender::flip(int buffer)
new_storage.wait_and_clean();
if (new_storage.in_use)
{
m_constants_data.m_get_pos = new_storage.constants_heap_get_pos;
m_vertex_index_data.m_get_pos = new_storage.vertex_index_heap_get_pos;
m_texture_upload_data.m_get_pos = new_storage.texture_upload_heap_get_pos;
m_buffer_data.m_get_pos = new_storage.buffer_heap_get_pos;
m_readback_resources.m_get_pos = new_storage.readback_heap_get_pos;
m_uav_heap.m_get_pos = new_storage.uav_heap_get_pos;
}
m_frame->flip(nullptr);

View file

@ -112,14 +112,9 @@ private:
resource_storage &get_current_resource_storage();
resource_storage &get_non_current_resource_storage();
// Constants storage
data_heap<ID3D12Resource, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT> m_constants_data;
// Vertex storage
data_heap<ID3D12Resource, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT> m_vertex_index_data;
// Texture storage
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> m_texture_upload_data;
data_heap<ID3D12Heap, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT> m_uav_heap;
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> m_readback_resources;
// Textures, constants, index and vertex buffers storage
data_heap m_buffer_data;
data_heap m_readback_resources;
render_targets m_rtts;
@ -134,11 +129,11 @@ private:
ID3D12Resource *m_dummy_texture;
// Store previous fbo addresses to detect RTT config changes.
u32 m_previous_address_a;
u32 m_previous_address_b;
u32 m_previous_address_c;
u32 m_previous_address_d;
u32 m_previous_address_z;
std::array<u32, 4> m_previous_color_address = {};
u32 m_previous_address_z = 0;
u32 m_previous_target = 0;
u32 m_previous_clip_horizontal = 0;
u32 m_previous_clip_vertical = 0;
public:
D3D12GSRender();
virtual ~D3D12GSRender();

View file

@ -3,48 +3,6 @@
#include "d3dx12.h"
template<typename T>
struct init_heap
{
static T* init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags);
};
template<>
struct init_heap<ID3D12Heap>
{
static ID3D12Heap* init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags)
{
ID3D12Heap *result;
D3D12_HEAP_DESC heap_desc = {};
heap_desc.SizeInBytes = heap_size;
heap_desc.Properties.Type = type;
heap_desc.Flags = flags;
CHECK_HRESULT(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&result)));
return result;
}
};
template<>
struct init_heap<ID3D12Resource>
{
static ID3D12Resource* init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, D3D12_RESOURCE_STATES state)
{
ID3D12Resource *result;
D3D12_HEAP_PROPERTIES heap_properties = {};
heap_properties.Type = type;
CHECK_HRESULT(device->CreateCommittedResource(&heap_properties,
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(heap_size),
state,
nullptr,
IID_PPV_ARGS(&result))
);
return result;
}
};
/**
* Wrapper around a ID3D12Resource or a ID3D12Heap.
* Acts as a ring buffer : hold a get and put pointers,
@ -52,43 +10,30 @@ struct init_heap<ID3D12Resource>
* and get is used as beginning of in use data space.
* This wrapper checks that put pointer doesn't cross get one.
*/
template<typename T, size_t alignment>
struct data_heap
class data_heap
{
T *m_heap;
size_t m_size;
size_t m_put_pos; // Start of free space
size_t m_get_pos; // End of free space
template <typename... arg_type>
void init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, arg_type... args)
{
m_size = heap_size;
m_heap = init_heap<T>::init(device, heap_size, type, args...);
m_put_pos = 0;
m_get_pos = heap_size - 1;
}
/**
* Does alloc cross get position ?
*/
template<int Alignement>
bool can_alloc(size_t size) const
{
size_t alloc_size = align(size, alignment);
if (m_put_pos + alloc_size < m_size)
size_t alloc_size = align(size, Alignement);
size_t aligned_put_pos = align(m_put_pos, Alignement);
if (aligned_put_pos + alloc_size < m_size)
{
// range before get
if (m_put_pos + alloc_size < m_get_pos)
if (aligned_put_pos + alloc_size < m_get_pos)
return true;
// range after get
if (m_put_pos > m_get_pos)
if (aligned_put_pos > m_get_pos)
return true;
return false;
}
else
{
// ..]....[..get..
if (m_put_pos < m_get_pos)
if (aligned_put_pos < m_get_pos)
return false;
// ..get..]...[...
// Actually all resources extending beyond heap space starts at 0
@ -98,15 +43,40 @@ struct data_heap
}
}
size_t m_size;
size_t m_put_pos; // Start of free space
ComPtr<ID3D12Resource> m_heap;
public:
size_t m_get_pos; // End of free space
template <typename... arg_type>
void init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, D3D12_RESOURCE_STATES state)
{
m_size = heap_size;
m_put_pos = 0;
m_get_pos = heap_size - 1;
D3D12_HEAP_PROPERTIES heap_properties = {};
heap_properties.Type = type;
CHECK_HRESULT(device->CreateCommittedResource(&heap_properties,
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(heap_size),
state,
nullptr,
IID_PPV_ARGS(m_heap.GetAddressOf()))
);
}
template<int Alignement>
size_t alloc(size_t size)
{
assert(can_alloc(size));
size_t alloc_size = align(size, alignment);
if (m_put_pos + alloc_size < m_size)
if (!can_alloc<Alignement>(size)) throw EXCEPTION("Working buffer not big enough");
size_t alloc_size = align(size, Alignement);
size_t aligned_put_pos = align(m_put_pos, Alignement);
if (aligned_put_pos + alloc_size < m_size)
{
size_t old_put_pos = m_put_pos;
m_put_pos += alloc_size;
return old_put_pos;
m_put_pos = aligned_put_pos + alloc_size;
return aligned_put_pos;
}
else
{
@ -115,9 +85,37 @@ struct data_heap
}
}
void release()
template<typename T>
T* map(const D3D12_RANGE &range)
{
m_heap->Release();
void *buffer;
CHECK_HRESULT(m_heap->Map(0, &range, &buffer));
void *mapped_buffer = (char*)buffer + range.Begin;
return static_cast<T*>(mapped_buffer);
}
template<typename T>
T* map(size_t heap_offset)
{
void *buffer;
CHECK_HRESULT(m_heap->Map(0, nullptr, &buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
return static_cast<T*>(mapped_buffer);
}
void unmap(const D3D12_RANGE &range)
{
m_heap->Unmap(0, &range);
}
void unmap()
{
m_heap->Unmap(0, nullptr);
}
ID3D12Resource* get_heap()
{
return m_heap.Get();
}
/**
@ -230,11 +228,8 @@ struct resource_storage
* This means newer resources shouldn't allocate memory crossing this position
* until the frame rendering is over.
*/
size_t constants_heap_get_pos;
size_t vertex_index_heap_get_pos;
size_t texture_upload_heap_get_pos;
size_t buffer_heap_get_pos;
size_t readback_heap_get_pos;
size_t uav_heap_get_pos;
void reset();
void init(ID3D12Device *device);

View file

@ -77,29 +77,25 @@ void D3D12GSRender::clear_surface(u32 arg)
if (arg & 0x1 || arg & 0x2)
{
CD3DX12_CPU_DESCRIPTOR_HANDLE handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().depth_stencil_descriptor_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)get_current_resource_storage().depth_stencil_descriptor_heap_index * g_descriptor_stride_rtv);
m_rtts.bind_depth_stencil(m_device.Get(), m_surface.depth_format, handle);
get_current_resource_storage().depth_stencil_descriptor_heap_index++;
if (arg & 0x1)
{
u32 clear_depth = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] >> 8;
u32 max_depth_value = m_surface.depth_format == CELL_GCM_SURFACE_Z16 ? 0x0000ffff : 0x00ffffff;
get_current_resource_storage().command_list->ClearDepthStencilView(handle, D3D12_CLEAR_FLAG_DEPTH, clear_depth / (float)max_depth_value, 0,
get_current_resource_storage().command_list->ClearDepthStencilView(m_rtts.current_ds_handle, D3D12_CLEAR_FLAG_DEPTH, clear_depth / (float)max_depth_value, 0,
1, &get_scissor(rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL], rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL]));
}
if (arg & 0x2)
get_current_resource_storage().command_list->ClearDepthStencilView(handle, D3D12_CLEAR_FLAG_STENCIL, 0.f, get_clear_stencil(rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE]),
get_current_resource_storage().command_list->ClearDepthStencilView(m_rtts.current_ds_handle, D3D12_CLEAR_FLAG_STENCIL, 0.f, get_clear_stencil(rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE]),
1, &get_scissor(rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL], rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL]));
}
if (arg & 0xF0)
{
CD3DX12_CPU_DESCRIPTOR_HANDLE handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().render_targets_descriptors_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)get_current_resource_storage().render_targets_descriptors_heap_index * g_descriptor_stride_rtv);
size_t rtt_index = m_rtts.bind_render_targets(m_device.Get(), m_surface.color_format, handle);
CD3DX12_CPU_DESCRIPTOR_HANDLE handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.current_rtts_handle);
size_t rtt_index = get_num_rtt(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]);
get_current_resource_storage().render_targets_descriptors_heap_index += rtt_index;
for (unsigned i = 0; i < rtt_index; i++)
get_current_resource_storage().command_list->ClearRenderTargetView(handle.Offset(i, g_descriptor_stride_rtv), get_clear_color(rsx::method_registers[NV4097_SET_COLOR_CLEAR_VALUE]).data(),
@ -120,16 +116,8 @@ void D3D12GSRender::clear_surface(u32 arg)
void D3D12GSRender::prepare_render_targets(ID3D12GraphicsCommandList *copycmdlist)
{
// check if something has changed
u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT];
u32 clip_horizontal = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL];
u32 clip_vertical = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL];
u32 clip_width = clip_horizontal >> 16;
u32 clip_height = clip_vertical >> 16;
u32 clip_x = clip_horizontal;
u32 clip_y = clip_vertical;
u32 context_dma_color[] =
{
rsx::method_registers[NV4097_SET_CONTEXT_DMA_COLOR_A],
@ -149,7 +137,7 @@ void D3D12GSRender::prepare_render_targets(ID3D12GraphicsCommandList *copycmdlis
u32 offset_zeta = rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET];
// FBO location has changed, previous data might be copied
u32 address_color[] =
std::array<u32, 4> address_color =
{
rsx::get_address(offset_color[0], context_dma_color[0]),
rsx::get_address(offset_color[1], context_dma_color[1]),
@ -158,202 +146,71 @@ void D3D12GSRender::prepare_render_targets(ID3D12GraphicsCommandList *copycmdlis
};
u32 address_z = rsx::get_address(offset_zeta, m_context_dma_z);
u32 clip_h_reg = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL];
u32 clip_v_reg = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL];
u32 target_reg = rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET];
// Exit early if there is no rtt changes
if (m_previous_address_a == address_color[0] &&
m_previous_address_b == address_color[1] &&
m_previous_address_c == address_color[2] &&
m_previous_address_d == address_color[3] &&
if (m_previous_color_address == address_color &&
m_previous_address_z == address_z &&
m_surface.format == surface_format)
m_surface.format == surface_format &&
m_previous_clip_horizontal == clip_h_reg &&
m_previous_clip_vertical == clip_v_reg &&
m_previous_target == target_reg)
return;
m_previous_address_a = address_color[0];
m_previous_address_b = address_color[1];
m_previous_address_c = address_color[2];
m_previous_address_d = address_color[3];
m_previous_color_address = address_color;
m_previous_address_z = address_z;
m_previous_target = target_reg;
m_previous_clip_horizontal = clip_h_reg;
m_previous_clip_vertical = clip_v_reg;
if (m_surface.format != surface_format)
{
m_surface.unpack(surface_format);
m_surface.width = clip_width;
m_surface.height = clip_height;
}
// Make previous RTTs sampleable
for (unsigned i = 0; i < 4; i++)
{
if (m_rtts.bound_render_targets[i] == nullptr)
continue;
copycmdlist->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_render_targets[i], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ));
}
// Reset bound data
memset(m_rtts.bound_render_targets_address, 0, 4 * sizeof(u32));
memset(m_rtts.bound_render_targets, 0, 4 * sizeof(ID3D12Resource *));
// Create/Reuse requested rtts
std::array<float, 4> clear_color = get_clear_color(rsx::method_registers[NV4097_SET_COLOR_CLEAR_VALUE]);
for (u8 i : get_rtt_indexes(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]))
{
ComPtr<ID3D12Resource> old_render_target_resource;
m_rtts.bound_render_targets[i] = m_rtts.bind_address_as_render_targets(m_device.Get(), copycmdlist, address_color[i], clip_width, clip_height, m_surface.color_format,
clear_color, old_render_target_resource);
if (old_render_target_resource)
get_current_resource_storage().dirty_textures.push_back(old_render_target_resource);
m_rtts.bound_render_targets_address[i] = address_color[i];
}
m_rtts.prepare_render_target(copycmdlist, surface_format, clip_h_reg, clip_v_reg, target_reg, address_color, address_z, m_device.Get(), clear_color, 1.f, 0);
// Same for depth buffer
if (m_rtts.bound_depth_stencil != nullptr)
copycmdlist->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ));
m_rtts.bound_depth_stencil = nullptr;
m_rtts.bound_depth_stencil_address = 0;
if (!address_z)
return;
ComPtr<ID3D12Resource> old_depth_stencil_resource;
ID3D12Resource *ds = m_rtts.bind_address_as_depth_stencil(m_device.Get(), copycmdlist, address_z, clip_width, clip_height, m_surface.depth_format, 1., 0, old_depth_stencil_resource);
if (old_depth_stencil_resource)
get_current_resource_storage().dirty_textures.push_back(old_depth_stencil_resource);
m_rtts.bound_depth_stencil_address = address_z;
m_rtts.bound_depth_stencil = ds;
}
size_t render_targets::bind_render_targets(ID3D12Device *device, u32 color_format, D3D12_CPU_DESCRIPTOR_HANDLE handle)
{
DXGI_FORMAT dxgi_format = get_color_surface_format(color_format);
// write descriptors
DXGI_FORMAT dxgi_format = get_color_surface_format(m_surface.color_format);
D3D12_RENDER_TARGET_VIEW_DESC rtt_view_desc = {};
rtt_view_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
rtt_view_desc.Format = dxgi_format;
m_rtts.current_rtts_handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().render_targets_descriptors_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)get_current_resource_storage().render_targets_descriptors_heap_index * g_descriptor_stride_rtv);
size_t rtt_index = 0;
for (u8 i : get_rtt_indexes(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]))
{
if (bound_render_targets[i] == nullptr)
if (std::get<1>(m_rtts.m_bound_render_targets[i]) == nullptr)
continue;
device->CreateRenderTargetView(bound_render_targets[i], &rtt_view_desc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(handle).Offset((INT)rtt_index * g_descriptor_stride_rtv));
m_device->CreateRenderTargetView(std::get<1>(m_rtts.m_bound_render_targets[i]), &rtt_view_desc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.current_rtts_handle).Offset((INT)rtt_index * g_descriptor_stride_rtv));
rtt_index++;
}
return rtt_index;
}
get_current_resource_storage().render_targets_descriptors_heap_index += rtt_index;
size_t render_targets::bind_depth_stencil(ID3D12Device *device, u32 depth_format, D3D12_CPU_DESCRIPTOR_HANDLE handle)
{
if (!bound_depth_stencil)
return 0;
if (std::get<1>(m_rtts.m_bound_depth_stencil) == nullptr)
return;
m_rtts.current_ds_handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().depth_stencil_descriptor_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)get_current_resource_storage().depth_stencil_descriptor_heap_index * g_descriptor_stride_rtv);
get_current_resource_storage().depth_stencil_descriptor_heap_index += 1;
D3D12_DEPTH_STENCIL_VIEW_DESC depth_stencil_view_desc = {};
depth_stencil_view_desc.Format = get_depth_stencil_surface_format(depth_format);
depth_stencil_view_desc.Format = get_depth_stencil_surface_format(m_surface.depth_format);
depth_stencil_view_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
device->CreateDepthStencilView(bound_depth_stencil, &depth_stencil_view_desc, handle);
return 1;
m_device->CreateDepthStencilView(std::get<1>(m_rtts.m_bound_depth_stencil), &depth_stencil_view_desc, m_rtts.current_ds_handle);
}
void D3D12GSRender::set_rtt_and_ds(ID3D12GraphicsCommandList *command_list)
{
CD3DX12_CPU_DESCRIPTOR_HANDLE handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().render_targets_descriptors_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)get_current_resource_storage().render_targets_descriptors_heap_index * g_descriptor_stride_rtv);
size_t num_rtt = m_rtts.bind_render_targets(m_device.Get(), m_surface.color_format, handle);
get_current_resource_storage().render_targets_descriptors_heap_index += num_rtt;
CD3DX12_CPU_DESCRIPTOR_HANDLE depth_stencil_handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().depth_stencil_descriptor_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)get_current_resource_storage().depth_stencil_descriptor_heap_index * g_descriptor_stride_rtv);
size_t num_ds = m_rtts.bind_depth_stencil(m_device.Get(), m_surface.depth_format, depth_stencil_handle);
get_current_resource_storage().depth_stencil_descriptor_heap_index += num_ds;
command_list->OMSetRenderTargets((UINT)num_rtt, num_rtt > 0 ? &handle : nullptr, !!num_rtt,
num_ds > 0 ? &depth_stencil_handle : nullptr);
UINT num_rtt = get_num_rtt(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]);
D3D12_CPU_DESCRIPTOR_HANDLE* rtt_handle = (num_rtt > 0) ? &m_rtts.current_rtts_handle : nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE* ds_handle = (std::get<1>(m_rtts.m_bound_depth_stencil) != nullptr) ? &m_rtts.current_ds_handle : nullptr;
command_list->OMSetRenderTargets((UINT)num_rtt, rtt_handle, true, ds_handle);
}
ID3D12Resource *render_targets::bind_address_as_render_targets(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, u32 address,
size_t width, size_t height, u8 surfaceColorFormat, const std::array<float, 4> &clear_color, ComPtr<ID3D12Resource> &dirtyRTT)
void render_targets::init(ID3D12Device *device)
{
DXGI_FORMAT dxgi_format = get_color_surface_format(surfaceColorFormat);
auto It = render_targets_storage.find(address);
// TODO: Check if format and size match
if (It != render_targets_storage.end())
{
ComPtr<ID3D12Resource> rtt;
rtt = It->second.Get();
if (rtt->GetDesc().Format == dxgi_format && rtt->GetDesc().Width == width && rtt->GetDesc().Height == height)
{
cmdList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(rtt.Get(), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_RENDER_TARGET));
return rtt.Get();
}
render_targets_storage.erase(address);
dirtyRTT = rtt;
}
ComPtr<ID3D12Resource> rtt;
LOG_WARNING(RSX, "Creating RTT");
D3D12_CLEAR_VALUE clear_color_value = {};
clear_color_value.Format = dxgi_format;
clear_color_value.Color[0] = clear_color[0];
clear_color_value.Color[1] = clear_color[1];
clear_color_value.Color[2] = clear_color[2];
clear_color_value.Color[3] = clear_color[3];
device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(dxgi_format, (UINT)width, (UINT)height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET),
D3D12_RESOURCE_STATE_RENDER_TARGET,
&clear_color_value,
IID_PPV_ARGS(rtt.GetAddressOf())
);
render_targets_storage[address] = rtt;
std::wstring name = L"rtt_@" + std::to_wstring(address);
rtt->SetName(name.c_str());
return rtt.Get();
}
ID3D12Resource * render_targets::bind_address_as_depth_stencil(ID3D12Device * device, ID3D12GraphicsCommandList * cmdList, u32 address, size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear, ComPtr<ID3D12Resource> &dirtyDS)
{
auto It = depth_stencil_storage.find(address);
// TODO: Check if surface depth format match
if (It != depth_stencil_storage.end())
{
ComPtr<ID3D12Resource> ds = It->second;
if (ds->GetDesc().Width == width && ds->GetDesc().Height == height)
{
// set the resource as depth write
cmdList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds.Get(), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE));
return ds.Get();
}
// If size doesn't match, remove ds from cache
depth_stencil_storage.erase(address);
dirtyDS = ds;
}
D3D12_CLEAR_VALUE clear_depth_value = {};
clear_depth_value.DepthStencil.Depth = depthClear;
DXGI_FORMAT dxgi_format = get_depth_stencil_typeless_surface_format(surfaceDepthFormat);
clear_depth_value.Format = get_depth_stencil_surface_clear_format(surfaceDepthFormat);
ComPtr<ID3D12Resource> new_depth_stencil;
device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(dxgi_format, (UINT)width, (UINT)height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL),
D3D12_RESOURCE_STATE_DEPTH_WRITE,
&clear_depth_value,
IID_PPV_ARGS(new_depth_stencil.GetAddressOf())
);
depth_stencil_storage[address] = new_depth_stencil;
std::wstring name = L"ds_@" + std::to_wstring(address);
new_depth_stencil->SetName(name.c_str());
return new_depth_stencil.Get();
}
void render_targets::init(ID3D12Device *device)//, u8 surfaceDepthFormat, size_t width, size_t height, float clearColor[4], float clearDepth)
{
memset(bound_render_targets_address, 0, 4 * sizeof(u32));
memset(bound_render_targets, 0, 4 * sizeof(ID3D12Resource*));
bound_depth_stencil = nullptr;
bound_depth_stencil_address = 0;
g_descriptor_stride_rtv = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
}
@ -366,7 +223,7 @@ namespace
size_t download_to_readback_buffer(
ID3D12Device *device,
ID3D12GraphicsCommandList * command_list,
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &readback_heap,
data_heap &readback_heap,
ID3D12Resource * color_surface,
int color_surface_format
)
@ -390,23 +247,20 @@ namespace
}
size_t buffer_size = row_pitch * clip_h;
assert(readback_heap.can_alloc(buffer_size));
size_t heap_offset = readback_heap.alloc(buffer_size);
size_t heap_offset = readback_heap.alloc<D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(color_surface, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE));
command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(readback_heap.m_heap, { heap_offset, { dxgi_format, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0,
command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(readback_heap.get_heap(), { heap_offset, { dxgi_format, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(color_surface, 0), nullptr);
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(color_surface, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET));
return heap_offset;
}
void copy_readback_buffer_to_dest(void *dest, data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &readback_heap, size_t offset_in_heap, size_t dst_pitch, size_t src_pitch, size_t height)
void copy_readback_buffer_to_dest(void *dest, data_heap &readback_heap, size_t offset_in_heap, size_t dst_pitch, size_t src_pitch, size_t height)
{
void *buffer;
// TODO: Use exact range
CHECK_HRESULT(readback_heap.m_heap->Map(0, nullptr, &buffer));
void *mapped_buffer = (char*)buffer + offset_in_heap;
void *mapped_buffer = readback_heap.map<void>(offset_in_heap);
for (unsigned row = 0; row < height; row++)
{
u32 *casted_dest = (u32*)((char*)dest + row * dst_pitch);
@ -414,7 +268,7 @@ namespace
for (unsigned col = 0; col < src_pitch / 4; col++)
*casted_dest++ = se_storage<u32>::swap(*casted_src++);
}
readback_heap.m_heap->Unmap(0, nullptr);
readback_heap.unmap();
}
void wait_for_command_queue(ID3D12Device *device, ID3D12CommandQueue *command_queue)
@ -474,13 +328,11 @@ void D3D12GSRender::copy_render_target_to_dma_location()
if (m_context_dma_z && rpcs3::state.config.rsx.opengl.write_depth_buffer)
{
size_t uav_size = clip_w * clip_h * 2;
assert(m_uav_heap.can_alloc(uav_size));
size_t heap_offset = m_uav_heap.alloc(uav_size);
CHECK_HRESULT(
m_device->CreatePlacedResource(
m_uav_heap.m_heap,
heap_offset,
m_device->CreateCommittedResource(
&D3D12_HEAP_PROPERTIES{D3D12_HEAP_TYPE_DEFAULT},
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8_UNORM, clip_w, clip_h, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS),
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
nullptr,
@ -497,7 +349,7 @@ void D3D12GSRender::copy_render_target_to_dma_location()
shader_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
shader_resource_view_desc.Texture2D.MipLevels = 1;
shader_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
m_device->CreateShaderResourceView(m_rtts.bound_depth_stencil, &shader_resource_view_desc,
m_device->CreateShaderResourceView(std::get<1>(m_rtts.m_bound_depth_stencil), &shader_resource_view_desc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptor_heap->GetCPUDescriptorHandleForHeapStart()));
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {};
uav_desc.Format = DXGI_FORMAT_R8_UNORM;
@ -506,7 +358,7 @@ void D3D12GSRender::copy_render_target_to_dma_location()
CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptor_heap->GetCPUDescriptorHandleForHeapStart()).Offset(1, g_descriptor_stride_srv_cbv_uav));
// Convert
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ));
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ));
get_current_resource_storage().command_list->SetPipelineState(m_convertPSO);
get_current_resource_storage().command_list->SetComputeRootSignature(m_convertRootSignature);
@ -516,12 +368,12 @@ void D3D12GSRender::copy_render_target_to_dma_location()
D3D12_RESOURCE_BARRIER barriers[] =
{
CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE),
CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE),
CD3DX12_RESOURCE_BARRIER::UAV(depth_format_conversion_buffer.Get()),
};
get_current_resource_storage().command_list->ResourceBarrier(2, barriers);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(depth_format_conversion_buffer.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE));
get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.m_heap, { depth_buffer_offset_in_heap,{ DXGI_FORMAT_R8_UNORM, (UINT)clip_w, (UINT)clip_h, 1, (UINT)depth_row_pitch } }), 0, 0, 0,
get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.get_heap(), { depth_buffer_offset_in_heap,{ DXGI_FORMAT_R8_UNORM, (UINT)clip_w, (UINT)clip_h, 1, (UINT)depth_row_pitch } }), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(depth_format_conversion_buffer.Get(), 0), nullptr);
invalidate_address(address_z);
@ -536,7 +388,7 @@ void D3D12GSRender::copy_render_target_to_dma_location()
{
if (!address_color[i])
continue;
color_buffer_offset_in_heap[i] = download_to_readback_buffer(m_device.Get(), get_current_resource_storage().command_list.Get(), m_readback_resources, m_rtts.bound_render_targets[i], m_surface.color_format);
color_buffer_offset_in_heap[i] = download_to_readback_buffer(m_device.Get(), get_current_resource_storage().command_list.Get(), m_readback_resources, std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface.color_format);
invalidate_address(address_color[i]);
need_transfer = true;
}
@ -555,10 +407,7 @@ void D3D12GSRender::copy_render_target_to_dma_location()
{
auto ptr = vm::base(address_z);
char *depth_buffer = (char*)ptr;
void *buffer;
// TODO: Use exact range
CHECK_HRESULT(m_readback_resources.m_heap->Map(0, nullptr, &buffer));
unsigned char *mapped_buffer = (unsigned char*)buffer + depth_buffer_offset_in_heap;
u8 *mapped_buffer = m_readback_resources.map<u8>(depth_buffer_offset_in_heap);
for (unsigned row = 0; row < (unsigned)clip_h; row++)
{
@ -571,7 +420,7 @@ void D3D12GSRender::copy_render_target_to_dma_location()
depth_buffer[4 * (row * clip_w + i) + 3] = c;
}
}
m_readback_resources.m_heap->Unmap(0, nullptr);
m_readback_resources.unmap();
}
size_t srcPitch, dstPitch;
@ -613,7 +462,7 @@ void D3D12GSRender::copy_render_target_to_dma_location()
void D3D12GSRender::copy_render_targets_to_memory(void *buffer, u8 rtt)
{
size_t heap_offset = download_to_readback_buffer(m_device.Get(), get_current_resource_storage().command_list.Get(), m_readback_resources, m_rtts.bound_render_targets[rtt], m_surface.color_format);
size_t heap_offset = download_to_readback_buffer(m_device.Get(), get_current_resource_storage().command_list.Get(), m_readback_resources, std::get<1>(m_rtts.m_bound_render_targets[rtt]), m_surface.color_format);
CHECK_HRESULT(get_current_resource_storage().command_list->Close());
m_command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)get_current_resource_storage().command_list.GetAddressOf());
@ -651,14 +500,13 @@ void D3D12GSRender::copy_depth_buffer_to_memory(void *buffer)
size_t row_pitch = align(clip_w * 4, 256);
size_t buffer_size = row_pitch * clip_h;
assert(m_readback_resources.can_alloc(buffer_size));
size_t heap_offset = m_readback_resources.alloc(buffer_size);
size_t heap_offset = m_readback_resources.alloc<D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE));
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE));
get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.m_heap, { heap_offset,{ DXGI_FORMAT_R32_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(m_rtts.bound_depth_stencil, 0), nullptr);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE));
get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.get_heap(), { heap_offset,{ DXGI_FORMAT_R32_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(std::get<1>(m_rtts.m_bound_depth_stencil), 0), nullptr);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE));
CHECK_HRESULT(get_current_resource_storage().command_list->Close());
m_command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)get_current_resource_storage().command_list.GetAddressOf());
@ -667,9 +515,7 @@ void D3D12GSRender::copy_depth_buffer_to_memory(void *buffer)
wait_for_command_queue(m_device.Get(), m_command_queue.Get());
m_readback_resources.m_get_pos = m_readback_resources.get_current_put_pos_minus_one();
void *temp_buffer;
CHECK_HRESULT(m_readback_resources.m_heap->Map(0, nullptr, &temp_buffer));
void *mapped_buffer = (char*)temp_buffer + heap_offset;
void *mapped_buffer = m_readback_resources.map<void>(heap_offset);
for (unsigned row = 0; row < clip_h; row++)
{
u32 *casted_dest = (u32*)((char*)buffer + row * clip_w * 4);
@ -677,7 +523,7 @@ void D3D12GSRender::copy_depth_buffer_to_memory(void *buffer)
for (unsigned col = 0; col < row_pitch / 4; col++)
*casted_dest++ = *casted_src++;
}
m_readback_resources.m_heap->Unmap(0, nullptr);
m_readback_resources.unmap();
}
@ -689,14 +535,13 @@ void D3D12GSRender::copy_stencil_buffer_to_memory(void *buffer)
size_t row_pitch = align(clip_w * 4, 256);
size_t buffer_size = row_pitch * clip_h;
assert(m_readback_resources.can_alloc(buffer_size));
size_t heap_offset = m_readback_resources.alloc(buffer_size);
size_t heap_offset = m_readback_resources.alloc<D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE));
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE));
get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.m_heap, { heap_offset, { DXGI_FORMAT_R8_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(m_rtts.bound_depth_stencil, 1), nullptr);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE));
get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.get_heap(), { heap_offset, { DXGI_FORMAT_R8_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(std::get<1>(m_rtts.m_bound_depth_stencil), 1), nullptr);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE));
CHECK_HRESULT(get_current_resource_storage().command_list->Close());
m_command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)get_current_resource_storage().command_list.GetAddressOf());
@ -705,9 +550,7 @@ void D3D12GSRender::copy_stencil_buffer_to_memory(void *buffer)
wait_for_command_queue(m_device.Get(), m_command_queue.Get());
m_readback_resources.m_get_pos = m_readback_resources.get_current_put_pos_minus_one();
void *temp_buffer;
CHECK_HRESULT(m_readback_resources.m_heap->Map(0, nullptr, &temp_buffer));
void *mapped_buffer = (char*)temp_buffer + heap_offset;
void *mapped_buffer = m_readback_resources.map<void>(heap_offset);
for (unsigned row = 0; row < clip_h; row++)
{
char *casted_dest = (char*)buffer + row * clip_w;
@ -715,7 +558,7 @@ void D3D12GSRender::copy_stencil_buffer_to_memory(void *buffer)
for (unsigned col = 0; col < row_pitch; col++)
*casted_dest++ = *casted_src++;
}
m_readback_resources.m_heap->Unmap(0, nullptr);
m_readback_resources.unmap();
}
#endif

View file

@ -1,30 +1,297 @@
#pragma once
#include <utility>
#include <d3d12.h>
#include "d3dx12.h"
struct render_targets
#include "D3D12Formats.h"
#include <gsl.h>
namespace rsx
{
namespace
{
std::vector<u8> get_rtt_indexes(u8 color_target)
{
switch (color_target)
{
case CELL_GCM_SURFACE_TARGET_NONE: return{};
case CELL_GCM_SURFACE_TARGET_0: return{ 0 };
case CELL_GCM_SURFACE_TARGET_1: return{ 1 };
case CELL_GCM_SURFACE_TARGET_MRT1: return{ 0, 1 };
case CELL_GCM_SURFACE_TARGET_MRT2: return{ 0, 1, 2 };
case CELL_GCM_SURFACE_TARGET_MRT3: return{ 0, 1, 2, 3 };
}
throw EXCEPTION("Wrong color_target (%d)", color_target);
}
}
template<typename Traits>
struct surface_store
{
private:
using surface_storage_type = typename Traits::surface_storage_type;
using surface_type = typename Traits::surface_type;
using command_list_type = typename Traits::command_list_type;
std::unordered_map<u32, surface_storage_type> m_render_targets_storage = {};
std::unordered_map<u32, surface_storage_type> m_depth_stencil_storage = {};
public:
std::array<std::tuple<u32, surface_type>, 4> m_bound_render_targets = {};
std::tuple<u32, surface_type> m_bound_depth_stencil = {};
std::list<surface_storage_type> invalidated_resources;
surface_store() = default;
~surface_store() = default;
surface_store(const surface_store&) = delete;
private:
/**
* If render target already exists at address, issue state change operation on cmdList.
* Otherwise create one with width, height, clearColor info.
* returns the corresponding render target resource.
*/
template <typename ...Args>
gsl::not_null<surface_type> bind_address_as_render_targets(
command_list_type command_list,
u32 address,
u8 surface_color_format, size_t width, size_t height,
Args&&... extra_params)
{
auto It = m_render_targets_storage.find(address);
// TODO: Fix corner cases
// This doesn't take overlapping surface(s) into account.
// Invalidated surface(s) should also copy their content to the new resources.
if (It != m_render_targets_storage.end())
{
surface_storage_type &rtt = It->second;
if (Traits::rtt_has_format_width_height(rtt, surface_color_format, width, height))
{
Traits::prepare_rtt_for_drawing(command_list, rtt.Get());
return rtt.Get();
}
invalidated_resources.push_back(std::move(rtt));
m_render_targets_storage.erase(address);
}
m_render_targets_storage[address] = Traits::create_new_render_target(address, surface_color_format, width, height, std::forward<Args>(extra_params)...);
return m_render_targets_storage[address].Get();
}
template <typename ...Args>
gsl::not_null<surface_type> bind_address_as_depth_stencil(
command_list_type command_list,
u32 address,
u8 surface_depth_format, size_t width, size_t height,
Args&&... extra_params)
{
auto It = m_depth_stencil_storage.find(address);
if (It != m_depth_stencil_storage.end())
{
surface_storage_type &ds = It->second;
if (Traits::ds_has_format_width_height(ds, surface_depth_format, width, height))
{
Traits::prepare_ds_for_drawing(command_list, ds.Get());
return ds.Get();
}
invalidated_resources.push_back(std::move(ds));
m_depth_stencil_storage.erase(address);
}
m_depth_stencil_storage[address] = Traits::create_new_depth_stencil(address, surface_depth_format, width, height, std::forward<Args>(extra_params)...);
return m_depth_stencil_storage[address].Get();
}
public:
template <typename ...Args>
void prepare_render_target(
command_list_type command_list,
u32 set_surface_format_reg,
u32 clip_horizontal_reg, u32 clip_vertical_reg,
u32 set_surface_target,
const std::array<u32, 4> &surface_addresses, u32 address_z,
Args&&... extra_params)
{
u32 clip_width = clip_horizontal_reg >> 16;
u32 clip_height = clip_vertical_reg >> 16;
u32 clip_x = clip_horizontal_reg;
u32 clip_y = clip_vertical_reg;
rsx::surface_info surface = {};
surface.unpack(set_surface_format_reg);
// Make previous RTTs sampleable
for (std::tuple<u32, surface_type> &rtt : m_bound_render_targets)
{
if (std::get<1>(rtt) != nullptr)
Traits::prepare_rtt_for_sampling(command_list, std::get<1>(rtt));
rtt = std::make_tuple(0, nullptr);
}
// Create/Reuse requested rtts
for (u8 surface_index : get_rtt_indexes(set_surface_target))
{
if (surface_addresses[surface_index] == 0)
continue;
m_bound_render_targets[surface_index] = std::make_tuple(surface_addresses[surface_index],
bind_address_as_render_targets(command_list, surface_addresses[surface_index], surface.color_format, clip_width, clip_height, std::forward<Args>(extra_params)...));
}
// Same for depth buffer
if (std::get<1>(m_bound_depth_stencil) != nullptr)
Traits::prepare_ds_for_sampling(command_list, std::get<1>(m_bound_depth_stencil));
m_bound_depth_stencil = std::make_tuple(0, nullptr);
if (!address_z)
return;
m_bound_depth_stencil = std::make_tuple(address_z,
bind_address_as_depth_stencil(command_list, address_z, surface.depth_format, clip_width, clip_height, std::forward<Args>(extra_params)...));
}
surface_type get_texture_from_render_target_if_applicable(u32 address)
{
// TODO: Handle texture that overlaps one (or several) surface.
// Handle texture conversion
// FIXME: Disgaea 3 loading screen seems to use a subset of a surface. It's not properly handled here.
// Note: not const because conversions/resolve/... can happen
auto It = m_render_targets_storage.find(address);
if (It != m_render_targets_storage.end())
return It->second.Get();
return surface_type();
}
surface_type get_texture_from_depth_stencil_if_applicable(u32 address)
{
// TODO: Same as above although there wasn't any game using corner case for DS yet.
auto It = m_depth_stencil_storage.find(address);
if (It != m_depth_stencil_storage.end())
return It->second.Get();
return surface_type();
}
};
}
struct render_target_traits
{
using surface_storage_type = ComPtr<ID3D12Resource>;
using surface_type = ID3D12Resource*;
using command_list_type = gsl::not_null<ID3D12GraphicsCommandList*>;
static
ComPtr<ID3D12Resource> create_new_render_target(
u32 address,
u8 surface_color_format, size_t width, size_t height,
gsl::not_null<ID3D12Device*> device, const std::array<float, 4> &clear_color, float, u8)
{
DXGI_FORMAT dxgi_format = get_color_surface_format(surface_color_format);
ComPtr<ID3D12Resource> rtt;
LOG_WARNING(RSX, "Creating RTT");
D3D12_CLEAR_VALUE clear_color_value = {};
clear_color_value.Format = dxgi_format;
clear_color_value.Color[0] = clear_color[0];
clear_color_value.Color[1] = clear_color[1];
clear_color_value.Color[2] = clear_color[2];
clear_color_value.Color[3] = clear_color[3];
device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(dxgi_format, (UINT)width, (UINT)height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET),
D3D12_RESOURCE_STATE_RENDER_TARGET,
&clear_color_value,
IID_PPV_ARGS(rtt.GetAddressOf())
);
std::wstring name = L"rtt_@" + std::to_wstring(address);
rtt->SetName(name.c_str());
return rtt;
}
static
void prepare_rtt_for_drawing(
gsl::not_null<ID3D12GraphicsCommandList*> command_list,
ID3D12Resource* rtt)
{
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(rtt, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_RENDER_TARGET));
}
static
void prepare_rtt_for_sampling(
gsl::not_null<ID3D12GraphicsCommandList*> command_list,
ID3D12Resource* rtt)
{
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(rtt, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ));
}
static
ComPtr<ID3D12Resource> create_new_depth_stencil(
u32 address,
u8 surfaceDepthFormat, size_t width, size_t height,
gsl::not_null<ID3D12Device*> device, const std::array<float, 4>& , float clear_depth, u8 clear_stencil)
{
D3D12_CLEAR_VALUE clear_depth_value = {};
clear_depth_value.DepthStencil.Depth = clear_depth;
clear_depth_value.DepthStencil.Stencil = clear_stencil;
DXGI_FORMAT dxgi_format = get_depth_stencil_typeless_surface_format(surfaceDepthFormat);
clear_depth_value.Format = get_depth_stencil_surface_clear_format(surfaceDepthFormat);
ComPtr<ID3D12Resource> new_depth_stencil;
device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(dxgi_format, (UINT)width, (UINT)height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL),
D3D12_RESOURCE_STATE_DEPTH_WRITE,
&clear_depth_value,
IID_PPV_ARGS(new_depth_stencil.GetAddressOf())
);
std::wstring name = L"ds_@" + std::to_wstring(address);
new_depth_stencil->SetName(name.c_str());
return new_depth_stencil;
}
static
void prepare_ds_for_drawing(
gsl::not_null<ID3D12GraphicsCommandList*> command_list,
ID3D12Resource* ds)
{
// set the resource as depth write
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE));
}
static
void prepare_ds_for_sampling(
gsl::not_null<ID3D12GraphicsCommandList*> command_list,
ID3D12Resource* ds)
{
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ));
}
static
bool rtt_has_format_width_height(const ComPtr<ID3D12Resource> &rtt, u8 surface_color_format, size_t width, size_t height)
{
DXGI_FORMAT dxgi_format = get_color_surface_format(surface_color_format);
return rtt->GetDesc().Format == dxgi_format && rtt->GetDesc().Width == width && rtt->GetDesc().Height == height;
}
static
bool ds_has_format_width_height(const ComPtr<ID3D12Resource> &rtt, u8 surface_depth_stencil_format, size_t width, size_t height)
{
//TODO: Check format
return rtt->GetDesc().Width == width && rtt->GetDesc().Height == height;
}
};
struct render_targets : public rsx::surface_store<render_target_traits>
{
INT g_descriptor_stride_rtv;
std::unordered_map<u32, ComPtr<ID3D12Resource> > render_targets_storage;
ID3D12Resource *bound_render_targets[4];
u32 bound_render_targets_address[4];
std::unordered_map<u32, ComPtr<ID3D12Resource> > depth_stencil_storage;
ID3D12Resource *bound_depth_stencil;
u32 bound_depth_stencil_address;
size_t bind_render_targets(ID3D12Device *, u32 color_format, D3D12_CPU_DESCRIPTOR_HANDLE);
size_t bind_depth_stencil(ID3D12Device *, u32 depth_format, D3D12_CPU_DESCRIPTOR_HANDLE);
/**
* If render target already exists at address, issue state change operation on cmdList.
* Otherwise create one with width, height, clearColor info.
* returns the corresponding render target resource.
*/
ID3D12Resource *bind_address_as_render_targets(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, u32 address,
size_t width, size_t height, u8 surfaceColorFormat, const std::array<float, 4> &clearColor, ComPtr<ID3D12Resource> &dirtyDS);
ID3D12Resource *bind_address_as_depth_stencil(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, u32 address,
size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear, ComPtr<ID3D12Resource> &dirtyDS);
D3D12_CPU_DESCRIPTOR_HANDLE current_rtts_handle;
D3D12_CPU_DESCRIPTOR_HANDLE current_ds_handle;
void init(ID3D12Device *device);
};

View file

@ -49,7 +49,7 @@ ComPtr<ID3D12Resource> upload_single_texture(
const rsx::texture &texture,
ID3D12Device *device,
ID3D12GraphicsCommandList *command_list,
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &texture_buffer_heap)
data_heap &texture_buffer_heap)
{
size_t w = texture.width(), h = texture.height();
size_t depth = texture.depth();
@ -60,14 +60,11 @@ ComPtr<ID3D12Resource> upload_single_texture(
DXGI_FORMAT dxgi_format = get_texture_format(format);
size_t buffer_size = get_placed_texture_storage_size(texture, 256);
assert(texture_buffer_heap.can_alloc(buffer_size));
size_t heap_offset = texture_buffer_heap.alloc(buffer_size);
size_t heap_offset = texture_buffer_heap.alloc<D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *buffer;
CHECK_HRESULT(texture_buffer_heap.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
void *mapped_buffer = texture_buffer_heap.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
std::vector<MipmapLevelInfo> mipInfos = upload_placed_texture(texture, 256, mapped_buffer);
texture_buffer_heap.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
texture_buffer_heap.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
ComPtr<ID3D12Resource> result;
CHECK_HRESULT(device->CreateCommittedResource(
@ -83,7 +80,7 @@ ComPtr<ID3D12Resource> upload_single_texture(
for (const MipmapLevelInfo mli : mipInfos)
{
command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(result.Get(), (UINT)mip_level), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.m_heap, { heap_offset + mli.offset, { dxgi_format, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
&CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.get_heap(), { heap_offset + mli.offset, { dxgi_format, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
mip_level++;
}
@ -97,7 +94,7 @@ ComPtr<ID3D12Resource> upload_single_texture(
void update_existing_texture(
const rsx::texture &texture,
ID3D12GraphicsCommandList *command_list,
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &texture_buffer_heap,
data_heap &texture_buffer_heap,
ID3D12Resource *existing_texture)
{
size_t w = texture.width(), h = texture.height();
@ -106,21 +103,18 @@ void update_existing_texture(
DXGI_FORMAT dxgi_format = get_texture_format(format);
size_t buffer_size = get_placed_texture_storage_size(texture, 256);
assert(texture_buffer_heap.can_alloc(buffer_size));
size_t heap_offset = texture_buffer_heap.alloc(buffer_size);
size_t heap_offset = texture_buffer_heap.alloc<D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *buffer;
CHECK_HRESULT(texture_buffer_heap.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
void *mapped_buffer = texture_buffer_heap.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
std::vector<MipmapLevelInfo> mipInfos = upload_placed_texture(texture, 256, mapped_buffer);
texture_buffer_heap.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
texture_buffer_heap.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(existing_texture, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_COPY_DEST));
size_t miplevel = 0;
for (const MipmapLevelInfo mli : mipInfos)
{
command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(existing_texture, (UINT)miplevel), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.m_heap, { heap_offset + mli.offset,{ dxgi_format, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
&CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.get_heap(), { heap_offset + mli.offset,{ dxgi_format, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
miplevel++;
}
@ -173,25 +167,22 @@ void D3D12GSRender::upload_and_bind_textures(ID3D12GraphicsCommandList *command_
bool is_swizzled = !(textures[i].format() & CELL_GCM_TEXTURE_LN);
ID3D12Resource *vram_texture;
std::unordered_map<u32, ComPtr<ID3D12Resource> >::const_iterator ItRTT = m_rtts.render_targets_storage.find(texaddr);
std::unordered_map<u32, ComPtr<ID3D12Resource> >::const_iterator ItDS = m_rtts.depth_stencil_storage.find(texaddr);
std::pair<texture_entry, ComPtr<ID3D12Resource> > *cached_texture = m_texture_cache.find_data_if_available(texaddr);
bool is_render_target = false, is_depth_stencil_texture = false;
if (ItRTT != m_rtts.render_targets_storage.end())
if (vram_texture = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
{
vram_texture = ItRTT->second.Get();
is_render_target = true;
}
else if (ItDS != m_rtts.depth_stencil_storage.end())
else if (vram_texture = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
{
vram_texture = ItDS->second.Get();
is_depth_stencil_texture = true;
}
else if (cached_texture != nullptr && (cached_texture->first == texture_entry(format, w, h, textures[i].mipmap())))
{
if (cached_texture->first.m_is_dirty)
{
update_existing_texture(textures[i], command_list, m_texture_upload_data, cached_texture->second.Get());
update_existing_texture(textures[i], command_list, m_buffer_data, cached_texture->second.Get());
m_texture_cache.protect_data(texaddr, texaddr, get_texture_size(textures[i]));
}
vram_texture = cached_texture->second.Get();
@ -200,7 +191,7 @@ void D3D12GSRender::upload_and_bind_textures(ID3D12GraphicsCommandList *command_
{
if (cached_texture != nullptr)
get_current_resource_storage().dirty_textures.push_back(m_texture_cache.remove_from_cache(texaddr));
ComPtr<ID3D12Resource> tex = upload_single_texture(textures[i], m_device.Get(), command_list, m_texture_upload_data);
ComPtr<ID3D12Resource> tex = upload_single_texture(textures[i], m_device.Get(), command_list, m_buffer_data);
std::wstring name = L"texture_@" + std::to_wstring(texaddr);
tex->SetName(name.c_str());
vram_texture = tex.Get();

View file

@ -28,12 +28,7 @@ std::pair<ID3DBlob *, ID3DBlob *> compileF32toU8CS()
ID3DBlob *bytecode;
Microsoft::WRL::ComPtr<ID3DBlob> errorBlob;
HRESULT hr = wrapD3DCompile(shaderCode, strlen(shaderCode), "test", nullptr, nullptr, "main", "cs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf());
if (hr != S_OK)
{
const char *tmp = (const char*)errorBlob->GetBufferPointer();
LOG_ERROR(RSX, tmp);
}
CHECK_HRESULT(wrapD3DCompile(shaderCode, strlen(shaderCode), "test", nullptr, nullptr, "main", "cs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()));
CD3DX12_DESCRIPTOR_RANGE descriptorRange[] =
{
// Textures
@ -47,13 +42,7 @@ std::pair<ID3DBlob *, ID3DBlob *> compileF32toU8CS()
ID3DBlob *rootSignatureBlob;
hr = wrapD3D12SerializeRootSignature(&CD3DX12_ROOT_SIGNATURE_DESC(1, &RP), D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob);
if (hr != S_OK)
{
const char *tmp = (const char*)errorBlob->GetBufferPointer();
LOG_ERROR(RSX, tmp);
}
CHECK_HRESULT(wrapD3D12SerializeRootSignature(&CD3DX12_ROOT_SIGNATURE_DESC(1, &RP), D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob));
return std::make_pair(bytecode, rootSignatureBlob);
}
@ -78,12 +67,7 @@ void D3D12GSRender::Shader::Init(ID3D12Device *device, ID3D12CommandQueue *gfxco
Microsoft::WRL::ComPtr<ID3DBlob> fsBytecode;
Microsoft::WRL::ComPtr<ID3DBlob> errorBlob;
HRESULT hr = wrapD3DCompile(fsCode, strlen(fsCode), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, &fsBytecode, errorBlob.GetAddressOf());
if (hr != S_OK)
{
const char *tmp = (const char*)errorBlob->GetBufferPointer();
LOG_ERROR(RSX, tmp);
}
CHECK_HRESULT(wrapD3DCompile(fsCode, strlen(fsCode), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, &fsBytecode, errorBlob.GetAddressOf()));
const char *vsCode = STRINGIFY(
struct VertexInput \n
@ -108,12 +92,7 @@ void D3D12GSRender::Shader::Init(ID3D12Device *device, ID3D12CommandQueue *gfxco
);
Microsoft::WRL::ComPtr<ID3DBlob> vsBytecode;
hr = wrapD3DCompile(vsCode, strlen(vsCode), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, &vsBytecode, errorBlob.GetAddressOf());
if (hr != S_OK)
{
const char *tmp = (const char*)errorBlob->GetBufferPointer();
LOG_ERROR(RSX, tmp);
}
CHECK_HRESULT(wrapD3DCompile(vsCode, strlen(vsCode), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, &vsBytecode, errorBlob.GetAddressOf()));
D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
psoDesc.PS.BytecodeLength = fsBytecode->GetBufferSize();
@ -163,14 +142,8 @@ void D3D12GSRender::Shader::Init(ID3D12Device *device, ID3D12CommandQueue *gfxco
Microsoft::WRL::ComPtr<ID3DBlob> rootSignatureBlob;
hr = wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob);
if (hr != S_OK)
{
const char *tmp = (const char*)errorBlob->GetBufferPointer();
LOG_ERROR(RSX, tmp);
}
hr = device->CreateRootSignature(0, rootSignatureBlob->GetBufferPointer(), rootSignatureBlob->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature));
CHECK_HRESULT(wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob));
CHECK_HRESULT(device->CreateRootSignature(0, rootSignatureBlob->GetBufferPointer(), rootSignatureBlob->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature)));
psoDesc.pRootSignature = m_rootSignature;
psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;

View file

@ -290,6 +290,7 @@ enum
CELL_GCM_TEXTURE_NEAREST_LINEAR = 5,
CELL_GCM_TEXTURE_LINEAR_LINEAR = 6,
CELL_GCM_TEXTURE_CONVOLUTION_MIN = 7,
CELL_GCM_TEXTURE_UNKNOWN_MAG_FILTER = 4,
CELL_GCM_PRIMITIVE_POINTS = 1,
CELL_GCM_PRIMITIVE_LINES = 2,