d3d12: Avoid an extra vertex copy

This commit is contained in:
Vincent Lejeune 2015-10-14 19:47:09 +02:00
parent b0f8611f49
commit a2997a1109
7 changed files with 142 additions and 131 deletions

View file

@ -1,6 +1,6 @@
#include "stdafx.h"
#include "BufferUtils.h"
#include "Utilities/Log.h"
#define MIN2(x, y) ((x) < (y)) ? (x) : (y)
#define MAX2(x, y) ((x) > (y)) ? (x) : (y)
@ -55,50 +55,56 @@ std::vector<VertexBufferFormat> FormatVertexData(const rsx::data_array_format_in
return Result;
}
void uploadVertexData(const VertexBufferFormat &vbf, const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t baseOffset, void* bufferMap)
void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc)
{
for (int vertex = 0; vertex < vbf.elementCount; vertex++)
assert(vertex_array_desc.array);
if (vertex_array_desc.frequency > 1)
LOG_ERROR(RSX, "%s: frequency is not null (%d, index=%d)", __FUNCTION__, vertex_array_desc.frequency, index);
u32 offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + index];
u32 address = rsx::get_address(offset & 0x7fffffff, offset >> 31);
u32 type_size = rsx::get_vertex_type_size(vertex_array_desc.type);
u32 element_size = type_size * vertex_array_desc.size;
u32 base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET];
u32 base_index = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_INDEX];
for (u32 i = 0; i < count; ++i)
{
for (size_t attributeId : vbf.attributeId)
auto src = vm::ps3::_ptr<const u8>(address + base_offset + vertex_array_desc.stride * (first + i + base_index));
u8* dst = (u8*)buffer + i * element_size;
switch (type_size)
{
u32 addrRegVal = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + attributeId];
u32 addr = rsx::get_address(addrRegVal & 0x7fffffff, addrRegVal >> 31);
case 1:
memcpy(dst, src, vertex_array_desc.size);
break;
if (!vertex_array_desc[attributeId].array)
{
memcpy(bufferMap, vertex_data[attributeId].data(), vertex_data[attributeId].size());
continue;
}
size_t offset = (size_t)addr + baseOffset - vbf.range.first;
size_t tsize = rsx::get_vertex_type_size(vertex_array_desc[attributeId].type);
size_t size = vertex_array_desc[attributeId].size;
auto src = vm::ps3::_ptr<const u8>(addr + (u32)baseOffset + (u32)vbf.stride * vertex);
char* dst = (char*)bufferMap + offset + vbf.stride * vertex;
case 2:
{
auto* c_src = (const be_t<u16>*)src;
u16* c_dst = (u16*)dst;
switch (tsize)
for (u32 j = 0; j < vertex_array_desc.size; ++j)
{
case 1:
{
memcpy(dst, src, size);
break;
*c_dst++ = *c_src++;
}
break;
}
case 2:
{
const u16* c_src = (const u16*)src;
u16* c_dst = (u16*)dst;
for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ushort(*c_src++);
break;
}
case 4:
{
auto* c_src = (const be_t<u32>*)src;
u32* c_dst = (u32*)dst;
case 4:
for (u32 j = 0; j < vertex_array_desc.size; ++j)
{
const u32* c_src = (const u32*)src;
u32* c_dst = (u32*)dst;
for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ulong(*c_src++);
break;
}
*c_dst++ = *c_src++;
}
break;
}
}
}
}

View file

@ -20,9 +20,9 @@ struct VertexBufferFormat
std::vector<VertexBufferFormat> FormatVertexData(const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t *vertex_data_size, size_t base_offset);
/*
* Write vertex attributes to bufferMap, swapping data as required.
* Write count vertex attributes from index array buffer starting at first, using vertex_array_desc
*/
void uploadVertexData(const VertexBufferFormat &vbf, const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t baseOffset, void* bufferMap);
void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc);
/*
* If primitive mode is not supported and need to be emulated (using an index buffer) returns false.

View file

@ -91,32 +91,6 @@ DXGI_FORMAT getFormat(u8 type, u8 size)
}
}
static
std::vector<D3D12_INPUT_ELEMENT_DESC> getIALayout(const rsx::data_array_format_info *vertex_info, const std::vector<u8> *vertex_data)
{
std::vector<D3D12_INPUT_ELEMENT_DESC> result;
size_t inputSlot = 0;
for (size_t index = 0; index < rsx::limits::vertex_count; index++)
{
const auto &info = vertex_info[index];
if (!info.size)
continue;
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)inputSlot++;
IAElement.Format = getFormat(info.type - 1, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = info.array ? D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA : D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
IAElement.InstanceDataStepRate = 0;
result.push_back(IAElement);
}
return result;
}
// D3D12GS member handling buffers
@ -138,29 +112,105 @@ D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info &
return vertexIndexHeap.m_heap->GetGPUVirtualAddress() + heapOffset;
}
std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::UploadVertexBuffers(bool indexed_draw)
void D3D12GSRender::load_vertex_data(u32 first, u32 count)
{
u32 m_vertex_data_base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET];
std::vector<D3D12_VERTEX_BUFFER_VIEW> result;
m_IASet = getIALayout(vertex_arrays_info, vertex_arrays);
m_first_count_pairs.emplace_back(std::make_pair(first, count));
vertex_draw_count += count;
}
void D3D12GSRender::upload_vertex_attributes()
{
m_vertex_buffer_views.clear();
m_IASet.clear();
size_t inputSlot = 0;
// First array attribute
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
const auto &info = vertex_arrays_info[index];
if (!info.size)
if (!info.array) // disabled or not a vertex array
continue;
D3D12_GPU_VIRTUAL_ADDRESS virtualAddress = createVertexBuffer(info, vertex_arrays[index], m_device.Get(), m_vertexIndexData);
u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
size_t subBufferSize = element_size * vertex_draw_count;
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
for (const auto &range : m_first_count_pairs)
{
write_vertex_array_data_to_buffer(bufferMap, range.first, range.second, index, info);
bufferMap = (char*)bufferMap + range.second * element_size;
}
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {};
vertexBufferView.BufferLocation = virtualAddress;
vertexBufferView.SizeInBytes = (UINT)vertex_arrays[index].size();
vertexBufferView.StrideInBytes = (UINT)rsx::get_vertex_type_size(info.type) * info.size;
result.push_back(vertexBufferView);
vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
vertexBufferView.SizeInBytes = (UINT)subBufferSize;
vertexBufferView.StrideInBytes = (UINT)element_size;
m_vertex_buffer_views.push_back(vertexBufferView);
m_timers.m_bufferUploadSize += subBufferSize;
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)inputSlot++;
IAElement.Format = getFormat(info.type - 1, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
IAElement.InstanceDataStepRate = 0;
m_IASet.push_back(IAElement);
}
return result;
// Now immediate vertex buffer
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
const auto &info = vertex_arrays_info[index];
if (info.array)
continue;
if (!info.size) // disabled
continue;
auto &data = vertex_arrays[index];
u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
size_t subBufferSize = data.size();
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
memcpy(bufferMap, data.data(), data.size());
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {};
vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
vertexBufferView.SizeInBytes = (UINT)subBufferSize;
vertexBufferView.StrideInBytes = (UINT)element_size;
m_vertex_buffer_views.push_back(vertexBufferView);
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)inputSlot++;
IAElement.Format = getFormat(info.type - 1, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
IAElement.InstanceDataStepRate = 1;
m_IASet.push_back(IAElement);
}
m_first_count_pairs.clear();
}
D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)

View file

@ -411,9 +411,9 @@ void D3D12GSRender::end()
if (!vertex_index_array.empty() || vertex_draw_count)
{
const std::vector<D3D12_VERTEX_BUFFER_VIEW> &vertexBufferViews = UploadVertexBuffers(!vertex_index_array.empty());
upload_vertex_attributes();
const D3D12_INDEX_BUFFER_VIEW &indexBufferView = uploadIndexBuffers(!vertex_index_array.empty());
getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data());
getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data());
if (m_renderingInfo.m_indexed)
getCurrentResourceStorage().m_commandList->IASetIndexBuffer(&indexBufferView);
}

View file

@ -428,6 +428,7 @@ private:
RenderTargets m_rtts;
std::vector<D3D12_INPUT_ELEMENT_DESC> m_IASet;
std::vector<D3D12_VERTEX_BUFFER_VIEW> m_vertex_buffer_views;
INT g_descriptorStrideSRVCBVUAV;
INT g_descriptorStrideDSV;
@ -462,12 +463,11 @@ private:
bool LoadProgram();
std::vector<std::pair<u32, u32> > m_first_count_pairs;
/**
* Create as little vertex buffer as possible to hold all vertex info (in upload heap),
* create corresponding IA layout that can be used for load program and
* returns a vector of vertex buffer view that can be passed to IASetVertexBufferView().
* Upload all vertex attribute whose (first, count) info were previously accumulated.
*/
std::vector<D3D12_VERTEX_BUFFER_VIEW> UploadVertexBuffers(bool indexed_draw = false);
void upload_vertex_attributes();
/**
* Create index buffer for indexed rendering and non native primitive format if nedded, and
@ -507,6 +507,8 @@ protected:
virtual bool domethod(u32 cmd, u32 arg) override;
virtual void end() override;
virtual void flip(int buffer) override;
virtual void load_vertex_data(u32 first, u32 count) override;
};
#endif

View file

@ -10,6 +10,8 @@
#include "Emu/SysCalls/CB_FUNC.h"
#include "Emu/SysCalls/lv2/sys_time.h"
#include "Common/BufferUtils.h"
#include "Utilities/types.h"
extern "C"
@ -823,68 +825,19 @@ namespace rsx
for (int index = 0; index < limits::vertex_count; ++index)
{
auto &info = vertex_arrays_info[index];
const auto &info = vertex_arrays_info[index];
if (!info.array) // disabled or not a vertex array
{
continue;
}
auto &data = vertex_arrays[index];
if (info.frequency > 1)
{
LOG_ERROR(RSX, "%s: frequency is not null (%d, index=%d)", __FUNCTION__, info.frequency, index);
}
u32 offset = method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + index];
u32 address = get_address(offset & 0x7fffffff, offset >> 31);
u32 type_size = get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
u32 dst_position = (u32)data.size();
data.resize(dst_position + count * element_size);
u32 base_offset = method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET];
u32 base_index = method_registers[NV4097_SET_VERTEX_DATA_BASE_INDEX];
for (u32 i = 0; i < count; ++i)
{
const u8* src = vm::_ptr<u8>(address + base_offset + info.stride * (first + i + base_index));
u8* dst = data.data() + dst_position + i * element_size;
switch (type_size)
{
case 1:
memcpy(dst, src, info.size);
break;
case 2:
{
auto* c_src = (const be_t<u16>*)src;
u16* c_dst = (u16*)dst;
for (u32 j = 0; j < info.size; ++j)
{
*c_dst++ = *c_src++;
}
break;
}
case 4:
{
auto* c_src = (const be_t<u32>*)src;
u32* c_dst = (u32*)dst;
for (u32 j = 0; j < info.size; ++j)
{
*c_dst++ = *c_src++;
}
break;
}
}
}
write_vertex_array_data_to_buffer(data.data() + dst_position, first, count, index, info);
}
}

View file

@ -163,7 +163,7 @@ namespace rsx
u32 transform_program[512 * 4] = {};
void load_vertex_data(u32 first, u32 count);
virtual void load_vertex_data(u32 first, u32 count);
void load_vertex_index_data(u32 first, u32 count);
public: