mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
d3d12: Avoid an extra vertex copy
This commit is contained in:
parent
b0f8611f49
commit
a2997a1109
7 changed files with 142 additions and 131 deletions
|
@ -1,6 +1,6 @@
|
|||
#include "stdafx.h"
|
||||
#include "BufferUtils.h"
|
||||
|
||||
#include "Utilities/Log.h"
|
||||
|
||||
#define MIN2(x, y) ((x) < (y)) ? (x) : (y)
|
||||
#define MAX2(x, y) ((x) > (y)) ? (x) : (y)
|
||||
|
@ -55,50 +55,56 @@ std::vector<VertexBufferFormat> FormatVertexData(const rsx::data_array_format_in
|
|||
return Result;
|
||||
}
|
||||
|
||||
void uploadVertexData(const VertexBufferFormat &vbf, const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t baseOffset, void* bufferMap)
|
||||
void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc)
|
||||
{
|
||||
for (int vertex = 0; vertex < vbf.elementCount; vertex++)
|
||||
assert(vertex_array_desc.array);
|
||||
|
||||
if (vertex_array_desc.frequency > 1)
|
||||
LOG_ERROR(RSX, "%s: frequency is not null (%d, index=%d)", __FUNCTION__, vertex_array_desc.frequency, index);
|
||||
|
||||
u32 offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + index];
|
||||
u32 address = rsx::get_address(offset & 0x7fffffff, offset >> 31);
|
||||
|
||||
u32 type_size = rsx::get_vertex_type_size(vertex_array_desc.type);
|
||||
u32 element_size = type_size * vertex_array_desc.size;
|
||||
|
||||
u32 base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET];
|
||||
u32 base_index = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_INDEX];
|
||||
|
||||
for (u32 i = 0; i < count; ++i)
|
||||
{
|
||||
for (size_t attributeId : vbf.attributeId)
|
||||
auto src = vm::ps3::_ptr<const u8>(address + base_offset + vertex_array_desc.stride * (first + i + base_index));
|
||||
u8* dst = (u8*)buffer + i * element_size;
|
||||
|
||||
switch (type_size)
|
||||
{
|
||||
u32 addrRegVal = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + attributeId];
|
||||
u32 addr = rsx::get_address(addrRegVal & 0x7fffffff, addrRegVal >> 31);
|
||||
case 1:
|
||||
memcpy(dst, src, vertex_array_desc.size);
|
||||
break;
|
||||
|
||||
if (!vertex_array_desc[attributeId].array)
|
||||
{
|
||||
memcpy(bufferMap, vertex_data[attributeId].data(), vertex_data[attributeId].size());
|
||||
continue;
|
||||
}
|
||||
size_t offset = (size_t)addr + baseOffset - vbf.range.first;
|
||||
size_t tsize = rsx::get_vertex_type_size(vertex_array_desc[attributeId].type);
|
||||
size_t size = vertex_array_desc[attributeId].size;
|
||||
auto src = vm::ps3::_ptr<const u8>(addr + (u32)baseOffset + (u32)vbf.stride * vertex);
|
||||
char* dst = (char*)bufferMap + offset + vbf.stride * vertex;
|
||||
case 2:
|
||||
{
|
||||
auto* c_src = (const be_t<u16>*)src;
|
||||
u16* c_dst = (u16*)dst;
|
||||
|
||||
switch (tsize)
|
||||
for (u32 j = 0; j < vertex_array_desc.size; ++j)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
memcpy(dst, src, size);
|
||||
break;
|
||||
*c_dst++ = *c_src++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 2:
|
||||
{
|
||||
const u16* c_src = (const u16*)src;
|
||||
u16* c_dst = (u16*)dst;
|
||||
for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ushort(*c_src++);
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
auto* c_src = (const be_t<u32>*)src;
|
||||
u32* c_dst = (u32*)dst;
|
||||
|
||||
case 4:
|
||||
for (u32 j = 0; j < vertex_array_desc.size; ++j)
|
||||
{
|
||||
const u32* c_src = (const u32*)src;
|
||||
u32* c_dst = (u32*)dst;
|
||||
for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ulong(*c_src++);
|
||||
break;
|
||||
}
|
||||
*c_dst++ = *c_src++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,9 +20,9 @@ struct VertexBufferFormat
|
|||
std::vector<VertexBufferFormat> FormatVertexData(const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t *vertex_data_size, size_t base_offset);
|
||||
|
||||
/*
|
||||
* Write vertex attributes to bufferMap, swapping data as required.
|
||||
* Write count vertex attributes from index array buffer starting at first, using vertex_array_desc
|
||||
*/
|
||||
void uploadVertexData(const VertexBufferFormat &vbf, const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t baseOffset, void* bufferMap);
|
||||
void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc);
|
||||
|
||||
/*
|
||||
* If primitive mode is not supported and need to be emulated (using an index buffer) returns false.
|
||||
|
|
|
@ -91,32 +91,6 @@ DXGI_FORMAT getFormat(u8 type, u8 size)
|
|||
}
|
||||
}
|
||||
|
||||
static
|
||||
std::vector<D3D12_INPUT_ELEMENT_DESC> getIALayout(const rsx::data_array_format_info *vertex_info, const std::vector<u8> *vertex_data)
|
||||
{
|
||||
std::vector<D3D12_INPUT_ELEMENT_DESC> result;
|
||||
|
||||
size_t inputSlot = 0;
|
||||
for (size_t index = 0; index < rsx::limits::vertex_count; index++)
|
||||
{
|
||||
const auto &info = vertex_info[index];
|
||||
|
||||
if (!info.size)
|
||||
continue;
|
||||
|
||||
D3D12_INPUT_ELEMENT_DESC IAElement = {};
|
||||
IAElement.SemanticName = "TEXCOORD";
|
||||
IAElement.SemanticIndex = (UINT)index;
|
||||
IAElement.InputSlot = (UINT)inputSlot++;
|
||||
IAElement.Format = getFormat(info.type - 1, info.size);
|
||||
IAElement.AlignedByteOffset = 0;
|
||||
IAElement.InputSlotClass = info.array ? D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA : D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
|
||||
IAElement.InstanceDataStepRate = 0;
|
||||
result.push_back(IAElement);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// D3D12GS member handling buffers
|
||||
|
||||
|
||||
|
@ -138,29 +112,105 @@ D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info &
|
|||
return vertexIndexHeap.m_heap->GetGPUVirtualAddress() + heapOffset;
|
||||
}
|
||||
|
||||
std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::UploadVertexBuffers(bool indexed_draw)
|
||||
void D3D12GSRender::load_vertex_data(u32 first, u32 count)
|
||||
{
|
||||
u32 m_vertex_data_base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET];
|
||||
std::vector<D3D12_VERTEX_BUFFER_VIEW> result;
|
||||
m_IASet = getIALayout(vertex_arrays_info, vertex_arrays);
|
||||
m_first_count_pairs.emplace_back(std::make_pair(first, count));
|
||||
vertex_draw_count += count;
|
||||
}
|
||||
|
||||
|
||||
void D3D12GSRender::upload_vertex_attributes()
|
||||
{
|
||||
m_vertex_buffer_views.clear();
|
||||
m_IASet.clear();
|
||||
size_t inputSlot = 0;
|
||||
|
||||
// First array attribute
|
||||
for (int index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
const auto &info = vertex_arrays_info[index];
|
||||
|
||||
if (!info.size)
|
||||
if (!info.array) // disabled or not a vertex array
|
||||
continue;
|
||||
|
||||
D3D12_GPU_VIRTUAL_ADDRESS virtualAddress = createVertexBuffer(info, vertex_arrays[index], m_device.Get(), m_vertexIndexData);
|
||||
u32 type_size = rsx::get_vertex_type_size(info.type);
|
||||
u32 element_size = type_size * info.size;
|
||||
|
||||
size_t subBufferSize = element_size * vertex_draw_count;
|
||||
assert(m_vertexIndexData.canAlloc(subBufferSize));
|
||||
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
|
||||
|
||||
void *buffer;
|
||||
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
|
||||
void *bufferMap = (char*)buffer + heapOffset;
|
||||
for (const auto &range : m_first_count_pairs)
|
||||
{
|
||||
write_vertex_array_data_to_buffer(bufferMap, range.first, range.second, index, info);
|
||||
bufferMap = (char*)bufferMap + range.second * element_size;
|
||||
}
|
||||
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
|
||||
|
||||
D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {};
|
||||
vertexBufferView.BufferLocation = virtualAddress;
|
||||
vertexBufferView.SizeInBytes = (UINT)vertex_arrays[index].size();
|
||||
vertexBufferView.StrideInBytes = (UINT)rsx::get_vertex_type_size(info.type) * info.size;
|
||||
result.push_back(vertexBufferView);
|
||||
vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
|
||||
vertexBufferView.SizeInBytes = (UINT)subBufferSize;
|
||||
vertexBufferView.StrideInBytes = (UINT)element_size;
|
||||
m_vertex_buffer_views.push_back(vertexBufferView);
|
||||
|
||||
m_timers.m_bufferUploadSize += subBufferSize;
|
||||
|
||||
D3D12_INPUT_ELEMENT_DESC IAElement = {};
|
||||
IAElement.SemanticName = "TEXCOORD";
|
||||
IAElement.SemanticIndex = (UINT)index;
|
||||
IAElement.InputSlot = (UINT)inputSlot++;
|
||||
IAElement.Format = getFormat(info.type - 1, info.size);
|
||||
IAElement.AlignedByteOffset = 0;
|
||||
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
|
||||
IAElement.InstanceDataStepRate = 0;
|
||||
m_IASet.push_back(IAElement);
|
||||
}
|
||||
|
||||
return result;
|
||||
// Now immediate vertex buffer
|
||||
for (int index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
const auto &info = vertex_arrays_info[index];
|
||||
|
||||
if (info.array)
|
||||
continue;
|
||||
if (!info.size) // disabled
|
||||
continue;
|
||||
|
||||
auto &data = vertex_arrays[index];
|
||||
|
||||
u32 type_size = rsx::get_vertex_type_size(info.type);
|
||||
u32 element_size = type_size * info.size;
|
||||
|
||||
size_t subBufferSize = data.size();
|
||||
assert(m_vertexIndexData.canAlloc(subBufferSize));
|
||||
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
|
||||
|
||||
void *buffer;
|
||||
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
|
||||
void *bufferMap = (char*)buffer + heapOffset;
|
||||
memcpy(bufferMap, data.data(), data.size());
|
||||
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
|
||||
|
||||
D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {};
|
||||
vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
|
||||
vertexBufferView.SizeInBytes = (UINT)subBufferSize;
|
||||
vertexBufferView.StrideInBytes = (UINT)element_size;
|
||||
m_vertex_buffer_views.push_back(vertexBufferView);
|
||||
|
||||
D3D12_INPUT_ELEMENT_DESC IAElement = {};
|
||||
IAElement.SemanticName = "TEXCOORD";
|
||||
IAElement.SemanticIndex = (UINT)index;
|
||||
IAElement.InputSlot = (UINT)inputSlot++;
|
||||
IAElement.Format = getFormat(info.type - 1, info.size);
|
||||
IAElement.AlignedByteOffset = 0;
|
||||
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
|
||||
IAElement.InstanceDataStepRate = 1;
|
||||
m_IASet.push_back(IAElement);
|
||||
}
|
||||
m_first_count_pairs.clear();
|
||||
}
|
||||
|
||||
D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)
|
||||
|
|
|
@ -411,9 +411,9 @@ void D3D12GSRender::end()
|
|||
|
||||
if (!vertex_index_array.empty() || vertex_draw_count)
|
||||
{
|
||||
const std::vector<D3D12_VERTEX_BUFFER_VIEW> &vertexBufferViews = UploadVertexBuffers(!vertex_index_array.empty());
|
||||
upload_vertex_attributes();
|
||||
const D3D12_INDEX_BUFFER_VIEW &indexBufferView = uploadIndexBuffers(!vertex_index_array.empty());
|
||||
getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data());
|
||||
getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data());
|
||||
if (m_renderingInfo.m_indexed)
|
||||
getCurrentResourceStorage().m_commandList->IASetIndexBuffer(&indexBufferView);
|
||||
}
|
||||
|
|
|
@ -428,6 +428,7 @@ private:
|
|||
RenderTargets m_rtts;
|
||||
|
||||
std::vector<D3D12_INPUT_ELEMENT_DESC> m_IASet;
|
||||
std::vector<D3D12_VERTEX_BUFFER_VIEW> m_vertex_buffer_views;
|
||||
|
||||
INT g_descriptorStrideSRVCBVUAV;
|
||||
INT g_descriptorStrideDSV;
|
||||
|
@ -462,12 +463,11 @@ private:
|
|||
|
||||
bool LoadProgram();
|
||||
|
||||
std::vector<std::pair<u32, u32> > m_first_count_pairs;
|
||||
/**
|
||||
* Create as little vertex buffer as possible to hold all vertex info (in upload heap),
|
||||
* create corresponding IA layout that can be used for load program and
|
||||
* returns a vector of vertex buffer view that can be passed to IASetVertexBufferView().
|
||||
* Upload all vertex attribute whose (first, count) info were previously accumulated.
|
||||
*/
|
||||
std::vector<D3D12_VERTEX_BUFFER_VIEW> UploadVertexBuffers(bool indexed_draw = false);
|
||||
void upload_vertex_attributes();
|
||||
|
||||
/**
|
||||
* Create index buffer for indexed rendering and non native primitive format if nedded, and
|
||||
|
@ -507,6 +507,8 @@ protected:
|
|||
virtual bool domethod(u32 cmd, u32 arg) override;
|
||||
virtual void end() override;
|
||||
virtual void flip(int buffer) override;
|
||||
|
||||
virtual void load_vertex_data(u32 first, u32 count) override;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -10,6 +10,8 @@
|
|||
#include "Emu/SysCalls/CB_FUNC.h"
|
||||
#include "Emu/SysCalls/lv2/sys_time.h"
|
||||
|
||||
#include "Common/BufferUtils.h"
|
||||
|
||||
#include "Utilities/types.h"
|
||||
|
||||
extern "C"
|
||||
|
@ -823,68 +825,19 @@ namespace rsx
|
|||
|
||||
for (int index = 0; index < limits::vertex_count; ++index)
|
||||
{
|
||||
auto &info = vertex_arrays_info[index];
|
||||
const auto &info = vertex_arrays_info[index];
|
||||
|
||||
if (!info.array) // disabled or not a vertex array
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
auto &data = vertex_arrays[index];
|
||||
|
||||
if (info.frequency > 1)
|
||||
{
|
||||
LOG_ERROR(RSX, "%s: frequency is not null (%d, index=%d)", __FUNCTION__, info.frequency, index);
|
||||
}
|
||||
|
||||
u32 offset = method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + index];
|
||||
u32 address = get_address(offset & 0x7fffffff, offset >> 31);
|
||||
|
||||
u32 type_size = get_vertex_type_size(info.type);
|
||||
u32 element_size = type_size * info.size;
|
||||
|
||||
u32 dst_position = (u32)data.size();
|
||||
data.resize(dst_position + count * element_size);
|
||||
|
||||
u32 base_offset = method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET];
|
||||
u32 base_index = method_registers[NV4097_SET_VERTEX_DATA_BASE_INDEX];
|
||||
|
||||
for (u32 i = 0; i < count; ++i)
|
||||
{
|
||||
const u8* src = vm::_ptr<u8>(address + base_offset + info.stride * (first + i + base_index));
|
||||
u8* dst = data.data() + dst_position + i * element_size;
|
||||
|
||||
switch (type_size)
|
||||
{
|
||||
case 1:
|
||||
memcpy(dst, src, info.size);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
{
|
||||
auto* c_src = (const be_t<u16>*)src;
|
||||
u16* c_dst = (u16*)dst;
|
||||
|
||||
for (u32 j = 0; j < info.size; ++j)
|
||||
{
|
||||
*c_dst++ = *c_src++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 4:
|
||||
{
|
||||
auto* c_src = (const be_t<u32>*)src;
|
||||
u32* c_dst = (u32*)dst;
|
||||
|
||||
for (u32 j = 0; j < info.size; ++j)
|
||||
{
|
||||
*c_dst++ = *c_src++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
write_vertex_array_data_to_buffer(data.data() + dst_position, first, count, index, info);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -163,7 +163,7 @@ namespace rsx
|
|||
|
||||
u32 transform_program[512 * 4] = {};
|
||||
|
||||
void load_vertex_data(u32 first, u32 count);
|
||||
virtual void load_vertex_data(u32 first, u32 count);
|
||||
void load_vertex_index_data(u32 first, u32 count);
|
||||
|
||||
public:
|
||||
|
|
Loading…
Add table
Reference in a new issue