diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp index 16f3c88da0..9d75ad4ce1 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp +++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp @@ -1,6 +1,6 @@ #include "stdafx.h" #include "BufferUtils.h" - +#include "Utilities/Log.h" #define MIN2(x, y) ((x) < (y)) ? (x) : (y) #define MAX2(x, y) ((x) > (y)) ? (x) : (y) @@ -55,50 +55,56 @@ std::vector FormatVertexData(const rsx::data_array_format_in return Result; } -void uploadVertexData(const VertexBufferFormat &vbf, const rsx::data_array_format_info *vertex_array_desc, const std::vector *vertex_data, size_t baseOffset, void* bufferMap) +void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc) { - for (int vertex = 0; vertex < vbf.elementCount; vertex++) + assert(vertex_array_desc.array); + + if (vertex_array_desc.frequency > 1) + LOG_ERROR(RSX, "%s: frequency is not null (%d, index=%d)", __FUNCTION__, vertex_array_desc.frequency, index); + + u32 offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + index]; + u32 address = rsx::get_address(offset & 0x7fffffff, offset >> 31); + + u32 type_size = rsx::get_vertex_type_size(vertex_array_desc.type); + u32 element_size = type_size * vertex_array_desc.size; + + u32 base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET]; + u32 base_index = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_INDEX]; + + for (u32 i = 0; i < count; ++i) { - for (size_t attributeId : vbf.attributeId) + auto src = vm::ps3::_ptr(address + base_offset + vertex_array_desc.stride * (first + i + base_index)); + u8* dst = (u8*)buffer + i * element_size; + + switch (type_size) { - u32 addrRegVal = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + attributeId]; - u32 addr = rsx::get_address(addrRegVal & 0x7fffffff, addrRegVal >> 31); + case 1: + memcpy(dst, src, vertex_array_desc.size); + break; - if (!vertex_array_desc[attributeId].array) - { - memcpy(bufferMap, vertex_data[attributeId].data(), vertex_data[attributeId].size()); - continue; - } - size_t offset = (size_t)addr + baseOffset - vbf.range.first; - size_t tsize = rsx::get_vertex_type_size(vertex_array_desc[attributeId].type); - size_t size = vertex_array_desc[attributeId].size; - auto src = vm::ps3::_ptr(addr + (u32)baseOffset + (u32)vbf.stride * vertex); - char* dst = (char*)bufferMap + offset + vbf.stride * vertex; + case 2: + { + auto* c_src = (const be_t*)src; + u16* c_dst = (u16*)dst; - switch (tsize) + for (u32 j = 0; j < vertex_array_desc.size; ++j) { - case 1: - { - memcpy(dst, src, size); - break; + *c_dst++ = *c_src++; } + break; + } - case 2: - { - const u16* c_src = (const u16*)src; - u16* c_dst = (u16*)dst; - for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ushort(*c_src++); - break; - } + case 4: + { + auto* c_src = (const be_t*)src; + u32* c_dst = (u32*)dst; - case 4: + for (u32 j = 0; j < vertex_array_desc.size; ++j) { - const u32* c_src = (const u32*)src; - u32* c_dst = (u32*)dst; - for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ulong(*c_src++); - break; - } + *c_dst++ = *c_src++; } + break; + } } } } diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.h b/rpcs3/Emu/RSX/Common/BufferUtils.h index 874d673241..e04bbc33f1 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.h +++ b/rpcs3/Emu/RSX/Common/BufferUtils.h @@ -20,9 +20,9 @@ struct VertexBufferFormat std::vector FormatVertexData(const rsx::data_array_format_info *vertex_array_desc, const std::vector *vertex_data, size_t *vertex_data_size, size_t base_offset); /* - * Write vertex attributes to bufferMap, swapping data as required. + * Write count vertex attributes from index array buffer starting at first, using vertex_array_desc */ -void uploadVertexData(const VertexBufferFormat &vbf, const rsx::data_array_format_info *vertex_array_desc, const std::vector *vertex_data, size_t baseOffset, void* bufferMap); +void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc); /* * If primitive mode is not supported and need to be emulated (using an index buffer) returns false. diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 80e1125dfd..dad9e287a3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -91,32 +91,6 @@ DXGI_FORMAT getFormat(u8 type, u8 size) } } -static -std::vector getIALayout(const rsx::data_array_format_info *vertex_info, const std::vector *vertex_data) -{ - std::vector result; - - size_t inputSlot = 0; - for (size_t index = 0; index < rsx::limits::vertex_count; index++) - { - const auto &info = vertex_info[index]; - - if (!info.size) - continue; - - D3D12_INPUT_ELEMENT_DESC IAElement = {}; - IAElement.SemanticName = "TEXCOORD"; - IAElement.SemanticIndex = (UINT)index; - IAElement.InputSlot = (UINT)inputSlot++; - IAElement.Format = getFormat(info.type - 1, info.size); - IAElement.AlignedByteOffset = 0; - IAElement.InputSlotClass = info.array ? D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA : D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; - IAElement.InstanceDataStepRate = 0; - result.push_back(IAElement); - } - return result; -} - // D3D12GS member handling buffers @@ -138,29 +112,105 @@ D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info & return vertexIndexHeap.m_heap->GetGPUVirtualAddress() + heapOffset; } -std::vector D3D12GSRender::UploadVertexBuffers(bool indexed_draw) +void D3D12GSRender::load_vertex_data(u32 first, u32 count) { - u32 m_vertex_data_base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET]; - std::vector result; - m_IASet = getIALayout(vertex_arrays_info, vertex_arrays); + m_first_count_pairs.emplace_back(std::make_pair(first, count)); + vertex_draw_count += count; +} + +void D3D12GSRender::upload_vertex_attributes() +{ + m_vertex_buffer_views.clear(); + m_IASet.clear(); + size_t inputSlot = 0; + + // First array attribute for (int index = 0; index < rsx::limits::vertex_count; ++index) { const auto &info = vertex_arrays_info[index]; - if (!info.size) + if (!info.array) // disabled or not a vertex array continue; - D3D12_GPU_VIRTUAL_ADDRESS virtualAddress = createVertexBuffer(info, vertex_arrays[index], m_device.Get(), m_vertexIndexData); + u32 type_size = rsx::get_vertex_type_size(info.type); + u32 element_size = type_size * info.size; + + size_t subBufferSize = element_size * vertex_draw_count; + assert(m_vertexIndexData.canAlloc(subBufferSize)); + size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); + + void *buffer; + ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); + void *bufferMap = (char*)buffer + heapOffset; + for (const auto &range : m_first_count_pairs) + { + write_vertex_array_data_to_buffer(bufferMap, range.first, range.second, index, info); + bufferMap = (char*)bufferMap + range.second * element_size; + } + m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize)); D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; - vertexBufferView.BufferLocation = virtualAddress; - vertexBufferView.SizeInBytes = (UINT)vertex_arrays[index].size(); - vertexBufferView.StrideInBytes = (UINT)rsx::get_vertex_type_size(info.type) * info.size; - result.push_back(vertexBufferView); + vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset; + vertexBufferView.SizeInBytes = (UINT)subBufferSize; + vertexBufferView.StrideInBytes = (UINT)element_size; + m_vertex_buffer_views.push_back(vertexBufferView); + + m_timers.m_bufferUploadSize += subBufferSize; + + D3D12_INPUT_ELEMENT_DESC IAElement = {}; + IAElement.SemanticName = "TEXCOORD"; + IAElement.SemanticIndex = (UINT)index; + IAElement.InputSlot = (UINT)inputSlot++; + IAElement.Format = getFormat(info.type - 1, info.size); + IAElement.AlignedByteOffset = 0; + IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + IAElement.InstanceDataStepRate = 0; + m_IASet.push_back(IAElement); } - return result; + // Now immediate vertex buffer + for (int index = 0; index < rsx::limits::vertex_count; ++index) + { + const auto &info = vertex_arrays_info[index]; + + if (info.array) + continue; + if (!info.size) // disabled + continue; + + auto &data = vertex_arrays[index]; + + u32 type_size = rsx::get_vertex_type_size(info.type); + u32 element_size = type_size * info.size; + + size_t subBufferSize = data.size(); + assert(m_vertexIndexData.canAlloc(subBufferSize)); + size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); + + void *buffer; + ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); + void *bufferMap = (char*)buffer + heapOffset; + memcpy(bufferMap, data.data(), data.size()); + m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize)); + + D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; + vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset; + vertexBufferView.SizeInBytes = (UINT)subBufferSize; + vertexBufferView.StrideInBytes = (UINT)element_size; + m_vertex_buffer_views.push_back(vertexBufferView); + + D3D12_INPUT_ELEMENT_DESC IAElement = {}; + IAElement.SemanticName = "TEXCOORD"; + IAElement.SemanticIndex = (UINT)index; + IAElement.InputSlot = (UINT)inputSlot++; + IAElement.Format = getFormat(info.type - 1, info.size); + IAElement.AlignedByteOffset = 0; + IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; + IAElement.InstanceDataStepRate = 1; + m_IASet.push_back(IAElement); + } + m_first_count_pairs.clear(); } D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index deb4614af7..129b860610 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -411,9 +411,9 @@ void D3D12GSRender::end() if (!vertex_index_array.empty() || vertex_draw_count) { - const std::vector &vertexBufferViews = UploadVertexBuffers(!vertex_index_array.empty()); + upload_vertex_attributes(); const D3D12_INDEX_BUFFER_VIEW &indexBufferView = uploadIndexBuffers(!vertex_index_array.empty()); - getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data()); + getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data()); if (m_renderingInfo.m_indexed) getCurrentResourceStorage().m_commandList->IASetIndexBuffer(&indexBufferView); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index f5becb50ee..bfdb0ae0e9 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -428,6 +428,7 @@ private: RenderTargets m_rtts; std::vector m_IASet; + std::vector m_vertex_buffer_views; INT g_descriptorStrideSRVCBVUAV; INT g_descriptorStrideDSV; @@ -462,12 +463,11 @@ private: bool LoadProgram(); + std::vector > m_first_count_pairs; /** - * Create as little vertex buffer as possible to hold all vertex info (in upload heap), - * create corresponding IA layout that can be used for load program and - * returns a vector of vertex buffer view that can be passed to IASetVertexBufferView(). + * Upload all vertex attribute whose (first, count) info were previously accumulated. */ - std::vector UploadVertexBuffers(bool indexed_draw = false); + void upload_vertex_attributes(); /** * Create index buffer for indexed rendering and non native primitive format if nedded, and @@ -507,6 +507,8 @@ protected: virtual bool domethod(u32 cmd, u32 arg) override; virtual void end() override; virtual void flip(int buffer) override; + + virtual void load_vertex_data(u32 first, u32 count) override; }; #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 91aff45190..24a726fa8a 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -10,6 +10,8 @@ #include "Emu/SysCalls/CB_FUNC.h" #include "Emu/SysCalls/lv2/sys_time.h" +#include "Common/BufferUtils.h" + #include "Utilities/types.h" extern "C" @@ -823,68 +825,19 @@ namespace rsx for (int index = 0; index < limits::vertex_count; ++index) { - auto &info = vertex_arrays_info[index]; + const auto &info = vertex_arrays_info[index]; if (!info.array) // disabled or not a vertex array - { continue; - } auto &data = vertex_arrays[index]; - if (info.frequency > 1) - { - LOG_ERROR(RSX, "%s: frequency is not null (%d, index=%d)", __FUNCTION__, info.frequency, index); - } - - u32 offset = method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + index]; - u32 address = get_address(offset & 0x7fffffff, offset >> 31); - u32 type_size = get_vertex_type_size(info.type); u32 element_size = type_size * info.size; u32 dst_position = (u32)data.size(); data.resize(dst_position + count * element_size); - - u32 base_offset = method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET]; - u32 base_index = method_registers[NV4097_SET_VERTEX_DATA_BASE_INDEX]; - - for (u32 i = 0; i < count; ++i) - { - const u8* src = vm::_ptr(address + base_offset + info.stride * (first + i + base_index)); - u8* dst = data.data() + dst_position + i * element_size; - - switch (type_size) - { - case 1: - memcpy(dst, src, info.size); - break; - - case 2: - { - auto* c_src = (const be_t*)src; - u16* c_dst = (u16*)dst; - - for (u32 j = 0; j < info.size; ++j) - { - *c_dst++ = *c_src++; - } - break; - } - - case 4: - { - auto* c_src = (const be_t*)src; - u32* c_dst = (u32*)dst; - - for (u32 j = 0; j < info.size; ++j) - { - *c_dst++ = *c_src++; - } - break; - } - } - } + write_vertex_array_data_to_buffer(data.data() + dst_position, first, count, index, info); } } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 27a577eb64..c84c1714a3 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -163,7 +163,7 @@ namespace rsx u32 transform_program[512 * 4] = {}; - void load_vertex_data(u32 first, u32 count); + virtual void load_vertex_data(u32 first, u32 count); void load_vertex_index_data(u32 first, u32 count); public: