Merge pull request #1257 from vlj/d3d12

D3d12: Fixes and speed improvements.
This commit is contained in:
Raul Tambre 2015-10-15 18:38:01 +03:00
commit b3db9255e4
15 changed files with 231 additions and 218 deletions

View file

@ -1,6 +1,6 @@
#include "stdafx.h" #include "stdafx.h"
#include "BufferUtils.h" #include "BufferUtils.h"
#include "Utilities/Log.h"
#define MIN2(x, y) ((x) < (y)) ? (x) : (y) #define MIN2(x, y) ((x) < (y)) ? (x) : (y)
#define MAX2(x, y) ((x) > (y)) ? (x) : (y) #define MAX2(x, y) ((x) > (y)) ? (x) : (y)
@ -55,49 +55,55 @@ std::vector<VertexBufferFormat> FormatVertexData(const rsx::data_array_format_in
return Result; return Result;
} }
void uploadVertexData(const VertexBufferFormat &vbf, const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t baseOffset, void* bufferMap) void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc)
{ {
for (int vertex = 0; vertex < vbf.elementCount; vertex++) assert(vertex_array_desc.array);
{
for (size_t attributeId : vbf.attributeId)
{
u32 addrRegVal = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + attributeId];
u32 addr = rsx::get_address(addrRegVal & 0x7fffffff, addrRegVal >> 31);
if (!vertex_array_desc[attributeId].array) if (vertex_array_desc.frequency > 1)
{ LOG_ERROR(RSX, "%s: frequency is not null (%d, index=%d)", __FUNCTION__, vertex_array_desc.frequency, index);
memcpy(bufferMap, vertex_data[attributeId].data(), vertex_data[attributeId].size());
continue;
}
size_t offset = (size_t)addr + baseOffset - vbf.range.first;
size_t tsize = rsx::get_vertex_type_size(vertex_array_desc[attributeId].type);
size_t size = vertex_array_desc[attributeId].size;
auto src = vm::ps3::_ptr<const u8>(addr + (u32)baseOffset + (u32)vbf.stride * vertex);
char* dst = (char*)bufferMap + offset + vbf.stride * vertex;
switch (tsize) u32 offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + index];
u32 address = rsx::get_address(offset & 0x7fffffff, offset >> 31);
u32 type_size = rsx::get_vertex_type_size(vertex_array_desc.type);
u32 element_size = type_size * vertex_array_desc.size;
u32 base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET];
u32 base_index = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_INDEX];
for (u32 i = 0; i < count; ++i)
{
auto src = vm::ps3::_ptr<const u8>(address + base_offset + vertex_array_desc.stride * (first + i + base_index));
u8* dst = (u8*)buffer + i * element_size;
switch (type_size)
{ {
case 1: case 1:
{ memcpy(dst, src, vertex_array_desc.size);
memcpy(dst, src, size);
break; break;
}
case 2: case 2:
{ {
const u16* c_src = (const u16*)src; auto* c_src = (const be_t<u16>*)src;
u16* c_dst = (u16*)dst; u16* c_dst = (u16*)dst;
for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ushort(*c_src++);
for (u32 j = 0; j < vertex_array_desc.size; ++j)
{
*c_dst++ = *c_src++;
}
break; break;
} }
case 4: case 4:
{ {
const u32* c_src = (const u32*)src; auto* c_src = (const be_t<u32>*)src;
u32* c_dst = (u32*)dst; u32* c_dst = (u32*)dst;
for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ulong(*c_src++);
break; for (u32 j = 0; j < vertex_array_desc.size; ++j)
{
*c_dst++ = *c_src++;
} }
break;
} }
} }
} }

View file

@ -20,9 +20,9 @@ struct VertexBufferFormat
std::vector<VertexBufferFormat> FormatVertexData(const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t *vertex_data_size, size_t base_offset); std::vector<VertexBufferFormat> FormatVertexData(const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t *vertex_data_size, size_t base_offset);
/* /*
* Write vertex attributes to bufferMap, swapping data as required. * Write count vertex attributes from index array buffer starting at first, using vertex_array_desc
*/ */
void uploadVertexData(const VertexBufferFormat &vbf, const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t baseOffset, void* bufferMap); void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc);
/* /*
* If primitive mode is not supported and need to be emulated (using an index buffer) returns false. * If primitive mode is not supported and need to be emulated (using an index buffer) returns false.

View file

@ -232,16 +232,15 @@ std::string VertexProgramDecompiler::GetCond()
if (d0.cond == 0) return "false"; if (d0.cond == 0) return "false";
if (d0.cond == (lt | gt | eq)) return "true"; if (d0.cond == (lt | gt | eq)) return "true";
static const char* cond_string_table[(lt | gt | eq) + 1] = static const COMPARE cond_string_table[(lt | gt | eq) + 1] =
{ {
"error", COMPARE::FUNCTION_SLT, // "error"
"lessThan", COMPARE::FUNCTION_SLT,
"equal", COMPARE::FUNCTION_SEQ,
"lessThanEqual", COMPARE::FUNCTION_SLE,
"greaterThan", COMPARE::FUNCTION_SGT,
"notEqual", COMPARE::FUNCTION_SNE,
"greaterThanEqual", COMPARE::FUNCTION_SGE,
"error"
}; };
static const char f[4] = { 'x', 'y', 'z', 'w' }; static const char f[4] = { 'x', 'y', 'z', 'w' };
@ -253,8 +252,7 @@ std::string VertexProgramDecompiler::GetCond()
swizzle += f[d0.mask_w]; swizzle += f[d0.mask_w];
swizzle = swizzle == "xyzw" ? "" : "." + swizzle; swizzle = swizzle == "xyzw" ? "" : "." + swizzle;
return "any(" + compareFunction(cond_string_table[d0.cond], "cc" + std::to_string(d0.cond_reg_sel_1), getFloatTypeName(4) + "(0., 0., 0., 0.)" + swizzle) + ")";
return fmt::format("any(%s(cc%d%s, vec4(0.0)%s))", cond_string_table[d0.cond], d0.cond_reg_sel_1, swizzle.c_str(), swizzle.c_str());
} }
void VertexProgramDecompiler::AddCodeCond(const std::string& dst, const std::string& src) void VertexProgramDecompiler::AddCodeCond(const std::string& dst, const std::string& src)
@ -330,7 +328,7 @@ std::string VertexProgramDecompiler::AddAddrMask()
std::string VertexProgramDecompiler::AddAddrReg() std::string VertexProgramDecompiler::AddAddrReg()
{ {
static const char f[] = { 'x', 'y', 'z', 'w' }; static const char f[] = { 'x', 'y', 'z', 'w' };
return m_parr.AddParam(PF_PARAM_NONE, "ivec4", "a" + std::to_string(d0.addr_reg_sel_1), "ivec4(0)") + AddAddrMask(); return m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), "a" + std::to_string(d0.addr_reg_sel_1), getFloatTypeName(4) + "(0, 0, 0, 0)") + AddAddrMask();
} }
u32 VertexProgramDecompiler::GetAddr() u32 VertexProgramDecompiler::GetAddr()
@ -659,7 +657,7 @@ std::string VertexProgramDecompiler::Decompile()
case RSX_VEC_OPCODE_MAX: SetDSTVec("max($0, $1)"); break; case RSX_VEC_OPCODE_MAX: SetDSTVec("max($0, $1)"); break;
case RSX_VEC_OPCODE_SLT: SetDSTVec(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SLT, "$0", "$1") + ")"); break; case RSX_VEC_OPCODE_SLT: SetDSTVec(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SLT, "$0", "$1") + ")"); break;
case RSX_VEC_OPCODE_SGE: SetDSTVec(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SGE, "$0", "$1") + ")"); break; case RSX_VEC_OPCODE_SGE: SetDSTVec(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SGE, "$0", "$1") + ")"); break;
case RSX_VEC_OPCODE_ARL: AddCode("$ifcond $a = ivec4($0)$am;"); break; case RSX_VEC_OPCODE_ARL: AddCode("$ifcond $a = " + getIntTypeName(4) + "($0)$am;"); break;
case RSX_VEC_OPCODE_FRC: SetDSTVec(getFunction(FUNCTION::FUNCTION_FRACT)); break; case RSX_VEC_OPCODE_FRC: SetDSTVec(getFunction(FUNCTION::FUNCTION_FRACT)); break;
case RSX_VEC_OPCODE_FLR: SetDSTVec("floor($0)"); break; case RSX_VEC_OPCODE_FLR: SetDSTVec("floor($0)"); break;
case RSX_VEC_OPCODE_SEQ: SetDSTVec(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SEQ, "$0", "$1") + ")"); break; case RSX_VEC_OPCODE_SEQ: SetDSTVec(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SEQ, "$0", "$1") + ")"); break;

View file

@ -80,6 +80,10 @@ protected:
*/ */
virtual std::string getFloatTypeName(size_t elementCount) = 0; virtual std::string getFloatTypeName(size_t elementCount) = 0;
/** returns the type name of int vectors.
*/
virtual std::string getIntTypeName(size_t elementCount) = 0;
/** returns string calling function where arguments are passed via /** returns string calling function where arguments are passed via
* $0 $1 $2 substring. * $0 $1 $2 substring.
*/ */

View file

@ -91,32 +91,6 @@ DXGI_FORMAT getFormat(u8 type, u8 size)
} }
} }
static
std::vector<D3D12_INPUT_ELEMENT_DESC> getIALayout(const rsx::data_array_format_info *vertex_info, const std::vector<u8> *vertex_data)
{
std::vector<D3D12_INPUT_ELEMENT_DESC> result;
size_t inputSlot = 0;
for (size_t index = 0; index < rsx::limits::vertex_count; index++)
{
const auto &info = vertex_info[index];
if (!info.size)
continue;
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)inputSlot++;
IAElement.Format = getFormat(info.type - 1, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = info.array ? D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA : D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
IAElement.InstanceDataStepRate = 0;
result.push_back(IAElement);
}
return result;
}
// D3D12GS member handling buffers // D3D12GS member handling buffers
@ -138,29 +112,105 @@ D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info &
return vertexIndexHeap.m_heap->GetGPUVirtualAddress() + heapOffset; return vertexIndexHeap.m_heap->GetGPUVirtualAddress() + heapOffset;
} }
std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::UploadVertexBuffers(bool indexed_draw) void D3D12GSRender::load_vertex_data(u32 first, u32 count)
{ {
u32 m_vertex_data_base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET]; m_first_count_pairs.emplace_back(std::make_pair(first, count));
std::vector<D3D12_VERTEX_BUFFER_VIEW> result; vertex_draw_count += count;
m_IASet = getIALayout(vertex_arrays_info, vertex_arrays); }
void D3D12GSRender::upload_vertex_attributes()
{
m_vertex_buffer_views.clear();
m_IASet.clear();
size_t inputSlot = 0;
// First array attribute
for (int index = 0; index < rsx::limits::vertex_count; ++index) for (int index = 0; index < rsx::limits::vertex_count; ++index)
{ {
const auto &info = vertex_arrays_info[index]; const auto &info = vertex_arrays_info[index];
if (!info.size) if (!info.array) // disabled or not a vertex array
continue; continue;
D3D12_GPU_VIRTUAL_ADDRESS virtualAddress = createVertexBuffer(info, vertex_arrays[index], m_device.Get(), m_vertexIndexData); u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
size_t subBufferSize = element_size * vertex_draw_count;
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
for (const auto &range : m_first_count_pairs)
{
write_vertex_array_data_to_buffer(bufferMap, range.first, range.second, index, info);
bufferMap = (char*)bufferMap + range.second * element_size;
}
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {};
vertexBufferView.BufferLocation = virtualAddress; vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
vertexBufferView.SizeInBytes = (UINT)vertex_arrays[index].size(); vertexBufferView.SizeInBytes = (UINT)subBufferSize;
vertexBufferView.StrideInBytes = (UINT)rsx::get_vertex_type_size(info.type) * info.size; vertexBufferView.StrideInBytes = (UINT)element_size;
result.push_back(vertexBufferView); m_vertex_buffer_views.push_back(vertexBufferView);
m_timers.m_bufferUploadSize += subBufferSize;
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)inputSlot++;
IAElement.Format = getFormat(info.type - 1, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
IAElement.InstanceDataStepRate = 0;
m_IASet.push_back(IAElement);
} }
return result; // Now immediate vertex buffer
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
const auto &info = vertex_arrays_info[index];
if (info.array)
continue;
if (!info.size) // disabled
continue;
auto &data = vertex_arrays[index];
u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
size_t subBufferSize = data.size();
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
memcpy(bufferMap, data.data(), data.size());
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {};
vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
vertexBufferView.SizeInBytes = (UINT)subBufferSize;
vertexBufferView.StrideInBytes = (UINT)element_size;
m_vertex_buffer_views.push_back(vertexBufferView);
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)inputSlot++;
IAElement.Format = getFormat(info.type - 1, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
IAElement.InstanceDataStepRate = 1;
m_IASet.push_back(IAElement);
}
m_first_count_pairs.clear();
} }
D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw) D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)
@ -229,7 +279,7 @@ D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)
return indexBufferView; return indexBufferView;
} }
void D3D12GSRender::setScaleOffset() void D3D12GSRender::setScaleOffset(size_t descriptorIndex)
{ {
float scaleOffsetMat[16] = float scaleOffsetMat[16] =
{ {
@ -273,11 +323,11 @@ void D3D12GSRender::setScaleOffset()
constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset; constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
constantBufferViewDesc.SizeInBytes = (UINT)256; constantBufferViewDesc.SizeInBytes = (UINT)256;
m_device->CreateConstantBufferView(&constantBufferViewDesc, m_device->CreateConstantBufferView(&constantBufferViewDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart()) CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_currentScaleOffsetBufferIndex, g_descriptorStrideSRVCBVUAV)); .Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV));
} }
void D3D12GSRender::FillVertexShaderConstantsBuffer() void D3D12GSRender::FillVertexShaderConstantsBuffer(size_t descriptorIndex)
{ {
for (const auto &entry : transform_constants) for (const auto &entry : transform_constants)
local_transform_constants[entry.first] = entry.second; local_transform_constants[entry.first] = entry.second;
@ -305,11 +355,11 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer()
constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset; constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; constantBufferViewDesc.SizeInBytes = (UINT)bufferSize;
m_device->CreateConstantBufferView(&constantBufferViewDesc, m_device->CreateConstantBufferView(&constantBufferViewDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()) CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_constantsBufferIndex, g_descriptorStrideSRVCBVUAV)); .Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV));
} }
void D3D12GSRender::FillPixelShaderConstantsBuffer() void D3D12GSRender::FillPixelShaderConstantsBuffer(size_t descriptorIndex)
{ {
// Get constant from fragment program // Get constant from fragment program
const std::vector<size_t> &fragmentOffset = m_cachePSO.getFragmentConstantOffsetsCache(&fragment_program); const std::vector<size_t> &fragmentOffset = m_cachePSO.getFragmentConstantOffsetsCache(&fragment_program);
@ -366,8 +416,8 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer()
constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset; constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; constantBufferViewDesc.SizeInBytes = (UINT)bufferSize;
m_device->CreateConstantBufferView(&constantBufferViewDesc, m_device->CreateConstantBufferView(&constantBufferViewDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()) CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_constantsBufferIndex, g_descriptorStrideSRVCBVUAV)); .Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV));
} }

View file

@ -36,9 +36,7 @@ static void unloadD3D12FunctionPointers()
void D3D12GSRender::ResourceStorage::Reset() void D3D12GSRender::ResourceStorage::Reset()
{ {
m_constantsBufferIndex = 0; m_descriptorsHeapIndex = 0;
m_currentScaleOffsetBufferIndex = 0;
m_currentTextureIndex = 0;
m_currentSamplerIndex = 0; m_currentSamplerIndex = 0;
m_samplerDescriptorHeapIndex = 0; m_samplerDescriptorHeapIndex = 0;
@ -65,9 +63,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device)
ThrowIfFailed(m_commandList->Close()); ThrowIfFailed(m_commandList->Close());
D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 10000, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE }; D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 10000, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE };
ThrowIfFailed(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_constantsBufferDescriptorsHeap))); ThrowIfFailed(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_descriptorsHeap)));
ThrowIfFailed(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_scaleOffsetDescriptorHeap)));
ThrowIfFailed(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_textureDescriptorsHeap)));
D3D12_DESCRIPTOR_HEAP_DESC samplerHeapDesc = { D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER , 2048, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE }; D3D12_DESCRIPTOR_HEAP_DESC samplerHeapDesc = { D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER , 2048, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE };
ThrowIfFailed(device->CreateDescriptorHeap(&samplerHeapDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap[0]))); ThrowIfFailed(device->CreateDescriptorHeap(&samplerHeapDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap[0])));
@ -205,16 +201,14 @@ D3D12GSRender::D3D12GSRender()
// Samplers // Samplers
CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, textureCount, 0), CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, textureCount, 0),
}; };
CD3DX12_ROOT_PARAMETER RP[4]; CD3DX12_ROOT_PARAMETER RP[2];
RP[0].InitAsDescriptorTable(1, &descriptorRange[0]); RP[0].InitAsDescriptorTable((textureCount > 0) ? 3 : 2, &descriptorRange[0]);
RP[1].InitAsDescriptorTable(1, &descriptorRange[1]); RP[1].InitAsDescriptorTable(1, &descriptorRange[3]);
RP[2].InitAsDescriptorTable(1, &descriptorRange[2]);
RP[3].InitAsDescriptorTable(1, &descriptorRange[3]);
Microsoft::WRL::ComPtr<ID3DBlob> rootSignatureBlob; Microsoft::WRL::ComPtr<ID3DBlob> rootSignatureBlob;
Microsoft::WRL::ComPtr<ID3DBlob> errorBlob; Microsoft::WRL::ComPtr<ID3DBlob> errorBlob;
ThrowIfFailed(wrapD3D12SerializeRootSignature( ThrowIfFailed(wrapD3D12SerializeRootSignature(
&CD3DX12_ROOT_SIGNATURE_DESC((textureCount > 0) ? 4 : 2, RP, 0, 0, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), &CD3DX12_ROOT_SIGNATURE_DESC((textureCount > 0) ? 2 : 1, RP, 0, 0, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob)); D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob));
m_device->CreateRootSignature(0, m_device->CreateRootSignature(0,
@ -247,8 +241,8 @@ D3D12GSRender::D3D12GSRender()
m_rtts.Init(m_device.Get()); m_rtts.Init(m_device.Get());
m_constantsData.Init(m_device.Get(), 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); m_constantsData.Init(m_device.Get(), 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
m_vertexIndexData.Init(m_device.Get(), 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); m_vertexIndexData.Init(m_device.Get(), 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
m_textureUploadData.Init(m_device.Get(), 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); m_textureUploadData.Init(m_device.Get(), 1024 * 1024 * 512, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
if (Ini.GSOverlay.GetValue()) if (Ini.GSOverlay.GetValue())
InitD2DStructures(); InitD2DStructures();
@ -417,9 +411,9 @@ void D3D12GSRender::end()
if (!vertex_index_array.empty() || vertex_draw_count) if (!vertex_index_array.empty() || vertex_draw_count)
{ {
const std::vector<D3D12_VERTEX_BUFFER_VIEW> &vertexBufferViews = UploadVertexBuffers(!vertex_index_array.empty()); upload_vertex_attributes();
const D3D12_INDEX_BUFFER_VIEW &indexBufferView = uploadIndexBuffers(!vertex_index_array.empty()); const D3D12_INDEX_BUFFER_VIEW &indexBufferView = uploadIndexBuffers(!vertex_index_array.empty());
getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data()); getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data());
if (m_renderingInfo.m_indexed) if (m_renderingInfo.m_indexed)
getCurrentResourceStorage().m_commandList->IASetIndexBuffer(&indexBufferView); getCurrentResourceStorage().m_commandList->IASetIndexBuffer(&indexBufferView);
} }
@ -442,26 +436,11 @@ void D3D12GSRender::end()
std::chrono::time_point<std::chrono::system_clock> constantsDurationStart = std::chrono::system_clock::now(); std::chrono::time_point<std::chrono::system_clock> constantsDurationStart = std::chrono::system_clock::now();
size_t currentDescriptorIndex = getCurrentResourceStorage().m_descriptorsHeapIndex;
// Constants // Constants
setScaleOffset(); setScaleOffset(currentDescriptorIndex);
getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, getCurrentResourceStorage().m_scaleOffsetDescriptorHeap.GetAddressOf()); FillVertexShaderConstantsBuffer(currentDescriptorIndex + 1);
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(0, FillPixelShaderConstantsBuffer(currentDescriptorIndex + 2);
CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetGPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_currentScaleOffsetBufferIndex, g_descriptorStrideSRVCBVUAV)
);
getCurrentResourceStorage().m_currentScaleOffsetBufferIndex++;
size_t currentBufferIndex = getCurrentResourceStorage().m_constantsBufferIndex;
FillVertexShaderConstantsBuffer();
getCurrentResourceStorage().m_constantsBufferIndex++;
FillPixelShaderConstantsBuffer();
getCurrentResourceStorage().m_constantsBufferIndex++;
getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, getCurrentResourceStorage().m_constantsBufferDescriptorsHeap.GetAddressOf());
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(1,
CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart())
.Offset((INT)currentBufferIndex, g_descriptorStrideSRVCBVUAV)
);
std::chrono::time_point<std::chrono::system_clock> constantsDurationEnd = std::chrono::system_clock::now(); std::chrono::time_point<std::chrono::system_clock> constantsDurationEnd = std::chrono::system_clock::now();
m_timers.m_constantsDuration += std::chrono::duration_cast<std::chrono::microseconds>(constantsDurationEnd - constantsDurationStart).count(); m_timers.m_constantsDuration += std::chrono::duration_cast<std::chrono::microseconds>(constantsDurationEnd - constantsDurationStart).count();
@ -471,7 +450,7 @@ void D3D12GSRender::end()
std::chrono::time_point<std::chrono::system_clock> textureDurationStart = std::chrono::system_clock::now(); std::chrono::time_point<std::chrono::system_clock> textureDurationStart = std::chrono::system_clock::now();
if (m_PSO->second > 0) if (m_PSO->second > 0)
{ {
size_t usedTexture = UploadTextures(getCurrentResourceStorage().m_commandList.Get()); size_t usedTexture = UploadTextures(getCurrentResourceStorage().m_commandList.Get(), currentDescriptorIndex + 3);
// Fill empty slots // Fill empty slots
for (; usedTexture < m_PSO->second; usedTexture++) for (; usedTexture < m_PSO->second; usedTexture++)
@ -486,8 +465,8 @@ void D3D12GSRender::end()
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0); D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0);
m_device->CreateShaderResourceView(m_dummyTexture, &srvDesc, m_device->CreateShaderResourceView(m_dummyTexture, &srvDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()) CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_currentTextureIndex + (INT)usedTexture, g_descriptorStrideSRVCBVUAV) .Offset((INT)currentDescriptorIndex + 3 + (INT)usedTexture, g_descriptorStrideSRVCBVUAV)
); );
D3D12_SAMPLER_DESC samplerDesc = {}; D3D12_SAMPLER_DESC samplerDesc = {};
@ -501,21 +480,35 @@ void D3D12GSRender::end()
); );
} }
getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, getCurrentResourceStorage().m_textureDescriptorsHeap.GetAddressOf()); ID3D12DescriptorHeap *descriptors[] =
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(2, {
CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart()) getCurrentResourceStorage().m_descriptorsHeap.Get(),
.Offset((INT)getCurrentResourceStorage().m_currentTextureIndex, g_descriptorStrideSRVCBVUAV) getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex].Get(),
); };
getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(2, descriptors);
getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex].GetAddressOf()); getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(0,
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(3, CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetGPUDescriptorHandleForHeapStart())
.Offset((INT)currentDescriptorIndex, g_descriptorStrideSRVCBVUAV)
);
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(1,
CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex]->GetGPUDescriptorHandleForHeapStart()) CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex]->GetGPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_currentSamplerIndex, g_descriptorStrideSamplers) .Offset((INT)getCurrentResourceStorage().m_currentSamplerIndex, g_descriptorStrideSamplers)
); );
getCurrentResourceStorage().m_currentTextureIndex += usedTexture;
getCurrentResourceStorage().m_currentSamplerIndex += usedTexture; getCurrentResourceStorage().m_currentSamplerIndex += usedTexture;
getCurrentResourceStorage().m_descriptorsHeapIndex += usedTexture + 3;
} }
else
{
getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, getCurrentResourceStorage().m_descriptorsHeap.GetAddressOf());
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(0,
CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetGPUDescriptorHandleForHeapStart())
.Offset((INT)currentDescriptorIndex, g_descriptorStrideSRVCBVUAV)
);
getCurrentResourceStorage().m_descriptorsHeapIndex += 3;
}
std::chrono::time_point<std::chrono::system_clock> textureDurationEnd = std::chrono::system_clock::now(); std::chrono::time_point<std::chrono::system_clock> textureDurationEnd = std::chrono::system_clock::now();
m_timers.m_textureDuration += std::chrono::duration_cast<std::chrono::microseconds>(textureDurationEnd - textureDurationStart).count(); m_timers.m_textureDuration += std::chrono::duration_cast<std::chrono::microseconds>(textureDurationEnd - textureDurationStart).count();
@ -651,9 +644,6 @@ void D3D12GSRender::flip(int buffer)
if (false) if (false)
{ {
CellGcmDisplayInfo* buffers;// = vm::ps3::_ptr<CellGcmDisplayInfo>(m_gcm_buffers_addr); CellGcmDisplayInfo* buffers;// = vm::ps3::_ptr<CellGcmDisplayInfo>(m_gcm_buffers_addr);
u32 addr = rsx::get_address(buffers[gcm_current_buffer].offset, CELL_GCM_LOCATION_LOCAL);
w = buffers[gcm_current_buffer].width;
h = buffers[gcm_current_buffer].height;
u32 addr = rsx::get_address(gcm_buffers[gcm_current_buffer].offset, CELL_GCM_LOCATION_LOCAL); u32 addr = rsx::get_address(gcm_buffers[gcm_current_buffer].offset, CELL_GCM_LOCATION_LOCAL);
w = gcm_buffers[gcm_current_buffer].width; w = gcm_buffers[gcm_current_buffer].width;
h = gcm_buffers[gcm_current_buffer].height; h = gcm_buffers[gcm_current_buffer].height;

View file

@ -374,15 +374,11 @@ private:
ComPtr<ID3D12CommandAllocator> m_commandAllocator; ComPtr<ID3D12CommandAllocator> m_commandAllocator;
ComPtr<ID3D12GraphicsCommandList> m_commandList; ComPtr<ID3D12GraphicsCommandList> m_commandList;
// Constants storage // Descriptor heap
ComPtr<ID3D12DescriptorHeap> m_constantsBufferDescriptorsHeap; ComPtr<ID3D12DescriptorHeap> m_descriptorsHeap;
size_t m_constantsBufferIndex; size_t m_descriptorsHeapIndex;
ComPtr<ID3D12DescriptorHeap> m_scaleOffsetDescriptorHeap;
size_t m_currentScaleOffsetBufferIndex;
// Texture storage // Sampler heap
ComPtr<ID3D12DescriptorHeap> m_textureDescriptorsHeap;
size_t m_currentTextureIndex;
ComPtr<ID3D12DescriptorHeap> m_samplerDescriptorHeap[2]; ComPtr<ID3D12DescriptorHeap> m_samplerDescriptorHeap[2];
size_t m_samplerDescriptorHeapIndex; size_t m_samplerDescriptorHeapIndex;
size_t m_currentSamplerIndex; size_t m_currentSamplerIndex;
@ -416,7 +412,7 @@ private:
// Constants storage // Constants storage
DataHeap<ID3D12Resource, 256> m_constantsData; DataHeap<ID3D12Resource, 256> m_constantsData;
// Vertex storage // Vertex storage
DataHeap<ID3D12Resource, 65536> m_vertexIndexData; DataHeap<ID3D12Resource, 256> m_vertexIndexData;
// Texture storage // Texture storage
DataHeap<ID3D12Resource, 65536> m_textureUploadData; DataHeap<ID3D12Resource, 65536> m_textureUploadData;
DataHeap<ID3D12Heap, 65536> m_UAVHeap; DataHeap<ID3D12Heap, 65536> m_UAVHeap;
@ -432,6 +428,7 @@ private:
RenderTargets m_rtts; RenderTargets m_rtts;
std::vector<D3D12_INPUT_ELEMENT_DESC> m_IASet; std::vector<D3D12_INPUT_ELEMENT_DESC> m_IASet;
std::vector<D3D12_VERTEX_BUFFER_VIEW> m_vertex_buffer_views;
INT g_descriptorStrideSRVCBVUAV; INT g_descriptorStrideSRVCBVUAV;
INT g_descriptorStrideDSV; INT g_descriptorStrideDSV;
@ -466,12 +463,11 @@ private:
bool LoadProgram(); bool LoadProgram();
std::vector<std::pair<u32, u32> > m_first_count_pairs;
/** /**
* Create as little vertex buffer as possible to hold all vertex info (in upload heap), * Upload all vertex attribute whose (first, count) info were previously accumulated.
* create corresponding IA layout that can be used for load program and
* returns a vector of vertex buffer view that can be passed to IASetVertexBufferView().
*/ */
std::vector<D3D12_VERTEX_BUFFER_VIEW> UploadVertexBuffers(bool indexed_draw = false); void upload_vertex_attributes();
/** /**
* Create index buffer for indexed rendering and non native primitive format if nedded, and * Create index buffer for indexed rendering and non native primitive format if nedded, and
@ -481,16 +477,16 @@ private:
D3D12_INDEX_BUFFER_VIEW uploadIndexBuffers(bool indexed_draw = false); D3D12_INDEX_BUFFER_VIEW uploadIndexBuffers(bool indexed_draw = false);
void setScaleOffset(); void setScaleOffset(size_t descriptorIndex);
void FillVertexShaderConstantsBuffer(); void FillVertexShaderConstantsBuffer(size_t descriptorIndex);
void FillPixelShaderConstantsBuffer(); void FillPixelShaderConstantsBuffer(size_t descriptorIndex);
/** /**
* Fetch all textures recorded in the state in the render target cache and in the texture cache. * Fetch all textures recorded in the state in the render target cache and in the texture cache.
* If a texture is not cached, populate cmdlist with uploads command. * If a texture is not cached, populate cmdlist with uploads command.
* Create necessary resource view/sampler descriptors in the per frame storage struct. * Create necessary resource view/sampler descriptors in the per frame storage struct.
* returns the number of texture uploaded. * returns the number of texture uploaded.
*/ */
size_t UploadTextures(ID3D12GraphicsCommandList *cmdlist); size_t UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t descriptorIndex);
/** /**
* Creates render target if necessary. * Creates render target if necessary.
@ -511,6 +507,8 @@ protected:
virtual bool domethod(u32 cmd, u32 arg) override; virtual bool domethod(u32 cmd, u32 arg) override;
virtual void end() override; virtual void end() override;
virtual void flip(int buffer) override; virtual void flip(int buffer) override;
virtual void load_vertex_data(u32 first, u32 count) override;
}; };
#endif #endif

View file

@ -135,7 +135,7 @@ struct D3D12Traits
// TODO: This shouldn't use current dir // TODO: This shouldn't use current dir
std::string filename = "./FragmentProgram" + std::to_string(ID) + ".hlsl"; std::string filename = "./FragmentProgram" + std::to_string(ID) + ".hlsl";
fs::file(filename, o_write | o_create | o_trunc).write(shader.c_str(), shader.size()); fs::file(filename, fom::write | fom::create | fom::trunc).write(shader.c_str(), shader.size());
fragmentProgramData.id = (u32)ID; fragmentProgramData.id = (u32)ID;
} }
@ -148,7 +148,7 @@ struct D3D12Traits
// TODO: This shouldn't use current dir // TODO: This shouldn't use current dir
std::string filename = "./VertexProgram" + std::to_string(ID) + ".hlsl"; std::string filename = "./VertexProgram" + std::to_string(ID) + ".hlsl";
fs::file(filename, o_write | o_create | o_trunc).write(shaderCode.c_str(), shaderCode.size()); fs::file(filename, fom::write | fom::create | fom::trunc).write(shaderCode.c_str(), shaderCode.size());
vertexProgramData.id = (u32)ID; vertexProgramData.id = (u32)ID;
} }

View file

@ -284,7 +284,7 @@ size_t getTextureSize(const rsx::texture &texture)
} }
} }
size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist) size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t descriptorIndex)
{ {
size_t usedTexture = 0; size_t usedTexture = 0;
@ -452,7 +452,8 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist)
} }
m_device->CreateShaderResourceView(vramTexture, &srvDesc, m_device->CreateShaderResourceView(vramTexture, &srvDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()).Offset((UINT)getCurrentResourceStorage().m_currentTextureIndex + (UINT)usedTexture, g_descriptorStrideSRVCBVUAV)); CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((UINT)descriptorIndex + (UINT)usedTexture, g_descriptorStrideSRVCBVUAV));
if (getCurrentResourceStorage().m_currentSamplerIndex + 16 > 2048) if (getCurrentResourceStorage().m_currentSamplerIndex + 16 > 2048)
{ {

View file

@ -11,6 +11,11 @@ std::string D3D12VertexProgramDecompiler::getFloatTypeName(size_t elementCount)
return getFloatTypeNameImp(elementCount); return getFloatTypeNameImp(elementCount);
} }
std::string D3D12VertexProgramDecompiler::getIntTypeName(size_t elementCount)
{
return "int4";
}
std::string D3D12VertexProgramDecompiler::getFunction(enum class FUNCTION f) std::string D3D12VertexProgramDecompiler::getFunction(enum class FUNCTION f)
{ {
return getFunctionImp(f); return getFunctionImp(f);
@ -141,7 +146,7 @@ void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS)
void D3D12VertexProgramDecompiler::insertMainEnd(std::stringstream & OS) void D3D12VertexProgramDecompiler::insertMainEnd(std::stringstream & OS)
{ {
OS << " PixelInput Out;" << std::endl; OS << " PixelInput Out = (PixelInput)0;" << std::endl;
// Declare inside main function // Declare inside main function
for (auto &i : reg_table) for (auto &i : reg_table)
{ {

View file

@ -8,6 +8,7 @@ struct D3D12VertexProgramDecompiler : public VertexProgramDecompiler
{ {
protected: protected:
virtual std::string getFloatTypeName(size_t elementCount) override; virtual std::string getFloatTypeName(size_t elementCount) override;
std::string getIntTypeName(size_t elementCount) override;
virtual std::string getFunction(enum class FUNCTION) override; virtual std::string getFunction(enum class FUNCTION) override;
virtual std::string compareFunction(enum class COMPARE, const std::string &, const std::string &) override; virtual std::string compareFunction(enum class COMPARE, const std::string &, const std::string &) override;

View file

@ -10,6 +10,12 @@ std::string GLVertexDecompilerThread::getFloatTypeName(size_t elementCount)
return getFloatTypeNameImpl(elementCount); return getFloatTypeNameImpl(elementCount);
} }
std::string GLVertexDecompilerThread::getIntTypeName(size_t elementCount)
{
return "ivec4";
}
std::string GLVertexDecompilerThread::getFunction(FUNCTION f) std::string GLVertexDecompilerThread::getFunction(FUNCTION f)
{ {
return getFunctionImpl(f); return getFunctionImpl(f);

View file

@ -9,6 +9,7 @@ struct GLVertexDecompilerThread : public VertexProgramDecompiler
std::string &m_shader; std::string &m_shader;
protected: protected:
virtual std::string getFloatTypeName(size_t elementCount) override; virtual std::string getFloatTypeName(size_t elementCount) override;
std::string getIntTypeName(size_t elementCount) override;
virtual std::string getFunction(FUNCTION) override; virtual std::string getFunction(FUNCTION) override;
virtual std::string compareFunction(COMPARE, const std::string&, const std::string&) override; virtual std::string compareFunction(COMPARE, const std::string&, const std::string&) override;

View file

@ -10,6 +10,8 @@
#include "Emu/SysCalls/CB_FUNC.h" #include "Emu/SysCalls/CB_FUNC.h"
#include "Emu/SysCalls/lv2/sys_time.h" #include "Emu/SysCalls/lv2/sys_time.h"
#include "Common/BufferUtils.h"
#include "Utilities/types.h" #include "Utilities/types.h"
extern "C" extern "C"
@ -823,68 +825,19 @@ namespace rsx
for (int index = 0; index < limits::vertex_count; ++index) for (int index = 0; index < limits::vertex_count; ++index)
{ {
auto &info = vertex_arrays_info[index]; const auto &info = vertex_arrays_info[index];
if (!info.array) // disabled or not a vertex array if (!info.array) // disabled or not a vertex array
{
continue; continue;
}
auto &data = vertex_arrays[index]; auto &data = vertex_arrays[index];
if (info.frequency > 1)
{
LOG_ERROR(RSX, "%s: frequency is not null (%d, index=%d)", __FUNCTION__, info.frequency, index);
}
u32 offset = method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + index];
u32 address = get_address(offset & 0x7fffffff, offset >> 31);
u32 type_size = get_vertex_type_size(info.type); u32 type_size = get_vertex_type_size(info.type);
u32 element_size = type_size * info.size; u32 element_size = type_size * info.size;
u32 dst_position = (u32)data.size(); u32 dst_position = (u32)data.size();
data.resize(dst_position + count * element_size); data.resize(dst_position + count * element_size);
write_vertex_array_data_to_buffer(data.data() + dst_position, first, count, index, info);
u32 base_offset = method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET];
u32 base_index = method_registers[NV4097_SET_VERTEX_DATA_BASE_INDEX];
for (u32 i = 0; i < count; ++i)
{
const u8* src = vm::_ptr<u8>(address + base_offset + info.stride * (first + i + base_index));
u8* dst = data.data() + dst_position + i * element_size;
switch (type_size)
{
case 1:
memcpy(dst, src, info.size);
break;
case 2:
{
auto* c_src = (const be_t<u16>*)src;
u16* c_dst = (u16*)dst;
for (u32 j = 0; j < info.size; ++j)
{
*c_dst++ = *c_src++;
}
break;
}
case 4:
{
auto* c_src = (const be_t<u32>*)src;
u32* c_dst = (u32*)dst;
for (u32 j = 0; j < info.size; ++j)
{
*c_dst++ = *c_src++;
}
break;
}
}
}
} }
} }

View file

@ -163,7 +163,7 @@ namespace rsx
u32 transform_program[512 * 4] = {}; u32 transform_program[512 * 4] = {};
void load_vertex_data(u32 first, u32 count); virtual void load_vertex_data(u32 first, u32 count);
void load_vertex_index_data(u32 first, u32 count); void load_vertex_index_data(u32 first, u32 count);
public: public: