diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index 5582cc0e41..bf93a190cc 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -2,6 +2,7 @@ #if defined(DX12_SUPPORT) #include +#include inline void check(HRESULT hr) @@ -10,6 +11,17 @@ void check(HRESULT hr) abort(); } +/** + * Get next value that is aligned by the corresponding power of 2 + */ +inline +size_t powerOf2Align(size_t unalignedVal, size_t powerOf2) +{ + // check that powerOf2 is power of 2 + assert(!(powerOf2 & (powerOf2 - 1))); + return (unalignedVal + powerOf2 - 1) & ~powerOf2; +} + /** * Send data to dst pointer without polluting cache. * Usefull to write to mapped memory from upload heap. @@ -24,4 +36,28 @@ void streamToBuffer(void* dst, void* src, size_t sizeInBytes) } } +/** +* copy src to dst pointer without polluting cache. +* Usefull to write to mapped memory from upload heap. +*/ +inline +void streamBuffer(void* dst, void* src, size_t sizeInBytes) +{ + // Assume 64 bytes cache line + assert(powerOf2Align(sizeInBytes, 64)); + for (unsigned i = 0; i < sizeInBytes / 64; i++) + { + char *line = (char*)src + i * 64; + _mm_prefetch(line, _MM_HINT_NTA); + __m128i *srcPtr = (__m128i*) (line); + _mm_stream_si128((__m128i*)((char*)dst + i * 64), *srcPtr); + srcPtr = (__m128i*) (line + 16); + _mm_stream_si128((__m128i*)((char*)dst + i * 64 + 16), *srcPtr); + srcPtr = (__m128i*) (line + 32); + _mm_stream_si128((__m128i*)((char*)dst + i * 64 + 32), *srcPtr); + srcPtr = (__m128i*) (line + 48); + _mm_stream_si128((__m128i*)((char*)dst + i * 64 + 48), *srcPtr); + } +} + #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 3a11858e96..9b6a2f2d3f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -486,7 +486,7 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() void *constantsBufferMap; check(constantsBuffer->Map(0, nullptr, &constantsBufferMap)); - streamToBuffer(constantsBufferMap, vertexConstantShadowCopy, 512 * 4 * sizeof(float)); + streamBuffer(constantsBufferMap, vertexConstantShadowCopy, 512 * 4 * sizeof(float)); constantsBuffer->Unmap(0, nullptr); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {};