d3d12: Try to use stream function that doesn't pollute cache

This commit is contained in:
vlj 2015-05-25 19:49:43 +02:00 committed by Vincent Lejeune
parent 8b8385b0a4
commit 90e0a89e9b
2 changed files with 37 additions and 1 deletions

View file

@ -2,6 +2,7 @@
#if defined(DX12_SUPPORT)
#include <d3d12.h>
#include <cassert>
inline
void check(HRESULT hr)
@ -10,6 +11,17 @@ void check(HRESULT hr)
abort();
}
/**
* Get next value that is aligned by the corresponding power of 2
*/
inline
size_t powerOf2Align(size_t unalignedVal, size_t powerOf2)
{
// check that powerOf2 is power of 2
assert(!(powerOf2 & (powerOf2 - 1)));
return (unalignedVal + powerOf2 - 1) & ~powerOf2;
}
/**
* Send data to dst pointer without polluting cache.
* Usefull to write to mapped memory from upload heap.
@ -24,4 +36,28 @@ void streamToBuffer(void* dst, void* src, size_t sizeInBytes)
}
}
/**
* copy src to dst pointer without polluting cache.
* Usefull to write to mapped memory from upload heap.
*/
inline
void streamBuffer(void* dst, void* src, size_t sizeInBytes)
{
// Assume 64 bytes cache line
assert(powerOf2Align(sizeInBytes, 64));
for (unsigned i = 0; i < sizeInBytes / 64; i++)
{
char *line = (char*)src + i * 64;
_mm_prefetch(line, _MM_HINT_NTA);
__m128i *srcPtr = (__m128i*) (line);
_mm_stream_si128((__m128i*)((char*)dst + i * 64), *srcPtr);
srcPtr = (__m128i*) (line + 16);
_mm_stream_si128((__m128i*)((char*)dst + i * 64 + 16), *srcPtr);
srcPtr = (__m128i*) (line + 32);
_mm_stream_si128((__m128i*)((char*)dst + i * 64 + 32), *srcPtr);
srcPtr = (__m128i*) (line + 48);
_mm_stream_si128((__m128i*)((char*)dst + i * 64 + 48), *srcPtr);
}
}
#endif

View file

@ -486,7 +486,7 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer()
void *constantsBufferMap;
check(constantsBuffer->Map(0, nullptr, &constantsBufferMap));
streamToBuffer(constantsBufferMap, vertexConstantShadowCopy, 512 * 4 * sizeof(float));
streamBuffer(constantsBufferMap, vertexConstantShadowCopy, 512 * 4 * sizeof(float));
constantsBuffer->Unmap(0, nullptr);
D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {};