Merge pull request #1281 from vlj/rsx

Rsx: Factorise d3d12 constant loading code
This commit is contained in:
Ivan 2015-11-07 18:43:19 +03:00
commit adcb9864df
8 changed files with 109 additions and 61 deletions

View file

@ -276,4 +276,16 @@ void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned
return;
}
}
}
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w) noexcept
{
__m128i vector = _mm_set_epi32(w, z, y, x);
_mm_stream_si128((__m128i*)dst, vector);
}
void stream_vector_from_memory(void *dst, void *src) noexcept
{
const __m128i &vector = _mm_loadu_si128((__m128i*)src);
_mm_stream_si128((__m128i*)dst, vector);
}

View file

@ -42,4 +42,14 @@ void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned
/**
* Write index data needed to emulate non indexed non native primitive mode.
*/
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count) noexcept;
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count) noexcept;
/**
* Stream a 128 bits vector to dst.
*/
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w) noexcept;
/**
* Stream a 128 bits vector from src to dst.
*/
void stream_vector_from_memory(void *dst, void *src) noexcept;

View file

@ -337,6 +337,36 @@ public:
return result;
}
size_t get_fragment_constants_buffer_size(const RSXFragmentProgram *fragmentShader) const noexcept
{
typename binary2FS::const_iterator It = m_cacheFS.find(vm::base(fragmentShader->addr));
if (It != m_cacheFS.end())
return It->second.FragmentConstantOffsetCache.size() * 4 * sizeof(float);
LOG_ERROR(RSX, "Can't retrieve constant offset cache");
return 0;
}
void fill_fragment_constans_buffer(void *buffer, const RSXFragmentProgram *fragment_program) const noexcept
{
typename binary2FS::const_iterator It = m_cacheFS.find(vm::base(fragment_program->addr));
if (It == m_cacheFS.end())
return;
__m128i mask = _mm_set_epi8(0xE, 0xF, 0xC, 0xD,
0xA, 0xB, 0x8, 0x9,
0x6, 0x7, 0x4, 0x5,
0x2, 0x3, 0x0, 0x1);
size_t offset = 0;
for (size_t offset_in_fragment_program : It->second.FragmentConstantOffsetCache)
{
void *data = vm::base(fragment_program->addr + (u32)offset_in_fragment_program);
const __m128i &vector = _mm_loadu_si128((__m128i*)data);
const __m128i &shuffled_vector = _mm_shuffle_epi8(vector, mask);
_mm_stream_si128((__m128i*)((char*)buffer + offset), shuffled_vector);
offset += 4 * sizeof(u32);
}
}
const std::vector<size_t> &getFragmentConstantOffsetsCache(const RSXFragmentProgram *fragmentShader) const
{
typename binary2FS::const_iterator It = m_cacheFS.find(vm::base(fragmentShader->addr));

View file

@ -142,30 +142,6 @@ void D3D12GSRender::load_vertex_index_data(u32 first, u32 count)
void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
{
float scale_offset_matrix[16] =
{
1.0f, 0.0f, 0.0f, 0.0f,
0.0f, -1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f
};
int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
// Scale
scale_offset_matrix[0] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f);
scale_offset_matrix[5] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f);
scale_offset_matrix[10] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2];
// Offset
scale_offset_matrix[3] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f);
scale_offset_matrix[7] = -((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f));
scale_offset_matrix[11] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2];
scale_offset_matrix[3] /= clip_w / 2.f;
scale_offset_matrix[7] /= clip_h / 2.f;
assert(m_constantsData.can_alloc(256));
size_t heap_offset = m_constantsData.alloc(256);
@ -173,7 +149,7 @@ void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
// Separate constant buffer
void *mapped_buffer;
ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + 256), &mapped_buffer));
streamToBuffer((char*)mapped_buffer + heap_offset, scale_offset_matrix, 16 * sizeof(float));
fill_scale_offset_data((char*)mapped_buffer + heap_offset);
int is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]);
float alpha_ref = (float&)rsx::method_registers[NV4097_SET_ALPHA_REF];
memcpy((char*)mapped_buffer + heap_offset + 16 * sizeof(float), &is_alpha_tested, sizeof(int));
@ -191,9 +167,6 @@ void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_index)
{
for (const auto &entry : transform_constants)
local_transform_constants[entry.first] = entry.second;
size_t buffer_size = 512 * 4 * sizeof(float);
assert(m_constantsData.can_alloc(buffer_size));
@ -201,16 +174,7 @@ void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_in
void *mapped_buffer;
ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer));
for (const auto &entry : local_transform_constants)
{
float data[4] = {
entry.second.x,
entry.second.y,
entry.second.z,
entry.second.w
};
streamToBuffer((char*)mapped_buffer + heap_offset + entry.first * 4 * sizeof(float), data, 4 * sizeof(float));
}
fill_vertex_program_constants_data((char*)mapped_buffer + heap_offset);
m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {
@ -225,8 +189,7 @@ void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_in
void D3D12GSRender::upload_and_bind_fragment_shader_constants(size_t descriptor_index)
{
// Get constant from fragment program
const std::vector<size_t> &fragment_constant_offsets = m_cachePSO.getFragmentConstantOffsetsCache(&fragment_program);
size_t buffer_size = fragment_constant_offsets.size() * 4 * sizeof(float) + 1;
size_t buffer_size = m_cachePSO.get_fragment_constants_buffer_size(&fragment_program);
// Multiple of 256 never 0
buffer_size = (buffer_size + 255) & ~255;
@ -236,24 +199,7 @@ void D3D12GSRender::upload_and_bind_fragment_shader_constants(size_t descriptor_
size_t offset = 0;
void *mapped_buffer;
ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer));
for (size_t offset_in_fragment_program : fragment_constant_offsets)
{
u32 vector[4];
auto data = vm::ps3::ptr<u32>::make(fragment_program.addr + (u32)offset_in_fragment_program);
u32 c0 = (data[0] >> 16 | data[0] << 16);
u32 c1 = (data[1] >> 16 | data[1] << 16);
u32 c2 = (data[2] >> 16 | data[2] << 16);
u32 c3 = (data[3] >> 16 | data[3] << 16);
vector[0] = c0;
vector[1] = c1;
vector[2] = c2;
vector[3] = c3;
streamToBuffer((char*)mapped_buffer + heap_offset + offset, vector, 4 * sizeof(u32));
offset += 4 * sizeof(u32);
}
m_cachePSO.fill_fragment_constans_buffer((char*)mapped_buffer + heap_offset, &fragment_program);
m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {

View file

@ -672,7 +672,6 @@ void D3D12GSRender::flip(int buffer)
storage.uav_heap_get_pos = m_UAVHeap.get_current_put_pos_minus_one();
// Flush
local_transform_constants.clear();
m_texturesRTTs.clear();
// Now get ready for next frame

View file

@ -72,7 +72,6 @@ private:
RSXFragmentProgram fragment_program;
PipelineStateObjectCache m_cachePSO;
std::tuple<ID3D12PipelineState *, std::vector<size_t>, size_t> *m_PSO;
std::unordered_map<u32, color4f> local_transform_constants;
struct
{

View file

@ -586,6 +586,7 @@ namespace rsx
std::this_thread::sleep_for(std::chrono::milliseconds((s64)(1000.0 / limit - rsx->timer_sync.GetElapsedTimeInMilliSec())));
rsx->timer_sync.Start();
rsx->local_transform_constants.clear();
}
void user_command(thread* rsx, u32 arg)
@ -1024,6 +1025,42 @@ namespace rsx
onexit_thread();
}
void thread::fill_scale_offset_data(void *buffer) const noexcept
{
int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
float scale_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f);
float offset_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f);
offset_x /= clip_w / 2.f;
float scale_y = -(float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f);
float offset_y = -((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f));
offset_y /= clip_h / 2.f;
float scale_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2];
float offset_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2];
float one = 1.f;
stream_vector(buffer, (u32&)scale_x, 0, 0, (u32&)offset_x);
stream_vector((char*)buffer + 16, 0, (u32&)scale_y, 0, (u32&)offset_y);
stream_vector((char*)buffer + 32, 0, 0, (u32&)scale_z, (u32&)offset_z);
stream_vector((char*)buffer + 48, 0, 0, 0, (u32&)one);
}
/**
* Fill buffer with vertex program constants.
* Buffer must be at least 512 float4 wide.
*/
void thread::fill_vertex_program_constants_data(void *buffer) noexcept
{
for (const auto &entry : transform_constants)
local_transform_constants[entry.first] = entry.second;
for (const auto &entry : local_transform_constants)
stream_vector_from_memory((char*)buffer + entry.first * 4 * sizeof(float), (void*)entry.second.rgba);
}
u64 thread::timestamp() const
{
// Get timestamp, and convert it from microseconds to nanoseconds

View file

@ -161,6 +161,9 @@ namespace rsx
std::unordered_map<u32, color4_base<f32>> transform_constants;
// Constant stored for whole frame
std::unordered_map<u32, color4f> local_transform_constants;
u32 transform_program[512 * 4] = {};
virtual void load_vertex_data(u32 first, u32 count);
@ -218,6 +221,18 @@ namespace rsx
void task();
/**
* Fill buffer with 4x4 scale offset matrix.
* Vertex shader's position is to be multiplied by this matrix.
*/
void fill_scale_offset_data(void *buffer) const noexcept;
/**
* Fill buffer with vertex program constants.
* Buffer must be at least 512 float4 wide.
*/
void fill_vertex_program_constants_data(void *buffer) noexcept;
public:
void reset();
void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress);