mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
Merge pull request #1281 from vlj/rsx
Rsx: Factorise d3d12 constant loading code
This commit is contained in:
commit
adcb9864df
8 changed files with 109 additions and 61 deletions
|
@ -276,4 +276,16 @@ void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned
|
|||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w) noexcept
|
||||
{
|
||||
__m128i vector = _mm_set_epi32(w, z, y, x);
|
||||
_mm_stream_si128((__m128i*)dst, vector);
|
||||
}
|
||||
|
||||
void stream_vector_from_memory(void *dst, void *src) noexcept
|
||||
{
|
||||
const __m128i &vector = _mm_loadu_si128((__m128i*)src);
|
||||
_mm_stream_si128((__m128i*)dst, vector);
|
||||
}
|
|
@ -42,4 +42,14 @@ void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned
|
|||
/**
|
||||
* Write index data needed to emulate non indexed non native primitive mode.
|
||||
*/
|
||||
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count) noexcept;
|
||||
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count) noexcept;
|
||||
|
||||
/**
|
||||
* Stream a 128 bits vector to dst.
|
||||
*/
|
||||
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w) noexcept;
|
||||
|
||||
/**
|
||||
* Stream a 128 bits vector from src to dst.
|
||||
*/
|
||||
void stream_vector_from_memory(void *dst, void *src) noexcept;
|
||||
|
|
|
@ -337,6 +337,36 @@ public:
|
|||
return result;
|
||||
}
|
||||
|
||||
size_t get_fragment_constants_buffer_size(const RSXFragmentProgram *fragmentShader) const noexcept
|
||||
{
|
||||
typename binary2FS::const_iterator It = m_cacheFS.find(vm::base(fragmentShader->addr));
|
||||
if (It != m_cacheFS.end())
|
||||
return It->second.FragmentConstantOffsetCache.size() * 4 * sizeof(float);
|
||||
LOG_ERROR(RSX, "Can't retrieve constant offset cache");
|
||||
return 0;
|
||||
}
|
||||
|
||||
void fill_fragment_constans_buffer(void *buffer, const RSXFragmentProgram *fragment_program) const noexcept
|
||||
{
|
||||
typename binary2FS::const_iterator It = m_cacheFS.find(vm::base(fragment_program->addr));
|
||||
if (It == m_cacheFS.end())
|
||||
return;
|
||||
__m128i mask = _mm_set_epi8(0xE, 0xF, 0xC, 0xD,
|
||||
0xA, 0xB, 0x8, 0x9,
|
||||
0x6, 0x7, 0x4, 0x5,
|
||||
0x2, 0x3, 0x0, 0x1);
|
||||
|
||||
size_t offset = 0;
|
||||
for (size_t offset_in_fragment_program : It->second.FragmentConstantOffsetCache)
|
||||
{
|
||||
void *data = vm::base(fragment_program->addr + (u32)offset_in_fragment_program);
|
||||
const __m128i &vector = _mm_loadu_si128((__m128i*)data);
|
||||
const __m128i &shuffled_vector = _mm_shuffle_epi8(vector, mask);
|
||||
_mm_stream_si128((__m128i*)((char*)buffer + offset), shuffled_vector);
|
||||
offset += 4 * sizeof(u32);
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<size_t> &getFragmentConstantOffsetsCache(const RSXFragmentProgram *fragmentShader) const
|
||||
{
|
||||
typename binary2FS::const_iterator It = m_cacheFS.find(vm::base(fragmentShader->addr));
|
||||
|
|
|
@ -142,30 +142,6 @@ void D3D12GSRender::load_vertex_index_data(u32 first, u32 count)
|
|||
|
||||
void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
|
||||
{
|
||||
float scale_offset_matrix[16] =
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, -1.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f
|
||||
};
|
||||
|
||||
int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
|
||||
int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
|
||||
|
||||
// Scale
|
||||
scale_offset_matrix[0] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f);
|
||||
scale_offset_matrix[5] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f);
|
||||
scale_offset_matrix[10] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2];
|
||||
|
||||
// Offset
|
||||
scale_offset_matrix[3] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f);
|
||||
scale_offset_matrix[7] = -((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f));
|
||||
scale_offset_matrix[11] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2];
|
||||
|
||||
scale_offset_matrix[3] /= clip_w / 2.f;
|
||||
scale_offset_matrix[7] /= clip_h / 2.f;
|
||||
|
||||
assert(m_constantsData.can_alloc(256));
|
||||
size_t heap_offset = m_constantsData.alloc(256);
|
||||
|
||||
|
@ -173,7 +149,7 @@ void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
|
|||
// Separate constant buffer
|
||||
void *mapped_buffer;
|
||||
ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + 256), &mapped_buffer));
|
||||
streamToBuffer((char*)mapped_buffer + heap_offset, scale_offset_matrix, 16 * sizeof(float));
|
||||
fill_scale_offset_data((char*)mapped_buffer + heap_offset);
|
||||
int is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]);
|
||||
float alpha_ref = (float&)rsx::method_registers[NV4097_SET_ALPHA_REF];
|
||||
memcpy((char*)mapped_buffer + heap_offset + 16 * sizeof(float), &is_alpha_tested, sizeof(int));
|
||||
|
@ -191,9 +167,6 @@ void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
|
|||
|
||||
void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_index)
|
||||
{
|
||||
for (const auto &entry : transform_constants)
|
||||
local_transform_constants[entry.first] = entry.second;
|
||||
|
||||
size_t buffer_size = 512 * 4 * sizeof(float);
|
||||
|
||||
assert(m_constantsData.can_alloc(buffer_size));
|
||||
|
@ -201,16 +174,7 @@ void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_in
|
|||
|
||||
void *mapped_buffer;
|
||||
ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer));
|
||||
for (const auto &entry : local_transform_constants)
|
||||
{
|
||||
float data[4] = {
|
||||
entry.second.x,
|
||||
entry.second.y,
|
||||
entry.second.z,
|
||||
entry.second.w
|
||||
};
|
||||
streamToBuffer((char*)mapped_buffer + heap_offset + entry.first * 4 * sizeof(float), data, 4 * sizeof(float));
|
||||
}
|
||||
fill_vertex_program_constants_data((char*)mapped_buffer + heap_offset);
|
||||
m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
||||
|
||||
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {
|
||||
|
@ -225,8 +189,7 @@ void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_in
|
|||
void D3D12GSRender::upload_and_bind_fragment_shader_constants(size_t descriptor_index)
|
||||
{
|
||||
// Get constant from fragment program
|
||||
const std::vector<size_t> &fragment_constant_offsets = m_cachePSO.getFragmentConstantOffsetsCache(&fragment_program);
|
||||
size_t buffer_size = fragment_constant_offsets.size() * 4 * sizeof(float) + 1;
|
||||
size_t buffer_size = m_cachePSO.get_fragment_constants_buffer_size(&fragment_program);
|
||||
// Multiple of 256 never 0
|
||||
buffer_size = (buffer_size + 255) & ~255;
|
||||
|
||||
|
@ -236,24 +199,7 @@ void D3D12GSRender::upload_and_bind_fragment_shader_constants(size_t descriptor_
|
|||
size_t offset = 0;
|
||||
void *mapped_buffer;
|
||||
ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer));
|
||||
for (size_t offset_in_fragment_program : fragment_constant_offsets)
|
||||
{
|
||||
u32 vector[4];
|
||||
auto data = vm::ps3::ptr<u32>::make(fragment_program.addr + (u32)offset_in_fragment_program);
|
||||
|
||||
u32 c0 = (data[0] >> 16 | data[0] << 16);
|
||||
u32 c1 = (data[1] >> 16 | data[1] << 16);
|
||||
u32 c2 = (data[2] >> 16 | data[2] << 16);
|
||||
u32 c3 = (data[3] >> 16 | data[3] << 16);
|
||||
|
||||
vector[0] = c0;
|
||||
vector[1] = c1;
|
||||
vector[2] = c2;
|
||||
vector[3] = c3;
|
||||
|
||||
streamToBuffer((char*)mapped_buffer + heap_offset + offset, vector, 4 * sizeof(u32));
|
||||
offset += 4 * sizeof(u32);
|
||||
}
|
||||
m_cachePSO.fill_fragment_constans_buffer((char*)mapped_buffer + heap_offset, &fragment_program);
|
||||
m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
||||
|
||||
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {
|
||||
|
|
|
@ -672,7 +672,6 @@ void D3D12GSRender::flip(int buffer)
|
|||
storage.uav_heap_get_pos = m_UAVHeap.get_current_put_pos_minus_one();
|
||||
|
||||
// Flush
|
||||
local_transform_constants.clear();
|
||||
m_texturesRTTs.clear();
|
||||
|
||||
// Now get ready for next frame
|
||||
|
|
|
@ -72,7 +72,6 @@ private:
|
|||
RSXFragmentProgram fragment_program;
|
||||
PipelineStateObjectCache m_cachePSO;
|
||||
std::tuple<ID3D12PipelineState *, std::vector<size_t>, size_t> *m_PSO;
|
||||
std::unordered_map<u32, color4f> local_transform_constants;
|
||||
|
||||
struct
|
||||
{
|
||||
|
|
|
@ -586,6 +586,7 @@ namespace rsx
|
|||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds((s64)(1000.0 / limit - rsx->timer_sync.GetElapsedTimeInMilliSec())));
|
||||
rsx->timer_sync.Start();
|
||||
rsx->local_transform_constants.clear();
|
||||
}
|
||||
|
||||
void user_command(thread* rsx, u32 arg)
|
||||
|
@ -1024,6 +1025,42 @@ namespace rsx
|
|||
onexit_thread();
|
||||
}
|
||||
|
||||
void thread::fill_scale_offset_data(void *buffer) const noexcept
|
||||
{
|
||||
int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
|
||||
int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
|
||||
|
||||
float scale_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f);
|
||||
float offset_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f);
|
||||
offset_x /= clip_w / 2.f;
|
||||
|
||||
float scale_y = -(float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f);
|
||||
float offset_y = -((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f));
|
||||
offset_y /= clip_h / 2.f;
|
||||
|
||||
float scale_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2];
|
||||
float offset_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2];
|
||||
|
||||
float one = 1.f;
|
||||
|
||||
stream_vector(buffer, (u32&)scale_x, 0, 0, (u32&)offset_x);
|
||||
stream_vector((char*)buffer + 16, 0, (u32&)scale_y, 0, (u32&)offset_y);
|
||||
stream_vector((char*)buffer + 32, 0, 0, (u32&)scale_z, (u32&)offset_z);
|
||||
stream_vector((char*)buffer + 48, 0, 0, 0, (u32&)one);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill buffer with vertex program constants.
|
||||
* Buffer must be at least 512 float4 wide.
|
||||
*/
|
||||
void thread::fill_vertex_program_constants_data(void *buffer) noexcept
|
||||
{
|
||||
for (const auto &entry : transform_constants)
|
||||
local_transform_constants[entry.first] = entry.second;
|
||||
for (const auto &entry : local_transform_constants)
|
||||
stream_vector_from_memory((char*)buffer + entry.first * 4 * sizeof(float), (void*)entry.second.rgba);
|
||||
}
|
||||
|
||||
u64 thread::timestamp() const
|
||||
{
|
||||
// Get timestamp, and convert it from microseconds to nanoseconds
|
||||
|
|
|
@ -161,6 +161,9 @@ namespace rsx
|
|||
|
||||
std::unordered_map<u32, color4_base<f32>> transform_constants;
|
||||
|
||||
// Constant stored for whole frame
|
||||
std::unordered_map<u32, color4f> local_transform_constants;
|
||||
|
||||
u32 transform_program[512 * 4] = {};
|
||||
|
||||
virtual void load_vertex_data(u32 first, u32 count);
|
||||
|
@ -218,6 +221,18 @@ namespace rsx
|
|||
|
||||
void task();
|
||||
|
||||
/**
|
||||
* Fill buffer with 4x4 scale offset matrix.
|
||||
* Vertex shader's position is to be multiplied by this matrix.
|
||||
*/
|
||||
void fill_scale_offset_data(void *buffer) const noexcept;
|
||||
|
||||
/**
|
||||
* Fill buffer with vertex program constants.
|
||||
* Buffer must be at least 512 float4 wide.
|
||||
*/
|
||||
void fill_vertex_program_constants_data(void *buffer) noexcept;
|
||||
|
||||
public:
|
||||
void reset();
|
||||
void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress);
|
||||
|
|
Loading…
Add table
Reference in a new issue