diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp index 5d391a7b46..463959e473 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp +++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp @@ -276,4 +276,10 @@ void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned return; } } +} + +void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w) noexcept +{ + __m128i vector = _mm_set_epi32(w, z, y, x); + _mm_stream_si128((__m128i*)dst, vector); } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.h b/rpcs3/Emu/RSX/Common/BufferUtils.h index 106825b3ad..b98152ca27 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.h +++ b/rpcs3/Emu/RSX/Common/BufferUtils.h @@ -42,4 +42,9 @@ void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned /** * Write index data needed to emulate non indexed non native primitive mode. */ -void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count) noexcept; \ No newline at end of file +void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count) noexcept; + +/** + * Stream a 128 bits vector to dst. + */ +void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w) noexcept; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 1313df4f1f..6fbda2c511 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -142,30 +142,6 @@ void D3D12GSRender::load_vertex_index_data(u32 first, u32 count) void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex) { - float scale_offset_matrix[16] = - { - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, -1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - }; - - int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; - int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; - - // Scale - scale_offset_matrix[0] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f); - scale_offset_matrix[5] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f); - scale_offset_matrix[10] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2]; - - // Offset - scale_offset_matrix[3] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f); - scale_offset_matrix[7] = -((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f)); - scale_offset_matrix[11] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2]; - - scale_offset_matrix[3] /= clip_w / 2.f; - scale_offset_matrix[7] /= clip_h / 2.f; - assert(m_constantsData.can_alloc(256)); size_t heap_offset = m_constantsData.alloc(256); @@ -173,7 +149,7 @@ void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex) // Separate constant buffer void *mapped_buffer; ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + 256), &mapped_buffer)); - streamToBuffer((char*)mapped_buffer + heap_offset, scale_offset_matrix, 16 * sizeof(float)); + fill_scale_offset_data((char*)mapped_buffer + heap_offset); int is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]); float alpha_ref = (float&)rsx::method_registers[NV4097_SET_ALPHA_REF]; memcpy((char*)mapped_buffer + heap_offset + 16 * sizeof(float), &is_alpha_tested, sizeof(int)); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index e6f765d9d5..8bac746b79 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1024,6 +1024,30 @@ namespace rsx onexit_thread(); } + void thread::fill_scale_offset_data(void *buffer) const noexcept + { + int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + + float scale_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f); + float offset_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f); + offset_x /= clip_w / 2.f; + + float scale_y = -(float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f); + float offset_y = -((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f)); + offset_y /= clip_h / 2.f; + + float scale_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2]; + float offset_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2]; + + float one = 1.f; + + stream_vector(buffer, (u32&)scale_x, 0, 0, (u32&)offset_x); + stream_vector((char*)buffer + 16, 0, (u32&)scale_y, 0, (u32&)offset_y); + stream_vector((char*)buffer + 32, 0, 0, (u32&)scale_z, (u32&)offset_z); + stream_vector((char*)buffer + 48, 0, 0, 0, (u32&)one); + } + u64 thread::timestamp() const { // Get timestamp, and convert it from microseconds to nanoseconds diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 2131b26d82..8feaa9bd77 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -218,6 +218,12 @@ namespace rsx void task(); + /** + * Fill buffer with 4x4 scale offset matrix. + * Vertex shader's position is to be multiplied by this matrix. + */ + void fill_scale_offset_data(void *buffer) const noexcept; + public: void reset(); void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress);