From 912388cc6a5e0d52c6ef44dd8b64c84a14c3d216 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 11 Jan 2025 02:46:53 +0300 Subject: [PATCH] rsx: Improved shader interpreter support when hw instancing is active --- rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp | 15 ++++++++------- rpcs3/Emu/RSX/Core/RSXDrawCommands.h | 2 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 14 ++++++++++---- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 21 +++++++++++++-------- 4 files changed, 32 insertions(+), 20 deletions(-) diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp index 095903832d..7be332fc60 100644 --- a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp +++ b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp @@ -735,7 +735,7 @@ namespace rsx utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast(wpos_scale), std::bit_cast(wpos_bias)); } - void draw_command_processor::fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase& prog) const + void draw_command_processor::fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase* prog) const { auto& draw_call = REGS(m_ctx)->current_draw_clause; @@ -745,8 +745,9 @@ namespace rsx // Temp indirection table. Used to track "running" updates. rsx::simple_array instancing_indirection_table; // indirection table size - const auto reloc_table = prog.has_indexed_constants ? decltype(prog.constant_ids){} : prog.constant_ids; - const auto redirection_table_size = prog.has_indexed_constants ? 468u : ::size32(prog.constant_ids); + const auto full_reupload = !prog || prog->has_indexed_constants; + const auto reloc_table = full_reupload ? decltype(prog->constant_ids){} : prog->constant_ids; + const auto redirection_table_size = full_reupload ? 468u : ::size32(prog->constant_ids); instancing_indirection_table.resize(redirection_table_size); // Temp constants data @@ -787,9 +788,9 @@ namespace rsx continue; } - const int translated_offset = prog.has_indexed_constants + const int translated_offset = full_reupload ? instance_config.patch_load_offset - : prog.TranslateConstantsRange(instance_config.patch_load_offset, instance_config.patch_load_count); + : prog->TranslateConstantsRange(instance_config.patch_load_offset, instance_config.patch_load_count); if (translated_offset >= 0) { @@ -809,14 +810,14 @@ namespace rsx continue; } - ensure(!prog.has_indexed_constants); + ensure(!full_reupload); // Sparse update. Update records individually instead of bulk // FIXME: Range batching optimization const auto load_end = instance_config.patch_load_offset + instance_config.patch_load_count; for (u32 i = 0; i < redirection_table_size; ++i) { - const auto read_index = prog.constant_ids[i]; + const auto read_index = prog->constant_ids[i]; if (read_index < instance_config.patch_load_offset || read_index >= load_end) { // Reading outside "hot" range. diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.h b/rpcs3/Emu/RSX/Core/RSXDrawCommands.h index b69a918401..5bc5991a18 100644 --- a/rpcs3/Emu/RSX/Core/RSXDrawCommands.h +++ b/rpcs3/Emu/RSX/Core/RSXDrawCommands.h @@ -105,6 +105,6 @@ namespace rsx // Fill instancing buffers. A single iobuf is used for both. 256byte alignment enforced to allow global bind // Returns offsets to the index redirection lookup table and constants field array - void fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase& prog) const; + void fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase* prog) const; }; } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 90bc7cacbd..34f0380ad3 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -878,7 +878,7 @@ void GLGSRender::load_program_env() } } - if (update_fragment_constants && !update_instruction_buffers) + if (update_fragment_constants && !m_shader_interpreter.is_interpreter(m_program)) { // Fragment constants auto mapping = m_fragment_constants_buffer->alloc_from_heap(fragment_constants_size, m_uniform_buffer_offset_align); @@ -978,12 +978,18 @@ void GLGSRender::load_program_env() } } - m_graphics_state.clear( + rsx::flags32_t handled_flags = rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | - rsx::pipeline_state::fragment_constants_dirty | - rsx::pipeline_state::fragment_texture_state_dirty); + rsx::pipeline_state::fragment_texture_state_dirty; + + if (update_fragment_constants && !m_shader_interpreter.is_interpreter(m_program)) + { + handled_flags |= rsx::pipeline_state::fragment_constants_dirty; + } + + m_graphics_state.clear(handled_flags); } bool GLGSRender::is_current_program_interpreted() const diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 6e53891fbf..ea3d83cdf3 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -688,10 +688,10 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) } // Initialize optional allocation information with placeholders - m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, 0, 32 }; - m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, 0, 32 }; - m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, 0, 32 }; - m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, 0, 32 }; + m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, 0, 16 }; + m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, 0, 16 }; + m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, 0, 16 }; + m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, 0, 16 }; m_raster_env_buffer_info = { m_raster_env_ring_info.heap->value, 0, 128 }; const auto limits = m_device->gpu().get_limits(); @@ -2192,7 +2192,7 @@ void VKGSRender::load_program_env() return std::make_pair(m_instancing_buffer_ring_info.map(constants_data_table_offset, size), size); }); - m_draw_processor.fill_constants_instancing_buffer(indirection_table_buf, constants_array_buf, *m_vertex_prog); + m_draw_processor.fill_constants_instancing_buffer(indirection_table_buf, constants_array_buf, m_vertex_prog); m_instancing_buffer_ring_info.unmap(); m_instancing_indirection_buffer_info = { m_instancing_buffer_ring_info.heap->value, indirection_table_offset, indirection_table_buf.size() }; @@ -2219,7 +2219,7 @@ void VKGSRender::load_program_env() } } - if (update_fragment_constants && !update_instruction_buffers) + if (update_fragment_constants && !m_shader_interpreter.is_interpreter(m_program)) { check_heap_status(VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE); @@ -2350,9 +2350,9 @@ void VKGSRender::load_program_env() } // Clear flags - u32 handled_flags = rsx::pipeline_state::fragment_state_dirty | + rsx::flags32_t handled_flags = + rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | - rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty; if (!update_instancing_data) @@ -2360,6 +2360,11 @@ void VKGSRender::load_program_env() handled_flags |= rsx::pipeline_state::transform_constants_dirty; } + if (update_fragment_constants && !m_shader_interpreter.is_interpreter(m_program)) + { + handled_flags |= rsx::pipeline_state::fragment_constants_dirty; + } + m_graphics_state.clear(handled_flags); }