From dad9a2b916f0b30a58ec207aeea9201d753af4c4 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 2 Apr 2025 00:01:02 +0300 Subject: [PATCH 1/6] vk: Add support for dynamic buffer offsets --- rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp | 13 ++++++++++++- rpcs3/Emu/RSX/VK/vkutils/descriptors.h | 8 ++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index c96b5b145f..9b5f288141 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -430,6 +430,17 @@ namespace vk } } + void descriptor_set::push(const descriptor_set_dynamic_offset_t& offset) + { + ensure(offset.location >= 0 && offset.location <= 16); + while (m_dynamic_offsets.size() < (offset.location + 1)) + { + m_dynamic_offsets.push_back(0); + } + + m_dynamic_offsets[offset.location] = offset.value; + } + void descriptor_set::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout) { if ((m_push_type_mask & ~m_update_after_bind_mask) || (m_pending_writes.size() >= max_cache_size)) @@ -437,7 +448,7 @@ namespace vk flush(); } - vkCmdBindDescriptorSets(cmd, bind_point, layout, 0, 1, &m_handle, 0, nullptr); + vkCmdBindDescriptorSets(cmd, bind_point, layout, 0, 1, &m_handle, ::size32(m_dynamic_offsets), m_dynamic_offsets.data()); } void descriptor_set::flush() diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h index 37798187b0..6c61488b6e 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h @@ -27,6 +27,12 @@ namespace vk } }; + struct descriptor_set_dynamic_offset_t + { + int location; + u32 value; + }; + class descriptor_pool { public: @@ -95,6 +101,7 @@ namespace vk void push(const VkDescriptorImageInfo& image_info, VkDescriptorType type, u32 binding); void push(const VkDescriptorImageInfo* image_info, u32 count, VkDescriptorType type, u32 binding); void push(rsx::simple_array& copy_cmd, u32 type_mask = umax); + void push(const descriptor_set_dynamic_offset_t& offset); void bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout); @@ -109,6 +116,7 @@ namespace vk rsx::simple_array m_buffer_view_pool; rsx::simple_array m_buffer_info_pool; rsx::simple_array m_image_info_pool; + rsx::simple_array m_dynamic_offsets; #ifdef __clang__ // Clang (pre 16.x) does not support LWG 2089, std::construct_at for POD types From 1816a1eb34a6163c684b6efe1a5955b5df0cb1ff Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 2 Apr 2025 01:06:23 +0300 Subject: [PATCH 2/6] vk: Switch transform constants from UBO to SSBO --- rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp | 26 ++++++++++++++++++--- rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h | 5 +++- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 22 +++++------------ rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp | 16 +------------ rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 6 ++--- 5 files changed, 37 insertions(+), 38 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp index 6773a4d78f..f4253e1d51 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp @@ -50,7 +50,7 @@ namespace vk idx++; - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; bindings[idx].descriptorCount = 1; bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; bindings[idx].binding = binding_table.vertex_constant_buffers_bind_slot; @@ -101,7 +101,8 @@ namespace vk return bindings; } - std::tuple get_common_pipeline_layout(VkDevice dev) + std::tuple> + get_common_pipeline_layout(VkDevice dev) { const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); auto bindings = get_common_binding_table(); @@ -155,6 +156,25 @@ namespace vk VkPipelineLayout result; CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &result)); - return std::make_tuple(result, set_layout); + return std::make_tuple(result, set_layout, bindings); + } + + rsx::simple_array get_descriptor_pool_sizes(const rsx::simple_array& bindings) + { + // Compile descriptor pool sizes + const u32 num_ubo = bindings.reduce(0, FN(x + (y.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ? y.descriptorCount : 0))); + const u32 num_texel_buffers = bindings.reduce(0, FN(x + (y.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ? y.descriptorCount : 0))); + const u32 num_combined_image_sampler = bindings.reduce(0, FN(x + (y.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ? y.descriptorCount : 0))); + const u32 num_ssbo = bindings.reduce(0, FN(x + (y.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ? y.descriptorCount : 0))); + + ensure(num_ubo > 0 && num_texel_buffers > 0 && num_combined_image_sampler > 0 && num_ssbo > 0); + + return + { + { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , num_ubo }, + { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , num_texel_buffers }, + { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , num_combined_image_sampler }, + { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_ssbo } + }; } } diff --git a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h index 7c64d67982..371d0ebf76 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h +++ b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h @@ -7,8 +7,11 @@ namespace vk { // Grab standard layout for decompiled RSX programs. Also used by the interpreter. // FIXME: This generates a bloated monstrosity that needs to die. - std::tuple get_common_pipeline_layout(VkDevice dev); + std::tuple> get_common_pipeline_layout(VkDevice dev); // Returns the standard binding layout without texture slots. Those have special handling depending on the consumer. rsx::simple_array get_common_binding_table(); + + // Returns an array of pool sizes that can be used to generate a proper descriptor pool + rsx::simple_array get_descriptor_pool_sizes(const rsx::simple_array& bindings); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index dd9d080ecd..d58212a1aa 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -491,7 +491,8 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) m_secondary_cb_list.create(m_secondary_command_buffer_pool, vk::command_buffer::access_type_hint::all); //Precalculated stuff - std::tie(m_pipeline_layout, m_descriptor_layouts) = vk::get_common_pipeline_layout(*m_device); + rsx::simple_array binding_layout; + std::tie(m_pipeline_layout, m_descriptor_layouts, binding_layout) = vk::get_common_pipeline_layout(*m_device); //Occlusion m_occlusion_query_manager = std::make_unique(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE); @@ -507,18 +508,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) // Generate frame contexts const u32 max_draw_calls = m_device->get_descriptor_max_draw_calls(); - const auto& binding_table = m_device->get_pipeline_binding_table(); - const u32 num_fs_samplers = binding_table.vertex_textures_first_bind_slot - binding_table.textures_first_bind_slot; - - rsx::simple_array descriptor_type_sizes = - { - { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 }, - { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 }, - { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , (num_fs_samplers + 4) }, - - // Conditional rendering predicate slot; refactor to allow skipping this when not needed - { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3 } - }; + const auto descriptor_type_sizes = vk::get_descriptor_pool_sizes(binding_layout); m_descriptor_pool.create(*m_device, descriptor_type_sizes, max_draw_calls); VkSemaphoreCreateInfo semaphore_info = {}; @@ -531,7 +521,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer"); m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE); m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer"); - m_transform_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer"); + m_transform_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer"); m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000); m_raster_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "raster env buffer"); @@ -2107,7 +2097,7 @@ void VKGSRender::load_program_env() usz mem_offset = 0; auto alloc_storage = [&](usz size) -> std::pair { - const auto alignment = m_device->gpu().get_limits().minUniformBufferOffsetAlignment; + const auto alignment = m_device->gpu().get_limits().minStorageBufferOffsetAlignment; mem_offset = m_transform_constants_ring_info.alloc<1>(utils::align(size, alignment)); return std::make_pair(m_transform_constants_ring_info.map(mem_offset, size), size); }; @@ -2225,7 +2215,7 @@ void VKGSRender::load_program_env() const auto& binding_table = m_device->get_pipeline_binding_table(); m_program->bind_uniform(m_vertex_env_buffer_info, binding_table.vertex_params_bind_slot, m_current_frame->descriptor_set); - m_program->bind_uniform(m_vertex_constants_buffer_info, binding_table.vertex_constant_buffers_bind_slot, m_current_frame->descriptor_set); + m_program->bind_buffer(m_vertex_constants_buffer_info, binding_table.vertex_constant_buffers_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); m_program->bind_uniform(m_fragment_env_buffer_info, binding_table.fragment_state_bind_slot, m_current_frame->descriptor_set); m_program->bind_uniform(m_fragment_texture_params_buffer_info, binding_table.fragment_texture_params_bind_slot, m_current_frame->descriptor_set); m_program->bind_uniform(m_raster_env_buffer_info, binding_table.rasterizer_env_bind_slot, m_current_frame->descriptor_set); diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index c0a713ec00..3c9188fd60 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -330,21 +330,7 @@ namespace vk idx++; bindings.resize(idx); - // Compile descriptor pool sizes - const u32 num_ubo = bindings.reduce(0, FN(x + (y.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ? y.descriptorCount : 0))); - const u32 num_texel_buffers = bindings.reduce(0, FN(x + (y.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ? y.descriptorCount : 0))); - const u32 num_combined_image_sampler = bindings.reduce(0, FN(x + (y.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ? y.descriptorCount : 0))); - const u32 num_ssbo = bindings.reduce(0, FN(x + (y.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ? y.descriptorCount : 0))); - - ensure(num_ubo > 0 && num_texel_buffers > 0 && num_combined_image_sampler > 0 && num_ssbo > 0); - - m_descriptor_pool_sizes = - { - { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , num_ubo }, - { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , num_texel_buffers }, - { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , num_combined_image_sampler }, - { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_ssbo } - }; + m_descriptor_pool_sizes = get_descriptor_pool_sizes(bindings); std::array push_constants; push_constants[0].offset = 0; diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 0e88dab75d..8dbb9c968a 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -115,15 +115,15 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std { if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)) { - OS << "layout(std140, set=0, binding=" << static_cast(m_binding_table.vertex_constant_buffers_bind_slot) << ") uniform VertexConstantsBuffer\n"; + OS << "layout(std430, set=0, binding=" << static_cast(m_binding_table.vertex_constant_buffers_bind_slot) << ") readonly buffer VertexConstantsBuffer\n"; OS << "{\n"; - OS << " vec4 " << PI.name << ";\n"; + OS << " vec4 vc[];\n"; OS << "};\n\n"; in.location = m_binding_table.vertex_constant_buffers_bind_slot; in.domain = glsl::glsl_vertex_program; in.name = "VertexConstantsBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; + in.type = vk::glsl::input_type_storage_buffer; inputs.push_back(in); continue; From 38a1c8becb3607b269c7356916ac3f1d892c780b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 2 Apr 2025 01:52:22 +0300 Subject: [PATCH 3/6] vk: Use dynamic constants offset for transform constants --- .../RSXProg/RSXVertexPrologue.glsl | 2 ++ rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp | 4 ++-- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 21 +++++++++---------- rpcs3/Emu/RSX/VK/VKGSRender.h | 2 ++ rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 8 ++++--- 5 files changed, 21 insertions(+), 16 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXVertexPrologue.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXVertexPrologue.glsl index b188b224ba..f43b03ee3a 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXVertexPrologue.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXVertexPrologue.glsl @@ -76,6 +76,8 @@ vec4 _fetch_constant(const in uint base_offset) // uint override return _fetch_constant(int(base_offset)); } +#elif defined(VULKAN) +#define _fetch_constant(x) vc[x + xform_constants_offset] #else #define _fetch_constant(x) vc[x] #endif diff --git a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp index f4253e1d51..602d855d76 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp @@ -136,13 +136,13 @@ namespace vk std::array push_constants; push_constants[0].offset = 0; - push_constants[0].size = 16; + push_constants[0].size = 20; push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; if (vk::emulate_conditional_rendering()) { // Conditional render toggle - push_constants[0].size = 20; + push_constants[0].size = 24; } const auto set_layout = vk::descriptors::create_layout(bindings); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index d58212a1aa..10c439b183 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2108,7 +2108,8 @@ void VKGSRender::load_program_env() if (!io_buf.empty()) { m_transform_constants_ring_info.unmap(); - m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, mem_offset, io_buf.size() }; + m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, 0, VK_WHOLE_SIZE }; + m_xform_constants_dynamic_offset = mem_offset; } } @@ -2310,21 +2311,19 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ base_offset = 0; } - u8 data_size = 16; - u32 draw_info[5]; - - draw_info[0] = vertex_info.vertex_index_base; - draw_info[1] = vertex_info.vertex_index_offset; - draw_info[2] = id; - draw_info[3] = (id * 16) + (base_offset / 8); + rsx::simple_array dynamic_constants; + dynamic_constants.push_back(vertex_info.vertex_index_base); // Vertex index base + dynamic_constants.push_back(vertex_info.vertex_index_offset); // Vertex index offset + dynamic_constants.push_back(id); // Draw id + dynamic_constants.push_back((id * 16) + (base_offset / 8)); // Vertex layout offset + dynamic_constants.push_back(m_xform_constants_dynamic_offset); // Vertex constants offset if (vk::emulate_conditional_rendering()) { - draw_info[4] = cond_render_ctrl.hw_cond_active ? 1 : 0; - data_size = 20; + dynamic_constants.push_back(cond_render_ctrl.hw_cond_active ? 1 : 0); } - vkCmdPushConstants(*m_current_command_buffer, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, data_size, draw_info); + vkCmdPushConstants(*m_current_command_buffer, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, dynamic_constants.size_bytes(), dynamic_constants.data()); const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset; auto dst = m_vertex_layout_ring_info.map(data_offset, 128); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 88e1e2b00c..35e9b76626 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -160,6 +160,8 @@ private: VkDescriptorBufferInfo m_vertex_instructions_buffer_info {}; VkDescriptorBufferInfo m_fragment_instructions_buffer_info {}; + u32 m_xform_constants_dynamic_offset = 0; // We manage transform_constants dynamic offset manually to alleviate performance penalty of doing a hot-patch of constants. + std::array frame_context_storage; //Temp frame context to use if the real frame queue is overburdened. Only used for storage vk::frame_context_t m_aux_frame_context; diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 8dbb9c968a..60f33f49c5 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -29,8 +29,9 @@ std::string VKVertexDecompilerThread::compareFunction(COMPARE f, const std::stri void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) { - OS << "#version 450\n\n"; - OS << "#extension GL_ARB_separate_shader_objects : enable\n\n"; + OS << + "#version 450\n\n" + "#extension GL_ARB_separate_shader_objects : enable\n\n"; OS << "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n" @@ -59,7 +60,8 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) " uint vertex_base_index;\n" " uint vertex_index_offset;\n" " uint draw_id;\n" - " uint layout_ptr_offset;\n"; + " uint layout_ptr_offset;\n" + " uint xform_constants_offset;\n"; if (m_device_props.emulate_conditional_rendering) { From 66909168ac76e7ce9284701157c8bd6068c51cdf Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 5 Apr 2025 14:25:08 +0300 Subject: [PATCH 4/6] vk: Silence compiler warning --- rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index 9b5f288141..2dc70d4070 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -433,7 +433,7 @@ namespace vk void descriptor_set::push(const descriptor_set_dynamic_offset_t& offset) { ensure(offset.location >= 0 && offset.location <= 16); - while (m_dynamic_offsets.size() < (offset.location + 1)) + while (m_dynamic_offsets.size() < (static_cast(offset.location) + 1u)) { m_dynamic_offsets.push_back(0); } From ed280cdbfedc44d67a0a58651eb2b7e6e112822c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 5 Apr 2025 14:25:38 +0300 Subject: [PATCH 5/6] vk: Use dynamic constant buffer offset via push constant --- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 102 +++++++------------------------- rpcs3/Emu/RSX/VK/VKGSRender.h | 2 +- 2 files changed, 23 insertions(+), 81 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 10c439b183..7079d3a22b 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2311,19 +2311,28 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ base_offset = 0; } + const u32 vertex_layout_offset = (id * 16) + (base_offset / 8); + const u32 constant_id_offset = static_cast(m_xform_constants_dynamic_offset) / 16; + rsx::simple_array dynamic_constants; - dynamic_constants.push_back(vertex_info.vertex_index_base); // Vertex index base - dynamic_constants.push_back(vertex_info.vertex_index_offset); // Vertex index offset - dynamic_constants.push_back(id); // Draw id - dynamic_constants.push_back((id * 16) + (base_offset / 8)); // Vertex layout offset - dynamic_constants.push_back(m_xform_constants_dynamic_offset); // Vertex constants offset + dynamic_constants.push_back(vertex_info.vertex_index_base); // Vertex index base + dynamic_constants.push_back(vertex_info.vertex_index_offset); // Vertex index offset + dynamic_constants.push_back(id); // Draw id + dynamic_constants.push_back(vertex_layout_offset); // Vertex layout offset + dynamic_constants.push_back(constant_id_offset); // Vertex constants offset if (vk::emulate_conditional_rendering()) { dynamic_constants.push_back(cond_render_ctrl.hw_cond_active ? 1 : 0); } - vkCmdPushConstants(*m_current_command_buffer, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, dynamic_constants.size_bytes(), dynamic_constants.data()); + vkCmdPushConstants( + *m_current_command_buffer, + m_pipeline_layout, + VK_SHADER_STAGE_VERTEX_BIT, + 0, + static_cast(dynamic_constants.size_bytes()), + dynamic_constants.data()); const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset; auto dst = m_vertex_layout_ring_info.map(data_offset, 128); @@ -2355,83 +2364,16 @@ void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 cou return; } - // Hot-patching transform constants mid-draw (instanced draw) - std::pair data_range; - void* data_source = nullptr; - - if (m_vertex_prog->has_indexed_constants) + // Buffer updates mid-pass violate the spec and destroy performance on NVIDIA + auto allocate_mem = [&](usz size) -> std::pair { - // We're working with a full range. We can do a direct patch in this case since no index translation is required. - const auto byte_count = count * 16; - const auto byte_offset = index * 16; - - data_range = { m_vertex_constants_buffer_info.offset + byte_offset, byte_count }; - data_source = ®S(ctx)->transform_constants[index]; - } - else if (auto xform_id = m_vertex_prog->translate_constants_range(index, count); xform_id >= 0) - { - const auto write_offset = xform_id * 16; - const auto byte_count = count * 16; - - data_range = { m_vertex_constants_buffer_info.offset + write_offset, byte_count }; - data_source = ®S(ctx)->transform_constants[index]; - } - else - { - // Indexed. This is a bit trickier. Use scratchpad to avoid UAF - auto allocate_mem = [&](usz size) -> std::pair - { - m_scratch_mem.resize(size); - return { m_scratch_mem.data(), size }; - }; - - rsx::io_buffer iobuf(allocate_mem); - upload_transform_constants(iobuf); - - ensure(iobuf.size() >= m_vertex_constants_buffer_info.range); - data_range = { m_vertex_constants_buffer_info.offset, m_vertex_constants_buffer_info.range }; - data_source = iobuf.data(); - } - - // Preserving an active renderpass across a transfer operation is illegal vulkan. However, splitting up the CB into thousands of renderpasses incurs an overhead. - // We cheat here for specific cases where we already know the driver can let us get away with this. - static const std::set s_allowed_vendors = - { - vk::driver_vendor::AMD, - vk::driver_vendor::RADV, - vk::driver_vendor::LAVAPIPE, - vk::driver_vendor::NVIDIA, - vk::driver_vendor::NVK + const usz alignment = m_device->gpu().get_limits().minStorageBufferOffsetAlignment; + m_xform_constants_dynamic_offset = m_transform_constants_ring_info.alloc<1>(utils::align(size, alignment)); + return std::make_pair(m_transform_constants_ring_info.map(m_xform_constants_dynamic_offset, size), size); }; - const auto driver_vendor = vk::get_driver_vendor(); - const bool preserve_renderpass = !g_cfg.video.strict_rendering_mode && s_allowed_vendors.contains(driver_vendor); - - vk::insert_buffer_memory_barrier( - *m_current_command_buffer, - m_vertex_constants_buffer_info.buffer, - data_range.first, - data_range.second, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_UNIFORM_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, - preserve_renderpass); - - // FIXME: This is illegal during a renderpass - vkCmdUpdateBuffer( - *m_current_command_buffer, - m_vertex_constants_buffer_info.buffer, - data_range.first, - data_range.second, - data_source); - - vk::insert_buffer_memory_barrier( - *m_current_command_buffer, - m_vertex_constants_buffer_info.buffer, - data_range.first, - data_range.second, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, - preserve_renderpass); + rsx::io_buffer iobuf(allocate_mem); + upload_transform_constants(iobuf); } void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 35e9b76626..5c72528ecc 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -160,7 +160,7 @@ private: VkDescriptorBufferInfo m_vertex_instructions_buffer_info {}; VkDescriptorBufferInfo m_fragment_instructions_buffer_info {}; - u32 m_xform_constants_dynamic_offset = 0; // We manage transform_constants dynamic offset manually to alleviate performance penalty of doing a hot-patch of constants. + u64 m_xform_constants_dynamic_offset = 0; // We manage transform_constants dynamic offset manually to alleviate performance penalty of doing a hot-patch of constants. std::array frame_context_storage; //Temp frame context to use if the real frame queue is overburdened. Only used for storage From f38a61bdb3557b1c728a4cd8ae015686886e2494 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 6 Apr 2025 21:09:01 +0300 Subject: [PATCH 6/6] Try msvc workaround --- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 7079d3a22b..430f4a7563 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2312,18 +2312,21 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ } const u32 vertex_layout_offset = (id * 16) + (base_offset / 8); - const u32 constant_id_offset = static_cast(m_xform_constants_dynamic_offset) / 16; + const volatile u32 constant_id_offset = static_cast(m_xform_constants_dynamic_offset) / 16u; - rsx::simple_array dynamic_constants; - dynamic_constants.push_back(vertex_info.vertex_index_base); // Vertex index base - dynamic_constants.push_back(vertex_info.vertex_index_offset); // Vertex index offset - dynamic_constants.push_back(id); // Draw id - dynamic_constants.push_back(vertex_layout_offset); // Vertex layout offset - dynamic_constants.push_back(constant_id_offset); // Vertex constants offset + u32 push_constants[6]; + u32 data_length = 20; + + push_constants[0] = vertex_info.vertex_index_base; + push_constants[1] = vertex_info.vertex_index_offset; + push_constants[2] = id; + push_constants[3] = vertex_layout_offset; + push_constants[4] = constant_id_offset; if (vk::emulate_conditional_rendering()) { - dynamic_constants.push_back(cond_render_ctrl.hw_cond_active ? 1 : 0); + push_constants[5] = cond_render_ctrl.hw_cond_active ? 1 : 0; + data_length += 4; } vkCmdPushConstants( @@ -2331,8 +2334,8 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, - static_cast(dynamic_constants.size_bytes()), - dynamic_constants.data()); + data_length, + push_constants); const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset; auto dst = m_vertex_layout_ring_info.map(data_offset, 128); @@ -2349,7 +2352,7 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ m_vertex_layout_ring_info.unmap(); } -void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 count) +void VKGSRender::patch_transform_constants(rsx::context* /*ctx*/, u32 index, u32 count) { if (!m_program || !m_vertex_prog) {