diff --git a/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp b/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp index e4dcb5dad4..d31ea4a51d 100644 --- a/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp +++ b/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp @@ -138,6 +138,47 @@ namespace rsx is_disjoint_primitive = is_primitive_disjointed(primitive); } + simple_array draw_clause::get_subranges() const + { + ensure(!is_single_draw()); + + const auto range = get_range(); + const auto limit = range.first + range.count; + const auto _pass_count = pass_count(); + + simple_array ret; + ret.reserve(_pass_count); + + u32 previous_barrier = range.first; + u32 vertex_counter = 0; + + for (auto it = current_barrier_it; + it != draw_command_barriers.end() && it->draw_id == current_range_index; + it++) + { + const auto& barrier = *it; + if (barrier.type != primitive_restart_barrier) + continue; + + if (barrier.address <= range.first) + continue; + + if (barrier.address >= limit) + break; + + const u32 count = barrier.address - previous_barrier; + ret.push_back({ 0, vertex_counter, count }); + previous_barrier = barrier.address; + vertex_counter += count; + } + + ensure(!ret.empty()); + ensure(previous_barrier < limit); + ret.push_back({ 0, vertex_counter, limit - previous_barrier }); + + return ret; + } + u32 draw_clause::execute_pipeline_dependencies(context* ctx, instanced_draw_config_t* instance_config) const { u32 result = 0u; diff --git a/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp b/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp index 416a82fbae..1764bca99b 100644 --- a/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp +++ b/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp @@ -285,48 +285,19 @@ namespace rsx */ u32 execute_pipeline_dependencies(struct context* ctx, instanced_draw_config_t* instance_config = nullptr) const; + /** + * Returns the first-count data for the current subdraw + */ const draw_range_t& get_range() const { ensure(current_range_index < draw_command_ranges.size()); return draw_command_ranges[current_range_index]; } - simple_array get_subranges() const - { - ensure(!is_single_draw()); - - const auto range = get_range(); - const auto limit = range.first + range.count; - - simple_array ret; - u32 previous_barrier = range.first; - u32 vertex_counter = 0; - - for (const auto& barrier : draw_command_barriers) - { - if (barrier.draw_id != current_range_index) - continue; - - if (barrier.type != primitive_restart_barrier) - continue; - - if (barrier.address <= range.first) - continue; - - if (barrier.address >= limit) - break; - - const u32 count = barrier.address - previous_barrier; - ret.push_back({ 0, vertex_counter, count }); - previous_barrier = barrier.address; - vertex_counter += count; - } - - ensure(!ret.empty()); - ensure(previous_barrier < limit); - ret.push_back({ 0, vertex_counter, limit - previous_barrier }); - - return ret; - } + /* + * Returns a compiled list of all subdraws. + * NOTE: This is a non-trivial operation as it takes disjoint primitive boundaries into account. + */ + simple_array get_subranges() const; }; } diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 1d6b376e7c..3821240cab 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -6,6 +6,7 @@ #include "VKGSRender.h" #include "vkutils/buffer_object.h" #include "vkutils/chip_class.h" +#include namespace vk { @@ -937,6 +938,20 @@ void VKGSRender::emit_geometry(u32 sub_index) { vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0); } + else if (m_device->get_multidraw_support()) + { + const auto subranges = draw_call.get_subranges(); + const auto subranges_count = ::size32(subranges); + auto [offset, ptr] = m_draw_indirect_count_ring_info.alloc_and_map<4, VkMultiDrawInfoEXT>(subranges_count); + + auto _ptr = ptr; + for (const auto& range : subranges) + { + _ptr->firstVertex = range.first; + _ptr->vertexCount = range.count; + } + vkCmdDrawMultiEXT(*m_current_command_buffer, subranges_count, ptr, 1, 0, sizeof(VkMultiDrawInfoEXT)); + } else { u32 vertex_offset = 0; @@ -963,6 +978,24 @@ void VKGSRender::emit_geometry(u32 sub_index) { vkCmdDrawIndexed(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0, 0); } + else if (m_device->get_multidraw_support()) + { + const auto subranges = draw_call.get_subranges(); + const auto subranges_count = ::size32(subranges); + auto [offset, ptr] = m_draw_indirect_count_ring_info.alloc_and_map<4, VkMultiDrawIndexedInfoEXT>(subranges_count); + + auto _ptr = ptr; + u32 vertex_offset = 0; + for (const auto& range : subranges) + { + const auto count = get_index_count(draw_call.primitive, range.count); + _ptr->vertexOffset = 0; + _ptr->firstIndex = vertex_offset; + _ptr->indexCount = count; + vertex_offset += count; + } + vkCmdDrawMultiIndexedEXT(*m_current_command_buffer, subranges_count, ptr, 1, 0, sizeof(VkMultiDrawIndexedInfoEXT), nullptr); + } else { u32 vertex_offset = 0; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index b5ccb885a9..13039a68c3 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1,3 +1,4 @@ +#include "Emu/RSX/VK/VKDataHeapManager.h" #include "stdafx.h" #include "../Overlays/overlay_compile_notification.h" #include "../Overlays/Shaders/shader_loading_dialog_native.h" @@ -23,6 +24,7 @@ #include "../Program/SPIRVCommon.h" #include "util/asm.hpp" +#include namespace vk { @@ -556,6 +558,12 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) }); } + if (m_device->get_multidraw_support().supported) + { + m_draw_indirect_count_ring_info.create(VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, 16 * 0x100000, "multidraw indirect buffer", 1024); + vk::data_heap_manager::register_ring_buffer(m_draw_indirect_count_ring_info); + } + // Initialize optional allocation information with placeholders m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, 0, 16 }; m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, 0, 16 }; @@ -1165,7 +1173,11 @@ void VKGSRender::check_heap_status(u32 flags) heap_critical = m_texture_upload_buffer_ring_info.is_critical(); break; case VK_HEAP_CHECK_VERTEX_STORAGE: - heap_critical = m_attrib_ring_info.is_critical() || m_index_buffer_ring_info.is_critical(); + heap_critical = m_attrib_ring_info.is_critical() || + m_index_buffer_ring_info.is_critical() || + (m_draw_indirect_count_ring_info.heap + ? m_draw_indirect_count_ring_info.is_critical() + : false); break; case VK_HEAP_CHECK_VERTEX_ENV_STORAGE: heap_critical = m_vertex_env_ring_info.is_critical(); diff --git a/rpcs3/Emu/RSX/VK/vkutils/data_heap.h b/rpcs3/Emu/RSX/VK/vkutils/data_heap.h index d8fb876edd..6941733cdf 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/data_heap.h +++ b/rpcs3/Emu/RSX/VK/vkutils/data_heap.h @@ -6,6 +6,7 @@ #include "commands.h" #include +#include #include namespace vk @@ -38,6 +39,15 @@ namespace vk void* map(usz offset, usz size); void unmap(bool force = false); + template + requires std::is_trivially_destructible_v + std::pair alloc_and_map(usz count) + { + const auto size_bytes = count * sizeof(T); + const auto addr = alloc(size_bytes); + return { addr, reinterpret_cast( map(addr, size_bytes)) }; + } + void sync(const vk::command_buffer& cmd); // Properties diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index 58a82a767b..b19ea8b842 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -2,6 +2,7 @@ #include "instance.h" #include "util/logs.hpp" #include "Emu/system_config.h" +#include namespace vk { @@ -36,6 +37,7 @@ namespace vk VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_color_info{}; VkPhysicalDeviceBorderColorSwizzleFeaturesEXT border_color_swizzle_info{}; VkPhysicalDeviceFaultFeaturesEXT device_fault_info{}; + VkPhysicalDeviceMultiDrawFeaturesEXT multidraw_info{}; if (device_extensions.is_supported(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { @@ -86,6 +88,13 @@ namespace vk features2.pNext = &device_fault_info; } + if (device_extensions.is_supported(VK_EXT_MULTI_DRAW_EXTENSION_NAME)) + { + multidraw_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT; + multidraw_info.pNext = features2.pNext; + features2.pNext = &multidraw_info; + } + auto _vkGetPhysicalDeviceFeatures2KHR = reinterpret_cast(vkGetInstanceProcAddr(parent, "vkGetPhysicalDeviceFeatures2KHR")); ensure(_vkGetPhysicalDeviceFeatures2KHR); // "vkGetInstanceProcAddress failed to find entry point!" _vkGetPhysicalDeviceFeatures2KHR(dev, &features2); @@ -98,6 +107,9 @@ namespace vk custom_border_color_support.swizzle_extension_supported = border_color_swizzle_info.sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT; custom_border_color_support.require_border_color_remap = !border_color_swizzle_info.borderColorSwizzleFromImage; + multidraw_support.supported = !!multidraw_info.multiDraw; + multidraw_support.max_batch_size = 65536; + optional_features_support.barycentric_coords = !!shader_barycentric_info.fragmentShaderBarycentric; optional_features_support.framebuffer_loops = !!fbo_loops_info.attachmentFeedbackLoopLayout; optional_features_support.extended_device_fault = !!device_fault_info.deviceFault; @@ -124,7 +136,6 @@ namespace vk optional_features_support.sampler_mirror_clamped = device_extensions.is_supported(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME); optional_features_support.synchronization_2 = device_extensions.is_supported(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); optional_features_support.unrestricted_depth_range = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); - optional_features_support.multidraw_indirect = device_extensions.is_supported(VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME); optional_features_support.debug_utils = instance_extensions.is_supported(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); optional_features_support.surface_capabilities_2 = instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); @@ -165,6 +176,7 @@ namespace vk properties2.pNext = nullptr; VkPhysicalDeviceDescriptorIndexingPropertiesEXT descriptor_indexing_props{}; + VkPhysicalDeviceMultiDrawPropertiesEXT multidraw_props{}; if (descriptor_indexing_support) { @@ -173,6 +185,13 @@ namespace vk properties2.pNext = &descriptor_indexing_props; } + if (multidraw_support.supported) + { + multidraw_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT; + multidraw_props.pNext = properties2.pNext; + properties2.pNext = &multidraw_props; + } + if (device_extensions.is_supported(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME)) { driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR; @@ -199,6 +218,17 @@ namespace vk descriptor_max_draw_calls = 8192; } } + + if (multidraw_support.supported) + { + multidraw_support.max_batch_size = multidraw_props.maxMultiDrawCount; + + if (!multidraw_props.maxMultiDrawCount) + { + rsx_log.error("Physical device reports 0 support maxMultiDraw count. Multidraw support will be disabled."); + multidraw_support.supported = false; + } + } } } diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.h b/rpcs3/Emu/RSX/VK/vkutils/device.h index bfb141c584..193fa2377e 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.h +++ b/rpcs3/Emu/RSX/VK/vkutils/device.h @@ -61,6 +61,14 @@ namespace vk operator bool() const { return supported; } }; + struct multidraw_features + { + bool supported; + u32 max_batch_size; + + operator bool() const { return supported; } + }; + class physical_device { VkInstance parent = VK_NULL_HANDLE; @@ -79,6 +87,8 @@ namespace vk custom_border_color_features custom_border_color_support{}; + multidraw_features multidraw_support{}; + struct { bool barycentric_coords = false; @@ -93,7 +103,6 @@ namespace vk bool unrestricted_depth_range = false; bool extended_device_fault = false; bool texture_compression_bc = false; - bool multidraw_indirect = false; } optional_features_support; friend class render_device; @@ -176,6 +185,7 @@ namespace vk const pipeline_binding_table& get_pipeline_binding_table() const { return m_pipeline_binding_table; } const gpu_shader_types_support& get_shader_types_support() const { return pgpu->shader_types_support; } const custom_border_color_features& get_custom_border_color_support() const { return pgpu->custom_border_color_support; } + const multidraw_features get_multidraw_support() const { return pgpu->multidraw_support; } bool get_shader_stencil_export_support() const { return pgpu->optional_features_support.shader_stencil_export; } bool get_depth_bounds_support() const { return pgpu->features.depthBounds != VK_FALSE; } @@ -193,7 +203,6 @@ namespace vk bool get_synchronization2_support() const { return pgpu->optional_features_support.synchronization_2; } bool get_extended_device_fault_support() const { return pgpu->optional_features_support.extended_device_fault; } bool get_texture_compression_bc_support() const { return pgpu->optional_features_support.texture_compression_bc; } - bool get_multidraw_indirect_support() const { return pgpu->optional_features_support.multidraw_indirect; } u64 get_descriptor_update_after_bind_support() const { return pgpu->descriptor_indexing_support.update_after_bind_mask; } u32 get_descriptor_max_draw_calls() const { return pgpu->descriptor_max_draw_calls; }