vk: Implement EXT_multi_draw support

This commit is contained in:
kd-11 2025-03-31 14:38:19 +03:00 committed by kd-11
parent 8ce8410a5b
commit 8aaf2f8577
7 changed files with 147 additions and 41 deletions

View file

@ -138,6 +138,47 @@ namespace rsx
is_disjoint_primitive = is_primitive_disjointed(primitive);
}
simple_array<draw_range_t> draw_clause::get_subranges() const
{
ensure(!is_single_draw());
const auto range = get_range();
const auto limit = range.first + range.count;
const auto _pass_count = pass_count();
simple_array<draw_range_t> ret;
ret.reserve(_pass_count);
u32 previous_barrier = range.first;
u32 vertex_counter = 0;
for (auto it = current_barrier_it;
it != draw_command_barriers.end() && it->draw_id == current_range_index;
it++)
{
const auto& barrier = *it;
if (barrier.type != primitive_restart_barrier)
continue;
if (barrier.address <= range.first)
continue;
if (barrier.address >= limit)
break;
const u32 count = barrier.address - previous_barrier;
ret.push_back({ 0, vertex_counter, count });
previous_barrier = barrier.address;
vertex_counter += count;
}
ensure(!ret.empty());
ensure(previous_barrier < limit);
ret.push_back({ 0, vertex_counter, limit - previous_barrier });
return ret;
}
u32 draw_clause::execute_pipeline_dependencies(context* ctx, instanced_draw_config_t* instance_config) const
{
u32 result = 0u;

View file

@ -285,48 +285,19 @@ namespace rsx
*/
u32 execute_pipeline_dependencies(struct context* ctx, instanced_draw_config_t* instance_config = nullptr) const;
/**
* Returns the first-count data for the current subdraw
*/
const draw_range_t& get_range() const
{
ensure(current_range_index < draw_command_ranges.size());
return draw_command_ranges[current_range_index];
}
simple_array<draw_range_t> get_subranges() const
{
ensure(!is_single_draw());
const auto range = get_range();
const auto limit = range.first + range.count;
simple_array<draw_range_t> ret;
u32 previous_barrier = range.first;
u32 vertex_counter = 0;
for (const auto& barrier : draw_command_barriers)
{
if (barrier.draw_id != current_range_index)
continue;
if (barrier.type != primitive_restart_barrier)
continue;
if (barrier.address <= range.first)
continue;
if (barrier.address >= limit)
break;
const u32 count = barrier.address - previous_barrier;
ret.push_back({ 0, vertex_counter, count });
previous_barrier = barrier.address;
vertex_counter += count;
}
ensure(!ret.empty());
ensure(previous_barrier < limit);
ret.push_back({ 0, vertex_counter, limit - previous_barrier });
return ret;
}
/*
* Returns a compiled list of all subdraws.
* NOTE: This is a non-trivial operation as it takes disjoint primitive boundaries into account.
*/
simple_array<draw_range_t> get_subranges() const;
};
}

View file

@ -6,6 +6,7 @@
#include "VKGSRender.h"
#include "vkutils/buffer_object.h"
#include "vkutils/chip_class.h"
#include <vulkan/vulkan_core.h>
namespace vk
{
@ -937,6 +938,20 @@ void VKGSRender::emit_geometry(u32 sub_index)
{
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0);
}
else if (m_device->get_multidraw_support())
{
const auto subranges = draw_call.get_subranges();
const auto subranges_count = ::size32(subranges);
auto [offset, ptr] = m_draw_indirect_count_ring_info.alloc_and_map<4, VkMultiDrawInfoEXT>(subranges_count);
auto _ptr = ptr;
for (const auto& range : subranges)
{
_ptr->firstVertex = range.first;
_ptr->vertexCount = range.count;
}
vkCmdDrawMultiEXT(*m_current_command_buffer, subranges_count, ptr, 1, 0, sizeof(VkMultiDrawInfoEXT));
}
else
{
u32 vertex_offset = 0;
@ -963,6 +978,24 @@ void VKGSRender::emit_geometry(u32 sub_index)
{
vkCmdDrawIndexed(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0, 0);
}
else if (m_device->get_multidraw_support())
{
const auto subranges = draw_call.get_subranges();
const auto subranges_count = ::size32(subranges);
auto [offset, ptr] = m_draw_indirect_count_ring_info.alloc_and_map<4, VkMultiDrawIndexedInfoEXT>(subranges_count);
auto _ptr = ptr;
u32 vertex_offset = 0;
for (const auto& range : subranges)
{
const auto count = get_index_count(draw_call.primitive, range.count);
_ptr->vertexOffset = 0;
_ptr->firstIndex = vertex_offset;
_ptr->indexCount = count;
vertex_offset += count;
}
vkCmdDrawMultiIndexedEXT(*m_current_command_buffer, subranges_count, ptr, 1, 0, sizeof(VkMultiDrawIndexedInfoEXT), nullptr);
}
else
{
u32 vertex_offset = 0;

View file

@ -1,3 +1,4 @@
#include "Emu/RSX/VK/VKDataHeapManager.h"
#include "stdafx.h"
#include "../Overlays/overlay_compile_notification.h"
#include "../Overlays/Shaders/shader_loading_dialog_native.h"
@ -23,6 +24,7 @@
#include "../Program/SPIRVCommon.h"
#include "util/asm.hpp"
#include <vulkan/vulkan_core.h>
namespace vk
{
@ -556,6 +558,12 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
});
}
if (m_device->get_multidraw_support().supported)
{
m_draw_indirect_count_ring_info.create(VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, 16 * 0x100000, "multidraw indirect buffer", 1024);
vk::data_heap_manager::register_ring_buffer(m_draw_indirect_count_ring_info);
}
// Initialize optional allocation information with placeholders
m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, 0, 16 };
m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, 0, 16 };
@ -1165,7 +1173,11 @@ void VKGSRender::check_heap_status(u32 flags)
heap_critical = m_texture_upload_buffer_ring_info.is_critical();
break;
case VK_HEAP_CHECK_VERTEX_STORAGE:
heap_critical = m_attrib_ring_info.is_critical() || m_index_buffer_ring_info.is_critical();
heap_critical = m_attrib_ring_info.is_critical() ||
m_index_buffer_ring_info.is_critical() ||
(m_draw_indirect_count_ring_info.heap
? m_draw_indirect_count_ring_info.is_critical()
: false);
break;
case VK_HEAP_CHECK_VERTEX_ENV_STORAGE:
heap_critical = m_vertex_env_ring_info.is_critical();

View file

@ -6,6 +6,7 @@
#include "commands.h"
#include <memory>
#include <type_traits>
#include <vector>
namespace vk
@ -38,6 +39,15 @@ namespace vk
void* map(usz offset, usz size);
void unmap(bool force = false);
template<int Alignment, typename T = char>
requires std::is_trivially_destructible_v<T>
std::pair<usz, T*> alloc_and_map(usz count)
{
const auto size_bytes = count * sizeof(T);
const auto addr = alloc<Alignment>(size_bytes);
return { addr, reinterpret_cast<T*>( map(addr, size_bytes)) };
}
void sync(const vk::command_buffer& cmd);
// Properties

View file

@ -2,6 +2,7 @@
#include "instance.h"
#include "util/logs.hpp"
#include "Emu/system_config.h"
#include <vulkan/vulkan_core.h>
namespace vk
{
@ -36,6 +37,7 @@ namespace vk
VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_color_info{};
VkPhysicalDeviceBorderColorSwizzleFeaturesEXT border_color_swizzle_info{};
VkPhysicalDeviceFaultFeaturesEXT device_fault_info{};
VkPhysicalDeviceMultiDrawFeaturesEXT multidraw_info{};
if (device_extensions.is_supported(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME))
{
@ -86,6 +88,13 @@ namespace vk
features2.pNext = &device_fault_info;
}
if (device_extensions.is_supported(VK_EXT_MULTI_DRAW_EXTENSION_NAME))
{
multidraw_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT;
multidraw_info.pNext = features2.pNext;
features2.pNext = &multidraw_info;
}
auto _vkGetPhysicalDeviceFeatures2KHR = reinterpret_cast<PFN_vkGetPhysicalDeviceFeatures2KHR>(vkGetInstanceProcAddr(parent, "vkGetPhysicalDeviceFeatures2KHR"));
ensure(_vkGetPhysicalDeviceFeatures2KHR); // "vkGetInstanceProcAddress failed to find entry point!"
_vkGetPhysicalDeviceFeatures2KHR(dev, &features2);
@ -98,6 +107,9 @@ namespace vk
custom_border_color_support.swizzle_extension_supported = border_color_swizzle_info.sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT;
custom_border_color_support.require_border_color_remap = !border_color_swizzle_info.borderColorSwizzleFromImage;
multidraw_support.supported = !!multidraw_info.multiDraw;
multidraw_support.max_batch_size = 65536;
optional_features_support.barycentric_coords = !!shader_barycentric_info.fragmentShaderBarycentric;
optional_features_support.framebuffer_loops = !!fbo_loops_info.attachmentFeedbackLoopLayout;
optional_features_support.extended_device_fault = !!device_fault_info.deviceFault;
@ -124,7 +136,6 @@ namespace vk
optional_features_support.sampler_mirror_clamped = device_extensions.is_supported(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME);
optional_features_support.synchronization_2 = device_extensions.is_supported(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
optional_features_support.unrestricted_depth_range = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
optional_features_support.multidraw_indirect = device_extensions.is_supported(VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME);
optional_features_support.debug_utils = instance_extensions.is_supported(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
optional_features_support.surface_capabilities_2 = instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
@ -165,6 +176,7 @@ namespace vk
properties2.pNext = nullptr;
VkPhysicalDeviceDescriptorIndexingPropertiesEXT descriptor_indexing_props{};
VkPhysicalDeviceMultiDrawPropertiesEXT multidraw_props{};
if (descriptor_indexing_support)
{
@ -173,6 +185,13 @@ namespace vk
properties2.pNext = &descriptor_indexing_props;
}
if (multidraw_support.supported)
{
multidraw_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT;
multidraw_props.pNext = properties2.pNext;
properties2.pNext = &multidraw_props;
}
if (device_extensions.is_supported(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME))
{
driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR;
@ -199,6 +218,17 @@ namespace vk
descriptor_max_draw_calls = 8192;
}
}
if (multidraw_support.supported)
{
multidraw_support.max_batch_size = multidraw_props.maxMultiDrawCount;
if (!multidraw_props.maxMultiDrawCount)
{
rsx_log.error("Physical device reports 0 support maxMultiDraw count. Multidraw support will be disabled.");
multidraw_support.supported = false;
}
}
}
}

View file

@ -61,6 +61,14 @@ namespace vk
operator bool() const { return supported; }
};
struct multidraw_features
{
bool supported;
u32 max_batch_size;
operator bool() const { return supported; }
};
class physical_device
{
VkInstance parent = VK_NULL_HANDLE;
@ -79,6 +87,8 @@ namespace vk
custom_border_color_features custom_border_color_support{};
multidraw_features multidraw_support{};
struct
{
bool barycentric_coords = false;
@ -93,7 +103,6 @@ namespace vk
bool unrestricted_depth_range = false;
bool extended_device_fault = false;
bool texture_compression_bc = false;
bool multidraw_indirect = false;
} optional_features_support;
friend class render_device;
@ -176,6 +185,7 @@ namespace vk
const pipeline_binding_table& get_pipeline_binding_table() const { return m_pipeline_binding_table; }
const gpu_shader_types_support& get_shader_types_support() const { return pgpu->shader_types_support; }
const custom_border_color_features& get_custom_border_color_support() const { return pgpu->custom_border_color_support; }
const multidraw_features get_multidraw_support() const { return pgpu->multidraw_support; }
bool get_shader_stencil_export_support() const { return pgpu->optional_features_support.shader_stencil_export; }
bool get_depth_bounds_support() const { return pgpu->features.depthBounds != VK_FALSE; }
@ -193,7 +203,6 @@ namespace vk
bool get_synchronization2_support() const { return pgpu->optional_features_support.synchronization_2; }
bool get_extended_device_fault_support() const { return pgpu->optional_features_support.extended_device_fault; }
bool get_texture_compression_bc_support() const { return pgpu->optional_features_support.texture_compression_bc; }
bool get_multidraw_indirect_support() const { return pgpu->optional_features_support.multidraw_indirect; }
u64 get_descriptor_update_after_bind_support() const { return pgpu->descriptor_indexing_support.update_after_bind_mask; }
u32 get_descriptor_max_draw_calls() const { return pgpu->descriptor_max_draw_calls; }