This commit is contained in:
digant 2024-12-25 00:00:21 +01:00
parent 650947aada
commit 32d8dfc7fe
15 changed files with 256 additions and 47 deletions

View file

@ -96,6 +96,7 @@ namespace vk
optimal_group_size = 64;
break;
case vk::driver_vendor::MVK:
case vk::driver_vendor::HONEYKRISP:
unroll_loops = true;
optimal_kernel_size = 1;
optimal_group_size = 256;

View file

@ -797,6 +797,16 @@ void VKGSRender::emit_geometry(u32 sub_index)
}
}
// Before starting a query, we need to match RP scope (VK_1_0 rules).
// We always want our queries to start outside a renderpass whenever possible.
// We ignore this for performance reasons whenever possible of course and only do this for sensitive drivers.
if (vk::use_strict_query_scopes() &&
vk::is_renderpass_open(*m_current_command_buffer))
{
vk::end_renderpass(*m_current_command_buffer);
emergency_query_cleanup(m_current_command_buffer);
}
// Begin query
m_occlusion_query_manager->begin_query(*m_current_command_buffer, occlusion_id);

View file

@ -2877,6 +2877,14 @@ void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
// NOTE: flushing the queue is very expensive, do not flush just because query stopped
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query)
{
// VK_1_0 rules dictate that query must match subpass behavior on begin/end query.
// This is slow, so only do this for drivers that care.
if (vk::use_strict_query_scopes() &&
vk::is_renderpass_open(*m_current_command_buffer))
{
vk::end_renderpass(*m_current_command_buffer);
}
// End query
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
m_occlusion_query_manager->end_query(*m_current_command_buffer, open_query);

View file

@ -31,6 +31,8 @@ namespace vk
bool g_drv_sanitize_fp_values = false;
bool g_drv_disable_fence_reset = false;
bool g_drv_emulate_cond_render = false;
bool g_drv_strict_query_scopes = false;
bool g_drv_force_reuse_query_pools = false;
u64 g_num_processed_frames = 0;
u64 g_num_total_frames = 0;
@ -139,6 +141,9 @@ namespace vk
rsx_log.error("Dozen is currently unsupported. How did you even get this to run outside windows?");
#endif
break;
case driver_vendor::HONEYKRISP:
// Needs more testing
break;
default:
rsx_log.warning("Unsupported device: %s", gpu_name);
}
@ -228,6 +233,16 @@ namespace vk
return g_drv_emulate_cond_render;
}
bool use_strict_query_scopes()
{
return g_drv_strict_query_scopes;
}
bool force_reuse_query_pools()
{
return g_drv_force_reuse_query_pools;
}
void raise_status_interrupt(runtime_state status)
{
g_runtime_state |= status;

View file

@ -62,6 +62,8 @@ namespace vk
bool sanitize_fp_values();
bool fence_reset_disabled();
bool emulate_conditional_rendering();
bool use_strict_query_scopes();
bool force_reuse_query_pools();
VkFlags get_heap_compatible_buffer_types();
// Sync helpers around vkQueueSubmit

View file

@ -1,4 +1,5 @@
#include "stdafx.h"
#include "VKHelpers.h"
#include "VKQueryPool.h"
#include "VKRenderPass.h"
#include "VKResourceManager.h"
@ -74,13 +75,20 @@ namespace vk
{
ensure(!m_current_query_pool);
const u32 count = ::size32(query_slot_status);
m_current_query_pool = std::make_unique<query_pool>(*owner, query_type, count);
if (m_query_pool_cache.size() > 0)
{
m_current_query_pool = std::move(m_query_pool_cache.front());
m_query_pool_cache.pop_front();
}
else
{
const u32 count = ::size32(query_slot_status);
m_current_query_pool = std::make_unique<query_pool>(*owner, query_type, count);
}
// From spec: "After query pool creation, each query must be reset before it is used."
vkCmdResetQueryPool(cmd, *m_current_query_pool.get(), 0, count);
m_pool_lifetime_counter = count;
vkCmdResetQueryPool(cmd, *m_current_query_pool.get(), 0, m_current_query_pool->size());
m_pool_lifetime_counter = m_current_query_pool->size();
}
void query_pool_manager::reallocate_pool(vk::command_buffer& cmd)
@ -89,7 +97,8 @@ namespace vk
{
if (!m_current_query_pool->has_refs())
{
vk::get_resource_manager()->dispose(m_current_query_pool);
auto ref = std::make_unique<query_pool_ref>(this, m_current_query_pool);
vk::get_resource_manager()->dispose(ref);
}
else
{
@ -112,7 +121,8 @@ namespace vk
{
if (!(*It)->has_refs())
{
vk::get_resource_manager()->dispose(*It);
auto ref = std::make_unique<query_pool_ref>(this, *It);
vk::get_resource_manager()->dispose(ref);
It = m_consumed_pools.erase(It);
}
else
@ -219,4 +229,21 @@ namespace vk
return ~0u;
}
void query_pool_manager::on_query_pool_released(std::unique_ptr<vk::query_pool>& pool)
{
if (!vk::force_reuse_query_pools())
{
// Delete and let the driver recreate a new pool each time.
pool.reset();
return;
}
m_query_pool_cache.emplace_back(std::move(pool));
}
query_pool_manager::query_pool_ref::~query_pool_ref()
{
m_pool_man->on_query_pool_released(m_object);
}
}

View file

@ -19,7 +19,22 @@ namespace vk
u32 data;
};
class query_pool_ref
{
std::unique_ptr<query_pool> m_object;
query_pool_manager* m_pool_man;
public:
query_pool_ref(query_pool_manager* pool_man, std::unique_ptr<query_pool>& pool)
: m_object(std::move(pool))
, m_pool_man(pool_man)
{}
~query_pool_ref();
};
std::vector<std::unique_ptr<query_pool>> m_consumed_pools;
std::deque<std::unique_ptr<query_pool>> m_query_pool_cache;
std::unique_ptr<query_pool> m_current_query_pool;
std::deque<u32> m_available_slots;
u32 m_pool_lifetime_counter = 0;
@ -52,6 +67,8 @@ namespace vk
u32 allocate_query(vk::command_buffer& cmd);
void free_query(vk::command_buffer&/*cmd*/, u32 index);
void on_query_pool_released(std::unique_ptr<vk::query_pool>& pool);
template<template<class> class _List>
void free_queries(vk::command_buffer& cmd, _List<u32>& list)
{

View file

@ -189,6 +189,7 @@ namespace vk
case driver_vendor::DOZEN:
case driver_vendor::LAVAPIPE:
case driver_vendor::V3DV:
case driver_vendor::HONEYKRISP:
break;
}

View file

@ -21,39 +21,6 @@
#include <util/types.hpp>
#ifndef VK_EXT_attachment_feedback_loop_layout
#define VK_EXT_attachment_feedback_loop_layout 1
#define VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME "VK_EXT_attachment_feedback_loop_layout"
#define VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT static_cast<VkImageLayout>(1000339000)
#define VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT 0x00080000
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT static_cast<VkStructureType>(1000339000)
typedef struct VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT {
VkStructureType sType;
void* pNext;
VkBool32 attachmentFeedbackLoopLayout;
} VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT;
#endif
#ifndef VK_KHR_fragment_shader_barycentric
#define VK_KHR_fragment_shader_barycentric 1
#define VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_SPEC_VERSION 1
#define VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME "VK_KHR_fragment_shader_barycentric"
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_KHR static_cast<VkStructureType>(1000203000)
typedef struct VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR {
VkStructureType sType;
void* pNext;
VkBool32 fragmentShaderBarycentric;
} VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR;
typedef struct VkPhysicalDeviceFragmentShaderBarycentricPropertiesKHR {
VkStructureType sType;
void* pNext;
VkBool32 triStripVertexOrderIndependentOfProvokingVertex;
} VkPhysicalDeviceFragmentShaderBarycentricPropertiesKHR;
#if VK_HEADER_VERSION < 287
constexpr VkDriverId VK_DRIVER_ID_MESA_HONEYKRISP = static_cast<VkDriverId>(26);
#endif

View file

@ -103,7 +103,11 @@ namespace vk
if (vendor_id == 0x106B)
{
return chip_class::MVK_apple;
#ifdef __APPLE__
return chip_class::APPLE_MVK;
#else
return chip_class::APPLE_HK_generic; // Lazy, but at the moment we don't care about the differences in M1, M2, M3, M4, etc
#endif
}
if (vendor_id == 0x8086)

View file

@ -31,7 +31,9 @@ namespace vk
_NV_ENUM_MAX_, // Do not insert NV enums beyond this point
// APPLE
MVK_apple,
APPLE_HK_generic,
APPLE_MVK,
_APPLE_ENUM_MAX, // Do not insert APPLE enums beyond this point
// INTEL
INTEL_generic,
@ -51,7 +53,8 @@ namespace vk
DOZEN,
LAVAPIPE,
NVK,
V3DV
V3DV,
HONEYKRISP
};
driver_vendor get_driver_vendor();

View file

@ -35,6 +35,7 @@ namespace vk
VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR shader_barycentric_info{};
VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_color_info{};
VkPhysicalDeviceBorderColorSwizzleFeaturesEXT border_color_swizzle_info{};
VkPhysicalDeviceFaultFeaturesEXT device_fault_info{};
if (device_extensions.is_supported(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME))
{
@ -78,6 +79,13 @@ namespace vk
features2.pNext = &border_color_swizzle_info;
}
if (device_extensions.is_supported(VK_EXT_DEVICE_FAULT_EXTENSION_NAME))
{
device_fault_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FAULT_FEATURES_EXT;
device_fault_info.pNext = features2.pNext;
features2.pNext = &device_fault_info;
}
auto _vkGetPhysicalDeviceFeatures2KHR = reinterpret_cast<PFN_vkGetPhysicalDeviceFeatures2KHR>(vkGetInstanceProcAddr(parent, "vkGetPhysicalDeviceFeatures2KHR"));
ensure(_vkGetPhysicalDeviceFeatures2KHR); // "vkGetInstanceProcAddress failed to find entry point!"
_vkGetPhysicalDeviceFeatures2KHR(dev, &features2);
@ -92,6 +100,7 @@ namespace vk
optional_features_support.barycentric_coords = !!shader_barycentric_info.fragmentShaderBarycentric;
optional_features_support.framebuffer_loops = !!fbo_loops_info.attachmentFeedbackLoopLayout;
optional_features_support.extended_device_fault = !!device_fault_info.deviceFault;
features = features2.features;
@ -288,6 +297,11 @@ namespace vk
return driver_vendor::V3DV;
}
if (gpu_name.find("Apple") != umax)
{
return driver_vendor::HONEYKRISP;
}
return driver_vendor::unknown;
}
else
@ -313,6 +327,8 @@ namespace vk
return driver_vendor::NVK;
case VK_DRIVER_ID_MESA_V3DV:
return driver_vendor::V3DV;
case VK_DRIVER_ID_MESA_HONEYKRISP:
return driver_vendor::HONEYKRISP;
default:
// Mobile?
return driver_vendor::unknown;
@ -508,6 +524,11 @@ namespace vk
requested_extensions.push_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
}
if (pgpu->optional_features_support.extended_device_fault)
{
requested_extensions.push_back(VK_EXT_DEVICE_FAULT_EXTENSION_NAME);
}
enabled_features.robustBufferAccess = VK_TRUE;
enabled_features.fullDrawIndexUint32 = VK_TRUE;
enabled_features.independentBlend = VK_TRUE;
@ -711,6 +732,16 @@ namespace vk
device.pNext = &synchronization2_info;
}
VkPhysicalDeviceFaultFeaturesEXT device_fault_info{};
if (pgpu->optional_features_support.extended_device_fault)
{
device_fault_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FAULT_FEATURES_EXT;
device_fault_info.pNext = const_cast<void*>(device.pNext);
device_fault_info.deviceFault = VK_TRUE;
device_fault_info.deviceFaultVendorBinary = VK_FALSE;
device_fault_info.pNext = &device_fault_info;
}
if (auto error = vkCreateDevice(*pgpu, &device, nullptr, &dev))
{
dump_debug_info(requested_extensions, enabled_features);
@ -754,6 +785,11 @@ namespace vk
_vkCmdPipelineBarrier2KHR = reinterpret_cast<PFN_vkCmdPipelineBarrier2KHR>(vkGetDeviceProcAddr(dev, "vkCmdPipelineBarrier2KHR"));
}
if (pgpu->optional_features_support.extended_device_fault)
{
_vkGetDeviceFaultInfoEXT = reinterpret_cast<PFN_vkGetDeviceFaultInfoEXT>(vkGetDeviceProcAddr(dev, "vkGetDeviceFaultInfoEXT"));
}
memory_map = vk::get_memory_mapping(pdev);
m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev);

View file

@ -91,6 +91,7 @@ namespace vk
bool surface_capabilities_2 = false;
bool synchronization_2 = false;
bool unrestricted_depth_range = false;
bool extended_device_fault = false;
} optional_features_support;
friend class render_device;
@ -153,6 +154,7 @@ namespace vk
PFN_vkCmdSetEvent2KHR _vkCmdSetEvent2KHR = nullptr;
PFN_vkCmdWaitEvents2KHR _vkCmdWaitEvents2KHR = nullptr;
PFN_vkCmdPipelineBarrier2KHR _vkCmdPipelineBarrier2KHR = nullptr;
PFN_vkGetDeviceFaultInfoEXT _vkGetDeviceFaultInfoEXT = nullptr;
public:
render_device() = default;
@ -187,6 +189,7 @@ namespace vk
bool get_framebuffer_loops_support() const { return pgpu->optional_features_support.framebuffer_loops; }
bool get_barycoords_support() const { return pgpu->optional_features_support.barycentric_coords; }
bool get_synchronization2_support() const { return pgpu->optional_features_support.synchronization_2; }
bool get_extended_device_fault_support() const { return pgpu->optional_features_support.extended_device_fault; }
u64 get_descriptor_update_after_bind_support() const { return pgpu->descriptor_indexing_support.update_after_bind_mask; }
u32 get_descriptor_max_draw_calls() const { return pgpu->descriptor_max_draw_calls; }

View file

@ -9,17 +9,19 @@ namespace vk
{
VkQueryPool m_query_pool;
VkDevice m_device;
u32 m_size;
public:
query_pool(VkDevice dev, VkQueryType type, u32 size)
: m_query_pool(VK_NULL_HANDLE)
, m_device(dev)
, m_size(size)
{
VkQueryPoolCreateInfo info{};
info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
info.queryType = type;
info.queryCount = size;
vkCreateQueryPool(dev, &info, nullptr, &m_query_pool);
CHECK_RESULT(vkCreateQueryPool(dev, &info, nullptr, &m_query_pool));
// Take 'size' references on this object
ref_count.release(static_cast<s32>(size));
@ -34,5 +36,10 @@ namespace vk
{
return m_query_pool;
}
inline u32 size() const
{
return m_size;
}
};
}

View file

@ -1,3 +1,4 @@
#include "device.h"
#include "shared.h"
#include "util/logs.hpp"
@ -9,9 +10,110 @@ namespace vk
{
extern void print_debug_markers();
std::string retrieve_device_fault_info()
{
if (!g_render_device || !g_render_device->get_extended_device_fault_support())
{
return "Extended fault info is not available. Extension 'VK_EXT_device_fault' is probably not supported by your driver.";
}
ensure(g_render_device->_vkGetDeviceFaultInfoEXT);
VkDeviceFaultCountsEXT fault_counts
{
.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_COUNTS_EXT
};
std::vector<VkDeviceFaultAddressInfoEXT> address_info;
std::vector<VkDeviceFaultVendorInfoEXT> vendor_info;
std::vector<u8> vendor_binary_data;
std::string fault_description;
#ifdef _MSC_VER
__try
{
#endif
// Retrieve sizes
g_render_device->_vkGetDeviceFaultInfoEXT(*g_render_device, &fault_counts, nullptr);
// Resize arrays and fill
address_info.resize(fault_counts.addressInfoCount);
vendor_info.resize(fault_counts.vendorInfoCount);
vendor_binary_data.resize(fault_counts.vendorBinarySize);
VkDeviceFaultInfoEXT fault_info
{
.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT,
.pAddressInfos = address_info.data(),
.pVendorInfos = vendor_info.data(),
.pVendorBinaryData = vendor_binary_data.data()
};
g_render_device->_vkGetDeviceFaultInfoEXT(*g_render_device, &fault_counts, &fault_info);
fault_description = fault_info.description;
#ifdef _MSC_VER
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
rsx_log.error("Driver crashed retrieving extended crash information. Are you running on an NVIDIA card?");
return "Extended fault information is not available. The driver crashed when retrieving the details.";
}
#endif
std::string fault_message = fmt::format(
"Device Fault Information:\n"
"Fault Summary:\n"
" %s\n\n",
fault_description);
if (!address_info.empty())
{
fmt::append(fault_message, " Address Fault Information:\n", fault_description);
for (const auto& fault : address_info)
{
std::string access_type = "access_unknown";
switch (fault.addressType)
{
case VK_DEVICE_FAULT_ADDRESS_TYPE_NONE_EXT:
access_type = "access_none";
break;
case VK_DEVICE_FAULT_ADDRESS_TYPE_READ_INVALID_EXT:
access_type = "access_read"; break;
case VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_EXT:
access_type = "access_write"; break;
case VK_DEVICE_FAULT_ADDRESS_TYPE_EXECUTE_INVALID_EXT:
access_type = "access_execute"; break;
case VK_DEVICE_FAULT_ADDRESS_TYPE_INSTRUCTION_POINTER_UNKNOWN_EXT:
access_type = "instruction_pointer_unknown"; break;
case VK_DEVICE_FAULT_ADDRESS_TYPE_INSTRUCTION_POINTER_INVALID_EXT:
access_type = "instruction_pointer_invalid"; break;
case VK_DEVICE_FAULT_ADDRESS_TYPE_INSTRUCTION_POINTER_FAULT_EXT:
access_type = "instruction_pointer_fault"; break;
default:
break;
}
fmt::append(fault_message, " - Fault at address 0x%llx caused by %s\n", fault.reportedAddress, access_type);
}
}
if (!vendor_info.empty())
{
fmt::append(fault_message, " Vendor Fault Information:\n", fault_description);
for (const auto& fault : vendor_info)
{
fmt::append(fault_message, " - [0x%llx, 0x%llx] %s\n", fault.vendorFaultCode, fault.vendorFaultData, fault.description);
}
}
return fault_message;
}
void die_with_error(VkResult error_code, std::string message, std::source_location src_loc)
{
std::string error_message;
std::string error_message, extra_info;
int severity = 0; // 0 - die, 1 - warn, 2 - nothing
switch (error_code)
@ -42,6 +144,7 @@ namespace vk
break;
case VK_ERROR_DEVICE_LOST:
error_message = "Device lost (Driver crashed with unspecified error or stopped responding and recovered) (VK_ERROR_DEVICE_LOST)";
extra_info = retrieve_device_fault_info();
break;
case VK_ERROR_MEMORY_MAP_FAILED:
error_message = "Memory map failed (VK_ERROR_MEMORY_MAP_FAILED)";
@ -100,6 +203,11 @@ namespace vk
break;
}
if (!extra_info.empty())
{
error_message = fmt::format("%s\n---------------- EXTRA INFORMATION --------------------\n%s", error_message, extra_info);
}
switch (severity)
{
default: