renderer_vulkan: add workaround for broken occlusion queries on turnip

This commit is contained in:
Liam 2023-11-08 23:14:37 -05:00
commit 502c1a7ab9
6 changed files with 56 additions and 22 deletions

View file

@ -113,9 +113,10 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
using RuntimeType = typename Traits::RuntimeType; using RuntimeType = typename Traits::RuntimeType;
QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_,
: owner{owner_}, rasterizer{rasterizer_}, bool has_broken_occlusion_query_)
cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { : owner{owner_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, runtime{runtime_},
gpu{gpu_}, has_broken_occlusion_query{has_broken_occlusion_query_} {
streamer_mask = 0; streamer_mask = 0;
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
@ -163,6 +164,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
Tegra::GPU& gpu; Tegra::GPU& gpu;
std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
u64 streamer_mask; u64 streamer_mask;
bool has_broken_occlusion_query;
std::mutex flush_guard; std::mutex flush_guard;
std::deque<u64> flushes_pending; std::deque<u64> flushes_pending;
std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister; std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister;
@ -171,10 +173,11 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
template <typename Traits> template <typename Traits>
QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_,
VideoCore::RasterizerInterface& rasterizer_, VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_,
bool has_broken_occlusion_query_)
: cached_queries{} { : cached_queries{} {
impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>(
this, rasterizer_, cpu_memory_, runtime_, gpu_); this, rasterizer_, cpu_memory_, runtime_, gpu_, has_broken_occlusion_query_);
} }
template <typename Traits> template <typename Traits>
@ -223,6 +226,19 @@ void QueryCacheBase<Traits>::BindToChannel(s32 id) {
impl->runtime.Bind3DEngine(maxwell3d); impl->runtime.Bind3DEngine(maxwell3d);
} }
constexpr u64 OcclusionQueryAdjustValue(bool has_broken_occlusion_query, QueryType counter_type) {
if (!has_broken_occlusion_query) {
return 0;
}
switch (counter_type) {
case QueryType::ZPassPixelCount:
case QueryType::ZPassPixelCount64:
return 120;
default:
return 0;
}
}
template <typename Traits> template <typename Traits>
void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type, void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type,
QueryPropertiesFlags flags, u32 payload, u32 subreport) { QueryPropertiesFlags flags, u32 payload, u32 subreport) {
@ -256,9 +272,10 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
bool is_synced = !Settings::IsGPULevelHigh() && is_fence; bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
u64 adjustment = OcclusionQueryAdjustValue(impl->has_broken_occlusion_query, counter_type);
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
pointer, pointer_timestamp] { pointer, pointer_timestamp, adjustment] {
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
if (!is_synced) [[likely]] { if (!is_synced) [[likely]] {
impl->pending_unregister.push_back(query_location); impl->pending_unregister.push_back(query_location);
@ -269,7 +286,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
UNREACHABLE(); UNREACHABLE();
return; return;
} }
query_base->value += streamer->GetAmmendValue(); query_base->value += streamer->GetAmmendValue() + adjustment;
streamer->SetAccumulationValue(query_base->value); streamer->SetAccumulationValue(query_base->value);
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
u64 timestamp = impl->gpu.GetTicks(); u64 timestamp = impl->gpu.GetTicks();

View file

@ -53,7 +53,8 @@ public:
}; };
explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_,
bool has_broken_occlusion_query_);
~QueryCacheBase(); ~QueryCacheBase();

View file

@ -116,8 +116,9 @@ public:
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
ComputePassDescriptorQueue& compute_pass_descriptor_queue, ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool) DescriptorPool& descriptor_pool)
: BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_}, : BaseStreamer(id_), runtime{runtime_},
scheduler{scheduler_}, memory_allocator{memory_allocator_} { rasterizer{rasterizer_}, device{device_}, scheduler{scheduler_},
memory_allocator{memory_allocator_}, is_broken{device.HasBrokenOcclusionQuery()} {
current_bank = nullptr; current_bank = nullptr;
current_query = nullptr; current_query = nullptr;
ammend_value = 0; ammend_value = 0;
@ -150,12 +151,14 @@ public:
return; return;
} }
ReserveHostQuery(); ReserveHostQuery();
if (!is_broken) {
scheduler.Record([query_pool = current_query_pool, scheduler.Record([query_pool = current_query_pool,
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) { query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
const bool use_precise = Settings::IsGPULevelHigh(); const bool use_precise = Settings::IsGPULevelHigh();
cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index), cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index),
use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0); use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
}); });
}
has_started = true; has_started = true;
} }
@ -163,10 +166,12 @@ public:
if (!has_started) { if (!has_started) {
return; return;
} }
if (!is_broken) {
scheduler.Record([query_pool = current_query_pool, scheduler.Record([query_pool = current_query_pool,
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) { query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
cmdbuf.EndQuery(query_pool, static_cast<u32>(query_index)); cmdbuf.EndQuery(query_pool, static_cast<u32>(query_index));
}); });
}
has_started = false; has_started = false;
} }
@ -573,6 +578,7 @@ private:
bool accumulation_since_last_sync{}; bool accumulation_since_last_sync{};
VideoCommon::HostQueryBase* current_query; VideoCommon::HostQueryBase* current_query;
bool has_started{}; bool has_started{};
bool is_broken{};
std::mutex flush_guard; std::mutex flush_guard;
std::unique_ptr<QueriesPrefixScanPass> queries_prefix_scan_pass; std::unique_ptr<QueriesPrefixScanPass> queries_prefix_scan_pass;

View file

@ -174,7 +174,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
buffer_cache(*this, cpu_memory_, buffer_cache_runtime), buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler, query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler,
staging_pool, compute_pass_descriptor_queue, descriptor_pool), staging_pool, compute_pass_descriptor_queue, descriptor_pool),
query_cache(gpu, *this, cpu_memory_, query_cache_runtime), query_cache(gpu, *this, cpu_memory_, query_cache_runtime, device.HasBrokenOcclusionQuery()),
pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue,
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
accelerate_dma(buffer_cache, texture_cache, scheduler), accelerate_dma(buffer_cache, texture_cache, scheduler),

View file

@ -513,6 +513,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
#endif #endif
} }
if (is_turnip) {
LOG_WARNING(Render_Vulkan, "Turnip drivers have broken occlusion queries");
has_broken_occlusion_query = true;
}
if (is_arm) { if (is_arm) {
must_emulate_scaled_formats = true; must_emulate_scaled_formats = true;

View file

@ -599,6 +599,10 @@ public:
return has_broken_cube_compatibility; return has_broken_cube_compatibility;
} }
bool HasBrokenOcclusionQuery() const {
return has_broken_occlusion_query;
}
/// Returns the vendor name reported from Vulkan. /// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const { std::string_view GetVendorName() const {
return properties.driver.driverName; return properties.driver.driverName;
@ -794,6 +798,7 @@ private:
bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
bool has_broken_compute{}; ///< Compute shaders can cause crashes bool has_broken_compute{}; ///< Compute shaders can cause crashes
bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
bool has_broken_occlusion_query{}; ///< Has broken occlusion queries
bool has_renderdoc{}; ///< Has RenderDoc attached bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
bool supports_d24_depth{}; ///< Supports D24 depth buffers. bool supports_d24_depth{}; ///< Supports D24 depth buffers.