renderer_vulkan: add workaround for broken occlusion queries on turnip

This commit is contained in:
Liam 2023-11-08 23:14:37 -05:00
parent f75363177e
commit 502c1a7ab9
6 changed files with 56 additions and 22 deletions

View file

@ -113,9 +113,10 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
using RuntimeType = typename Traits::RuntimeType;
QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_)
: owner{owner_}, rasterizer{rasterizer_},
cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} {
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_,
bool has_broken_occlusion_query_)
: owner{owner_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, runtime{runtime_},
gpu{gpu_}, has_broken_occlusion_query{has_broken_occlusion_query_} {
streamer_mask = 0;
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
@ -163,6 +164,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
Tegra::GPU& gpu;
std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
u64 streamer_mask;
bool has_broken_occlusion_query;
std::mutex flush_guard;
std::deque<u64> flushes_pending;
std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister;
@ -171,10 +173,11 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
template <typename Traits>
QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_,
VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_)
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_,
bool has_broken_occlusion_query_)
: cached_queries{} {
impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>(
this, rasterizer_, cpu_memory_, runtime_, gpu_);
this, rasterizer_, cpu_memory_, runtime_, gpu_, has_broken_occlusion_query_);
}
template <typename Traits>
@ -223,6 +226,19 @@ void QueryCacheBase<Traits>::BindToChannel(s32 id) {
impl->runtime.Bind3DEngine(maxwell3d);
}
constexpr u64 OcclusionQueryAdjustValue(bool has_broken_occlusion_query, QueryType counter_type) {
if (!has_broken_occlusion_query) {
return 0;
}
switch (counter_type) {
case QueryType::ZPassPixelCount:
case QueryType::ZPassPixelCount64:
return 120;
default:
return 0;
}
}
template <typename Traits>
void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type,
QueryPropertiesFlags flags, u32 payload, u32 subreport) {
@ -256,9 +272,10 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
u64 adjustment = OcclusionQueryAdjustValue(impl->has_broken_occlusion_query, counter_type);
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
pointer, pointer_timestamp] {
pointer, pointer_timestamp, adjustment] {
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
if (!is_synced) [[likely]] {
impl->pending_unregister.push_back(query_location);
@ -269,7 +286,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
UNREACHABLE();
return;
}
query_base->value += streamer->GetAmmendValue();
query_base->value += streamer->GetAmmendValue() + adjustment;
streamer->SetAccumulationValue(query_base->value);
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
u64 timestamp = impl->gpu.GetTicks();

View file

@ -53,7 +53,8 @@ public:
};
explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_);
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_,
bool has_broken_occlusion_query_);
~QueryCacheBase();
@ -178,4 +179,4 @@ protected:
std::unique_ptr<QueryCacheBaseImpl> impl;
};
} // namespace VideoCommon
} // namespace VideoCommon

View file

@ -116,8 +116,9 @@ public:
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool)
: BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_},
scheduler{scheduler_}, memory_allocator{memory_allocator_} {
: BaseStreamer(id_), runtime{runtime_},
rasterizer{rasterizer_}, device{device_}, scheduler{scheduler_},
memory_allocator{memory_allocator_}, is_broken{device.HasBrokenOcclusionQuery()} {
current_bank = nullptr;
current_query = nullptr;
ammend_value = 0;
@ -150,12 +151,14 @@ public:
return;
}
ReserveHostQuery();
scheduler.Record([query_pool = current_query_pool,
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
const bool use_precise = Settings::IsGPULevelHigh();
cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index),
use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
});
if (!is_broken) {
scheduler.Record([query_pool = current_query_pool,
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
const bool use_precise = Settings::IsGPULevelHigh();
cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index),
use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
});
}
has_started = true;
}
@ -163,10 +166,12 @@ public:
if (!has_started) {
return;
}
scheduler.Record([query_pool = current_query_pool,
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
cmdbuf.EndQuery(query_pool, static_cast<u32>(query_index));
});
if (!is_broken) {
scheduler.Record([query_pool = current_query_pool,
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
cmdbuf.EndQuery(query_pool, static_cast<u32>(query_index));
});
}
has_started = false;
}
@ -573,6 +578,7 @@ private:
bool accumulation_since_last_sync{};
VideoCommon::HostQueryBase* current_query;
bool has_started{};
bool is_broken{};
std::mutex flush_guard;
std::unique_ptr<QueriesPrefixScanPass> queries_prefix_scan_pass;

View file

@ -174,7 +174,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler,
staging_pool, compute_pass_descriptor_queue, descriptor_pool),
query_cache(gpu, *this, cpu_memory_, query_cache_runtime),
query_cache(gpu, *this, cpu_memory_, query_cache_runtime, device.HasBrokenOcclusionQuery()),
pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue,
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
accelerate_dma(buffer_cache, texture_cache, scheduler),

View file

@ -513,6 +513,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
#endif
}
if (is_turnip) {
LOG_WARNING(Render_Vulkan, "Turnip drivers have broken occlusion queries");
has_broken_occlusion_query = true;
}
if (is_arm) {
must_emulate_scaled_formats = true;

View file

@ -599,6 +599,10 @@ public:
return has_broken_cube_compatibility;
}
bool HasBrokenOcclusionQuery() const {
return has_broken_occlusion_query;
}
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return properties.driver.driverName;
@ -794,6 +798,7 @@ private:
bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
bool has_broken_compute{}; ///< Compute shaders can cause crashes
bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
bool has_broken_occlusion_query{}; ///< Has broken occlusion queries
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
bool supports_d24_depth{}; ///< Supports D24 depth buffers.