renderer_vulkan: add workaround for broken occlusion queries on turnip
This commit is contained in:
parent
f75363177e
commit
502c1a7ab9
6 changed files with 56 additions and 22 deletions
|
@ -113,9 +113,10 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
|
||||||
using RuntimeType = typename Traits::RuntimeType;
|
using RuntimeType = typename Traits::RuntimeType;
|
||||||
|
|
||||||
QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
|
QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
|
||||||
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_)
|
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_,
|
||||||
: owner{owner_}, rasterizer{rasterizer_},
|
bool has_broken_occlusion_query_)
|
||||||
cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} {
|
: owner{owner_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, runtime{runtime_},
|
||||||
|
gpu{gpu_}, has_broken_occlusion_query{has_broken_occlusion_query_} {
|
||||||
streamer_mask = 0;
|
streamer_mask = 0;
|
||||||
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
|
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
|
||||||
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
|
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
|
||||||
|
@ -163,6 +164,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
|
||||||
Tegra::GPU& gpu;
|
Tegra::GPU& gpu;
|
||||||
std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
|
std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
|
||||||
u64 streamer_mask;
|
u64 streamer_mask;
|
||||||
|
bool has_broken_occlusion_query;
|
||||||
std::mutex flush_guard;
|
std::mutex flush_guard;
|
||||||
std::deque<u64> flushes_pending;
|
std::deque<u64> flushes_pending;
|
||||||
std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister;
|
std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister;
|
||||||
|
@ -171,10 +173,11 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
|
||||||
template <typename Traits>
|
template <typename Traits>
|
||||||
QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_,
|
QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_,
|
||||||
VideoCore::RasterizerInterface& rasterizer_,
|
VideoCore::RasterizerInterface& rasterizer_,
|
||||||
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_)
|
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_,
|
||||||
|
bool has_broken_occlusion_query_)
|
||||||
: cached_queries{} {
|
: cached_queries{} {
|
||||||
impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>(
|
impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>(
|
||||||
this, rasterizer_, cpu_memory_, runtime_, gpu_);
|
this, rasterizer_, cpu_memory_, runtime_, gpu_, has_broken_occlusion_query_);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Traits>
|
template <typename Traits>
|
||||||
|
@ -223,6 +226,19 @@ void QueryCacheBase<Traits>::BindToChannel(s32 id) {
|
||||||
impl->runtime.Bind3DEngine(maxwell3d);
|
impl->runtime.Bind3DEngine(maxwell3d);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr u64 OcclusionQueryAdjustValue(bool has_broken_occlusion_query, QueryType counter_type) {
|
||||||
|
if (!has_broken_occlusion_query) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
switch (counter_type) {
|
||||||
|
case QueryType::ZPassPixelCount:
|
||||||
|
case QueryType::ZPassPixelCount64:
|
||||||
|
return 120;
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Traits>
|
template <typename Traits>
|
||||||
void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type,
|
void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type,
|
||||||
QueryPropertiesFlags flags, u32 payload, u32 subreport) {
|
QueryPropertiesFlags flags, u32 payload, u32 subreport) {
|
||||||
|
@ -256,9 +272,10 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
|
||||||
u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
|
u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
|
||||||
u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
|
u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
|
||||||
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
|
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
|
||||||
|
u64 adjustment = OcclusionQueryAdjustValue(impl->has_broken_occlusion_query, counter_type);
|
||||||
|
|
||||||
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
|
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
|
||||||
pointer, pointer_timestamp] {
|
pointer, pointer_timestamp, adjustment] {
|
||||||
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
|
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
|
||||||
if (!is_synced) [[likely]] {
|
if (!is_synced) [[likely]] {
|
||||||
impl->pending_unregister.push_back(query_location);
|
impl->pending_unregister.push_back(query_location);
|
||||||
|
@ -269,7 +286,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
query_base->value += streamer->GetAmmendValue();
|
query_base->value += streamer->GetAmmendValue() + adjustment;
|
||||||
streamer->SetAccumulationValue(query_base->value);
|
streamer->SetAccumulationValue(query_base->value);
|
||||||
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
|
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
|
||||||
u64 timestamp = impl->gpu.GetTicks();
|
u64 timestamp = impl->gpu.GetTicks();
|
||||||
|
|
|
@ -53,7 +53,8 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_,
|
explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_,
|
||||||
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_);
|
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_,
|
||||||
|
bool has_broken_occlusion_query_);
|
||||||
|
|
||||||
~QueryCacheBase();
|
~QueryCacheBase();
|
||||||
|
|
||||||
|
@ -178,4 +179,4 @@ protected:
|
||||||
std::unique_ptr<QueryCacheBaseImpl> impl;
|
std::unique_ptr<QueryCacheBaseImpl> impl;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
|
@ -116,8 +116,9 @@ public:
|
||||||
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
|
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
|
||||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
|
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
|
||||||
DescriptorPool& descriptor_pool)
|
DescriptorPool& descriptor_pool)
|
||||||
: BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_},
|
: BaseStreamer(id_), runtime{runtime_},
|
||||||
scheduler{scheduler_}, memory_allocator{memory_allocator_} {
|
rasterizer{rasterizer_}, device{device_}, scheduler{scheduler_},
|
||||||
|
memory_allocator{memory_allocator_}, is_broken{device.HasBrokenOcclusionQuery()} {
|
||||||
current_bank = nullptr;
|
current_bank = nullptr;
|
||||||
current_query = nullptr;
|
current_query = nullptr;
|
||||||
ammend_value = 0;
|
ammend_value = 0;
|
||||||
|
@ -150,12 +151,14 @@ public:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ReserveHostQuery();
|
ReserveHostQuery();
|
||||||
scheduler.Record([query_pool = current_query_pool,
|
if (!is_broken) {
|
||||||
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([query_pool = current_query_pool,
|
||||||
const bool use_precise = Settings::IsGPULevelHigh();
|
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index),
|
const bool use_precise = Settings::IsGPULevelHigh();
|
||||||
use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
|
cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index),
|
||||||
});
|
use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
|
||||||
|
});
|
||||||
|
}
|
||||||
has_started = true;
|
has_started = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -163,10 +166,12 @@ public:
|
||||||
if (!has_started) {
|
if (!has_started) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
scheduler.Record([query_pool = current_query_pool,
|
if (!is_broken) {
|
||||||
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([query_pool = current_query_pool,
|
||||||
cmdbuf.EndQuery(query_pool, static_cast<u32>(query_index));
|
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
|
||||||
});
|
cmdbuf.EndQuery(query_pool, static_cast<u32>(query_index));
|
||||||
|
});
|
||||||
|
}
|
||||||
has_started = false;
|
has_started = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -573,6 +578,7 @@ private:
|
||||||
bool accumulation_since_last_sync{};
|
bool accumulation_since_last_sync{};
|
||||||
VideoCommon::HostQueryBase* current_query;
|
VideoCommon::HostQueryBase* current_query;
|
||||||
bool has_started{};
|
bool has_started{};
|
||||||
|
bool is_broken{};
|
||||||
std::mutex flush_guard;
|
std::mutex flush_guard;
|
||||||
|
|
||||||
std::unique_ptr<QueriesPrefixScanPass> queries_prefix_scan_pass;
|
std::unique_ptr<QueriesPrefixScanPass> queries_prefix_scan_pass;
|
||||||
|
|
|
@ -174,7 +174,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
||||||
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
|
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
|
||||||
query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler,
|
query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler,
|
||||||
staging_pool, compute_pass_descriptor_queue, descriptor_pool),
|
staging_pool, compute_pass_descriptor_queue, descriptor_pool),
|
||||||
query_cache(gpu, *this, cpu_memory_, query_cache_runtime),
|
query_cache(gpu, *this, cpu_memory_, query_cache_runtime, device.HasBrokenOcclusionQuery()),
|
||||||
pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue,
|
pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue,
|
||||||
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
|
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
|
||||||
accelerate_dma(buffer_cache, texture_cache, scheduler),
|
accelerate_dma(buffer_cache, texture_cache, scheduler),
|
||||||
|
|
|
@ -513,6 +513,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (is_turnip) {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Turnip drivers have broken occlusion queries");
|
||||||
|
has_broken_occlusion_query = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (is_arm) {
|
if (is_arm) {
|
||||||
must_emulate_scaled_formats = true;
|
must_emulate_scaled_formats = true;
|
||||||
|
|
||||||
|
|
|
@ -599,6 +599,10 @@ public:
|
||||||
return has_broken_cube_compatibility;
|
return has_broken_cube_compatibility;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool HasBrokenOcclusionQuery() const {
|
||||||
|
return has_broken_occlusion_query;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the vendor name reported from Vulkan.
|
/// Returns the vendor name reported from Vulkan.
|
||||||
std::string_view GetVendorName() const {
|
std::string_view GetVendorName() const {
|
||||||
return properties.driver.driverName;
|
return properties.driver.driverName;
|
||||||
|
@ -794,6 +798,7 @@ private:
|
||||||
bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
|
bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
|
||||||
bool has_broken_compute{}; ///< Compute shaders can cause crashes
|
bool has_broken_compute{}; ///< Compute shaders can cause crashes
|
||||||
bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
|
bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
|
||||||
|
bool has_broken_occlusion_query{}; ///< Has broken occlusion queries
|
||||||
bool has_renderdoc{}; ///< Has RenderDoc attached
|
bool has_renderdoc{}; ///< Has RenderDoc attached
|
||||||
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
|
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
|
||||||
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
|
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue