diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 28d595d61d..55a3381e6a 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2894,7 +2894,7 @@ void VKGSRender::begin_conditional_rendering(const std::vectorget_query_result_indirect(*m_current_command_buffer, index, m_cond_render_buffer->value, 0); + m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, index, 1, m_cond_render_buffer->value, 0); vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage, @@ -2912,16 +2912,58 @@ void VKGSRender::begin_conditional_rendering(const std::vector query_range = { umax, 0 }; + + auto copy_query_range_impl = [&]() + { + const auto count = (query_range.second - query_range.first + 1); + m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, query_range.first, count, scratch->value, dst_offset); + dst_offset += count * 4; + }; + for (usz i = first; i < last; ++i) { auto& query_info = m_occlusion_map[sources[i]->driver_handle]; for (const auto& index : query_info.indices) { - m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, index, scratch->value, dst_offset); - dst_offset += 4; + // First iteration? + if (query_range.first == umax) + { + query_range = { index, index }; + continue; + } + + // Head? + if ((query_range.first - 1) == index) + { + query_range.first = index; + continue; + } + + // Tail? + if ((query_range.second + 1) == index) + { + query_range.second = index; + continue; + } + + // Flush pending queue. In practice, this is never reached and we fall out to the spill block outside the loops + copy_query_range_impl(); + + // Start a new range for the current index + query_range = { index, index }; } } + if (query_range.first != umax) + { + // Dangling queries, flush + copy_query_range_impl(); + } + // Sanity check ensure(dst_offset <= scratch->size()); diff --git a/rpcs3/Emu/RSX/VK/VKQueryPool.cpp b/rpcs3/Emu/RSX/VK/VKQueryPool.cpp index 3a91d25c0c..449d8d13b9 100644 --- a/rpcs3/Emu/RSX/VK/VKQueryPool.cpp +++ b/rpcs3/Emu/RSX/VK/VKQueryPool.cpp @@ -168,11 +168,11 @@ namespace vk return query_info.data; } - void query_pool_manager::get_query_result_indirect(vk::command_buffer& cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset) + void query_pool_manager::get_query_result_indirect(vk::command_buffer& cmd, u32 index, u32 count, VkBuffer dst, VkDeviceSize dst_offset) { // We're technically supposed to stop any active renderpasses before streaming the results out, but that doesn't matter on IMR hw // On TBDR setups like the apple M series, the stop is required (results are all 0 if you don't flush the RP), but this introduces a very heavy performance loss. - vkCmdCopyQueryPoolResults(cmd, *query_slot_status[index].pool, index, 1, dst, dst_offset, 4, VK_QUERY_RESULT_WAIT_BIT); + vkCmdCopyQueryPoolResults(cmd, *query_slot_status[index].pool, index, count, dst, dst_offset, 4, VK_QUERY_RESULT_WAIT_BIT); } void query_pool_manager::free_query(vk::command_buffer&/*cmd*/, u32 index) diff --git a/rpcs3/Emu/RSX/VK/VKQueryPool.h b/rpcs3/Emu/RSX/VK/VKQueryPool.h index f235e7bbf6..6aa1e19978 100644 --- a/rpcs3/Emu/RSX/VK/VKQueryPool.h +++ b/rpcs3/Emu/RSX/VK/VKQueryPool.h @@ -47,7 +47,7 @@ namespace vk bool check_query_status(u32 index); u32 get_query_result(u32 index); - void get_query_result_indirect(vk::command_buffer& cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset); + void get_query_result_indirect(vk::command_buffer& cmd, u32 index, u32 count, VkBuffer dst, VkDeviceSize dst_offset); u32 allocate_query(vk::command_buffer& cmd); void free_query(vk::command_buffer&/*cmd*/, u32 index);