diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 78da6ae348..65c8e10162 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -590,10 +590,6 @@ void GLGSRender::end() m_draw_time += (u32)std::chrono::duration_cast(draw_end - draw_start).count(); m_draw_calls++; - if (zcull_task_queue.active_query && - zcull_task_queue.active_query->active) - zcull_task_queue.active_query->num_draws++; - synchronize_buffers(); rsx::thread::end(); } @@ -754,9 +750,11 @@ void GLGSRender::on_init_thread() //Occlusion query for (u32 i = 0; i < occlusion_query_count; ++i) { + GLuint handle = 0; auto &query = occlusion_query_data[i]; - glGenQueries(1, &query.handle); + glGenQueries(1, &handle); + query.driver_handle = (u64)handle; query.pending = false; query.active = false; query.result = 0; @@ -848,7 +846,9 @@ void GLGSRender::on_exit() query.active = false; query.pending = false; - glDeleteQueries(1, &query.handle); + GLuint handle = (GLuint)query.driver_handle; + glDeleteQueries(1, &handle); + query.driver_handle = 0; } glFlush(); @@ -1410,179 +1410,29 @@ void GLGSRender::notify_tile_unbound(u32 tile) //m_rtts.invalidate_surface_address(addr, false); } -void GLGSRender::check_zcull_status(bool framebuffer_swap, bool force_read) +void GLGSRender::begin_occlusion_query(rsx::occlusion_query_info* query) { - if (g_cfg.video.disable_zcull_queries) - return; - - bool testing_enabled = zcull_pixel_cnt_enabled || zcull_stats_enabled; - - if (framebuffer_swap) - { - zcull_surface_active = false; - const u32 zeta_address = depth_surface_info.address; - - if (zeta_address) - { - //Find zeta address in bound zculls - for (int i = 0; i < rsx::limits::zculls_count; i++) - { - if (zculls[i].binded) - { - const u32 rsx_address = rsx::get_address(zculls[i].offset, CELL_GCM_LOCATION_LOCAL); - if (rsx_address == zeta_address) - { - zcull_surface_active = true; - break; - } - } - } - } - } - - occlusion_query_info* query = nullptr; - - if (zcull_task_queue.task_stack.size() > 0) - query = zcull_task_queue.active_query; - - if (query && query->active) - { - if (force_read || (!zcull_rendering_enabled || !testing_enabled || !zcull_surface_active)) - { - glEndQuery(GL_ANY_SAMPLES_PASSED); - query->active = false; - query->pending = true; - } - } - else - { - if (zcull_rendering_enabled && testing_enabled && zcull_surface_active) - { - //Find query - u32 free_index = synchronize_zcull_stats(); - query = &occlusion_query_data[free_index]; - zcull_task_queue.add(query); - - glBeginQuery(GL_ANY_SAMPLES_PASSED, query->handle); - query->active = true; - query->result = 0; - query->num_draws = 0; - } - } + query->result = 0; + glBeginQuery(GL_ANY_SAMPLES_PASSED, (GLuint)query->driver_handle); } -void GLGSRender::clear_zcull_stats(u32 type) +void GLGSRender::end_occlusion_query(rsx::occlusion_query_info* query) { - if (g_cfg.video.disable_zcull_queries) - return; - - if (type == CELL_GCM_ZPASS_PIXEL_CNT) - { - if (zcull_task_queue.active_query && - zcull_task_queue.active_query->active && - zcull_task_queue.active_query->num_draws > 0) - { - //discard active query results - check_zcull_status(false, true); - zcull_task_queue.active_query->pending = false; - - //re-enable cull stats if stats are enabled - check_zcull_status(false, false); - zcull_task_queue.active_query->num_draws = 0; - } - - current_zcull_stats.clear(); - } + glEndQuery(GL_ANY_SAMPLES_PASSED); } -u32 GLGSRender::get_zcull_stats(u32 type) +bool GLGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query) { - if (g_cfg.video.disable_zcull_queries) - return 0u; + GLint status = GL_TRUE; + glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT_AVAILABLE, &status); - if (zcull_task_queue.active_query && - zcull_task_queue.active_query->active && - current_zcull_stats.zpass_pixel_cnt == 0 && - type == CELL_GCM_ZPASS_PIXEL_CNT) - { - //The zcull unit is still bound as the read is happening and there are no results ready - check_zcull_status(false, true); //close current query - check_zcull_status(false, false); //start new query since stat counting is still active - } - - switch (type) - { - case CELL_GCM_ZPASS_PIXEL_CNT: - { - if (current_zcull_stats.zpass_pixel_cnt > 0) - return UINT16_MAX; - - synchronize_zcull_stats(true); - return (current_zcull_stats.zpass_pixel_cnt > 0)? UINT16_MAX : 0; - } - case CELL_GCM_ZCULL_STATS: - case CELL_GCM_ZCULL_STATS1: - case CELL_GCM_ZCULL_STATS2: - //TODO - return UINT16_MAX; - case CELL_GCM_ZCULL_STATS3: - { - //Some kind of inverse value - if (current_zcull_stats.zpass_pixel_cnt > 0) - return 0; - - synchronize_zcull_stats(true); - return (current_zcull_stats.zpass_pixel_cnt > 0) ? 0 : UINT16_MAX; - } - default: - LOG_ERROR(RSX, "Unknown zcull stat type %d", type); - return 0; - } + return status != GL_FALSE; } -u32 GLGSRender::synchronize_zcull_stats(bool hard_sync) +void GLGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query) { - if (!zcull_rendering_enabled || zcull_task_queue.pending == 0) - return 0; + GLint result; + glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT, &result); - u32 result = UINT16_MAX; - GLint count, status; - - for (auto &query : zcull_task_queue.task_stack) - { - if (query == nullptr || query->active) - continue; - - glGetQueryObjectiv(query->handle, GL_QUERY_RESULT_AVAILABLE, &status); - - if (status == GL_FALSE && !hard_sync) - continue; - - glGetQueryObjectiv(query->handle, GL_QUERY_RESULT, &count); - query->pending = false; - query = nullptr; - - current_zcull_stats.zpass_pixel_cnt += count; - zcull_task_queue.pending--; - } - - for (u32 i = 0; i < occlusion_query_count; ++i) - { - auto &query = occlusion_query_data[i]; - if (!query.pending && !query.active) - { - result = i; - break; - } - } - - if (result == UINT16_MAX && !hard_sync) - return synchronize_zcull_stats(true); - - return result; -} - -void GLGSRender::notify_zcull_info_changed() -{ - check_zcull_status(false, false); -} + query->result += result; +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index e2e561d15f..8c3cac44bc 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -36,62 +36,6 @@ struct work_item volatile bool received = false; }; -struct occlusion_query_info -{ - GLuint handle; - GLint result; - GLint num_draws; - bool pending; - bool active; -}; - -struct zcull_statistics -{ - u32 zpass_pixel_cnt; - u32 zcull_stats; - u32 zcull_stats1; - u32 zcull_stats2; - u32 zcull_stats3; - - void clear() - { - zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0; - } -}; - -struct occlusion_task -{ - std::vector task_stack; - occlusion_query_info* active_query = nullptr; - u32 pending = 0; - - //Add one query to the task - void add(occlusion_query_info* query) - { - active_query = query; - - if (task_stack.size() > 0 && pending == 0) - task_stack.resize(0); - - const auto empty_slots = task_stack.size() - pending; - if (empty_slots >= 4) - { - for (auto &_query : task_stack) - { - if (_query == nullptr) - { - _query = query; - pending++; - return; - } - } - } - - task_stack.push_back(query); - pending++; - } -}; - struct driver_state { const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001; @@ -354,11 +298,6 @@ private: std::mutex queue_guard; std::list work_queue; - bool framebuffer_status_valid = false; - - rsx::gcm_framebuffer_info surface_info[rsx::limits::color_buffers_count]; - rsx::gcm_framebuffer_info depth_surface_info; - bool flush_draw_buffers = false; std::thread::id m_thread_id; @@ -372,14 +311,6 @@ private: //vaos are mandatory for core profile gl::vao m_vao; - //occlusion query - bool zcull_surface_active = false; - zcull_statistics current_zcull_stats; - occlusion_task zcull_task_queue = {}; - - const u32 occlusion_query_count = 128; - std::array occlusion_query_data = {}; - std::mutex m_sampler_mutex; u64 surface_store_tag = 0; std::atomic_bool m_samplers_dirty = {true}; @@ -414,9 +345,11 @@ public: work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data); bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override; - - void check_zcull_status(bool framebuffer_swap, bool force_read); - u32 synchronize_zcull_stats(bool hard_sync = false); + + void begin_occlusion_query(rsx::occlusion_query_info* query) override; + void end_occlusion_query(rsx::occlusion_query_info* query) override; + bool check_occlusion_query_status(rsx::occlusion_query_info* query) override; + void get_occlusion_query_result(rsx::occlusion_query_info* query) override; protected: void begin() override; @@ -430,10 +363,6 @@ protected: void do_local_task() override; - void notify_zcull_info_changed() override; - void clear_zcull_stats(u32 type) override; - u32 get_zcull_stats(u32 type) override; - bool on_access_violation(u32 address, bool is_writing) override; void on_notify_memory_unmapped(u32 address_base, u32 size) override; void notify_tile_unbound(u32 tile) override; diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 7a75b8739e..e6f62a4189 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -248,15 +248,15 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk for (int i = 0; i < rsx::limits::color_buffers_count; ++i) { - if (surface_info[i].pitch && g_cfg.video.write_color_buffers) + if (m_surface_info[i].pitch && g_cfg.video.write_color_buffers) { if (!old_format_found) { - old_format = rsx::internals::surface_color_format_to_gl(surface_info[i].color_format).format; + old_format = rsx::internals::surface_color_format_to_gl(m_surface_info[i].color_format).format; old_format_found = true; } - m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_info[i].address, surface_info[i].pitch * surface_info[i].height); + m_gl_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height); } if (std::get<0>(m_rtts.m_bound_render_targets[i])) @@ -265,7 +265,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk draw_fbo.color[i] = *rtt; rtt->set_rsx_pitch(pitchs[i]); - surface_info[i] = { surface_addresses[i], pitchs[i], false, surface_format, depth_format, clip_horizontal, clip_vertical }; + m_surface_info[i] = { surface_addresses[i], pitchs[i], false, surface_format, depth_format, clip_horizontal, clip_vertical }; rtt->tile = find_tile(color_offsets[i], color_locations[i]); rtt->aa_mode = aa_mode; @@ -275,7 +275,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk m_gl_texture_cache.tag_framebuffer(surface_addresses[i] + rtt->raster_address_offset); } else - surface_info[i] = {}; + m_surface_info[i] = {}; } if (std::get<0>(m_rtts.m_bound_depth_stencil)) @@ -293,7 +293,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk const u32 depth_surface_pitch = rsx::method_registers.surface_z_pitch(); std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(rsx::method_registers.surface_z_pitch()); - depth_surface_info = { depth_address, depth_surface_pitch, true, surface_format, depth_format, clip_horizontal, clip_vertical }; + m_depth_surface_info = { depth_address, depth_surface_pitch, true, surface_format, depth_format, clip_horizontal, clip_vertical }; ds->aa_mode = aa_mode; ds->set_raster_offset(clip_x, clip_y, texel_size); @@ -302,7 +302,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk m_gl_texture_cache.tag_framebuffer(depth_address + ds->raster_address_offset); } else - depth_surface_info = {}; + m_depth_surface_info = {}; framebuffer_status_valid = draw_fbo.check(); if (!framebuffer_status_valid) return; @@ -349,30 +349,30 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) { - if (!surface_info[i].address || !surface_info[i].pitch) continue; + if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; - const u32 range = surface_info[i].pitch * surface_info[i].height; - m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch, + const u32 range = m_surface_info[i].pitch * m_surface_info[i].height; + m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface_info[i].address, range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, color_format.format, color_format.type, color_format.swap_bytes); } } if (g_cfg.video.write_depth_buffer) { - if (depth_surface_info.address && depth_surface_info.pitch) + if (m_depth_surface_info.address && m_depth_surface_info.pitch) { auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(depth_format); - u32 pitch = depth_surface_info.width * 2; - if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2; + u32 pitch = m_depth_surface_info.width * 2; + if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2; - const u32 range = pitch * depth_surface_info.height; + const u32 range = pitch * m_depth_surface_info.height; //TODO: Verify that depth surface pitch variance affects results - if (pitch != depth_surface_info.pitch) - LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", depth_surface_info.pitch, pitch); + if (pitch != m_depth_surface_info.pitch) + LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", m_depth_surface_info.pitch, pitch); - m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch, + m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, pitch, depth_format_gl.format, depth_format_gl.type, true); } } @@ -418,7 +418,7 @@ void GLGSRender::read_buffers() const u32 location = locations[i]; const u32 pitch = pitchs[i]; - if (!surface_info[i].pitch) + if (!m_surface_info[i].pitch) continue; const u32 range = pitch * height; @@ -478,7 +478,7 @@ void GLGSRender::read_buffers() if (g_cfg.video.read_depth_buffer) { //TODO: use pitch - const u32 pitch = depth_surface_info.pitch; + const u32 pitch = m_depth_surface_info.pitch; const u32 width = rsx::method_registers.surface_clip_width(); const u32 height = rsx::method_registers.surface_clip_height(); @@ -537,7 +537,7 @@ void GLGSRender::write_buffers() { for (int i = index; i < index + count; ++i) { - if (surface_info[i].pitch == 0) + if (m_surface_info[i].pitch == 0) continue; /**Even tiles are loaded as whole textures during read_buffers from testing. @@ -545,8 +545,8 @@ void GLGSRender::write_buffers() * but using the GPU to perform the caching is many times faster. */ - const u32 range = surface_info[i].pitch * surface_info[i].height; - __glcheck m_gl_texture_cache.flush_memory_to_cache(surface_info[i].address, range, true); + const u32 range = m_surface_info[i].pitch * m_surface_info[i].height; + __glcheck m_gl_texture_cache.flush_memory_to_cache(m_surface_info[i].address, range, true); } }; @@ -556,11 +556,11 @@ void GLGSRender::write_buffers() if (g_cfg.video.write_depth_buffer) { //TODO: use pitch - if (depth_surface_info.pitch == 0) return; + if (m_depth_surface_info.pitch == 0) return; - u32 range = depth_surface_info.width * depth_surface_info.height * 2; - if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) range *= 2; + u32 range = m_depth_surface_info.width * m_depth_surface_info.height * 2; + if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) range *= 2; - m_gl_texture_cache.flush_memory_to_cache(depth_surface_info.address, range, true); + m_gl_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, range, true); } } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index d1db93c1ff..5eeaa498a8 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -343,6 +343,9 @@ namespace rsx element_push_buffer.resize(0); + if (zcull_task_queue.active_query && zcull_task_queue.active_query->active) + zcull_task_queue.active_query->num_draws++; + if (capture_current_frame) { u32 element_count = rsx::method_registers.current_draw_clause.get_elements_count(); @@ -1925,4 +1928,179 @@ namespace rsx skip_frame = (m_skip_frame_ctr < 0); } } + + void thread::check_zcull_status(bool framebuffer_swap, bool force_read) + { + if (g_cfg.video.disable_zcull_queries) + return; + + bool testing_enabled = zcull_pixel_cnt_enabled || zcull_stats_enabled; + + if (framebuffer_swap) + { + zcull_surface_active = false; + const u32 zeta_address = m_depth_surface_info.address; + + if (zeta_address) + { + //Find zeta address in bound zculls + for (int i = 0; i < rsx::limits::zculls_count; i++) + { + if (zculls[i].binded) + { + const u32 rsx_address = rsx::get_address(zculls[i].offset, CELL_GCM_LOCATION_LOCAL); + if (rsx_address == zeta_address) + { + zcull_surface_active = true; + break; + } + } + } + } + } + + occlusion_query_info* query = nullptr; + + if (zcull_task_queue.task_stack.size() > 0) + query = zcull_task_queue.active_query; + + if (query && query->active) + { + if (force_read || (!zcull_rendering_enabled || !testing_enabled || !zcull_surface_active)) + { + end_occlusion_query(query); + query->active = false; + query->pending = true; + } + } + else + { + if (zcull_rendering_enabled && testing_enabled && zcull_surface_active) + { + //Find query + u32 free_index = synchronize_zcull_stats(); + query = &occlusion_query_data[free_index]; + zcull_task_queue.add(query); + + begin_occlusion_query(query); + query->active = true; + query->result = 0; + query->num_draws = 0; + } + } + } + + void thread::clear_zcull_stats(u32 type) + { + if (g_cfg.video.disable_zcull_queries) + return; + + if (type == CELL_GCM_ZPASS_PIXEL_CNT) + { + if (zcull_task_queue.active_query && + zcull_task_queue.active_query->active && + zcull_task_queue.active_query->num_draws > 0) + { + //discard active query results + check_zcull_status(false, true); + zcull_task_queue.active_query->pending = false; + + //re-enable cull stats if stats are enabled + check_zcull_status(false, false); + zcull_task_queue.active_query->num_draws = 0; + } + + current_zcull_stats.clear(); + } + } + + u32 thread::get_zcull_stats(u32 type) + { + if (g_cfg.video.disable_zcull_queries) + return 0u; + + if (zcull_task_queue.active_query && + zcull_task_queue.active_query->active && + current_zcull_stats.zpass_pixel_cnt == 0 && + type == CELL_GCM_ZPASS_PIXEL_CNT) + { + //The zcull unit is still bound as the read is happening and there are no results ready + check_zcull_status(false, true); //close current query + check_zcull_status(false, false); //start new query since stat counting is still active + } + + switch (type) + { + case CELL_GCM_ZPASS_PIXEL_CNT: + { + if (current_zcull_stats.zpass_pixel_cnt > 0) + return UINT16_MAX; + + synchronize_zcull_stats(true); + return (current_zcull_stats.zpass_pixel_cnt > 0) ? UINT16_MAX : 0; + } + case CELL_GCM_ZCULL_STATS: + case CELL_GCM_ZCULL_STATS1: + case CELL_GCM_ZCULL_STATS2: + //TODO + return UINT16_MAX; + case CELL_GCM_ZCULL_STATS3: + { + //Some kind of inverse value + if (current_zcull_stats.zpass_pixel_cnt > 0) + return 0; + + synchronize_zcull_stats(true); + return (current_zcull_stats.zpass_pixel_cnt > 0) ? 0 : UINT16_MAX; + } + default: + LOG_ERROR(RSX, "Unknown zcull stat type %d", type); + return 0; + } + } + + u32 thread::synchronize_zcull_stats(bool hard_sync) + { + if (!zcull_rendering_enabled || zcull_task_queue.pending == 0) + return 0; + + u32 result = UINT16_MAX; + + for (auto &query : zcull_task_queue.task_stack) + { + if (query == nullptr || query->active) + continue; + + bool status = check_occlusion_query_status(query); + if (status == false && !hard_sync) + continue; + + get_occlusion_query_result(query); + current_zcull_stats.zpass_pixel_cnt += query->result; + + query->pending = false; + query = nullptr; + zcull_task_queue.pending--; + } + + for (u32 i = 0; i < occlusion_query_count; ++i) + { + auto &query = occlusion_query_data[i]; + if (!query.pending && !query.active) + { + result = i; + break; + } + } + + if (result == UINT16_MAX && !hard_sync) + return synchronize_zcull_stats(true); + + return result; + } + + void thread::notify_zcull_info_changed() + { + check_zcull_status(false, false); + } } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index a4916b0d7e..6d7076dc08 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -11,7 +11,7 @@ #include "RSXVertexProgram.h" #include "RSXFragmentProgram.h" #include "rsx_methods.h" -#include "rsx_trace.h" +#include "rsx_utils.h" #include #include "Utilities/Thread.h" @@ -142,6 +142,65 @@ namespace rsx std::array attribute_placement; }; + struct zcull_statistics + { + u32 zpass_pixel_cnt; + u32 zcull_stats; + u32 zcull_stats1; + u32 zcull_stats2; + u32 zcull_stats3; + + void clear() + { + zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0; + } + }; + + struct occlusion_query_info + { + u32 driver_handle; + u32 result; + u32 num_draws; + bool pending; + bool active; + + u64 sync_timestamp; + u64 external_flags; + }; + + struct occlusion_task + { + std::vector task_stack; + occlusion_query_info* active_query = nullptr; + u32 pending = 0; + + //Add one query to the task + void add(occlusion_query_info* query) + { + active_query = query; + + if (task_stack.size() > 0 && pending == 0) + task_stack.resize(0); + + const auto empty_slots = task_stack.size() - pending; + if (empty_slots >= 4) + { + for (auto &_query : task_stack) + { + if (_query == nullptr) + { + _query = query; + pending++; + return; + } + } + } + + task_stack.push_back(query); + pending++; + } + }; + struct sampled_image_descriptor_base; class thread : public named_thread @@ -158,6 +217,19 @@ namespace rsx bool supports_multidraw = false; + //occlusion query + bool zcull_surface_active = false; + zcull_statistics current_zcull_stats; + + const u32 occlusion_query_count = 128; + std::array occlusion_query_data = {}; + occlusion_task zcull_task_queue = {}; + + //framebuffer setup + rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count]; + rsx::gcm_framebuffer_info m_depth_surface_info; + bool framebuffer_status_valid = false; + public: RsxDmaControl* ctrl = nullptr; atomic_t internal_get{ 0 }; @@ -274,9 +346,16 @@ namespace rsx virtual void notify_tile_unbound(u32 /*tile*/) {} //zcull - virtual void notify_zcull_info_changed() {} - virtual void clear_zcull_stats(u32 /*type*/) {} - virtual u32 get_zcull_stats(u32 /*type*/) { return UINT16_MAX; } + virtual void notify_zcull_info_changed(); + virtual void clear_zcull_stats(u32 type); + virtual u32 get_zcull_stats(u32 type); + virtual void check_zcull_status(bool framebuffer_swap, bool force_read); + virtual u32 synchronize_zcull_stats(bool hard_sync = false); + + virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {} + virtual void end_occlusion_query(occlusion_query_info* /*query*/) {} + virtual bool check_occlusion_query_status(occlusion_query_info* /*query*/) { return true; } + virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = UINT32_MAX; } gsl::span get_raw_index_array(const std::vector >& draw_indexed_clause) const; gsl::span get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector>& vertex_ranges) const; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index b5790f55e8..7045063d53 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -596,11 +596,16 @@ VKGSRender::VKGSRender() : GSRender() m_secondary_command_buffer_pool.create((*m_device)); m_secondary_command_buffer.create(m_secondary_command_buffer_pool); m_secondary_command_buffer.access_hint = vk::command_buffer::access_type_hint::all; - + //Precalculated stuff m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats); std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device); + //Occlusion + m_occlusion_query_pool.create((*m_device), 1024); //Enough for 1k draw calls per pass + for (int n = 0; n < 128; ++n) + occlusion_query_data[n].driver_handle = n; + //Generate frame contexts VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS }; VkDescriptorPoolSize uniform_texel_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 16 * DESCRIPTOR_MAX_DRAW_CALLS }; @@ -753,6 +758,9 @@ VKGSRender::~VKGSRender() vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr); vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr); + //Queries + m_occlusion_query_pool.destroy(); + //Command buffer for (auto &cb : m_primary_cb_list) cb.destroy(); @@ -1416,6 +1424,54 @@ void VKGSRender::end() const bool is_emulated_restart = (!primitive_emulated && rsx::method_registers.restart_index_enabled() && vk::emulate_primitive_restart() && rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed); const bool single_draw = !supports_multidraw || (!is_emulated_restart && (rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive)); + u32 occlusion_id = 0; + if (m_occlusion_query_active) + { + //Begin query + occlusion_id = m_occlusion_query_pool.find_free_slot(); + if (occlusion_id == UINT32_MAX) + { + bool free_slot_found = false; + u32 index_to_free = UINT32_MAX; + u64 earliest_timestamp = UINT64_MAX; + + //flush occlusion queries + for (auto It : m_occlusion_map) + { + u32 index = It.first; + auto query = &occlusion_query_data[index]; + if (check_occlusion_query_status(query)) + { + free_slot_found = true; + get_occlusion_query_result(query); + break; + } + + if (query->sync_timestamp < earliest_timestamp) + { + index_to_free = index; + earliest_timestamp = query->sync_timestamp; + } + } + + if (free_slot_found) + { + occlusion_id = m_occlusion_query_pool.find_free_slot(); + } + else + { + get_occlusion_query_result(&occlusion_query_data[index_to_free]); + occlusion_id = m_occlusion_query_pool.find_free_slot(); + } + + verify(HERE), occlusion_id != UINT32_MAX; + } + + m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id); + m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id); + m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer; + } + if (!index_info) { if (single_draw) @@ -1468,6 +1524,12 @@ void VKGSRender::end() } } + if (m_occlusion_query_active) + { + //End query + m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id); + } + close_render_pass(); vk::leave_uninterruptible(); @@ -2309,7 +2371,6 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) return; copy_render_targets_to_dma_location(); - m_rtts_dirty = false; u32 clip_width = rsx::method_registers.surface_clip_width(); @@ -2580,6 +2641,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) m_draw_fbo.reset(new vk::framebuffer_holder(*m_device, current_render_pass, fbo_width, fbo_height, std::move(fbo_images))); } + + check_zcull_status(true, false); } void VKGSRender::reinitialize_swapchain() @@ -2913,6 +2976,79 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst m_current_command_buffer->begin(); m_samplers_dirty.store(true); - return result; +} + +void VKGSRender::clear_zcull_stats(u32 type) +{ + rsx::thread::clear_zcull_stats(type); + m_occlusion_map.clear(); + m_occlusion_query_pool.reset_all(*m_current_command_buffer); +} + +void VKGSRender::begin_occlusion_query(rsx::occlusion_query_info* query) +{ + query->result = 0; + query->sync_timestamp = get_system_time(); + m_active_query_info = query; + m_occlusion_query_active = true; +} + +void VKGSRender::end_occlusion_query(rsx::occlusion_query_info* query) +{ + m_occlusion_query_active = false; + m_active_query_info = nullptr; + + flush_command_queue(); +} + +bool VKGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query) +{ + auto found = m_occlusion_map.find(query->driver_handle); + if (found == m_occlusion_map.end()) + return true; + + auto &data = found->second; + if (data.indices.size() == 0) + return true; + + if (data.command_buffer_to_wait == m_current_command_buffer) + return false; + + if (data.command_buffer_to_wait->pending) + return false; + + u32 oldest = data.indices.front(); + return m_occlusion_query_pool.check_query_status(oldest); +} + +void VKGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query) +{ + auto found = m_occlusion_map.find(query->driver_handle); + if (found == m_occlusion_map.end()) + return; + + auto &data = found->second; + if (data.indices.size() == 0) + return; + + if (data.command_buffer_to_wait == m_current_command_buffer) + flush_command_queue(); //Should hard sync, but this should almost never ever happen + + if (data.command_buffer_to_wait->pending) + data.command_buffer_to_wait->wait(); + + //Gather data + for (const auto occlusion_id : data.indices) + { + //We only need one hit + if (auto value = m_occlusion_query_pool.get_query_result(occlusion_id)) + { + query->result = 1; + break; + } + } + + m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices); + m_occlusion_map.erase(query->driver_handle); } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 48e7dfcb19..fe0981bd39 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -109,6 +109,12 @@ struct command_buffer_chunk: public vk::command_buffer } }; +struct occlusion_data +{ + std::vector indices; + command_buffer_chunk* command_buffer_to_wait = nullptr; +}; + class VKGSRender : public GSRender { private: @@ -153,10 +159,14 @@ private: //Vulkan internals vk::command_pool m_command_buffer_pool; + vk::occlusion_query_pool m_occlusion_query_pool; + bool m_occlusion_query_active = false; + rsx::occlusion_query_info *m_active_query_info = nullptr; + std::unordered_map m_occlusion_map; std::mutex m_secondary_cb_guard; vk::command_pool m_secondary_command_buffer_pool; - vk::command_buffer m_secondary_command_buffer; + vk::command_buffer m_secondary_command_buffer; //command buffer used for setup operations u32 m_current_cb_index = 0; std::array m_primary_cb_list; @@ -260,12 +270,6 @@ private: s64 m_flip_time = 0; u8 m_draw_buffers_count = 0; - - bool framebuffer_status_valid = false; - - rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count]; - rsx::gcm_framebuffer_info m_depth_surface_info; - bool m_flush_draw_buffers = false; std::atomic m_last_flushable_cb = {-1 }; @@ -320,6 +324,12 @@ public: void write_buffers(); void set_viewport(); + void clear_zcull_stats(u32 type) override; + void begin_occlusion_query(rsx::occlusion_query_info* query) override; + void end_occlusion_query(rsx::occlusion_query_info* query) override; + bool check_occlusion_query_status(rsx::occlusion_query_info* query) override; + void get_occlusion_query_result(rsx::occlusion_query_info* query) override; + protected: void begin() override; void end() override; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 780410af8c..640cd93ba2 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -1450,6 +1450,113 @@ namespace vk } }; + class occlusion_query_pool + { + VkQueryPool query_pool = VK_NULL_HANDLE; + vk::render_device* owner = nullptr; + + std::vector query_active_status; + + public: + + void create(vk::render_device &dev, u32 num_entries) + { + VkQueryPoolCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + info.queryType = VK_QUERY_TYPE_OCCLUSION; + info.queryCount = num_entries; + + CHECK_RESULT(vkCreateQueryPool(dev, &info, nullptr, &query_pool)); + owner = &dev; + + query_active_status.resize(num_entries, false); + } + + void destroy() + { + if (query_pool) + { + vkDestroyQueryPool(*owner, query_pool, nullptr); + + owner = nullptr; + query_pool = VK_NULL_HANDLE; + } + } + + void begin_query(vk::command_buffer &cmd, u32 index) + { + if (query_active_status[index]) + { + //Synchronization must be done externally + vkCmdResetQueryPool(cmd, query_pool, index, 1); + } + + vkCmdBeginQuery(cmd, query_pool, index, 0);//VK_QUERY_CONTROL_PRECISE_BIT); + query_active_status[index] = true; + } + + void end_query(vk::command_buffer &cmd, u32 index) + { + vkCmdEndQuery(cmd, query_pool, index); + } + + bool check_query_status(u32 index) + { + u32 result[2] = {0, 0}; + switch (VkResult status = vkGetQueryPoolResults(*owner, query_pool, index, 1, 8, result, 8, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)) + { + case VK_SUCCESS: + break; + case VK_NOT_READY: + return false; + default: + vk::die_with_error(HERE, status); + } + + return result[1] != 0; + } + + u32 get_query_result(u32 index) + { + u32 result = 0; + CHECK_RESULT(vkGetQueryPoolResults(*owner, query_pool, index, 1, 4, &result, 4, VK_QUERY_RESULT_WAIT_BIT)); + + return result == 0u? 0u: 1u; + } + + void reset_query(vk::command_buffer &cmd, u32 index) + { + vkCmdResetQueryPool(cmd, query_pool, index, 1); + query_active_status[index] = false; + } + + void reset_queries(vk::command_buffer &cmd, std::vector &list) + { + for (const auto index : list) + reset_query(cmd, index); + } + + void reset_all(vk::command_buffer &cmd) + { + for (u32 n = 0; n < query_active_status.size(); n++) + { + if (query_active_status[n]) + reset_query(cmd, n); + } + } + + u32 find_free_slot() + { + for (u32 n = 0; n < query_active_status.size(); n++) + { + if (query_active_status[n] == false) + return n; + } + + return UINT32_MAX; + } + }; + namespace glsl { enum program_input_type