From 7453e46a7c9cc6d522e05f9d267b4e57d26362d1 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 17 Jan 2020 19:24:33 +0300 Subject: [PATCH] rsx: Refactor out complex present code into separate files - Also restructures present code to have image lookup in a separate re-usable function. --- rpcs3/Emu/CMakeLists.txt | 2 + rpcs3/Emu/RSX/GL/GLGSRender.cpp | 270 +----------- rpcs3/Emu/RSX/GL/GLGSRender.h | 51 ++- rpcs3/Emu/RSX/GL/GLPresent.cpp | 283 +++++++++++++ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 669 +----------------------------- rpcs3/Emu/RSX/VK/VKGSRender.h | 527 +++++++++++------------ rpcs3/Emu/RSX/VK/VKPresent.cpp | 692 +++++++++++++++++++++++++++++++ rpcs3/GLGSRender.vcxproj | 3 +- rpcs3/GLGSRender.vcxproj.filters | 3 +- rpcs3/VKGSRender.vcxproj | 1 + rpcs3/VKGSRender.vcxproj.filters | 5 +- 11 files changed, 1293 insertions(+), 1213 deletions(-) create mode 100644 rpcs3/Emu/RSX/GL/GLPresent.cpp create mode 100644 rpcs3/Emu/RSX/VK/VKPresent.cpp diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index 38db739119..b848d2faeb 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -379,6 +379,7 @@ target_sources(rpcs3_emu PRIVATE RSX/GL/GLFragmentProgram.cpp RSX/GL/GLGSRender.cpp RSX/GL/GLHelpers.cpp + RSX/GL/GLPresent.cpp RSX/GL/GLRenderTargets.cpp RSX/GL/GLTexture.cpp RSX/GL/GLVertexBuffers.cpp @@ -397,6 +398,7 @@ if(TARGET 3rdparty_vulkan) RSX/VK/VKGSRender.cpp RSX/VK/VKHelpers.cpp RSX/VK/VKMemAlloc.cpp + RSX/VK/VKPresent.cpp RSX/VK/VKProgramPipeline.cpp RSX/VK/VKRenderPass.cpp RSX/VK/VKResolveHelper.cpp diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index a8b159686d..6f4771635a 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1482,266 +1482,6 @@ void GLGSRender::update_draw_state() m_frame_stats.setup_time += m_profiler.duration(); } -void GLGSRender::flip(const rsx::display_flip_info_t& info) -{ - if (info.skip_frame) - { - m_frame->flip(m_context, true); - rsx::thread::flip(info); - return; - } - - u32 buffer_width = display_buffers[info.buffer].width; - u32 buffer_height = display_buffers[info.buffer].height; - u32 buffer_pitch = display_buffers[info.buffer].pitch; - - u32 av_format; - const auto avconfig = g_fxo->get(); - - if (avconfig->state) - { - av_format = avconfig->get_compatible_gcm_format(); - if (!buffer_pitch) - buffer_pitch = buffer_width * avconfig->get_bpp(); - - buffer_width = std::min(buffer_width, avconfig->resolution_x); - buffer_height = std::min(buffer_height, avconfig->resolution_y); - } - else - { - av_format = CELL_GCM_TEXTURE_A8R8G8B8; - if (!buffer_pitch) - buffer_pitch = buffer_width * 4; - } - - // Disable scissor test (affects blit, clear, etc) - gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); - - // Clear the window background to black - gl_state.clear_color(0, 0, 0, 0); - gl::screen.bind(); - gl::screen.clear(gl::buffers::color); - - // Calculate blit coordinates - coordi aspect_ratio; - sizei csize(m_frame->client_width(), m_frame->client_height()); - sizei new_size = csize; - - if (!g_cfg.video.stretch_to_display_area) - { - const double aq = 1. * buffer_width / buffer_height; - const double rq = 1. * new_size.width / new_size.height; - const double q = aq / rq; - - if (q > 1.0) - { - new_size.height = static_cast(new_size.height / q); - aspect_ratio.y = (csize.height - new_size.height) / 2; - } - else if (q < 1.0) - { - new_size.width = static_cast(new_size.width * q); - aspect_ratio.x = (csize.width - new_size.width) / 2; - } - } - - aspect_ratio.size = new_size; - - if (info.buffer < display_buffers_count && buffer_width && buffer_height) - { - // Find the source image - const u32 absolute_address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL); - GLuint image = GL_NONE; - - if (auto render_target_texture = m_rtts.get_color_surface_at(absolute_address)) - { - if (render_target_texture->last_use_tag == m_rtts.write_tag) - { - image = render_target_texture->raw_handle(); - } - else - { - gl::command_context cmd = { gl_state }; - const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp(), rsx::surface_access::read); - - if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture) - { - // Confirmed to be the newest data source in that range - image = render_target_texture->raw_handle(); - } - } - - if (image) - { - buffer_width = rsx::apply_resolution_scale(buffer_width, true); - buffer_height = rsx::apply_resolution_scale(buffer_height, true); - - if (buffer_width > render_target_texture->width() || - buffer_height > render_target_texture->height()) - { - // TODO: Should emit only once to avoid flooding the log file - // TODO: Take AA scaling into account - LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d", - display_buffers[info.buffer].width, display_buffers[info.buffer].height, avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y, - render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels)); - - buffer_width = render_target_texture->width(); - buffer_height = render_target_texture->height(); - } - } - } - else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address, av_format, buffer_width, buffer_height)) - { - //Hack - this should be the first location to check for output - //The render might have been done offscreen or in software and a blit used to display - if (const auto tex = surface->get_raw_texture(); tex) image = tex->id(); - } - - if (!image) - { - LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU"); - - gl::pixel_unpack_settings unpack_settings; - unpack_settings.alignment(1).row_length(buffer_pitch / 4); - - if (!m_flip_tex_color || m_flip_tex_color->size2D() != sizei{ static_cast(buffer_width), static_cast(buffer_height) }) - { - m_flip_tex_color = std::make_unique(GL_TEXTURE_2D, buffer_width, buffer_height, 1, 1, GL_RGBA8); - } - - gl::command_context cmd{ gl_state }; - const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height); - m_gl_texture_cache.invalidate_range(cmd, range, rsx::invalidation_cause::read); - - m_flip_tex_color->copy_from(vm::base(absolute_address), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8, unpack_settings); - image = m_flip_tex_color->id(); - } - - if (m_frame->screenshot_toggle == true) - { - m_frame->screenshot_toggle = false; - - std::vector sshot_frame(buffer_height * buffer_width * 4); - - gl::pixel_pack_settings pack_settings{}; - pack_settings.apply(); - - if (gl::get_driver_caps().ARB_dsa_supported) - glGetTextureImage(image, 0, GL_BGRA, GL_UNSIGNED_BYTE, buffer_height * buffer_width * 4, sshot_frame.data()); - else - glGetTextureImageEXT(image, GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_BYTE, sshot_frame.data()); - - if (GLenum err; (err = glGetError()) != GL_NO_ERROR) - LOG_ERROR(GENERAL, "[Screenshot] Failed to capture image: 0x%x", err); - else - m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height); - } - - areai screen_area = coordi({}, { static_cast(buffer_width), static_cast(buffer_height) }); - - if (g_cfg.video.full_rgb_range_output && rsx::fcmp(avconfig->gamma, 1.f)) - { - // Blit source image to the screen - m_flip_fbo.recreate(); - m_flip_fbo.bind(); - m_flip_fbo.color = image; - m_flip_fbo.read_buffer(m_flip_fbo.color); - m_flip_fbo.draw_buffer(m_flip_fbo.color); - m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear); - } - else - { - const f32 gamma = avconfig->gamma; - const bool limited_range = !g_cfg.video.full_rgb_range_output; - - gl::screen.bind(); - m_video_output_pass.run(areau(aspect_ratio), image, gamma, limited_range); - } - } - - if (m_overlay_manager) - { - if (m_overlay_manager->has_dirty()) - { - m_overlay_manager->lock(); - - std::vector uids_to_dispose; - uids_to_dispose.reserve(m_overlay_manager->get_dirty().size()); - - for (const auto& view : m_overlay_manager->get_dirty()) - { - m_ui_renderer.remove_temp_resources(view->uid); - uids_to_dispose.push_back(view->uid); - } - - m_overlay_manager->unlock(); - m_overlay_manager->dispose(uids_to_dispose); - } - - if (m_overlay_manager->has_visible()) - { - gl::screen.bind(); - - // Lock to avoid modification during run-update chain - std::lock_guard lock(*m_overlay_manager); - - for (const auto& view : m_overlay_manager->get_views()) - { - m_ui_renderer.run(areau(aspect_ratio), 0, *view.get()); - } - } - } - - if (g_cfg.video.overlay) - { - gl::screen.bind(); - glViewport(0, 0, m_frame->client_width(), m_frame->client_height()); - - m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), fmt::format("RSX Load: %3d%%", get_load())); - m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), fmt::format("draw calls: %16d", info.stats.draw_calls)); - m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call setup: %11dus", info.stats.setup_time)); - m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), fmt::format("vertex upload time: %8dus", info.stats.vertex_upload_time)); - m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), fmt::format("textures upload time: %6dus", info.stats.textures_upload_time)); - m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call execution: %7dus", info.stats.draw_exec_time)); - - const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count(); - const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024); - const auto num_flushes = m_gl_texture_cache.get_num_flush_requests(); - const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions(); - const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes(); - const auto num_misses = m_gl_texture_cache.get_num_cache_misses(); - const auto num_unavoidable = m_gl_texture_cache.get_num_unavoidable_hard_faults(); - const auto cache_miss_ratio = static_cast(ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100)); - m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures)); - m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size)); - m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate)); - } - - m_frame->flip(m_context); - rsx::thread::flip(info); - - // Cleanup - m_gl_texture_cache.on_frame_end(); - m_vertex_cache->purge(); - - auto removed_textures = m_rtts.free_invalidated(); - m_framebuffer_cache.remove_if([&](auto& fbo) - { - if (fbo.unused_check_count() >= 2) return true; // Remove if stale - if (fbo.references_any(removed_textures)) return true; // Remove if any of the attachments is invalid - - return false; - }); - - if (m_draw_fbo && !m_rtts_dirty) - { - // Always restore the active framebuffer - m_draw_fbo->bind(); - set_viewport(); - set_scissor(!!(m_graphics_state & rsx::pipeline_state::scissor_setup_clipped)); - } -} - bool GLGSRender::on_access_violation(u32 address, bool is_writing) { const bool can_flush = (std::this_thread::get_id() == m_rsx_thread); @@ -1762,7 +1502,7 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing) if (result.num_flushable > 0) { - work_item &task = post_flush_request(address, result); + auto &task = post_flush_request(address, result); vm::temporary_unlock(); task.producer_wait(); @@ -1802,9 +1542,9 @@ void GLGSRender::do_local_task(rsx::FIFO_state state) { std::lock_guard lock(queue_guard); - work_queue.remove_if([](work_item &q) { return q.received; }); + work_queue.remove_if([](auto &q) { return q.received; }); - for (work_item& q : work_queue) + for (auto& q : work_queue) { if (q.processed) continue; @@ -1843,11 +1583,11 @@ void GLGSRender::do_local_task(rsx::FIFO_state state) } } -work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data) +gl::work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data) { std::lock_guard lock(queue_guard); - work_item &result = work_queue.emplace_back(); + auto &result = work_queue.emplace_back(); result.address_to_flush = address; result.section_data = std::move(flush_data); return result; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 44311d703b..92ba1c346b 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -33,27 +33,36 @@ namespace gl u32 volatile_mapping_offset; std::optional > index_info; }; -} -struct work_item -{ - u32 address_to_flush = 0; - gl::texture_cache::thrashed_set section_data; - - volatile bool processed = false; - volatile bool result = false; - volatile bool received = false; - - void producer_wait() + struct work_item { - while (!processed) - { - std::this_thread::yield(); - } + u32 address_to_flush = 0; + gl::texture_cache::thrashed_set section_data; - received = true; - } -}; + volatile bool processed = false; + volatile bool result = false; + volatile bool received = false; + + void producer_wait() + { + while (!processed) + { + std::this_thread::yield(); + } + + received = true; + } + }; + + struct present_surface_info + { + u32 address; + u32 format; + u32 width; + u32 height; + u32 pitch; + }; +} class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control { @@ -103,7 +112,7 @@ private: gl::video_out_calibration_pass m_video_output_pass; shared_mutex queue_guard; - std::list work_queue; + std::list work_queue; GLProgramBuffer m_prog_buffer; draw_context_t m_decompiler_context; @@ -145,12 +154,14 @@ private: void update_draw_state(); + GLuint get_present_source(gl::present_surface_info* info, const rsx::avconf* avconfig); + public: void read_buffers(); void set_viewport(); void set_scissor(bool clip_viewport); - work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data); + gl::work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data); bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override; diff --git a/rpcs3/Emu/RSX/GL/GLPresent.cpp b/rpcs3/Emu/RSX/GL/GLPresent.cpp new file mode 100644 index 0000000000..7ad06c185c --- /dev/null +++ b/rpcs3/Emu/RSX/GL/GLPresent.cpp @@ -0,0 +1,283 @@ +#include "stdafx.h" +#include "GLGSRender.h" + +GLuint GLGSRender::get_present_source(gl::present_surface_info* info, const rsx::avconf* avconfig) +{ + GLuint image = GL_NONE; + + if (auto render_target_texture = m_rtts.get_color_surface_at(info->address)) + { + if (render_target_texture->last_use_tag == m_rtts.write_tag) + { + image = render_target_texture->raw_handle(); + } + else + { + gl::command_context cmd = { gl_state }; + const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, info->address, info->width, info->height, info->pitch, render_target_texture->get_bpp(), rsx::surface_access::read); + + if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture) + { + // Confirmed to be the newest data source in that range + image = render_target_texture->raw_handle(); + } + } + + if (image) + { + const auto buffer_width = rsx::apply_resolution_scale(info->width, true); + const auto buffer_height = rsx::apply_resolution_scale(info->height, true); + + if (buffer_width > render_target_texture->width() || + buffer_height > render_target_texture->height()) + { + // TODO: Should emit only once to avoid flooding the log file + // TODO: Take AA scaling into account + LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d", + info->width, info->height, + avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y, + render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels)); + + info->width = render_target_texture->width(); + info->height = render_target_texture->height(); + } + else + { + info->width = buffer_width; + info->height = buffer_height; + } + } + } + else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(info->address, info->format, info->width, info->height)) + { + //Hack - this should be the first location to check for output + //The render might have been done offscreen or in software and a blit used to display + if (const auto tex = surface->get_raw_texture(); tex) image = tex->id(); + } + + if (!image) + { + LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU"); + + gl::pixel_unpack_settings unpack_settings; + unpack_settings.alignment(1).row_length(info->pitch / 4); + + if (!m_flip_tex_color || m_flip_tex_color->size2D() != sizei{ static_cast(info->width), static_cast(info->height) }) + { + m_flip_tex_color = std::make_unique(GL_TEXTURE_2D, info->width, info->height, 1, 1, GL_RGBA8); + } + + gl::command_context cmd{ gl_state }; + const auto range = utils::address_range::start_length(info->address, info->pitch * info->height); + m_gl_texture_cache.invalidate_range(cmd, range, rsx::invalidation_cause::read); + + m_flip_tex_color->copy_from(vm::base(info->address), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8, unpack_settings); + image = m_flip_tex_color->id(); + } + + return image; +} + +void GLGSRender::flip(const rsx::display_flip_info_t& info) +{ + if (info.skip_frame) + { + m_frame->flip(m_context, true); + rsx::thread::flip(info); + return; + } + + u32 buffer_width = display_buffers[info.buffer].width; + u32 buffer_height = display_buffers[info.buffer].height; + u32 buffer_pitch = display_buffers[info.buffer].pitch; + + u32 av_format; + const auto avconfig = g_fxo->get(); + + if (avconfig->state) + { + av_format = avconfig->get_compatible_gcm_format(); + if (!buffer_pitch) + buffer_pitch = buffer_width * avconfig->get_bpp(); + + buffer_width = std::min(buffer_width, avconfig->resolution_x); + buffer_height = std::min(buffer_height, avconfig->resolution_y); + } + else + { + av_format = CELL_GCM_TEXTURE_A8R8G8B8; + if (!buffer_pitch) + buffer_pitch = buffer_width * 4; + } + + // Disable scissor test (affects blit, clear, etc) + gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); + + // Clear the window background to black + gl_state.clear_color(0, 0, 0, 0); + gl::screen.bind(); + gl::screen.clear(gl::buffers::color); + + // Calculate blit coordinates + coordi aspect_ratio; + sizei csize(m_frame->client_width(), m_frame->client_height()); + sizei new_size = csize; + + if (!g_cfg.video.stretch_to_display_area) + { + const double aq = 1. * buffer_width / buffer_height; + const double rq = 1. * new_size.width / new_size.height; + const double q = aq / rq; + + if (q > 1.0) + { + new_size.height = static_cast(new_size.height / q); + aspect_ratio.y = (csize.height - new_size.height) / 2; + } + else if (q < 1.0) + { + new_size.width = static_cast(new_size.width * q); + aspect_ratio.x = (csize.width - new_size.width) / 2; + } + } + + aspect_ratio.size = new_size; + + if (info.buffer < display_buffers_count && buffer_width && buffer_height) + { + // Find the source image + gl::present_surface_info present_info; + present_info.width = buffer_width; + present_info.height = buffer_height; + present_info.pitch = buffer_pitch; + present_info.format = av_format; + present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL); + + const GLuint image = get_present_source(&present_info, avconfig); + buffer_width = present_info.width; + buffer_height = present_info.height; + + if (m_frame->screenshot_toggle == true) + { + m_frame->screenshot_toggle = false; + + std::vector sshot_frame(buffer_height * buffer_width * 4); + + gl::pixel_pack_settings pack_settings{}; + pack_settings.apply(); + + if (gl::get_driver_caps().ARB_dsa_supported) + glGetTextureImage(image, 0, GL_BGRA, GL_UNSIGNED_BYTE, buffer_height * buffer_width * 4, sshot_frame.data()); + else + glGetTextureImageEXT(image, GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_BYTE, sshot_frame.data()); + + if (GLenum err; (err = glGetError()) != GL_NO_ERROR) + LOG_ERROR(GENERAL, "[Screenshot] Failed to capture image: 0x%x", err); + else + m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height); + } + + areai screen_area = coordi({}, { static_cast(buffer_width), static_cast(buffer_height) }); + + if (g_cfg.video.full_rgb_range_output && rsx::fcmp(avconfig->gamma, 1.f)) + { + // Blit source image to the screen + m_flip_fbo.recreate(); + m_flip_fbo.bind(); + m_flip_fbo.color = image; + m_flip_fbo.read_buffer(m_flip_fbo.color); + m_flip_fbo.draw_buffer(m_flip_fbo.color); + m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear); + } + else + { + const f32 gamma = avconfig->gamma; + const bool limited_range = !g_cfg.video.full_rgb_range_output; + + gl::screen.bind(); + m_video_output_pass.run(areau(aspect_ratio), image, gamma, limited_range); + } + } + + if (m_overlay_manager) + { + if (m_overlay_manager->has_dirty()) + { + m_overlay_manager->lock(); + + std::vector uids_to_dispose; + uids_to_dispose.reserve(m_overlay_manager->get_dirty().size()); + + for (const auto& view : m_overlay_manager->get_dirty()) + { + m_ui_renderer.remove_temp_resources(view->uid); + uids_to_dispose.push_back(view->uid); + } + + m_overlay_manager->unlock(); + m_overlay_manager->dispose(uids_to_dispose); + } + + if (m_overlay_manager->has_visible()) + { + gl::screen.bind(); + + // Lock to avoid modification during run-update chain + std::lock_guard lock(*m_overlay_manager); + + for (const auto& view : m_overlay_manager->get_views()) + { + m_ui_renderer.run(areau(aspect_ratio), 0, *view.get()); + } + } + } + + if (g_cfg.video.overlay) + { + gl::screen.bind(); + glViewport(0, 0, m_frame->client_width(), m_frame->client_height()); + + m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), fmt::format("RSX Load: %3d%%", get_load())); + m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), fmt::format("draw calls: %16d", info.stats.draw_calls)); + m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call setup: %11dus", info.stats.setup_time)); + m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), fmt::format("vertex upload time: %8dus", info.stats.vertex_upload_time)); + m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), fmt::format("textures upload time: %6dus", info.stats.textures_upload_time)); + m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call execution: %7dus", info.stats.draw_exec_time)); + + const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count(); + const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024); + const auto num_flushes = m_gl_texture_cache.get_num_flush_requests(); + const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions(); + const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes(); + const auto num_misses = m_gl_texture_cache.get_num_cache_misses(); + const auto num_unavoidable = m_gl_texture_cache.get_num_unavoidable_hard_faults(); + const auto cache_miss_ratio = static_cast(ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100)); + m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures)); + m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size)); + m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate)); + } + + m_frame->flip(m_context); + rsx::thread::flip(info); + + // Cleanup + m_gl_texture_cache.on_frame_end(); + m_vertex_cache->purge(); + + auto removed_textures = m_rtts.free_invalidated(); + m_framebuffer_cache.remove_if([&](auto& fbo) + { + if (fbo.unused_check_count() >= 2) return true; // Remove if stale + if (fbo.references_any(removed_textures)) return true; // Remove if any of the attachments is invalid + + return false; + }); + + if (m_draw_fbo && !m_rtts_dirty) + { + // Always restore the active framebuffer + m_draw_fbo->bind(); + set_viewport(); + set_scissor(!!(m_graphics_state & rsx::pipeline_state::scissor_setup_clipped)); + } +} diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index f8ac77b043..9a3cb3109a 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -873,7 +873,7 @@ void VKGSRender::check_heap_status(u32 flags) { m_profiler.start(); - frame_context_t *target_frame = nullptr; + vk::frame_context_t *target_frame = nullptr; if (!m_queued_frames.empty()) { if (m_current_frame != &m_aux_frame_context) @@ -2224,196 +2224,6 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args) } } -void VKGSRender::advance_queued_frames() -{ - // Check all other frames for completion and clear resources - check_present_status(); - - //m_rtts storage is double buffered and should be safe to tag on frame boundary - m_rtts.free_invalidated(); - - //texture cache is also double buffered to prevent use-after-free - m_texture_cache.on_frame_end(); - m_samplers_dirty.store(true); - - vk::remove_unused_framebuffers(); - - m_vertex_cache->purge(); - m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(), - m_vertex_env_ring_info.get_current_put_pos_minus_one(), - m_fragment_env_ring_info.get_current_put_pos_minus_one(), - m_vertex_layout_ring_info.get_current_put_pos_minus_one(), - m_fragment_texture_params_ring_info.get_current_put_pos_minus_one(), - m_fragment_constants_ring_info.get_current_put_pos_minus_one(), - m_transform_constants_ring_info.get_current_put_pos_minus_one(), - m_index_buffer_ring_info.get_current_put_pos_minus_one(), - m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one()); - - m_queued_frames.push_back(m_current_frame); - verify(HERE), m_queued_frames.size() <= VK_MAX_ASYNC_FRAMES; - - m_current_queue_index = (m_current_queue_index + 1) % VK_MAX_ASYNC_FRAMES; - m_current_frame = &frame_context_storage[m_current_queue_index]; - m_current_frame->flags |= frame_context_state::dirty; - - vk::advance_frame_counter(); -} - -void VKGSRender::present(frame_context_t *ctx) -{ - verify(HERE), ctx->present_image != UINT32_MAX; - - // Partial CS flush - ctx->swap_command_buffer->flush(); - - if (!swapchain_unavailable) - { - switch (VkResult error = m_swapchain->present(ctx->present_wait_semaphore, ctx->present_image)) - { - case VK_SUCCESS: - break; - case VK_SUBOPTIMAL_KHR: - should_reinitialize_swapchain = true; - break; - case VK_ERROR_OUT_OF_DATE_KHR: - swapchain_unavailable = true; - break; - default: - vk::die_with_error(HERE, error); - } - } - - // Presentation image released; reset value - ctx->present_image = UINT32_MAX; -} - -void VKGSRender::queue_swap_request() -{ - verify(HERE), !m_current_frame->swap_command_buffer; - m_current_frame->swap_command_buffer = m_current_command_buffer; - - if (m_swapchain->is_headless()) - { - m_swapchain->end_frame(*m_current_command_buffer, m_current_frame->present_image); - close_and_submit_command_buffer(m_current_command_buffer->submit_fence); - } - else - { - close_and_submit_command_buffer(m_current_command_buffer->submit_fence, - m_current_frame->acquire_signal_semaphore, - m_current_frame->present_wait_semaphore, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT); - } - - // Signal pending state as the command queue is now closed - m_current_frame->swap_command_buffer->pending = true; - - // Set up a present request for this frame as well - present(m_current_frame); - - // Grab next cb in line and make it usable - m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT; - m_current_command_buffer = &m_primary_cb_list[m_current_cb_index]; - m_current_command_buffer->reset(); - - // Set up new pointers for the next frame - advance_queued_frames(); - open_command_buffer(); -} - -void VKGSRender::frame_context_cleanup(frame_context_t *ctx, bool free_resources) -{ - verify(HERE), ctx->swap_command_buffer; - - if (ctx->swap_command_buffer->pending) - { - // Perform hard swap here - if (ctx->swap_command_buffer->wait(FRAME_PRESENT_TIMEOUT) != VK_SUCCESS) - { - // Lost surface/device, release swapchain - swapchain_unavailable = true; - } - - free_resources = true; - } - - if (free_resources) - { - if (g_cfg.video.overlay) - { - m_text_writer->reset_descriptors(); - } - - if (m_overlay_manager && m_overlay_manager->has_dirty()) - { - m_overlay_manager->lock(); - - std::vector uids_to_dispose; - uids_to_dispose.reserve(m_overlay_manager->get_dirty().size()); - - for (const auto& view : m_overlay_manager->get_dirty()) - { - m_ui_renderer->remove_temp_resources(view->uid); - uids_to_dispose.push_back(view->uid); - } - - m_overlay_manager->unlock(); - m_overlay_manager->dispose(uids_to_dispose); - } - - vk::reset_global_resources(); - - m_attachment_clear_pass->free_resources(); - m_depth_converter->free_resources(); - m_ui_renderer->free_resources(); - m_video_output_pass->free_resources(); - - ctx->buffer_views_to_clean.clear(); - - if (ctx->last_frame_sync_time > m_last_heap_sync_time) - { - m_last_heap_sync_time = ctx->last_frame_sync_time; - - //Heap cleanup; deallocates memory consumed by the frame if it is still held - m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr; - m_vertex_env_ring_info.m_get_pos = ctx->vtx_env_heap_ptr; - m_fragment_env_ring_info.m_get_pos = ctx->frag_env_heap_ptr; - m_fragment_constants_ring_info.m_get_pos = ctx->frag_const_heap_ptr; - m_transform_constants_ring_info.m_get_pos = ctx->vtx_const_heap_ptr; - m_vertex_layout_ring_info.m_get_pos = ctx->vtx_layout_heap_ptr; - m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr; - m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr; - m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr; - - m_attrib_ring_info.notify(); - m_vertex_env_ring_info.notify(); - m_fragment_env_ring_info.notify(); - m_fragment_constants_ring_info.notify(); - m_transform_constants_ring_info.notify(); - m_vertex_layout_ring_info.notify(); - m_fragment_texture_params_ring_info.notify(); - m_index_buffer_ring_info.notify(); - m_texture_upload_buffer_ring_info.notify(); - } - } - - ctx->swap_command_buffer = nullptr; - - // Remove from queued list - while (!m_queued_frames.empty()) - { - auto frame = m_queued_frames.front(); - m_queued_frames.pop_front(); - - if (frame == ctx) - { - break; - } - } - - vk::advance_completed_frame_counter(); -} - void VKGSRender::do_local_task(rsx::FIFO_state state) { if (m_queue_status & flush_queue_state::deadlock) @@ -3115,483 +2925,6 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) check_zcull_status(true); } -void VKGSRender::reinitialize_swapchain() -{ - m_swapchain_dims.width = m_frame->client_width(); - m_swapchain_dims.height = m_frame->client_height(); - - // Reject requests to acquire new swapchain if the window is minimized - // The NVIDIA driver will spam VK_ERROR_OUT_OF_DATE_KHR if you try to acquire an image from the swapchain and the window is minimized - // However, any attempt to actually renew the swapchain will crash the driver with VK_ERROR_DEVICE_LOST while the window is in this state - if (m_swapchain_dims.width == 0 || m_swapchain_dims.height == 0) - { - swapchain_unavailable = true; - return; - } - - // NOTE: This operation will create a hard sync point - close_and_submit_command_buffer(m_current_command_buffer->submit_fence); - m_current_command_buffer->pending = true; - m_current_command_buffer->reset(); - - for (auto &ctx : frame_context_storage) - { - if (ctx.present_image == UINT32_MAX) - continue; - - // Release present image by presenting it - frame_context_cleanup(&ctx, true); - } - - // Drain all the queues - vkDeviceWaitIdle(*m_device); - - // Rebuild swapchain. Old swapchain destruction is handled by the init_swapchain call - if (!m_swapchain->init(m_swapchain_dims.width, m_swapchain_dims.height)) - { - LOG_WARNING(RSX, "Swapchain initialization failed. Request ignored [%dx%d]", m_swapchain_dims.width, m_swapchain_dims.height); - swapchain_unavailable = true; - open_command_buffer(); - return; - } - - // Prepare new swapchain images for use - open_command_buffer(); - - for (u32 i = 0; i < m_swapchain->get_swap_image_count(); ++i) - { - const auto target_layout = m_swapchain->get_optimal_present_layout(); - const auto target_image = m_swapchain->get_image(i); - VkClearColorValue clear_color{}; - VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - - vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, range); - vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &range); - vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, target_layout, range); - } - - //Will have to block until rendering is completed - vk::fence resize_fence(*m_device); - - //Flush the command buffer - close_and_submit_command_buffer(&resize_fence); - vk::wait_for_fence(&resize_fence); - - m_current_command_buffer->reset(); - open_command_buffer(); - - swapchain_unavailable = false; - should_reinitialize_swapchain = false; -} - -void VKGSRender::flip(const rsx::display_flip_info_t& info) -{ - // Check swapchain condition/status - if (!m_swapchain->supports_automatic_wm_reports()) - { - if (m_swapchain_dims.width != m_frame->client_width() || - m_swapchain_dims.height != m_frame->client_height()) - { - swapchain_unavailable = true; - } - } - - if (swapchain_unavailable || should_reinitialize_swapchain) - { - reinitialize_swapchain(); - } - - m_profiler.start(); - - if (m_current_frame == &m_aux_frame_context) - { - m_current_frame = &frame_context_storage[m_current_queue_index]; - if (m_current_frame->swap_command_buffer) - { - // Its possible this flip request is triggered by overlays and the flip queue is in undefined state - frame_context_cleanup(m_current_frame, true); - } - - // Swap aux storage and current frame; aux storage should always be ready for use at all times - m_current_frame->swap_storage(m_aux_frame_context); - m_current_frame->grab_resources(m_aux_frame_context); - } - else if (m_current_frame->swap_command_buffer) - { - if (info.stats.draw_calls > 0) - { - // This can be 'legal' if the window was being resized and no polling happened because of swapchain_unavailable flag - LOG_ERROR(RSX, "Possible data corruption on frame context storage detected"); - } - - // There were no draws and back-to-back flips happened - frame_context_cleanup(m_current_frame, true); - } - - if (info.skip_frame || swapchain_unavailable) - { - if (!info.skip_frame) - { - verify(HERE), swapchain_unavailable; - - // Perform a mini-flip here without invoking present code - m_current_frame->swap_command_buffer = m_current_command_buffer; - flush_command_queue(true); - vk::advance_frame_counter(); - frame_context_cleanup(m_current_frame, true); - } - - m_frame->flip(m_context); - rsx::thread::flip(info); - return; - } - - u32 buffer_width = display_buffers[info.buffer].width; - u32 buffer_height = display_buffers[info.buffer].height; - u32 buffer_pitch = display_buffers[info.buffer].pitch; - - u32 av_format; - const auto avconfig = g_fxo->get(); - - if (avconfig->state) - { - av_format = avconfig->get_compatible_gcm_format(); - if (!buffer_pitch) - buffer_pitch = buffer_width * avconfig->get_bpp(); - - buffer_width = std::min(buffer_width, avconfig->resolution_x); - buffer_height = std::min(buffer_height, avconfig->resolution_y); - } - else - { - av_format = CELL_GCM_TEXTURE_A8R8G8B8; - if (!buffer_pitch) - buffer_pitch = buffer_width * 4; - } - - coordi aspect_ratio; - - sizei csize = m_swapchain_dims; - sizei new_size = csize; - - if (!g_cfg.video.stretch_to_display_area) - { - const double aq = 1. * buffer_width / buffer_height; - const double rq = 1. * new_size.width / new_size.height; - const double q = aq / rq; - - if (q > 1.0) - { - new_size.height = static_cast(new_size.height / q); - aspect_ratio.y = (csize.height - new_size.height) / 2; - } - else if (q < 1.0) - { - new_size.width = static_cast(new_size.width * q); - aspect_ratio.x = (csize.width - new_size.width) / 2; - } - } - - aspect_ratio.size = new_size; - - //Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be - verify(HERE), m_current_frame->present_image == UINT32_MAX; - verify(HERE), m_current_frame->swap_command_buffer == nullptr; - - u64 timeout = m_swapchain->get_swap_image_count() <= VK_MAX_ASYNC_FRAMES? 0ull: 100000000ull; - while (VkResult status = m_swapchain->acquire_next_swapchain_image(m_current_frame->acquire_signal_semaphore, timeout, &m_current_frame->present_image)) - { - switch (status) - { - case VK_TIMEOUT: - case VK_NOT_READY: - { - //In some cases, after a fullscreen switch, the driver only allows N-1 images to be acquirable, where N = number of available swap images. - //This means that any acquired images have to be released - //before acquireNextImage can return successfully. This is despite the driver reporting 2 swap chain images available - //This makes fullscreen performance slower than windowed performance as throughput is lowered due to losing one presentable image - //Found on AMD Crimson 17.7.2 - - - //Whatever returned from status, this is now a spin - timeout = 0ull; - check_present_status(); - continue; - } - case VK_SUBOPTIMAL_KHR: - should_reinitialize_swapchain = true; - break; - case VK_ERROR_OUT_OF_DATE_KHR: - LOG_WARNING(RSX, "vkAcquireNextImageKHR failed with VK_ERROR_OUT_OF_DATE_KHR. Flip request ignored until surface is recreated."); - swapchain_unavailable = true; - reinitialize_swapchain(); - continue; - default: - vk::die_with_error(HERE, status); - } - } - - //Confirm that the driver did not silently fail - verify(HERE), m_current_frame->present_image != UINT32_MAX; - - //Blit contents to screen.. - vk::image* image_to_flip = nullptr; - - if (info.buffer < display_buffers_count && buffer_width && buffer_height) - { - const u32 absolute_address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL); - - if (auto render_target_texture = m_rtts.get_color_surface_at(absolute_address)) - { - if (render_target_texture->last_use_tag == m_rtts.write_tag) - { - image_to_flip = render_target_texture; - } - else - { - const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp(), rsx::surface_access::read); - if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture) - { - // Confirmed to be the newest data source in that range - image_to_flip = render_target_texture; - } - } - - if (image_to_flip) - { - buffer_width = rsx::apply_resolution_scale(buffer_width, true); - buffer_height = rsx::apply_resolution_scale(buffer_height, true); - - if (buffer_width > render_target_texture->width() || - buffer_height > render_target_texture->height()) - { - // TODO: Should emit only once to avoid flooding the log file - // TODO: Take AA scaling into account - LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d", - display_buffers[info.buffer].width, display_buffers[info.buffer].height, avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y, - render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels)); - - buffer_width = render_target_texture->width(); - buffer_height = render_target_texture->height(); - } - } - } - else if (auto surface = m_texture_cache.find_texture_from_dimensions(absolute_address, av_format, buffer_width, buffer_height)) - { - //Hack - this should be the first location to check for output - //The render might have been done offscreen or in software and a blit used to display - image_to_flip = surface->get_raw_texture(); - } - - if (!image_to_flip) - { - // Read from cell - const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height); - const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage; - const auto overlap = m_texture_cache.find_texture_from_range(range, 0, lookup_mask); - - for (const auto & section : overlap) - { - if (!section->is_synchronized()) - { - section->copy_texture(*m_current_command_buffer, true); - } - } - - if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer) - { - // Submit for processing to lower hard fault penalty - flush_command_queue(); - } - - m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read); - image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height); - } - } - - VkImage target_image = m_swapchain->get_image(m_current_frame->present_image); - const auto present_layout = m_swapchain->get_optimal_present_layout(); - - const VkImageSubresourceRange subresource_range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; - VkImageLayout target_layout = present_layout; - - VkRenderPass single_target_pass = VK_NULL_HANDLE; - vk::framebuffer_holder* direct_fbo = nullptr; - vk::viewable_image* calibration_src = nullptr; - - if (image_to_flip) - { - if (aspect_ratio.x || aspect_ratio.y) - { - VkClearColorValue clear_black {}; - vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); - vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range); - - target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - } - - if (UNLIKELY(!g_cfg.video.full_rgb_range_output || !rsx::fcmp(avconfig->gamma, 1.f))) - { - calibration_src = dynamic_cast(image_to_flip); - verify("Image handle not viewable!" HERE), calibration_src; - } - - if (LIKELY(!calibration_src)) - { - vk::copy_scaled_image(*m_current_command_buffer, image_to_flip->value, target_image, image_to_flip->current_layout, target_layout, - { 0, 0, static_cast(buffer_width), static_cast(buffer_height) }, aspect_ratio, 1, VK_IMAGE_ASPECT_COLOR_BIT, false); - } - else - { - vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, subresource_range); - target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - - const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format()); - single_target_pass = vk::get_renderpass(*m_device, key); - verify("Usupported renderpass configuration" HERE), single_target_pass != VK_NULL_HANDLE; - - direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image); - direct_fbo->add_ref(); - - image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_video_output_pass->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, calibration_src, avconfig->gamma, !g_cfg.video.full_rgb_range_output, single_target_pass); - image_to_flip->pop_layout(*m_current_command_buffer); - - direct_fbo->release(); - } - } - else - { - //No draw call was issued! - //TODO: Upload raw bytes from cpu for rendering - VkClearColorValue clear_black {}; - vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); - vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range); - - target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - } - - if (m_frame->screenshot_toggle == true) - { - m_frame->screenshot_toggle = false; - - const size_t sshot_size = buffer_height * buffer_width * 4; - - vk::buffer sshot_vkbuf(*m_device, align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0); - - VkBufferImageCopy copy_info; - copy_info.bufferOffset = 0; - copy_info.bufferRowLength = 0; - copy_info.bufferImageHeight = 0; - copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - copy_info.imageSubresource.baseArrayLayer = 0; - copy_info.imageSubresource.layerCount = 1; - copy_info.imageSubresource.mipLevel = 0; - copy_info.imageOffset.x = 0; - copy_info.imageOffset.y = 0; - copy_info.imageOffset.z = 0; - copy_info.imageExtent.width = buffer_width; - copy_info.imageExtent.height = buffer_height; - copy_info.imageExtent.depth = 1; - - image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - vk::copy_image_to_buffer(*m_current_command_buffer, image_to_flip, &sshot_vkbuf, copy_info); - image_to_flip->pop_layout(*m_current_command_buffer); - - flush_command_queue(true); - auto src = sshot_vkbuf.map(0, sshot_size); - std::vector sshot_frame(sshot_size); - memcpy(sshot_frame.data(), src, sshot_size); - sshot_vkbuf.unmap(); - - m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height); - } - - const bool has_overlay = (m_overlay_manager && m_overlay_manager->has_visible()); - if (g_cfg.video.overlay || has_overlay) - { - if (target_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) - { - // Change the image layout whilst setting up a dependency on waiting for the blit op to finish before we start writing - VkImageMemoryBarrier barrier = {}; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - barrier.oldLayout = target_layout; - barrier.image = target_image; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.subresourceRange = subresource_range; - vkCmdPipelineBarrier(*m_current_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); - - target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - } - - if (!direct_fbo) - { - const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format()); - single_target_pass = vk::get_renderpass(*m_device, key); - verify("Usupported renderpass configuration" HERE), single_target_pass != VK_NULL_HANDLE; - - direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image); - } - - direct_fbo->add_ref(); - - if (has_overlay) - { - // Lock to avoid modification during run-update chain - std::lock_guard lock(*m_overlay_manager); - - for (const auto& view : m_overlay_manager->get_views()) - { - m_ui_renderer->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, single_target_pass, m_texture_upload_buffer_ring_info, *view.get()); - } - } - - if (g_cfg.video.overlay) - { - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load())); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time)); - - const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count(); - const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024); - const auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024); - const auto num_flushes = m_texture_cache.get_num_flush_requests(); - const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions(); - const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes(); - const auto num_misses = m_texture_cache.get_num_cache_misses(); - const auto num_unavoidable = m_texture_cache.get_num_unavoidable_hard_faults(); - const auto cache_miss_ratio = static_cast(ceil(m_texture_cache.get_cache_miss_ratio() * 100)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), fmt::format("Unreleased textures: %8d", num_dirty_textures)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), fmt::format("Texture cache memory: %7dM", texture_memory_size)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Temporary texture memory: %3dM", tmp_texture_memory_size)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate)); - } - - direct_fbo->release(); - } - - if (target_layout != present_layout) - { - vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, present_layout, subresource_range); - } - - queue_swap_request(); - - m_frame_stats.flip_time = m_profiler.duration(); - - m_frame->flip(m_context); - rsx::thread::flip(info); -} - void VKGSRender::renderctl(u32 request_code, void* args) { switch (request_code) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 8f981a83a5..1607fffc3c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -50,74 +50,85 @@ namespace vk using rsx::flags32_t; extern u64 get_system_time(); -enum +namespace vk { - VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE = 0x1, - VK_HEAP_CHECK_VERTEX_STORAGE = 0x2, - VK_HEAP_CHECK_VERTEX_ENV_STORAGE = 0x4, - VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE = 0x8, - VK_HEAP_CHECK_TEXTURE_ENV_STORAGE = 0x10, - VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE = 0x20, - VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE = 0x40, - VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE = 0x80, - - VK_HEAP_CHECK_MAX_ENUM = VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE, - VK_HEAP_CHECK_ALL = 0xFF, -}; - -struct command_buffer_chunk: public vk::command_buffer -{ - vk::fence* submit_fence = nullptr; - VkDevice m_device = VK_NULL_HANDLE; - - std::atomic_bool pending = { false }; - u64 eid_tag = 0; - u64 reset_id = 0; - shared_mutex guard_mutex; - - command_buffer_chunk() = default; - - void init_fence(VkDevice dev) + struct command_buffer_chunk: public vk::command_buffer { - m_device = dev; - submit_fence = new vk::fence(dev); - } + vk::fence* submit_fence = nullptr; + VkDevice m_device = VK_NULL_HANDLE; - void destroy() - { - vk::command_buffer::destroy(); - delete submit_fence; - } + std::atomic_bool pending = { false }; + u64 eid_tag = 0; + u64 reset_id = 0; + shared_mutex guard_mutex; - void tag() - { - eid_tag = vk::get_event_id(); - } + command_buffer_chunk() = default; - void reset() - { - if (pending) - poke(); - - if (pending) - wait(FRAME_PRESENT_TIMEOUT); - - ++reset_id; - CHECK_RESULT(vkResetCommandBuffer(commands, 0)); - } - - bool poke() - { - reader_lock lock(guard_mutex); - - if (!pending) - return true; - - if (!submit_fence->flushed) - return false; - - if (vkGetFenceStatus(m_device, submit_fence->handle) == VK_SUCCESS) + void init_fence(VkDevice dev) { + m_device = dev; + submit_fence = new vk::fence(dev); + } + + void destroy() + { + vk::command_buffer::destroy(); + delete submit_fence; + } + + void tag() + { + eid_tag = vk::get_event_id(); + } + + void reset() + { + if (pending) + poke(); + + if (pending) + wait(FRAME_PRESENT_TIMEOUT); + + ++reset_id; + CHECK_RESULT(vkResetCommandBuffer(commands, 0)); + } + + bool poke() + { + reader_lock lock(guard_mutex); + + if (!pending) + return true; + + if (!submit_fence->flushed) + return false; + + if (vkGetFenceStatus(m_device, submit_fence->handle) == VK_SUCCESS) + { + lock.upgrade(); + + if (pending) + { + vk::reset_fence(submit_fence); + vk::on_event_completed(eid_tag); + + pending = false; + eid_tag = 0; + } + } + + return !pending; + } + + VkResult wait(u64 timeout = 0ull) + { + reader_lock lock(guard_mutex); + + if (!pending) + return VK_SUCCESS; + + const auto ret = vk::wait_for_fence(submit_fence, timeout); + lock.upgrade(); if (pending) @@ -128,210 +139,212 @@ struct command_buffer_chunk: public vk::command_buffer pending = false; eid_tag = 0; } + + return ret; } - return !pending; - } - - VkResult wait(u64 timeout = 0ull) - { - reader_lock lock(guard_mutex); - - if (!pending) - return VK_SUCCESS; - - const auto ret = vk::wait_for_fence(submit_fence, timeout); - - lock.upgrade(); - - if (pending) + void flush() { - vk::reset_fence(submit_fence); - vk::on_event_completed(eid_tag); + reader_lock lock(guard_mutex); - pending = false; - eid_tag = 0; + if (!pending) + return; + + submit_fence->wait_flush(); } + }; - return ret; - } - - void flush() + struct occlusion_data { - reader_lock lock(guard_mutex); + rsx::simple_array indices; + command_buffer_chunk* command_buffer_to_wait = nullptr; + u64 command_buffer_sync_id = 0; - if (!pending) - return; - - submit_fence->wait_flush(); - } -}; - -struct occlusion_data -{ - rsx::simple_array indices; - command_buffer_chunk* command_buffer_to_wait = nullptr; - u64 command_buffer_sync_id = 0; - - bool is_current(command_buffer_chunk* cmd) const - { - return (command_buffer_to_wait == cmd && command_buffer_sync_id == cmd->reset_id); - } - - void set_sync_command_buffer(command_buffer_chunk* cmd) - { - command_buffer_to_wait = cmd; - command_buffer_sync_id = cmd->reset_id; - } - - void sync() - { - if (command_buffer_to_wait->reset_id == command_buffer_sync_id) + bool is_current(command_buffer_chunk* cmd) const { - // Allocation stack is FIFO and very long so no need to actually wait for fence signal - command_buffer_to_wait->flush(); + return (command_buffer_to_wait == cmd && command_buffer_sync_id == cmd->reset_id); } - } -}; -enum frame_context_state : u32 -{ - dirty = 1 -}; - -struct frame_context_t -{ - VkSemaphore acquire_signal_semaphore = VK_NULL_HANDLE; - VkSemaphore present_wait_semaphore = VK_NULL_HANDLE; - VkDescriptorSet descriptor_set = VK_NULL_HANDLE; - - vk::descriptor_pool descriptor_pool; - u32 used_descriptors = 0; - - flags32_t flags = 0; - - std::vector> buffer_views_to_clean; - - u32 present_image = UINT32_MAX; - command_buffer_chunk* swap_command_buffer = nullptr; - - //Heap pointers - s64 attrib_heap_ptr = 0; - s64 vtx_env_heap_ptr = 0; - s64 frag_env_heap_ptr = 0; - s64 frag_const_heap_ptr = 0; - s64 vtx_const_heap_ptr = 0; - s64 vtx_layout_heap_ptr = 0; - s64 frag_texparam_heap_ptr = 0; - s64 index_heap_ptr = 0; - s64 texture_upload_heap_ptr = 0; - - u64 last_frame_sync_time = 0; - - //Copy shareable information - void grab_resources(frame_context_t &other) - { - present_wait_semaphore = other.present_wait_semaphore; - acquire_signal_semaphore = other.acquire_signal_semaphore; - descriptor_set = other.descriptor_set; - descriptor_pool = other.descriptor_pool; - used_descriptors = other.used_descriptors; - flags = other.flags; - - attrib_heap_ptr = other.attrib_heap_ptr; - vtx_env_heap_ptr = other.vtx_env_heap_ptr; - frag_env_heap_ptr = other.frag_env_heap_ptr; - vtx_layout_heap_ptr = other.vtx_layout_heap_ptr; - frag_texparam_heap_ptr = other.frag_texparam_heap_ptr; - frag_const_heap_ptr = other.frag_const_heap_ptr; - vtx_const_heap_ptr = other.vtx_const_heap_ptr; - index_heap_ptr = other.index_heap_ptr; - texture_upload_heap_ptr = other.texture_upload_heap_ptr; - } - - //Exchange storage (non-copyable) - void swap_storage(frame_context_t &other) - { - std::swap(buffer_views_to_clean, other.buffer_views_to_clean); - } - - void tag_frame_end(s64 attrib_loc, s64 vtxenv_loc, s64 fragenv_loc, s64 vtxlayout_loc, s64 fragtex_loc, s64 fragconst_loc,s64 vtxconst_loc, s64 index_loc, s64 texture_loc) - { - attrib_heap_ptr = attrib_loc; - vtx_env_heap_ptr = vtxenv_loc; - frag_env_heap_ptr = fragenv_loc; - vtx_layout_heap_ptr = vtxlayout_loc; - frag_texparam_heap_ptr = fragtex_loc; - frag_const_heap_ptr = fragconst_loc; - vtx_const_heap_ptr = vtxconst_loc; - index_heap_ptr = index_loc; - texture_upload_heap_ptr = texture_loc; - - last_frame_sync_time = get_system_time(); - } - - void reset_heap_ptrs() - { - last_frame_sync_time = 0; - } -}; - -struct flush_request_task -{ - atomic_t pending_state{ false }; //Flush request status; true if rsx::thread is yet to service this request - atomic_t num_waiters{ 0 }; //Number of threads waiting for this request to be serviced - bool hard_sync = false; - - flush_request_task() = default; - - void post(bool _hard_sync) - { - hard_sync = (hard_sync || _hard_sync); - pending_state = true; - num_waiters++; - } - - void remove_one() - { - num_waiters--; - } - - void clear_pending_flag() - { - hard_sync = false; - pending_state.store(false); - } - - bool pending() const - { - return pending_state.load(); - } - - void consumer_wait() const - { - while (num_waiters.load() != 0) + void set_sync_command_buffer(command_buffer_chunk* cmd) { - _mm_pause(); + command_buffer_to_wait = cmd; + command_buffer_sync_id = cmd->reset_id; } - } - void producer_wait() const - { - while (pending_state.load()) + void sync() { - std::this_thread::yield(); + if (command_buffer_to_wait->reset_id == command_buffer_sync_id) + { + // Allocation stack is FIFO and very long so no need to actually wait for fence signal + command_buffer_to_wait->flush(); + } } - } -}; + }; -enum flush_queue_state : u32 -{ - ok = 0, - deadlock = 1 -}; + struct frame_context_t + { + VkSemaphore acquire_signal_semaphore = VK_NULL_HANDLE; + VkSemaphore present_wait_semaphore = VK_NULL_HANDLE; + VkDescriptorSet descriptor_set = VK_NULL_HANDLE; + + vk::descriptor_pool descriptor_pool; + u32 used_descriptors = 0; + + flags32_t flags = 0; + + std::vector> buffer_views_to_clean; + + u32 present_image = UINT32_MAX; + command_buffer_chunk* swap_command_buffer = nullptr; + + //Heap pointers + s64 attrib_heap_ptr = 0; + s64 vtx_env_heap_ptr = 0; + s64 frag_env_heap_ptr = 0; + s64 frag_const_heap_ptr = 0; + s64 vtx_const_heap_ptr = 0; + s64 vtx_layout_heap_ptr = 0; + s64 frag_texparam_heap_ptr = 0; + s64 index_heap_ptr = 0; + s64 texture_upload_heap_ptr = 0; + + u64 last_frame_sync_time = 0; + + //Copy shareable information + void grab_resources(frame_context_t &other) + { + present_wait_semaphore = other.present_wait_semaphore; + acquire_signal_semaphore = other.acquire_signal_semaphore; + descriptor_set = other.descriptor_set; + descriptor_pool = other.descriptor_pool; + used_descriptors = other.used_descriptors; + flags = other.flags; + + attrib_heap_ptr = other.attrib_heap_ptr; + vtx_env_heap_ptr = other.vtx_env_heap_ptr; + frag_env_heap_ptr = other.frag_env_heap_ptr; + vtx_layout_heap_ptr = other.vtx_layout_heap_ptr; + frag_texparam_heap_ptr = other.frag_texparam_heap_ptr; + frag_const_heap_ptr = other.frag_const_heap_ptr; + vtx_const_heap_ptr = other.vtx_const_heap_ptr; + index_heap_ptr = other.index_heap_ptr; + texture_upload_heap_ptr = other.texture_upload_heap_ptr; + } + + //Exchange storage (non-copyable) + void swap_storage(frame_context_t &other) + { + std::swap(buffer_views_to_clean, other.buffer_views_to_clean); + } + + void tag_frame_end(s64 attrib_loc, s64 vtxenv_loc, s64 fragenv_loc, s64 vtxlayout_loc, s64 fragtex_loc, s64 fragconst_loc,s64 vtxconst_loc, s64 index_loc, s64 texture_loc) + { + attrib_heap_ptr = attrib_loc; + vtx_env_heap_ptr = vtxenv_loc; + frag_env_heap_ptr = fragenv_loc; + vtx_layout_heap_ptr = vtxlayout_loc; + frag_texparam_heap_ptr = fragtex_loc; + frag_const_heap_ptr = fragconst_loc; + vtx_const_heap_ptr = vtxconst_loc; + index_heap_ptr = index_loc; + texture_upload_heap_ptr = texture_loc; + + last_frame_sync_time = get_system_time(); + } + + void reset_heap_ptrs() + { + last_frame_sync_time = 0; + } + }; + + struct flush_request_task + { + atomic_t pending_state{ false }; //Flush request status; true if rsx::thread is yet to service this request + atomic_t num_waiters{ 0 }; //Number of threads waiting for this request to be serviced + bool hard_sync = false; + + flush_request_task() = default; + + void post(bool _hard_sync) + { + hard_sync = (hard_sync || _hard_sync); + pending_state = true; + num_waiters++; + } + + void remove_one() + { + num_waiters--; + } + + void clear_pending_flag() + { + hard_sync = false; + pending_state.store(false); + } + + bool pending() const + { + return pending_state.load(); + } + + void consumer_wait() const + { + while (num_waiters.load() != 0) + { + _mm_pause(); + } + } + + void producer_wait() const + { + while (pending_state.load()) + { + std::this_thread::yield(); + } + } + }; + + struct present_surface_info + { + u32 address; + u32 format; + u32 width; + u32 height; + u32 pitch; + }; +} class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control { +private: + enum + { + VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE = 0x1, + VK_HEAP_CHECK_VERTEX_STORAGE = 0x2, + VK_HEAP_CHECK_VERTEX_ENV_STORAGE = 0x4, + VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE = 0x8, + VK_HEAP_CHECK_TEXTURE_ENV_STORAGE = 0x10, + VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE = 0x20, + VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE = 0x40, + VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE = 0x80, + + VK_HEAP_CHECK_MAX_ENUM = VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE, + VK_HEAP_CHECK_ALL = 0xFF, + }; + + enum frame_context_state : u32 + { + dirty = 1 + }; + + enum flush_queue_state : u32 + { + ok = 0, + deadlock = 1 + }; + private: VKFragmentProgram m_fragment_prog; VKVertexProgram m_vertex_prog; @@ -382,15 +395,15 @@ private: vk::occlusion_query_pool m_occlusion_query_pool; bool m_occlusion_query_active = false; rsx::reports::occlusion_query_info *m_active_query_info = nullptr; - std::vector m_occlusion_map; + std::vector m_occlusion_map; shared_mutex m_secondary_cb_guard; vk::command_pool m_secondary_command_buffer_pool; vk::command_buffer m_secondary_command_buffer; //command buffer used for setup operations u32 m_current_cb_index = 0; - std::array m_primary_cb_list; - command_buffer_chunk* m_current_command_buffer = nullptr; + std::array m_primary_cb_list; + vk::command_buffer_chunk* m_current_command_buffer = nullptr; VkDescriptorSetLayout descriptor_layouts; VkPipelineLayout pipeline_layout; @@ -421,13 +434,13 @@ private: VkDescriptorBufferInfo m_fragment_constants_buffer_info; VkDescriptorBufferInfo m_fragment_texture_params_buffer_info; - std::array frame_context_storage; + std::array frame_context_storage; //Temp frame context to use if the real frame queue is overburdened. Only used for storage - frame_context_t m_aux_frame_context; + vk::frame_context_t m_aux_frame_context; u32 m_current_queue_index = 0; - frame_context_t* m_current_frame = nullptr; - std::deque m_queued_frames; + vk::frame_context_t* m_current_frame = nullptr; + std::deque m_queued_frames; VkViewport m_viewport{}; VkRect2D m_scissor{}; @@ -435,7 +448,7 @@ private: std::vector m_draw_buffers; shared_mutex m_flush_queue_mutex; - flush_request_task m_flush_requests; + vk::flush_request_task m_flush_requests; // Offloader thread deadlock recovery rsx::atomic_bitmask_t m_queue_status; @@ -471,11 +484,13 @@ private: void flush_command_queue(bool hard_sync = false); void queue_swap_request(); - void frame_context_cleanup(frame_context_t *ctx, bool free_resources = false); + void frame_context_cleanup(vk::frame_context_t *ctx, bool free_resources = false); void advance_queued_frames(); - void present(frame_context_t *ctx); + void present(vk::frame_context_t *ctx); void reinitialize_swapchain(); + vk::image* get_present_source(vk::present_surface_info* info, const rsx::avconf* avconfig); + void begin_render_pass(); void close_render_pass(); diff --git a/rpcs3/Emu/RSX/VK/VKPresent.cpp b/rpcs3/Emu/RSX/VK/VKPresent.cpp new file mode 100644 index 0000000000..cc6c87bc04 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKPresent.cpp @@ -0,0 +1,692 @@ +#include "stdafx.h" +#include "VKGSRender.h" + + +void VKGSRender::reinitialize_swapchain() +{ + m_swapchain_dims.width = m_frame->client_width(); + m_swapchain_dims.height = m_frame->client_height(); + + // Reject requests to acquire new swapchain if the window is minimized + // The NVIDIA driver will spam VK_ERROR_OUT_OF_DATE_KHR if you try to acquire an image from the swapchain and the window is minimized + // However, any attempt to actually renew the swapchain will crash the driver with VK_ERROR_DEVICE_LOST while the window is in this state + if (m_swapchain_dims.width == 0 || m_swapchain_dims.height == 0) + { + swapchain_unavailable = true; + return; + } + + // NOTE: This operation will create a hard sync point + close_and_submit_command_buffer(m_current_command_buffer->submit_fence); + m_current_command_buffer->pending = true; + m_current_command_buffer->reset(); + + for (auto &ctx : frame_context_storage) + { + if (ctx.present_image == UINT32_MAX) + continue; + + // Release present image by presenting it + frame_context_cleanup(&ctx, true); + } + + // Drain all the queues + vkDeviceWaitIdle(*m_device); + + // Rebuild swapchain. Old swapchain destruction is handled by the init_swapchain call + if (!m_swapchain->init(m_swapchain_dims.width, m_swapchain_dims.height)) + { + LOG_WARNING(RSX, "Swapchain initialization failed. Request ignored [%dx%d]", m_swapchain_dims.width, m_swapchain_dims.height); + swapchain_unavailable = true; + open_command_buffer(); + return; + } + + // Prepare new swapchain images for use + open_command_buffer(); + + for (u32 i = 0; i < m_swapchain->get_swap_image_count(); ++i) + { + const auto target_layout = m_swapchain->get_optimal_present_layout(); + const auto target_image = m_swapchain->get_image(i); + VkClearColorValue clear_color{}; + VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + + vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, range); + vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &range); + vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, target_layout, range); + } + + //Will have to block until rendering is completed + vk::fence resize_fence(*m_device); + + //Flush the command buffer + close_and_submit_command_buffer(&resize_fence); + vk::wait_for_fence(&resize_fence); + + m_current_command_buffer->reset(); + open_command_buffer(); + + swapchain_unavailable = false; + should_reinitialize_swapchain = false; +} + +void VKGSRender::present(vk::frame_context_t *ctx) +{ + verify(HERE), ctx->present_image != UINT32_MAX; + + // Partial CS flush + ctx->swap_command_buffer->flush(); + + if (!swapchain_unavailable) + { + switch (VkResult error = m_swapchain->present(ctx->present_wait_semaphore, ctx->present_image)) + { + case VK_SUCCESS: + break; + case VK_SUBOPTIMAL_KHR: + should_reinitialize_swapchain = true; + break; + case VK_ERROR_OUT_OF_DATE_KHR: + swapchain_unavailable = true; + break; + default: + vk::die_with_error(HERE, error); + } + } + + // Presentation image released; reset value + ctx->present_image = UINT32_MAX; +} + +void VKGSRender::advance_queued_frames() +{ + // Check all other frames for completion and clear resources + check_present_status(); + + //m_rtts storage is double buffered and should be safe to tag on frame boundary + m_rtts.free_invalidated(); + + //texture cache is also double buffered to prevent use-after-free + m_texture_cache.on_frame_end(); + m_samplers_dirty.store(true); + + vk::remove_unused_framebuffers(); + + m_vertex_cache->purge(); + m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(), + m_vertex_env_ring_info.get_current_put_pos_minus_one(), + m_fragment_env_ring_info.get_current_put_pos_minus_one(), + m_vertex_layout_ring_info.get_current_put_pos_minus_one(), + m_fragment_texture_params_ring_info.get_current_put_pos_minus_one(), + m_fragment_constants_ring_info.get_current_put_pos_minus_one(), + m_transform_constants_ring_info.get_current_put_pos_minus_one(), + m_index_buffer_ring_info.get_current_put_pos_minus_one(), + m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one()); + + m_queued_frames.push_back(m_current_frame); + verify(HERE), m_queued_frames.size() <= VK_MAX_ASYNC_FRAMES; + + m_current_queue_index = (m_current_queue_index + 1) % VK_MAX_ASYNC_FRAMES; + m_current_frame = &frame_context_storage[m_current_queue_index]; + m_current_frame->flags |= frame_context_state::dirty; + + vk::advance_frame_counter(); +} + +void VKGSRender::queue_swap_request() +{ + verify(HERE), !m_current_frame->swap_command_buffer; + m_current_frame->swap_command_buffer = m_current_command_buffer; + + if (m_swapchain->is_headless()) + { + m_swapchain->end_frame(*m_current_command_buffer, m_current_frame->present_image); + close_and_submit_command_buffer(m_current_command_buffer->submit_fence); + } + else + { + close_and_submit_command_buffer(m_current_command_buffer->submit_fence, + m_current_frame->acquire_signal_semaphore, + m_current_frame->present_wait_semaphore, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT); + } + + // Signal pending state as the command queue is now closed + m_current_frame->swap_command_buffer->pending = true; + + // Set up a present request for this frame as well + present(m_current_frame); + + // Grab next cb in line and make it usable + m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT; + m_current_command_buffer = &m_primary_cb_list[m_current_cb_index]; + m_current_command_buffer->reset(); + + // Set up new pointers for the next frame + advance_queued_frames(); + open_command_buffer(); +} + +void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx, bool free_resources) +{ + verify(HERE), ctx->swap_command_buffer; + + if (ctx->swap_command_buffer->pending) + { + // Perform hard swap here + if (ctx->swap_command_buffer->wait(FRAME_PRESENT_TIMEOUT) != VK_SUCCESS) + { + // Lost surface/device, release swapchain + swapchain_unavailable = true; + } + + free_resources = true; + } + + if (free_resources) + { + if (g_cfg.video.overlay) + { + m_text_writer->reset_descriptors(); + } + + if (m_overlay_manager && m_overlay_manager->has_dirty()) + { + m_overlay_manager->lock(); + + std::vector uids_to_dispose; + uids_to_dispose.reserve(m_overlay_manager->get_dirty().size()); + + for (const auto& view : m_overlay_manager->get_dirty()) + { + m_ui_renderer->remove_temp_resources(view->uid); + uids_to_dispose.push_back(view->uid); + } + + m_overlay_manager->unlock(); + m_overlay_manager->dispose(uids_to_dispose); + } + + vk::reset_global_resources(); + + m_attachment_clear_pass->free_resources(); + m_depth_converter->free_resources(); + m_ui_renderer->free_resources(); + m_video_output_pass->free_resources(); + + ctx->buffer_views_to_clean.clear(); + + if (ctx->last_frame_sync_time > m_last_heap_sync_time) + { + m_last_heap_sync_time = ctx->last_frame_sync_time; + + //Heap cleanup; deallocates memory consumed by the frame if it is still held + m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr; + m_vertex_env_ring_info.m_get_pos = ctx->vtx_env_heap_ptr; + m_fragment_env_ring_info.m_get_pos = ctx->frag_env_heap_ptr; + m_fragment_constants_ring_info.m_get_pos = ctx->frag_const_heap_ptr; + m_transform_constants_ring_info.m_get_pos = ctx->vtx_const_heap_ptr; + m_vertex_layout_ring_info.m_get_pos = ctx->vtx_layout_heap_ptr; + m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr; + m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr; + m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr; + + m_attrib_ring_info.notify(); + m_vertex_env_ring_info.notify(); + m_fragment_env_ring_info.notify(); + m_fragment_constants_ring_info.notify(); + m_transform_constants_ring_info.notify(); + m_vertex_layout_ring_info.notify(); + m_fragment_texture_params_ring_info.notify(); + m_index_buffer_ring_info.notify(); + m_texture_upload_buffer_ring_info.notify(); + } + } + + ctx->swap_command_buffer = nullptr; + + // Remove from queued list + while (!m_queued_frames.empty()) + { + auto frame = m_queued_frames.front(); + m_queued_frames.pop_front(); + + if (frame == ctx) + { + break; + } + } + + vk::advance_completed_frame_counter(); +} + +vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const rsx::avconf* avconfig) +{ + vk::image* image_to_flip = nullptr; + + if (auto render_target_texture = m_rtts.get_color_surface_at(info->address)) + { + if (render_target_texture->last_use_tag == m_rtts.write_tag) + { + image_to_flip = render_target_texture; + } + else + { + const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, info->address, info->width, info->height, info->pitch, render_target_texture->get_bpp(), rsx::surface_access::read); + if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture) + { + // Confirmed to be the newest data source in that range + image_to_flip = render_target_texture; + } + } + + if (image_to_flip) + { + const auto buffer_width = rsx::apply_resolution_scale(info->width, true); + const auto buffer_height = rsx::apply_resolution_scale(info->height, true); + + if (buffer_width > render_target_texture->width() || + buffer_height > render_target_texture->height()) + { + // TODO: Should emit only once to avoid flooding the log file + // TODO: Take AA scaling into account + LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d", + info->width, info->height, + avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y, + render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels)); + + info->width = render_target_texture->width(); + info->height = render_target_texture->height(); + } + else + { + info->width = buffer_width; + info->height = buffer_height; + } + } + } + else if (auto surface = m_texture_cache.find_texture_from_dimensions(info->address, info->format, info->width, info->height)) + { + //Hack - this should be the first location to check for output + //The render might have been done offscreen or in software and a blit used to display + image_to_flip = surface->get_raw_texture(); + } + + if (!image_to_flip) + { + // Read from cell + const auto range = utils::address_range::start_length(info->address, info->pitch * info->height); + const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage; + const auto overlap = m_texture_cache.find_texture_from_range(range, 0, lookup_mask); + + for (const auto & section : overlap) + { + if (!section->is_synchronized()) + { + section->copy_texture(*m_current_command_buffer, true); + } + } + + if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer) + { + // Submit for processing to lower hard fault penalty + flush_command_queue(); + } + + m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read); + image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, info->address, info->width, info->height); + } + + return image_to_flip; +} + +void VKGSRender::flip(const rsx::display_flip_info_t& info) +{ + // Check swapchain condition/status + if (!m_swapchain->supports_automatic_wm_reports()) + { + if (m_swapchain_dims.width != m_frame->client_width() || + m_swapchain_dims.height != m_frame->client_height()) + { + swapchain_unavailable = true; + } + } + + if (swapchain_unavailable || should_reinitialize_swapchain) + { + reinitialize_swapchain(); + } + + m_profiler.start(); + + if (m_current_frame == &m_aux_frame_context) + { + m_current_frame = &frame_context_storage[m_current_queue_index]; + if (m_current_frame->swap_command_buffer) + { + // Its possible this flip request is triggered by overlays and the flip queue is in undefined state + frame_context_cleanup(m_current_frame, true); + } + + // Swap aux storage and current frame; aux storage should always be ready for use at all times + m_current_frame->swap_storage(m_aux_frame_context); + m_current_frame->grab_resources(m_aux_frame_context); + } + else if (m_current_frame->swap_command_buffer) + { + if (info.stats.draw_calls > 0) + { + // This can be 'legal' if the window was being resized and no polling happened because of swapchain_unavailable flag + LOG_ERROR(RSX, "Possible data corruption on frame context storage detected"); + } + + // There were no draws and back-to-back flips happened + frame_context_cleanup(m_current_frame, true); + } + + if (info.skip_frame || swapchain_unavailable) + { + if (!info.skip_frame) + { + verify(HERE), swapchain_unavailable; + + // Perform a mini-flip here without invoking present code + m_current_frame->swap_command_buffer = m_current_command_buffer; + flush_command_queue(true); + vk::advance_frame_counter(); + frame_context_cleanup(m_current_frame, true); + } + + m_frame->flip(m_context); + rsx::thread::flip(info); + return; + } + + u32 buffer_width = display_buffers[info.buffer].width; + u32 buffer_height = display_buffers[info.buffer].height; + u32 buffer_pitch = display_buffers[info.buffer].pitch; + + u32 av_format; + const auto avconfig = g_fxo->get(); + + if (avconfig->state) + { + av_format = avconfig->get_compatible_gcm_format(); + if (!buffer_pitch) + buffer_pitch = buffer_width * avconfig->get_bpp(); + + buffer_width = std::min(buffer_width, avconfig->resolution_x); + buffer_height = std::min(buffer_height, avconfig->resolution_y); + } + else + { + av_format = CELL_GCM_TEXTURE_A8R8G8B8; + if (!buffer_pitch) + buffer_pitch = buffer_width * 4; + } + + coordi aspect_ratio; + + sizei csize = m_swapchain_dims; + sizei new_size = csize; + + if (!g_cfg.video.stretch_to_display_area) + { + const double aq = 1. * buffer_width / buffer_height; + const double rq = 1. * new_size.width / new_size.height; + const double q = aq / rq; + + if (q > 1.0) + { + new_size.height = static_cast(new_size.height / q); + aspect_ratio.y = (csize.height - new_size.height) / 2; + } + else if (q < 1.0) + { + new_size.width = static_cast(new_size.width * q); + aspect_ratio.x = (csize.width - new_size.width) / 2; + } + } + + aspect_ratio.size = new_size; + + //Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be + verify(HERE), m_current_frame->present_image == UINT32_MAX; + verify(HERE), m_current_frame->swap_command_buffer == nullptr; + + u64 timeout = m_swapchain->get_swap_image_count() <= VK_MAX_ASYNC_FRAMES? 0ull: 100000000ull; + while (VkResult status = m_swapchain->acquire_next_swapchain_image(m_current_frame->acquire_signal_semaphore, timeout, &m_current_frame->present_image)) + { + switch (status) + { + case VK_TIMEOUT: + case VK_NOT_READY: + { + //In some cases, after a fullscreen switch, the driver only allows N-1 images to be acquirable, where N = number of available swap images. + //This means that any acquired images have to be released + //before acquireNextImage can return successfully. This is despite the driver reporting 2 swap chain images available + //This makes fullscreen performance slower than windowed performance as throughput is lowered due to losing one presentable image + //Found on AMD Crimson 17.7.2 + + + //Whatever returned from status, this is now a spin + timeout = 0ull; + check_present_status(); + continue; + } + case VK_SUBOPTIMAL_KHR: + should_reinitialize_swapchain = true; + break; + case VK_ERROR_OUT_OF_DATE_KHR: + LOG_WARNING(RSX, "vkAcquireNextImageKHR failed with VK_ERROR_OUT_OF_DATE_KHR. Flip request ignored until surface is recreated."); + swapchain_unavailable = true; + reinitialize_swapchain(); + continue; + default: + vk::die_with_error(HERE, status); + } + } + + //Confirm that the driver did not silently fail + verify(HERE), m_current_frame->present_image != UINT32_MAX; + + //Blit contents to screen.. + vk::image* image_to_flip = nullptr; + + if (info.buffer < display_buffers_count && buffer_width && buffer_height) + { + vk::present_surface_info present_info; + present_info.width = buffer_width; + present_info.height = buffer_height; + present_info.pitch = buffer_pitch; + present_info.format = av_format; + present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL); + + image_to_flip = get_present_source(&present_info, avconfig); + buffer_width = present_info.width; + buffer_height = present_info.height; + } + + VkImage target_image = m_swapchain->get_image(m_current_frame->present_image); + const auto present_layout = m_swapchain->get_optimal_present_layout(); + + const VkImageSubresourceRange subresource_range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; + VkImageLayout target_layout = present_layout; + + VkRenderPass single_target_pass = VK_NULL_HANDLE; + vk::framebuffer_holder* direct_fbo = nullptr; + vk::viewable_image* calibration_src = nullptr; + + if (image_to_flip) + { + if (aspect_ratio.x || aspect_ratio.y) + { + VkClearColorValue clear_black {}; + vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range); + + target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + } + + if (UNLIKELY(!g_cfg.video.full_rgb_range_output || !rsx::fcmp(avconfig->gamma, 1.f))) + { + calibration_src = dynamic_cast(image_to_flip); + verify("Image handle not viewable!" HERE), calibration_src; + } + + if (LIKELY(!calibration_src)) + { + vk::copy_scaled_image(*m_current_command_buffer, image_to_flip->value, target_image, image_to_flip->current_layout, target_layout, + { 0, 0, static_cast(buffer_width), static_cast(buffer_height) }, aspect_ratio, 1, VK_IMAGE_ASPECT_COLOR_BIT, false); + } + else + { + vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, subresource_range); + target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format()); + single_target_pass = vk::get_renderpass(*m_device, key); + verify("Usupported renderpass configuration" HERE), single_target_pass != VK_NULL_HANDLE; + + direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image); + direct_fbo->add_ref(); + + image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + m_video_output_pass->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, calibration_src, avconfig->gamma, !g_cfg.video.full_rgb_range_output, single_target_pass); + image_to_flip->pop_layout(*m_current_command_buffer); + + direct_fbo->release(); + } + } + else + { + //No draw call was issued! + //TODO: Upload raw bytes from cpu for rendering + VkClearColorValue clear_black {}; + vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range); + + target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + } + + if (m_frame->screenshot_toggle == true) + { + m_frame->screenshot_toggle = false; + + const size_t sshot_size = buffer_height * buffer_width * 4; + + vk::buffer sshot_vkbuf(*m_device, align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0); + + VkBufferImageCopy copy_info; + copy_info.bufferOffset = 0; + copy_info.bufferRowLength = 0; + copy_info.bufferImageHeight = 0; + copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copy_info.imageSubresource.baseArrayLayer = 0; + copy_info.imageSubresource.layerCount = 1; + copy_info.imageSubresource.mipLevel = 0; + copy_info.imageOffset.x = 0; + copy_info.imageOffset.y = 0; + copy_info.imageOffset.z = 0; + copy_info.imageExtent.width = buffer_width; + copy_info.imageExtent.height = buffer_height; + copy_info.imageExtent.depth = 1; + + image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + vk::copy_image_to_buffer(*m_current_command_buffer, image_to_flip, &sshot_vkbuf, copy_info); + image_to_flip->pop_layout(*m_current_command_buffer); + + flush_command_queue(true); + auto src = sshot_vkbuf.map(0, sshot_size); + std::vector sshot_frame(sshot_size); + memcpy(sshot_frame.data(), src, sshot_size); + sshot_vkbuf.unmap(); + + m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height); + } + + const bool has_overlay = (m_overlay_manager && m_overlay_manager->has_visible()); + if (g_cfg.video.overlay || has_overlay) + { + if (target_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) + { + // Change the image layout whilst setting up a dependency on waiting for the blit op to finish before we start writing + VkImageMemoryBarrier barrier = {}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + barrier.oldLayout = target_layout; + barrier.image = target_image; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.subresourceRange = subresource_range; + vkCmdPipelineBarrier(*m_current_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); + + target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } + + if (!direct_fbo) + { + const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format()); + single_target_pass = vk::get_renderpass(*m_device, key); + verify("Usupported renderpass configuration" HERE), single_target_pass != VK_NULL_HANDLE; + + direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image); + } + + direct_fbo->add_ref(); + + if (has_overlay) + { + // Lock to avoid modification during run-update chain + std::lock_guard lock(*m_overlay_manager); + + for (const auto& view : m_overlay_manager->get_views()) + { + m_ui_renderer->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, single_target_pass, m_texture_upload_buffer_ring_info, *view.get()); + } + } + + if (g_cfg.video.overlay) + { + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load())); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time)); + + const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count(); + const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024); + const auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024); + const auto num_flushes = m_texture_cache.get_num_flush_requests(); + const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions(); + const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes(); + const auto num_misses = m_texture_cache.get_num_cache_misses(); + const auto num_unavoidable = m_texture_cache.get_num_unavoidable_hard_faults(); + const auto cache_miss_ratio = static_cast(ceil(m_texture_cache.get_cache_miss_ratio() * 100)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), fmt::format("Unreleased textures: %8d", num_dirty_textures)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), fmt::format("Texture cache memory: %7dM", texture_memory_size)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Temporary texture memory: %3dM", tmp_texture_memory_size)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate)); + } + + direct_fbo->release(); + } + + if (target_layout != present_layout) + { + vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, present_layout, subresource_range); + } + + queue_swap_request(); + + m_frame_stats.flip_time = m_profiler.duration(); + + m_frame->flip(m_context); + rsx::thread::flip(info); +} \ No newline at end of file diff --git a/rpcs3/GLGSRender.vcxproj b/rpcs3/GLGSRender.vcxproj index 3ae423cc3d..3964e08af3 100644 --- a/rpcs3/GLGSRender.vcxproj +++ b/rpcs3/GLGSRender.vcxproj @@ -93,6 +93,7 @@ + @@ -101,4 +102,4 @@ - \ No newline at end of file + diff --git a/rpcs3/GLGSRender.vcxproj.filters b/rpcs3/GLGSRender.vcxproj.filters index c46d53a635..a95348e504 100644 --- a/rpcs3/GLGSRender.vcxproj.filters +++ b/rpcs3/GLGSRender.vcxproj.filters @@ -8,6 +8,7 @@ + @@ -28,4 +29,4 @@ - \ No newline at end of file + diff --git a/rpcs3/VKGSRender.vcxproj b/rpcs3/VKGSRender.vcxproj index ec89ad07fb..a1e344a216 100644 --- a/rpcs3/VKGSRender.vcxproj +++ b/rpcs3/VKGSRender.vcxproj @@ -52,6 +52,7 @@ + diff --git a/rpcs3/VKGSRender.vcxproj.filters b/rpcs3/VKGSRender.vcxproj.filters index 9d34fab16d..25b20b2211 100644 --- a/rpcs3/VKGSRender.vcxproj.filters +++ b/rpcs3/VKGSRender.vcxproj.filters @@ -8,6 +8,7 @@ + @@ -18,7 +19,7 @@ - + @@ -37,6 +38,6 @@ - +