rsx: Refactor out complex present code into separate files

- Also restructures present code to have image lookup in a separate
re-usable function.
This commit is contained in:
kd-11 2020-01-17 19:24:33 +03:00 committed by kd-11
parent b07b5c9005
commit 7453e46a7c
11 changed files with 1293 additions and 1213 deletions

View file

@ -379,6 +379,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/GL/GLFragmentProgram.cpp
RSX/GL/GLGSRender.cpp
RSX/GL/GLHelpers.cpp
RSX/GL/GLPresent.cpp
RSX/GL/GLRenderTargets.cpp
RSX/GL/GLTexture.cpp
RSX/GL/GLVertexBuffers.cpp
@ -397,6 +398,7 @@ if(TARGET 3rdparty_vulkan)
RSX/VK/VKGSRender.cpp
RSX/VK/VKHelpers.cpp
RSX/VK/VKMemAlloc.cpp
RSX/VK/VKPresent.cpp
RSX/VK/VKProgramPipeline.cpp
RSX/VK/VKRenderPass.cpp
RSX/VK/VKResolveHelper.cpp

View file

@ -1482,266 +1482,6 @@ void GLGSRender::update_draw_state()
m_frame_stats.setup_time += m_profiler.duration();
}
void GLGSRender::flip(const rsx::display_flip_info_t& info)
{
if (info.skip_frame)
{
m_frame->flip(m_context, true);
rsx::thread::flip(info);
return;
}
u32 buffer_width = display_buffers[info.buffer].width;
u32 buffer_height = display_buffers[info.buffer].height;
u32 buffer_pitch = display_buffers[info.buffer].pitch;
u32 av_format;
const auto avconfig = g_fxo->get<rsx::avconf>();
if (avconfig->state)
{
av_format = avconfig->get_compatible_gcm_format();
if (!buffer_pitch)
buffer_pitch = buffer_width * avconfig->get_bpp();
buffer_width = std::min(buffer_width, avconfig->resolution_x);
buffer_height = std::min(buffer_height, avconfig->resolution_y);
}
else
{
av_format = CELL_GCM_TEXTURE_A8R8G8B8;
if (!buffer_pitch)
buffer_pitch = buffer_width * 4;
}
// Disable scissor test (affects blit, clear, etc)
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
// Clear the window background to black
gl_state.clear_color(0, 0, 0, 0);
gl::screen.bind();
gl::screen.clear(gl::buffers::color);
// Calculate blit coordinates
coordi aspect_ratio;
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;
if (!g_cfg.video.stretch_to_display_area)
{
const double aq = 1. * buffer_width / buffer_height;
const double rq = 1. * new_size.width / new_size.height;
const double q = aq / rq;
if (q > 1.0)
{
new_size.height = static_cast<int>(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = static_cast<int>(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
}
aspect_ratio.size = new_size;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
// Find the source image
const u32 absolute_address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
GLuint image = GL_NONE;
if (auto render_target_texture = m_rtts.get_color_surface_at(absolute_address))
{
if (render_target_texture->last_use_tag == m_rtts.write_tag)
{
image = render_target_texture->raw_handle();
}
else
{
gl::command_context cmd = { gl_state };
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image = render_target_texture->raw_handle();
}
}
if (image)
{
buffer_width = rsx::apply_resolution_scale(buffer_width, true);
buffer_height = rsx::apply_resolution_scale(buffer_height, true);
if (buffer_width > render_target_texture->width() ||
buffer_height > render_target_texture->height())
{
// TODO: Should emit only once to avoid flooding the log file
// TODO: Take AA scaling into account
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
display_buffers[info.buffer].width, display_buffers[info.buffer].height, avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
buffer_width = render_target_texture->width();
buffer_height = render_target_texture->height();
}
}
}
else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions<true>(absolute_address, av_format, buffer_width, buffer_height))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
if (const auto tex = surface->get_raw_texture(); tex) image = tex->id();
}
if (!image)
{
LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU");
gl::pixel_unpack_settings unpack_settings;
unpack_settings.alignment(1).row_length(buffer_pitch / 4);
if (!m_flip_tex_color || m_flip_tex_color->size2D() != sizei{ static_cast<int>(buffer_width), static_cast<int>(buffer_height) })
{
m_flip_tex_color = std::make_unique<gl::texture>(GL_TEXTURE_2D, buffer_width, buffer_height, 1, 1, GL_RGBA8);
}
gl::command_context cmd{ gl_state };
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
m_gl_texture_cache.invalidate_range(cmd, range, rsx::invalidation_cause::read);
m_flip_tex_color->copy_from(vm::base(absolute_address), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8, unpack_settings);
image = m_flip_tex_color->id();
}
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
std::vector<u8> sshot_frame(buffer_height * buffer_width * 4);
gl::pixel_pack_settings pack_settings{};
pack_settings.apply();
if (gl::get_driver_caps().ARB_dsa_supported)
glGetTextureImage(image, 0, GL_BGRA, GL_UNSIGNED_BYTE, buffer_height * buffer_width * 4, sshot_frame.data());
else
glGetTextureImageEXT(image, GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_BYTE, sshot_frame.data());
if (GLenum err; (err = glGetError()) != GL_NO_ERROR)
LOG_ERROR(GENERAL, "[Screenshot] Failed to capture image: 0x%x", err);
else
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
}
areai screen_area = coordi({}, { static_cast<int>(buffer_width), static_cast<int>(buffer_height) });
if (g_cfg.video.full_rgb_range_output && rsx::fcmp(avconfig->gamma, 1.f))
{
// Blit source image to the screen
m_flip_fbo.recreate();
m_flip_fbo.bind();
m_flip_fbo.color = image;
m_flip_fbo.read_buffer(m_flip_fbo.color);
m_flip_fbo.draw_buffer(m_flip_fbo.color);
m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
}
else
{
const f32 gamma = avconfig->gamma;
const bool limited_range = !g_cfg.video.full_rgb_range_output;
gl::screen.bind();
m_video_output_pass.run(areau(aspect_ratio), image, gamma, limited_range);
}
}
if (m_overlay_manager)
{
if (m_overlay_manager->has_dirty())
{
m_overlay_manager->lock();
std::vector<u32> uids_to_dispose;
uids_to_dispose.reserve(m_overlay_manager->get_dirty().size());
for (const auto& view : m_overlay_manager->get_dirty())
{
m_ui_renderer.remove_temp_resources(view->uid);
uids_to_dispose.push_back(view->uid);
}
m_overlay_manager->unlock();
m_overlay_manager->dispose(uids_to_dispose);
}
if (m_overlay_manager->has_visible())
{
gl::screen.bind();
// Lock to avoid modification during run-update chain
std::lock_guard lock(*m_overlay_manager);
for (const auto& view : m_overlay_manager->get_views())
{
m_ui_renderer.run(areau(aspect_ratio), 0, *view.get());
}
}
}
if (g_cfg.video.overlay)
{
gl::screen.bind();
glViewport(0, 0, m_frame->client_width(), m_frame->client_height());
m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), fmt::format("RSX Load: %3d%%", get_load()));
m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), fmt::format("draw calls: %16d", info.stats.draw_calls));
m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call setup: %11dus", info.stats.setup_time));
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), fmt::format("vertex upload time: %8dus", info.stats.vertex_upload_time));
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), fmt::format("textures upload time: %6dus", info.stats.textures_upload_time));
m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call execution: %7dus", info.stats.draw_exec_time));
const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
const auto num_flushes = m_gl_texture_cache.get_num_flush_requests();
const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions();
const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes();
const auto num_misses = m_gl_texture_cache.get_num_cache_misses();
const auto num_unavoidable = m_gl_texture_cache.get_num_unavoidable_hard_faults();
const auto cache_miss_ratio = static_cast<u32>(ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100));
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures));
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size));
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate));
}
m_frame->flip(m_context);
rsx::thread::flip(info);
// Cleanup
m_gl_texture_cache.on_frame_end();
m_vertex_cache->purge();
auto removed_textures = m_rtts.free_invalidated();
m_framebuffer_cache.remove_if([&](auto& fbo)
{
if (fbo.unused_check_count() >= 2) return true; // Remove if stale
if (fbo.references_any(removed_textures)) return true; // Remove if any of the attachments is invalid
return false;
});
if (m_draw_fbo && !m_rtts_dirty)
{
// Always restore the active framebuffer
m_draw_fbo->bind();
set_viewport();
set_scissor(!!(m_graphics_state & rsx::pipeline_state::scissor_setup_clipped));
}
}
bool GLGSRender::on_access_violation(u32 address, bool is_writing)
{
const bool can_flush = (std::this_thread::get_id() == m_rsx_thread);
@ -1762,7 +1502,7 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
if (result.num_flushable > 0)
{
work_item &task = post_flush_request(address, result);
auto &task = post_flush_request(address, result);
vm::temporary_unlock();
task.producer_wait();
@ -1802,9 +1542,9 @@ void GLGSRender::do_local_task(rsx::FIFO_state state)
{
std::lock_guard lock(queue_guard);
work_queue.remove_if([](work_item &q) { return q.received; });
work_queue.remove_if([](auto &q) { return q.received; });
for (work_item& q : work_queue)
for (auto& q : work_queue)
{
if (q.processed) continue;
@ -1843,11 +1583,11 @@ void GLGSRender::do_local_task(rsx::FIFO_state state)
}
}
work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data)
gl::work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data)
{
std::lock_guard lock(queue_guard);
work_item &result = work_queue.emplace_back();
auto &result = work_queue.emplace_back();
result.address_to_flush = address;
result.section_data = std::move(flush_data);
return result;

View file

@ -33,27 +33,36 @@ namespace gl
u32 volatile_mapping_offset;
std::optional<std::tuple<GLenum, u32> > index_info;
};
}
struct work_item
{
u32 address_to_flush = 0;
gl::texture_cache::thrashed_set section_data;
volatile bool processed = false;
volatile bool result = false;
volatile bool received = false;
void producer_wait()
struct work_item
{
while (!processed)
{
std::this_thread::yield();
}
u32 address_to_flush = 0;
gl::texture_cache::thrashed_set section_data;
received = true;
}
};
volatile bool processed = false;
volatile bool result = false;
volatile bool received = false;
void producer_wait()
{
while (!processed)
{
std::this_thread::yield();
}
received = true;
}
};
struct present_surface_info
{
u32 address;
u32 format;
u32 width;
u32 height;
u32 pitch;
};
}
class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{
@ -103,7 +112,7 @@ private:
gl::video_out_calibration_pass m_video_output_pass;
shared_mutex queue_guard;
std::list<work_item> work_queue;
std::list<gl::work_item> work_queue;
GLProgramBuffer m_prog_buffer;
draw_context_t m_decompiler_context;
@ -145,12 +154,14 @@ private:
void update_draw_state();
GLuint get_present_source(gl::present_surface_info* info, const rsx::avconf* avconfig);
public:
void read_buffers();
void set_viewport();
void set_scissor(bool clip_viewport);
work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data);
gl::work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data);
bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;

View file

@ -0,0 +1,283 @@
#include "stdafx.h"
#include "GLGSRender.h"
GLuint GLGSRender::get_present_source(gl::present_surface_info* info, const rsx::avconf* avconfig)
{
GLuint image = GL_NONE;
if (auto render_target_texture = m_rtts.get_color_surface_at(info->address))
{
if (render_target_texture->last_use_tag == m_rtts.write_tag)
{
image = render_target_texture->raw_handle();
}
else
{
gl::command_context cmd = { gl_state };
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, info->address, info->width, info->height, info->pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image = render_target_texture->raw_handle();
}
}
if (image)
{
const auto buffer_width = rsx::apply_resolution_scale(info->width, true);
const auto buffer_height = rsx::apply_resolution_scale(info->height, true);
if (buffer_width > render_target_texture->width() ||
buffer_height > render_target_texture->height())
{
// TODO: Should emit only once to avoid flooding the log file
// TODO: Take AA scaling into account
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
info->width, info->height,
avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
info->width = render_target_texture->width();
info->height = render_target_texture->height();
}
else
{
info->width = buffer_width;
info->height = buffer_height;
}
}
}
else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions<true>(info->address, info->format, info->width, info->height))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
if (const auto tex = surface->get_raw_texture(); tex) image = tex->id();
}
if (!image)
{
LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU");
gl::pixel_unpack_settings unpack_settings;
unpack_settings.alignment(1).row_length(info->pitch / 4);
if (!m_flip_tex_color || m_flip_tex_color->size2D() != sizei{ static_cast<int>(info->width), static_cast<int>(info->height) })
{
m_flip_tex_color = std::make_unique<gl::texture>(GL_TEXTURE_2D, info->width, info->height, 1, 1, GL_RGBA8);
}
gl::command_context cmd{ gl_state };
const auto range = utils::address_range::start_length(info->address, info->pitch * info->height);
m_gl_texture_cache.invalidate_range(cmd, range, rsx::invalidation_cause::read);
m_flip_tex_color->copy_from(vm::base(info->address), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8, unpack_settings);
image = m_flip_tex_color->id();
}
return image;
}
void GLGSRender::flip(const rsx::display_flip_info_t& info)
{
if (info.skip_frame)
{
m_frame->flip(m_context, true);
rsx::thread::flip(info);
return;
}
u32 buffer_width = display_buffers[info.buffer].width;
u32 buffer_height = display_buffers[info.buffer].height;
u32 buffer_pitch = display_buffers[info.buffer].pitch;
u32 av_format;
const auto avconfig = g_fxo->get<rsx::avconf>();
if (avconfig->state)
{
av_format = avconfig->get_compatible_gcm_format();
if (!buffer_pitch)
buffer_pitch = buffer_width * avconfig->get_bpp();
buffer_width = std::min(buffer_width, avconfig->resolution_x);
buffer_height = std::min(buffer_height, avconfig->resolution_y);
}
else
{
av_format = CELL_GCM_TEXTURE_A8R8G8B8;
if (!buffer_pitch)
buffer_pitch = buffer_width * 4;
}
// Disable scissor test (affects blit, clear, etc)
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
// Clear the window background to black
gl_state.clear_color(0, 0, 0, 0);
gl::screen.bind();
gl::screen.clear(gl::buffers::color);
// Calculate blit coordinates
coordi aspect_ratio;
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;
if (!g_cfg.video.stretch_to_display_area)
{
const double aq = 1. * buffer_width / buffer_height;
const double rq = 1. * new_size.width / new_size.height;
const double q = aq / rq;
if (q > 1.0)
{
new_size.height = static_cast<int>(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = static_cast<int>(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
}
aspect_ratio.size = new_size;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
// Find the source image
gl::present_surface_info present_info;
present_info.width = buffer_width;
present_info.height = buffer_height;
present_info.pitch = buffer_pitch;
present_info.format = av_format;
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
const GLuint image = get_present_source(&present_info, avconfig);
buffer_width = present_info.width;
buffer_height = present_info.height;
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
std::vector<u8> sshot_frame(buffer_height * buffer_width * 4);
gl::pixel_pack_settings pack_settings{};
pack_settings.apply();
if (gl::get_driver_caps().ARB_dsa_supported)
glGetTextureImage(image, 0, GL_BGRA, GL_UNSIGNED_BYTE, buffer_height * buffer_width * 4, sshot_frame.data());
else
glGetTextureImageEXT(image, GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_BYTE, sshot_frame.data());
if (GLenum err; (err = glGetError()) != GL_NO_ERROR)
LOG_ERROR(GENERAL, "[Screenshot] Failed to capture image: 0x%x", err);
else
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
}
areai screen_area = coordi({}, { static_cast<int>(buffer_width), static_cast<int>(buffer_height) });
if (g_cfg.video.full_rgb_range_output && rsx::fcmp(avconfig->gamma, 1.f))
{
// Blit source image to the screen
m_flip_fbo.recreate();
m_flip_fbo.bind();
m_flip_fbo.color = image;
m_flip_fbo.read_buffer(m_flip_fbo.color);
m_flip_fbo.draw_buffer(m_flip_fbo.color);
m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
}
else
{
const f32 gamma = avconfig->gamma;
const bool limited_range = !g_cfg.video.full_rgb_range_output;
gl::screen.bind();
m_video_output_pass.run(areau(aspect_ratio), image, gamma, limited_range);
}
}
if (m_overlay_manager)
{
if (m_overlay_manager->has_dirty())
{
m_overlay_manager->lock();
std::vector<u32> uids_to_dispose;
uids_to_dispose.reserve(m_overlay_manager->get_dirty().size());
for (const auto& view : m_overlay_manager->get_dirty())
{
m_ui_renderer.remove_temp_resources(view->uid);
uids_to_dispose.push_back(view->uid);
}
m_overlay_manager->unlock();
m_overlay_manager->dispose(uids_to_dispose);
}
if (m_overlay_manager->has_visible())
{
gl::screen.bind();
// Lock to avoid modification during run-update chain
std::lock_guard lock(*m_overlay_manager);
for (const auto& view : m_overlay_manager->get_views())
{
m_ui_renderer.run(areau(aspect_ratio), 0, *view.get());
}
}
}
if (g_cfg.video.overlay)
{
gl::screen.bind();
glViewport(0, 0, m_frame->client_width(), m_frame->client_height());
m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), fmt::format("RSX Load: %3d%%", get_load()));
m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), fmt::format("draw calls: %16d", info.stats.draw_calls));
m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call setup: %11dus", info.stats.setup_time));
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), fmt::format("vertex upload time: %8dus", info.stats.vertex_upload_time));
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), fmt::format("textures upload time: %6dus", info.stats.textures_upload_time));
m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call execution: %7dus", info.stats.draw_exec_time));
const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
const auto num_flushes = m_gl_texture_cache.get_num_flush_requests();
const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions();
const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes();
const auto num_misses = m_gl_texture_cache.get_num_cache_misses();
const auto num_unavoidable = m_gl_texture_cache.get_num_unavoidable_hard_faults();
const auto cache_miss_ratio = static_cast<u32>(ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100));
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures));
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size));
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate));
}
m_frame->flip(m_context);
rsx::thread::flip(info);
// Cleanup
m_gl_texture_cache.on_frame_end();
m_vertex_cache->purge();
auto removed_textures = m_rtts.free_invalidated();
m_framebuffer_cache.remove_if([&](auto& fbo)
{
if (fbo.unused_check_count() >= 2) return true; // Remove if stale
if (fbo.references_any(removed_textures)) return true; // Remove if any of the attachments is invalid
return false;
});
if (m_draw_fbo && !m_rtts_dirty)
{
// Always restore the active framebuffer
m_draw_fbo->bind();
set_viewport();
set_scissor(!!(m_graphics_state & rsx::pipeline_state::scissor_setup_clipped));
}
}

View file

@ -873,7 +873,7 @@ void VKGSRender::check_heap_status(u32 flags)
{
m_profiler.start();
frame_context_t *target_frame = nullptr;
vk::frame_context_t *target_frame = nullptr;
if (!m_queued_frames.empty())
{
if (m_current_frame != &m_aux_frame_context)
@ -2224,196 +2224,6 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args)
}
}
void VKGSRender::advance_queued_frames()
{
// Check all other frames for completion and clear resources
check_present_status();
//m_rtts storage is double buffered and should be safe to tag on frame boundary
m_rtts.free_invalidated();
//texture cache is also double buffered to prevent use-after-free
m_texture_cache.on_frame_end();
m_samplers_dirty.store(true);
vk::remove_unused_framebuffers();
m_vertex_cache->purge();
m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(),
m_vertex_env_ring_info.get_current_put_pos_minus_one(),
m_fragment_env_ring_info.get_current_put_pos_minus_one(),
m_vertex_layout_ring_info.get_current_put_pos_minus_one(),
m_fragment_texture_params_ring_info.get_current_put_pos_minus_one(),
m_fragment_constants_ring_info.get_current_put_pos_minus_one(),
m_transform_constants_ring_info.get_current_put_pos_minus_one(),
m_index_buffer_ring_info.get_current_put_pos_minus_one(),
m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one());
m_queued_frames.push_back(m_current_frame);
verify(HERE), m_queued_frames.size() <= VK_MAX_ASYNC_FRAMES;
m_current_queue_index = (m_current_queue_index + 1) % VK_MAX_ASYNC_FRAMES;
m_current_frame = &frame_context_storage[m_current_queue_index];
m_current_frame->flags |= frame_context_state::dirty;
vk::advance_frame_counter();
}
void VKGSRender::present(frame_context_t *ctx)
{
verify(HERE), ctx->present_image != UINT32_MAX;
// Partial CS flush
ctx->swap_command_buffer->flush();
if (!swapchain_unavailable)
{
switch (VkResult error = m_swapchain->present(ctx->present_wait_semaphore, ctx->present_image))
{
case VK_SUCCESS:
break;
case VK_SUBOPTIMAL_KHR:
should_reinitialize_swapchain = true;
break;
case VK_ERROR_OUT_OF_DATE_KHR:
swapchain_unavailable = true;
break;
default:
vk::die_with_error(HERE, error);
}
}
// Presentation image released; reset value
ctx->present_image = UINT32_MAX;
}
void VKGSRender::queue_swap_request()
{
verify(HERE), !m_current_frame->swap_command_buffer;
m_current_frame->swap_command_buffer = m_current_command_buffer;
if (m_swapchain->is_headless())
{
m_swapchain->end_frame(*m_current_command_buffer, m_current_frame->present_image);
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
}
else
{
close_and_submit_command_buffer(m_current_command_buffer->submit_fence,
m_current_frame->acquire_signal_semaphore,
m_current_frame->present_wait_semaphore,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
}
// Signal pending state as the command queue is now closed
m_current_frame->swap_command_buffer->pending = true;
// Set up a present request for this frame as well
present(m_current_frame);
// Grab next cb in line and make it usable
m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT;
m_current_command_buffer = &m_primary_cb_list[m_current_cb_index];
m_current_command_buffer->reset();
// Set up new pointers for the next frame
advance_queued_frames();
open_command_buffer();
}
void VKGSRender::frame_context_cleanup(frame_context_t *ctx, bool free_resources)
{
verify(HERE), ctx->swap_command_buffer;
if (ctx->swap_command_buffer->pending)
{
// Perform hard swap here
if (ctx->swap_command_buffer->wait(FRAME_PRESENT_TIMEOUT) != VK_SUCCESS)
{
// Lost surface/device, release swapchain
swapchain_unavailable = true;
}
free_resources = true;
}
if (free_resources)
{
if (g_cfg.video.overlay)
{
m_text_writer->reset_descriptors();
}
if (m_overlay_manager && m_overlay_manager->has_dirty())
{
m_overlay_manager->lock();
std::vector<u32> uids_to_dispose;
uids_to_dispose.reserve(m_overlay_manager->get_dirty().size());
for (const auto& view : m_overlay_manager->get_dirty())
{
m_ui_renderer->remove_temp_resources(view->uid);
uids_to_dispose.push_back(view->uid);
}
m_overlay_manager->unlock();
m_overlay_manager->dispose(uids_to_dispose);
}
vk::reset_global_resources();
m_attachment_clear_pass->free_resources();
m_depth_converter->free_resources();
m_ui_renderer->free_resources();
m_video_output_pass->free_resources();
ctx->buffer_views_to_clean.clear();
if (ctx->last_frame_sync_time > m_last_heap_sync_time)
{
m_last_heap_sync_time = ctx->last_frame_sync_time;
//Heap cleanup; deallocates memory consumed by the frame if it is still held
m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr;
m_vertex_env_ring_info.m_get_pos = ctx->vtx_env_heap_ptr;
m_fragment_env_ring_info.m_get_pos = ctx->frag_env_heap_ptr;
m_fragment_constants_ring_info.m_get_pos = ctx->frag_const_heap_ptr;
m_transform_constants_ring_info.m_get_pos = ctx->vtx_const_heap_ptr;
m_vertex_layout_ring_info.m_get_pos = ctx->vtx_layout_heap_ptr;
m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr;
m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr;
m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr;
m_attrib_ring_info.notify();
m_vertex_env_ring_info.notify();
m_fragment_env_ring_info.notify();
m_fragment_constants_ring_info.notify();
m_transform_constants_ring_info.notify();
m_vertex_layout_ring_info.notify();
m_fragment_texture_params_ring_info.notify();
m_index_buffer_ring_info.notify();
m_texture_upload_buffer_ring_info.notify();
}
}
ctx->swap_command_buffer = nullptr;
// Remove from queued list
while (!m_queued_frames.empty())
{
auto frame = m_queued_frames.front();
m_queued_frames.pop_front();
if (frame == ctx)
{
break;
}
}
vk::advance_completed_frame_counter();
}
void VKGSRender::do_local_task(rsx::FIFO_state state)
{
if (m_queue_status & flush_queue_state::deadlock)
@ -3115,483 +2925,6 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
check_zcull_status(true);
}
void VKGSRender::reinitialize_swapchain()
{
m_swapchain_dims.width = m_frame->client_width();
m_swapchain_dims.height = m_frame->client_height();
// Reject requests to acquire new swapchain if the window is minimized
// The NVIDIA driver will spam VK_ERROR_OUT_OF_DATE_KHR if you try to acquire an image from the swapchain and the window is minimized
// However, any attempt to actually renew the swapchain will crash the driver with VK_ERROR_DEVICE_LOST while the window is in this state
if (m_swapchain_dims.width == 0 || m_swapchain_dims.height == 0)
{
swapchain_unavailable = true;
return;
}
// NOTE: This operation will create a hard sync point
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
m_current_command_buffer->pending = true;
m_current_command_buffer->reset();
for (auto &ctx : frame_context_storage)
{
if (ctx.present_image == UINT32_MAX)
continue;
// Release present image by presenting it
frame_context_cleanup(&ctx, true);
}
// Drain all the queues
vkDeviceWaitIdle(*m_device);
// Rebuild swapchain. Old swapchain destruction is handled by the init_swapchain call
if (!m_swapchain->init(m_swapchain_dims.width, m_swapchain_dims.height))
{
LOG_WARNING(RSX, "Swapchain initialization failed. Request ignored [%dx%d]", m_swapchain_dims.width, m_swapchain_dims.height);
swapchain_unavailable = true;
open_command_buffer();
return;
}
// Prepare new swapchain images for use
open_command_buffer();
for (u32 i = 0; i < m_swapchain->get_swap_image_count(); ++i)
{
const auto target_layout = m_swapchain->get_optimal_present_layout();
const auto target_image = m_swapchain->get_image(i);
VkClearColorValue clear_color{};
VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &range);
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, target_layout, range);
}
//Will have to block until rendering is completed
vk::fence resize_fence(*m_device);
//Flush the command buffer
close_and_submit_command_buffer(&resize_fence);
vk::wait_for_fence(&resize_fence);
m_current_command_buffer->reset();
open_command_buffer();
swapchain_unavailable = false;
should_reinitialize_swapchain = false;
}
void VKGSRender::flip(const rsx::display_flip_info_t& info)
{
// Check swapchain condition/status
if (!m_swapchain->supports_automatic_wm_reports())
{
if (m_swapchain_dims.width != m_frame->client_width() ||
m_swapchain_dims.height != m_frame->client_height())
{
swapchain_unavailable = true;
}
}
if (swapchain_unavailable || should_reinitialize_swapchain)
{
reinitialize_swapchain();
}
m_profiler.start();
if (m_current_frame == &m_aux_frame_context)
{
m_current_frame = &frame_context_storage[m_current_queue_index];
if (m_current_frame->swap_command_buffer)
{
// Its possible this flip request is triggered by overlays and the flip queue is in undefined state
frame_context_cleanup(m_current_frame, true);
}
// Swap aux storage and current frame; aux storage should always be ready for use at all times
m_current_frame->swap_storage(m_aux_frame_context);
m_current_frame->grab_resources(m_aux_frame_context);
}
else if (m_current_frame->swap_command_buffer)
{
if (info.stats.draw_calls > 0)
{
// This can be 'legal' if the window was being resized and no polling happened because of swapchain_unavailable flag
LOG_ERROR(RSX, "Possible data corruption on frame context storage detected");
}
// There were no draws and back-to-back flips happened
frame_context_cleanup(m_current_frame, true);
}
if (info.skip_frame || swapchain_unavailable)
{
if (!info.skip_frame)
{
verify(HERE), swapchain_unavailable;
// Perform a mini-flip here without invoking present code
m_current_frame->swap_command_buffer = m_current_command_buffer;
flush_command_queue(true);
vk::advance_frame_counter();
frame_context_cleanup(m_current_frame, true);
}
m_frame->flip(m_context);
rsx::thread::flip(info);
return;
}
u32 buffer_width = display_buffers[info.buffer].width;
u32 buffer_height = display_buffers[info.buffer].height;
u32 buffer_pitch = display_buffers[info.buffer].pitch;
u32 av_format;
const auto avconfig = g_fxo->get<rsx::avconf>();
if (avconfig->state)
{
av_format = avconfig->get_compatible_gcm_format();
if (!buffer_pitch)
buffer_pitch = buffer_width * avconfig->get_bpp();
buffer_width = std::min(buffer_width, avconfig->resolution_x);
buffer_height = std::min(buffer_height, avconfig->resolution_y);
}
else
{
av_format = CELL_GCM_TEXTURE_A8R8G8B8;
if (!buffer_pitch)
buffer_pitch = buffer_width * 4;
}
coordi aspect_ratio;
sizei csize = m_swapchain_dims;
sizei new_size = csize;
if (!g_cfg.video.stretch_to_display_area)
{
const double aq = 1. * buffer_width / buffer_height;
const double rq = 1. * new_size.width / new_size.height;
const double q = aq / rq;
if (q > 1.0)
{
new_size.height = static_cast<int>(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = static_cast<int>(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
}
aspect_ratio.size = new_size;
//Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be
verify(HERE), m_current_frame->present_image == UINT32_MAX;
verify(HERE), m_current_frame->swap_command_buffer == nullptr;
u64 timeout = m_swapchain->get_swap_image_count() <= VK_MAX_ASYNC_FRAMES? 0ull: 100000000ull;
while (VkResult status = m_swapchain->acquire_next_swapchain_image(m_current_frame->acquire_signal_semaphore, timeout, &m_current_frame->present_image))
{
switch (status)
{
case VK_TIMEOUT:
case VK_NOT_READY:
{
//In some cases, after a fullscreen switch, the driver only allows N-1 images to be acquirable, where N = number of available swap images.
//This means that any acquired images have to be released
//before acquireNextImage can return successfully. This is despite the driver reporting 2 swap chain images available
//This makes fullscreen performance slower than windowed performance as throughput is lowered due to losing one presentable image
//Found on AMD Crimson 17.7.2
//Whatever returned from status, this is now a spin
timeout = 0ull;
check_present_status();
continue;
}
case VK_SUBOPTIMAL_KHR:
should_reinitialize_swapchain = true;
break;
case VK_ERROR_OUT_OF_DATE_KHR:
LOG_WARNING(RSX, "vkAcquireNextImageKHR failed with VK_ERROR_OUT_OF_DATE_KHR. Flip request ignored until surface is recreated.");
swapchain_unavailable = true;
reinitialize_swapchain();
continue;
default:
vk::die_with_error(HERE, status);
}
}
//Confirm that the driver did not silently fail
verify(HERE), m_current_frame->present_image != UINT32_MAX;
//Blit contents to screen..
vk::image* image_to_flip = nullptr;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
const u32 absolute_address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
if (auto render_target_texture = m_rtts.get_color_surface_at(absolute_address))
{
if (render_target_texture->last_use_tag == m_rtts.write_tag)
{
image_to_flip = render_target_texture;
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image_to_flip = render_target_texture;
}
}
if (image_to_flip)
{
buffer_width = rsx::apply_resolution_scale(buffer_width, true);
buffer_height = rsx::apply_resolution_scale(buffer_height, true);
if (buffer_width > render_target_texture->width() ||
buffer_height > render_target_texture->height())
{
// TODO: Should emit only once to avoid flooding the log file
// TODO: Take AA scaling into account
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
display_buffers[info.buffer].width, display_buffers[info.buffer].height, avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
buffer_width = render_target_texture->width();
buffer_height = render_target_texture->height();
}
}
}
else if (auto surface = m_texture_cache.find_texture_from_dimensions<true>(absolute_address, av_format, buffer_width, buffer_height))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
image_to_flip = surface->get_raw_texture();
}
if (!image_to_flip)
{
// Read from cell
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage;
const auto overlap = m_texture_cache.find_texture_from_range<true>(range, 0, lookup_mask);
for (const auto & section : overlap)
{
if (!section->is_synchronized())
{
section->copy_texture(*m_current_command_buffer, true);
}
}
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
{
// Submit for processing to lower hard fault penalty
flush_command_queue();
}
m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read);
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height);
}
}
VkImage target_image = m_swapchain->get_image(m_current_frame->present_image);
const auto present_layout = m_swapchain->get_optimal_present_layout();
const VkImageSubresourceRange subresource_range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
VkImageLayout target_layout = present_layout;
VkRenderPass single_target_pass = VK_NULL_HANDLE;
vk::framebuffer_holder* direct_fbo = nullptr;
vk::viewable_image* calibration_src = nullptr;
if (image_to_flip)
{
if (aspect_ratio.x || aspect_ratio.y)
{
VkClearColorValue clear_black {};
vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range);
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
if (UNLIKELY(!g_cfg.video.full_rgb_range_output || !rsx::fcmp(avconfig->gamma, 1.f)))
{
calibration_src = dynamic_cast<vk::viewable_image*>(image_to_flip);
verify("Image handle not viewable!" HERE), calibration_src;
}
if (LIKELY(!calibration_src))
{
vk::copy_scaled_image(*m_current_command_buffer, image_to_flip->value, target_image, image_to_flip->current_layout, target_layout,
{ 0, 0, static_cast<s32>(buffer_width), static_cast<s32>(buffer_height) }, aspect_ratio, 1, VK_IMAGE_ASPECT_COLOR_BIT, false);
}
else
{
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, subresource_range);
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
single_target_pass = vk::get_renderpass(*m_device, key);
verify("Usupported renderpass configuration" HERE), single_target_pass != VK_NULL_HANDLE;
direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image);
direct_fbo->add_ref();
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_video_output_pass->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, calibration_src, avconfig->gamma, !g_cfg.video.full_rgb_range_output, single_target_pass);
image_to_flip->pop_layout(*m_current_command_buffer);
direct_fbo->release();
}
}
else
{
//No draw call was issued!
//TODO: Upload raw bytes from cpu for rendering
VkClearColorValue clear_black {};
vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range);
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
const size_t sshot_size = buffer_height * buffer_width * 4;
vk::buffer sshot_vkbuf(*m_device, align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
VkBufferImageCopy copy_info;
copy_info.bufferOffset = 0;
copy_info.bufferRowLength = 0;
copy_info.bufferImageHeight = 0;
copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copy_info.imageSubresource.baseArrayLayer = 0;
copy_info.imageSubresource.layerCount = 1;
copy_info.imageSubresource.mipLevel = 0;
copy_info.imageOffset.x = 0;
copy_info.imageOffset.y = 0;
copy_info.imageOffset.z = 0;
copy_info.imageExtent.width = buffer_width;
copy_info.imageExtent.height = buffer_height;
copy_info.imageExtent.depth = 1;
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vk::copy_image_to_buffer(*m_current_command_buffer, image_to_flip, &sshot_vkbuf, copy_info);
image_to_flip->pop_layout(*m_current_command_buffer);
flush_command_queue(true);
auto src = sshot_vkbuf.map(0, sshot_size);
std::vector<u8> sshot_frame(sshot_size);
memcpy(sshot_frame.data(), src, sshot_size);
sshot_vkbuf.unmap();
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
}
const bool has_overlay = (m_overlay_manager && m_overlay_manager->has_visible());
if (g_cfg.video.overlay || has_overlay)
{
if (target_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
{
// Change the image layout whilst setting up a dependency on waiting for the blit op to finish before we start writing
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
barrier.oldLayout = target_layout;
barrier.image = target_image;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = subresource_range;
vkCmdPipelineBarrier(*m_current_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
}
if (!direct_fbo)
{
const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
single_target_pass = vk::get_renderpass(*m_device, key);
verify("Usupported renderpass configuration" HERE), single_target_pass != VK_NULL_HANDLE;
direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image);
}
direct_fbo->add_ref();
if (has_overlay)
{
// Lock to avoid modification during run-update chain
std::lock_guard lock(*m_overlay_manager);
for (const auto& view : m_overlay_manager->get_views())
{
m_ui_renderer->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, single_target_pass, m_texture_upload_buffer_ring_info, *view.get());
}
}
if (g_cfg.video.overlay)
{
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load()));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time));
const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
const auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024);
const auto num_flushes = m_texture_cache.get_num_flush_requests();
const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions();
const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes();
const auto num_misses = m_texture_cache.get_num_cache_misses();
const auto num_unavoidable = m_texture_cache.get_num_unavoidable_hard_faults();
const auto cache_miss_ratio = static_cast<u32>(ceil(m_texture_cache.get_cache_miss_ratio() * 100));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), fmt::format("Unreleased textures: %8d", num_dirty_textures));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), fmt::format("Texture cache memory: %7dM", texture_memory_size));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Temporary texture memory: %3dM", tmp_texture_memory_size));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate));
}
direct_fbo->release();
}
if (target_layout != present_layout)
{
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, present_layout, subresource_range);
}
queue_swap_request();
m_frame_stats.flip_time = m_profiler.duration();
m_frame->flip(m_context);
rsx::thread::flip(info);
}
void VKGSRender::renderctl(u32 request_code, void* args)
{
switch (request_code)

View file

@ -50,74 +50,85 @@ namespace vk
using rsx::flags32_t;
extern u64 get_system_time();
enum
namespace vk
{
VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE = 0x1,
VK_HEAP_CHECK_VERTEX_STORAGE = 0x2,
VK_HEAP_CHECK_VERTEX_ENV_STORAGE = 0x4,
VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE = 0x8,
VK_HEAP_CHECK_TEXTURE_ENV_STORAGE = 0x10,
VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE = 0x20,
VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE = 0x40,
VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE = 0x80,
VK_HEAP_CHECK_MAX_ENUM = VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE,
VK_HEAP_CHECK_ALL = 0xFF,
};
struct command_buffer_chunk: public vk::command_buffer
{
vk::fence* submit_fence = nullptr;
VkDevice m_device = VK_NULL_HANDLE;
std::atomic_bool pending = { false };
u64 eid_tag = 0;
u64 reset_id = 0;
shared_mutex guard_mutex;
command_buffer_chunk() = default;
void init_fence(VkDevice dev)
struct command_buffer_chunk: public vk::command_buffer
{
m_device = dev;
submit_fence = new vk::fence(dev);
}
vk::fence* submit_fence = nullptr;
VkDevice m_device = VK_NULL_HANDLE;
void destroy()
{
vk::command_buffer::destroy();
delete submit_fence;
}
std::atomic_bool pending = { false };
u64 eid_tag = 0;
u64 reset_id = 0;
shared_mutex guard_mutex;
void tag()
{
eid_tag = vk::get_event_id();
}
command_buffer_chunk() = default;
void reset()
{
if (pending)
poke();
if (pending)
wait(FRAME_PRESENT_TIMEOUT);
++reset_id;
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
}
bool poke()
{
reader_lock lock(guard_mutex);
if (!pending)
return true;
if (!submit_fence->flushed)
return false;
if (vkGetFenceStatus(m_device, submit_fence->handle) == VK_SUCCESS)
void init_fence(VkDevice dev)
{
m_device = dev;
submit_fence = new vk::fence(dev);
}
void destroy()
{
vk::command_buffer::destroy();
delete submit_fence;
}
void tag()
{
eid_tag = vk::get_event_id();
}
void reset()
{
if (pending)
poke();
if (pending)
wait(FRAME_PRESENT_TIMEOUT);
++reset_id;
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
}
bool poke()
{
reader_lock lock(guard_mutex);
if (!pending)
return true;
if (!submit_fence->flushed)
return false;
if (vkGetFenceStatus(m_device, submit_fence->handle) == VK_SUCCESS)
{
lock.upgrade();
if (pending)
{
vk::reset_fence(submit_fence);
vk::on_event_completed(eid_tag);
pending = false;
eid_tag = 0;
}
}
return !pending;
}
VkResult wait(u64 timeout = 0ull)
{
reader_lock lock(guard_mutex);
if (!pending)
return VK_SUCCESS;
const auto ret = vk::wait_for_fence(submit_fence, timeout);
lock.upgrade();
if (pending)
@ -128,210 +139,212 @@ struct command_buffer_chunk: public vk::command_buffer
pending = false;
eid_tag = 0;
}
return ret;
}
return !pending;
}
VkResult wait(u64 timeout = 0ull)
{
reader_lock lock(guard_mutex);
if (!pending)
return VK_SUCCESS;
const auto ret = vk::wait_for_fence(submit_fence, timeout);
lock.upgrade();
if (pending)
void flush()
{
vk::reset_fence(submit_fence);
vk::on_event_completed(eid_tag);
reader_lock lock(guard_mutex);
pending = false;
eid_tag = 0;
if (!pending)
return;
submit_fence->wait_flush();
}
};
return ret;
}
void flush()
struct occlusion_data
{
reader_lock lock(guard_mutex);
rsx::simple_array<u32> indices;
command_buffer_chunk* command_buffer_to_wait = nullptr;
u64 command_buffer_sync_id = 0;
if (!pending)
return;
submit_fence->wait_flush();
}
};
struct occlusion_data
{
rsx::simple_array<u32> indices;
command_buffer_chunk* command_buffer_to_wait = nullptr;
u64 command_buffer_sync_id = 0;
bool is_current(command_buffer_chunk* cmd) const
{
return (command_buffer_to_wait == cmd && command_buffer_sync_id == cmd->reset_id);
}
void set_sync_command_buffer(command_buffer_chunk* cmd)
{
command_buffer_to_wait = cmd;
command_buffer_sync_id = cmd->reset_id;
}
void sync()
{
if (command_buffer_to_wait->reset_id == command_buffer_sync_id)
bool is_current(command_buffer_chunk* cmd) const
{
// Allocation stack is FIFO and very long so no need to actually wait for fence signal
command_buffer_to_wait->flush();
return (command_buffer_to_wait == cmd && command_buffer_sync_id == cmd->reset_id);
}
}
};
enum frame_context_state : u32
{
dirty = 1
};
struct frame_context_t
{
VkSemaphore acquire_signal_semaphore = VK_NULL_HANDLE;
VkSemaphore present_wait_semaphore = VK_NULL_HANDLE;
VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
vk::descriptor_pool descriptor_pool;
u32 used_descriptors = 0;
flags32_t flags = 0;
std::vector<std::unique_ptr<vk::buffer_view>> buffer_views_to_clean;
u32 present_image = UINT32_MAX;
command_buffer_chunk* swap_command_buffer = nullptr;
//Heap pointers
s64 attrib_heap_ptr = 0;
s64 vtx_env_heap_ptr = 0;
s64 frag_env_heap_ptr = 0;
s64 frag_const_heap_ptr = 0;
s64 vtx_const_heap_ptr = 0;
s64 vtx_layout_heap_ptr = 0;
s64 frag_texparam_heap_ptr = 0;
s64 index_heap_ptr = 0;
s64 texture_upload_heap_ptr = 0;
u64 last_frame_sync_time = 0;
//Copy shareable information
void grab_resources(frame_context_t &other)
{
present_wait_semaphore = other.present_wait_semaphore;
acquire_signal_semaphore = other.acquire_signal_semaphore;
descriptor_set = other.descriptor_set;
descriptor_pool = other.descriptor_pool;
used_descriptors = other.used_descriptors;
flags = other.flags;
attrib_heap_ptr = other.attrib_heap_ptr;
vtx_env_heap_ptr = other.vtx_env_heap_ptr;
frag_env_heap_ptr = other.frag_env_heap_ptr;
vtx_layout_heap_ptr = other.vtx_layout_heap_ptr;
frag_texparam_heap_ptr = other.frag_texparam_heap_ptr;
frag_const_heap_ptr = other.frag_const_heap_ptr;
vtx_const_heap_ptr = other.vtx_const_heap_ptr;
index_heap_ptr = other.index_heap_ptr;
texture_upload_heap_ptr = other.texture_upload_heap_ptr;
}
//Exchange storage (non-copyable)
void swap_storage(frame_context_t &other)
{
std::swap(buffer_views_to_clean, other.buffer_views_to_clean);
}
void tag_frame_end(s64 attrib_loc, s64 vtxenv_loc, s64 fragenv_loc, s64 vtxlayout_loc, s64 fragtex_loc, s64 fragconst_loc,s64 vtxconst_loc, s64 index_loc, s64 texture_loc)
{
attrib_heap_ptr = attrib_loc;
vtx_env_heap_ptr = vtxenv_loc;
frag_env_heap_ptr = fragenv_loc;
vtx_layout_heap_ptr = vtxlayout_loc;
frag_texparam_heap_ptr = fragtex_loc;
frag_const_heap_ptr = fragconst_loc;
vtx_const_heap_ptr = vtxconst_loc;
index_heap_ptr = index_loc;
texture_upload_heap_ptr = texture_loc;
last_frame_sync_time = get_system_time();
}
void reset_heap_ptrs()
{
last_frame_sync_time = 0;
}
};
struct flush_request_task
{
atomic_t<bool> pending_state{ false }; //Flush request status; true if rsx::thread is yet to service this request
atomic_t<int> num_waiters{ 0 }; //Number of threads waiting for this request to be serviced
bool hard_sync = false;
flush_request_task() = default;
void post(bool _hard_sync)
{
hard_sync = (hard_sync || _hard_sync);
pending_state = true;
num_waiters++;
}
void remove_one()
{
num_waiters--;
}
void clear_pending_flag()
{
hard_sync = false;
pending_state.store(false);
}
bool pending() const
{
return pending_state.load();
}
void consumer_wait() const
{
while (num_waiters.load() != 0)
void set_sync_command_buffer(command_buffer_chunk* cmd)
{
_mm_pause();
command_buffer_to_wait = cmd;
command_buffer_sync_id = cmd->reset_id;
}
}
void producer_wait() const
{
while (pending_state.load())
void sync()
{
std::this_thread::yield();
if (command_buffer_to_wait->reset_id == command_buffer_sync_id)
{
// Allocation stack is FIFO and very long so no need to actually wait for fence signal
command_buffer_to_wait->flush();
}
}
}
};
};
enum flush_queue_state : u32
{
ok = 0,
deadlock = 1
};
struct frame_context_t
{
VkSemaphore acquire_signal_semaphore = VK_NULL_HANDLE;
VkSemaphore present_wait_semaphore = VK_NULL_HANDLE;
VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
vk::descriptor_pool descriptor_pool;
u32 used_descriptors = 0;
flags32_t flags = 0;
std::vector<std::unique_ptr<vk::buffer_view>> buffer_views_to_clean;
u32 present_image = UINT32_MAX;
command_buffer_chunk* swap_command_buffer = nullptr;
//Heap pointers
s64 attrib_heap_ptr = 0;
s64 vtx_env_heap_ptr = 0;
s64 frag_env_heap_ptr = 0;
s64 frag_const_heap_ptr = 0;
s64 vtx_const_heap_ptr = 0;
s64 vtx_layout_heap_ptr = 0;
s64 frag_texparam_heap_ptr = 0;
s64 index_heap_ptr = 0;
s64 texture_upload_heap_ptr = 0;
u64 last_frame_sync_time = 0;
//Copy shareable information
void grab_resources(frame_context_t &other)
{
present_wait_semaphore = other.present_wait_semaphore;
acquire_signal_semaphore = other.acquire_signal_semaphore;
descriptor_set = other.descriptor_set;
descriptor_pool = other.descriptor_pool;
used_descriptors = other.used_descriptors;
flags = other.flags;
attrib_heap_ptr = other.attrib_heap_ptr;
vtx_env_heap_ptr = other.vtx_env_heap_ptr;
frag_env_heap_ptr = other.frag_env_heap_ptr;
vtx_layout_heap_ptr = other.vtx_layout_heap_ptr;
frag_texparam_heap_ptr = other.frag_texparam_heap_ptr;
frag_const_heap_ptr = other.frag_const_heap_ptr;
vtx_const_heap_ptr = other.vtx_const_heap_ptr;
index_heap_ptr = other.index_heap_ptr;
texture_upload_heap_ptr = other.texture_upload_heap_ptr;
}
//Exchange storage (non-copyable)
void swap_storage(frame_context_t &other)
{
std::swap(buffer_views_to_clean, other.buffer_views_to_clean);
}
void tag_frame_end(s64 attrib_loc, s64 vtxenv_loc, s64 fragenv_loc, s64 vtxlayout_loc, s64 fragtex_loc, s64 fragconst_loc,s64 vtxconst_loc, s64 index_loc, s64 texture_loc)
{
attrib_heap_ptr = attrib_loc;
vtx_env_heap_ptr = vtxenv_loc;
frag_env_heap_ptr = fragenv_loc;
vtx_layout_heap_ptr = vtxlayout_loc;
frag_texparam_heap_ptr = fragtex_loc;
frag_const_heap_ptr = fragconst_loc;
vtx_const_heap_ptr = vtxconst_loc;
index_heap_ptr = index_loc;
texture_upload_heap_ptr = texture_loc;
last_frame_sync_time = get_system_time();
}
void reset_heap_ptrs()
{
last_frame_sync_time = 0;
}
};
struct flush_request_task
{
atomic_t<bool> pending_state{ false }; //Flush request status; true if rsx::thread is yet to service this request
atomic_t<int> num_waiters{ 0 }; //Number of threads waiting for this request to be serviced
bool hard_sync = false;
flush_request_task() = default;
void post(bool _hard_sync)
{
hard_sync = (hard_sync || _hard_sync);
pending_state = true;
num_waiters++;
}
void remove_one()
{
num_waiters--;
}
void clear_pending_flag()
{
hard_sync = false;
pending_state.store(false);
}
bool pending() const
{
return pending_state.load();
}
void consumer_wait() const
{
while (num_waiters.load() != 0)
{
_mm_pause();
}
}
void producer_wait() const
{
while (pending_state.load())
{
std::this_thread::yield();
}
}
};
struct present_surface_info
{
u32 address;
u32 format;
u32 width;
u32 height;
u32 pitch;
};
}
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{
private:
enum
{
VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE = 0x1,
VK_HEAP_CHECK_VERTEX_STORAGE = 0x2,
VK_HEAP_CHECK_VERTEX_ENV_STORAGE = 0x4,
VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE = 0x8,
VK_HEAP_CHECK_TEXTURE_ENV_STORAGE = 0x10,
VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE = 0x20,
VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE = 0x40,
VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE = 0x80,
VK_HEAP_CHECK_MAX_ENUM = VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE,
VK_HEAP_CHECK_ALL = 0xFF,
};
enum frame_context_state : u32
{
dirty = 1
};
enum flush_queue_state : u32
{
ok = 0,
deadlock = 1
};
private:
VKFragmentProgram m_fragment_prog;
VKVertexProgram m_vertex_prog;
@ -382,15 +395,15 @@ private:
vk::occlusion_query_pool m_occlusion_query_pool;
bool m_occlusion_query_active = false;
rsx::reports::occlusion_query_info *m_active_query_info = nullptr;
std::vector<occlusion_data> m_occlusion_map;
std::vector<vk::occlusion_data> m_occlusion_map;
shared_mutex m_secondary_cb_guard;
vk::command_pool m_secondary_command_buffer_pool;
vk::command_buffer m_secondary_command_buffer; //command buffer used for setup operations
u32 m_current_cb_index = 0;
std::array<command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
command_buffer_chunk* m_current_command_buffer = nullptr;
std::array<vk::command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
vk::command_buffer_chunk* m_current_command_buffer = nullptr;
VkDescriptorSetLayout descriptor_layouts;
VkPipelineLayout pipeline_layout;
@ -421,13 +434,13 @@ private:
VkDescriptorBufferInfo m_fragment_constants_buffer_info;
VkDescriptorBufferInfo m_fragment_texture_params_buffer_info;
std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
std::array<vk::frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
//Temp frame context to use if the real frame queue is overburdened. Only used for storage
frame_context_t m_aux_frame_context;
vk::frame_context_t m_aux_frame_context;
u32 m_current_queue_index = 0;
frame_context_t* m_current_frame = nullptr;
std::deque<frame_context_t*> m_queued_frames;
vk::frame_context_t* m_current_frame = nullptr;
std::deque<vk::frame_context_t*> m_queued_frames;
VkViewport m_viewport{};
VkRect2D m_scissor{};
@ -435,7 +448,7 @@ private:
std::vector<u8> m_draw_buffers;
shared_mutex m_flush_queue_mutex;
flush_request_task m_flush_requests;
vk::flush_request_task m_flush_requests;
// Offloader thread deadlock recovery
rsx::atomic_bitmask_t<flush_queue_state> m_queue_status;
@ -471,11 +484,13 @@ private:
void flush_command_queue(bool hard_sync = false);
void queue_swap_request();
void frame_context_cleanup(frame_context_t *ctx, bool free_resources = false);
void frame_context_cleanup(vk::frame_context_t *ctx, bool free_resources = false);
void advance_queued_frames();
void present(frame_context_t *ctx);
void present(vk::frame_context_t *ctx);
void reinitialize_swapchain();
vk::image* get_present_source(vk::present_surface_info* info, const rsx::avconf* avconfig);
void begin_render_pass();
void close_render_pass();

View file

@ -0,0 +1,692 @@
#include "stdafx.h"
#include "VKGSRender.h"
void VKGSRender::reinitialize_swapchain()
{
m_swapchain_dims.width = m_frame->client_width();
m_swapchain_dims.height = m_frame->client_height();
// Reject requests to acquire new swapchain if the window is minimized
// The NVIDIA driver will spam VK_ERROR_OUT_OF_DATE_KHR if you try to acquire an image from the swapchain and the window is minimized
// However, any attempt to actually renew the swapchain will crash the driver with VK_ERROR_DEVICE_LOST while the window is in this state
if (m_swapchain_dims.width == 0 || m_swapchain_dims.height == 0)
{
swapchain_unavailable = true;
return;
}
// NOTE: This operation will create a hard sync point
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
m_current_command_buffer->pending = true;
m_current_command_buffer->reset();
for (auto &ctx : frame_context_storage)
{
if (ctx.present_image == UINT32_MAX)
continue;
// Release present image by presenting it
frame_context_cleanup(&ctx, true);
}
// Drain all the queues
vkDeviceWaitIdle(*m_device);
// Rebuild swapchain. Old swapchain destruction is handled by the init_swapchain call
if (!m_swapchain->init(m_swapchain_dims.width, m_swapchain_dims.height))
{
LOG_WARNING(RSX, "Swapchain initialization failed. Request ignored [%dx%d]", m_swapchain_dims.width, m_swapchain_dims.height);
swapchain_unavailable = true;
open_command_buffer();
return;
}
// Prepare new swapchain images for use
open_command_buffer();
for (u32 i = 0; i < m_swapchain->get_swap_image_count(); ++i)
{
const auto target_layout = m_swapchain->get_optimal_present_layout();
const auto target_image = m_swapchain->get_image(i);
VkClearColorValue clear_color{};
VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &range);
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, target_layout, range);
}
//Will have to block until rendering is completed
vk::fence resize_fence(*m_device);
//Flush the command buffer
close_and_submit_command_buffer(&resize_fence);
vk::wait_for_fence(&resize_fence);
m_current_command_buffer->reset();
open_command_buffer();
swapchain_unavailable = false;
should_reinitialize_swapchain = false;
}
void VKGSRender::present(vk::frame_context_t *ctx)
{
verify(HERE), ctx->present_image != UINT32_MAX;
// Partial CS flush
ctx->swap_command_buffer->flush();
if (!swapchain_unavailable)
{
switch (VkResult error = m_swapchain->present(ctx->present_wait_semaphore, ctx->present_image))
{
case VK_SUCCESS:
break;
case VK_SUBOPTIMAL_KHR:
should_reinitialize_swapchain = true;
break;
case VK_ERROR_OUT_OF_DATE_KHR:
swapchain_unavailable = true;
break;
default:
vk::die_with_error(HERE, error);
}
}
// Presentation image released; reset value
ctx->present_image = UINT32_MAX;
}
void VKGSRender::advance_queued_frames()
{
// Check all other frames for completion and clear resources
check_present_status();
//m_rtts storage is double buffered and should be safe to tag on frame boundary
m_rtts.free_invalidated();
//texture cache is also double buffered to prevent use-after-free
m_texture_cache.on_frame_end();
m_samplers_dirty.store(true);
vk::remove_unused_framebuffers();
m_vertex_cache->purge();
m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(),
m_vertex_env_ring_info.get_current_put_pos_minus_one(),
m_fragment_env_ring_info.get_current_put_pos_minus_one(),
m_vertex_layout_ring_info.get_current_put_pos_minus_one(),
m_fragment_texture_params_ring_info.get_current_put_pos_minus_one(),
m_fragment_constants_ring_info.get_current_put_pos_minus_one(),
m_transform_constants_ring_info.get_current_put_pos_minus_one(),
m_index_buffer_ring_info.get_current_put_pos_minus_one(),
m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one());
m_queued_frames.push_back(m_current_frame);
verify(HERE), m_queued_frames.size() <= VK_MAX_ASYNC_FRAMES;
m_current_queue_index = (m_current_queue_index + 1) % VK_MAX_ASYNC_FRAMES;
m_current_frame = &frame_context_storage[m_current_queue_index];
m_current_frame->flags |= frame_context_state::dirty;
vk::advance_frame_counter();
}
void VKGSRender::queue_swap_request()
{
verify(HERE), !m_current_frame->swap_command_buffer;
m_current_frame->swap_command_buffer = m_current_command_buffer;
if (m_swapchain->is_headless())
{
m_swapchain->end_frame(*m_current_command_buffer, m_current_frame->present_image);
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
}
else
{
close_and_submit_command_buffer(m_current_command_buffer->submit_fence,
m_current_frame->acquire_signal_semaphore,
m_current_frame->present_wait_semaphore,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
}
// Signal pending state as the command queue is now closed
m_current_frame->swap_command_buffer->pending = true;
// Set up a present request for this frame as well
present(m_current_frame);
// Grab next cb in line and make it usable
m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT;
m_current_command_buffer = &m_primary_cb_list[m_current_cb_index];
m_current_command_buffer->reset();
// Set up new pointers for the next frame
advance_queued_frames();
open_command_buffer();
}
void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx, bool free_resources)
{
verify(HERE), ctx->swap_command_buffer;
if (ctx->swap_command_buffer->pending)
{
// Perform hard swap here
if (ctx->swap_command_buffer->wait(FRAME_PRESENT_TIMEOUT) != VK_SUCCESS)
{
// Lost surface/device, release swapchain
swapchain_unavailable = true;
}
free_resources = true;
}
if (free_resources)
{
if (g_cfg.video.overlay)
{
m_text_writer->reset_descriptors();
}
if (m_overlay_manager && m_overlay_manager->has_dirty())
{
m_overlay_manager->lock();
std::vector<u32> uids_to_dispose;
uids_to_dispose.reserve(m_overlay_manager->get_dirty().size());
for (const auto& view : m_overlay_manager->get_dirty())
{
m_ui_renderer->remove_temp_resources(view->uid);
uids_to_dispose.push_back(view->uid);
}
m_overlay_manager->unlock();
m_overlay_manager->dispose(uids_to_dispose);
}
vk::reset_global_resources();
m_attachment_clear_pass->free_resources();
m_depth_converter->free_resources();
m_ui_renderer->free_resources();
m_video_output_pass->free_resources();
ctx->buffer_views_to_clean.clear();
if (ctx->last_frame_sync_time > m_last_heap_sync_time)
{
m_last_heap_sync_time = ctx->last_frame_sync_time;
//Heap cleanup; deallocates memory consumed by the frame if it is still held
m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr;
m_vertex_env_ring_info.m_get_pos = ctx->vtx_env_heap_ptr;
m_fragment_env_ring_info.m_get_pos = ctx->frag_env_heap_ptr;
m_fragment_constants_ring_info.m_get_pos = ctx->frag_const_heap_ptr;
m_transform_constants_ring_info.m_get_pos = ctx->vtx_const_heap_ptr;
m_vertex_layout_ring_info.m_get_pos = ctx->vtx_layout_heap_ptr;
m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr;
m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr;
m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr;
m_attrib_ring_info.notify();
m_vertex_env_ring_info.notify();
m_fragment_env_ring_info.notify();
m_fragment_constants_ring_info.notify();
m_transform_constants_ring_info.notify();
m_vertex_layout_ring_info.notify();
m_fragment_texture_params_ring_info.notify();
m_index_buffer_ring_info.notify();
m_texture_upload_buffer_ring_info.notify();
}
}
ctx->swap_command_buffer = nullptr;
// Remove from queued list
while (!m_queued_frames.empty())
{
auto frame = m_queued_frames.front();
m_queued_frames.pop_front();
if (frame == ctx)
{
break;
}
}
vk::advance_completed_frame_counter();
}
vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const rsx::avconf* avconfig)
{
vk::image* image_to_flip = nullptr;
if (auto render_target_texture = m_rtts.get_color_surface_at(info->address))
{
if (render_target_texture->last_use_tag == m_rtts.write_tag)
{
image_to_flip = render_target_texture;
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, info->address, info->width, info->height, info->pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image_to_flip = render_target_texture;
}
}
if (image_to_flip)
{
const auto buffer_width = rsx::apply_resolution_scale(info->width, true);
const auto buffer_height = rsx::apply_resolution_scale(info->height, true);
if (buffer_width > render_target_texture->width() ||
buffer_height > render_target_texture->height())
{
// TODO: Should emit only once to avoid flooding the log file
// TODO: Take AA scaling into account
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
info->width, info->height,
avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
info->width = render_target_texture->width();
info->height = render_target_texture->height();
}
else
{
info->width = buffer_width;
info->height = buffer_height;
}
}
}
else if (auto surface = m_texture_cache.find_texture_from_dimensions<true>(info->address, info->format, info->width, info->height))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
image_to_flip = surface->get_raw_texture();
}
if (!image_to_flip)
{
// Read from cell
const auto range = utils::address_range::start_length(info->address, info->pitch * info->height);
const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage;
const auto overlap = m_texture_cache.find_texture_from_range<true>(range, 0, lookup_mask);
for (const auto & section : overlap)
{
if (!section->is_synchronized())
{
section->copy_texture(*m_current_command_buffer, true);
}
}
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
{
// Submit for processing to lower hard fault penalty
flush_command_queue();
}
m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read);
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, info->address, info->width, info->height);
}
return image_to_flip;
}
void VKGSRender::flip(const rsx::display_flip_info_t& info)
{
// Check swapchain condition/status
if (!m_swapchain->supports_automatic_wm_reports())
{
if (m_swapchain_dims.width != m_frame->client_width() ||
m_swapchain_dims.height != m_frame->client_height())
{
swapchain_unavailable = true;
}
}
if (swapchain_unavailable || should_reinitialize_swapchain)
{
reinitialize_swapchain();
}
m_profiler.start();
if (m_current_frame == &m_aux_frame_context)
{
m_current_frame = &frame_context_storage[m_current_queue_index];
if (m_current_frame->swap_command_buffer)
{
// Its possible this flip request is triggered by overlays and the flip queue is in undefined state
frame_context_cleanup(m_current_frame, true);
}
// Swap aux storage and current frame; aux storage should always be ready for use at all times
m_current_frame->swap_storage(m_aux_frame_context);
m_current_frame->grab_resources(m_aux_frame_context);
}
else if (m_current_frame->swap_command_buffer)
{
if (info.stats.draw_calls > 0)
{
// This can be 'legal' if the window was being resized and no polling happened because of swapchain_unavailable flag
LOG_ERROR(RSX, "Possible data corruption on frame context storage detected");
}
// There were no draws and back-to-back flips happened
frame_context_cleanup(m_current_frame, true);
}
if (info.skip_frame || swapchain_unavailable)
{
if (!info.skip_frame)
{
verify(HERE), swapchain_unavailable;
// Perform a mini-flip here without invoking present code
m_current_frame->swap_command_buffer = m_current_command_buffer;
flush_command_queue(true);
vk::advance_frame_counter();
frame_context_cleanup(m_current_frame, true);
}
m_frame->flip(m_context);
rsx::thread::flip(info);
return;
}
u32 buffer_width = display_buffers[info.buffer].width;
u32 buffer_height = display_buffers[info.buffer].height;
u32 buffer_pitch = display_buffers[info.buffer].pitch;
u32 av_format;
const auto avconfig = g_fxo->get<rsx::avconf>();
if (avconfig->state)
{
av_format = avconfig->get_compatible_gcm_format();
if (!buffer_pitch)
buffer_pitch = buffer_width * avconfig->get_bpp();
buffer_width = std::min(buffer_width, avconfig->resolution_x);
buffer_height = std::min(buffer_height, avconfig->resolution_y);
}
else
{
av_format = CELL_GCM_TEXTURE_A8R8G8B8;
if (!buffer_pitch)
buffer_pitch = buffer_width * 4;
}
coordi aspect_ratio;
sizei csize = m_swapchain_dims;
sizei new_size = csize;
if (!g_cfg.video.stretch_to_display_area)
{
const double aq = 1. * buffer_width / buffer_height;
const double rq = 1. * new_size.width / new_size.height;
const double q = aq / rq;
if (q > 1.0)
{
new_size.height = static_cast<int>(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = static_cast<int>(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
}
aspect_ratio.size = new_size;
//Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be
verify(HERE), m_current_frame->present_image == UINT32_MAX;
verify(HERE), m_current_frame->swap_command_buffer == nullptr;
u64 timeout = m_swapchain->get_swap_image_count() <= VK_MAX_ASYNC_FRAMES? 0ull: 100000000ull;
while (VkResult status = m_swapchain->acquire_next_swapchain_image(m_current_frame->acquire_signal_semaphore, timeout, &m_current_frame->present_image))
{
switch (status)
{
case VK_TIMEOUT:
case VK_NOT_READY:
{
//In some cases, after a fullscreen switch, the driver only allows N-1 images to be acquirable, where N = number of available swap images.
//This means that any acquired images have to be released
//before acquireNextImage can return successfully. This is despite the driver reporting 2 swap chain images available
//This makes fullscreen performance slower than windowed performance as throughput is lowered due to losing one presentable image
//Found on AMD Crimson 17.7.2
//Whatever returned from status, this is now a spin
timeout = 0ull;
check_present_status();
continue;
}
case VK_SUBOPTIMAL_KHR:
should_reinitialize_swapchain = true;
break;
case VK_ERROR_OUT_OF_DATE_KHR:
LOG_WARNING(RSX, "vkAcquireNextImageKHR failed with VK_ERROR_OUT_OF_DATE_KHR. Flip request ignored until surface is recreated.");
swapchain_unavailable = true;
reinitialize_swapchain();
continue;
default:
vk::die_with_error(HERE, status);
}
}
//Confirm that the driver did not silently fail
verify(HERE), m_current_frame->present_image != UINT32_MAX;
//Blit contents to screen..
vk::image* image_to_flip = nullptr;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
vk::present_surface_info present_info;
present_info.width = buffer_width;
present_info.height = buffer_height;
present_info.pitch = buffer_pitch;
present_info.format = av_format;
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
image_to_flip = get_present_source(&present_info, avconfig);
buffer_width = present_info.width;
buffer_height = present_info.height;
}
VkImage target_image = m_swapchain->get_image(m_current_frame->present_image);
const auto present_layout = m_swapchain->get_optimal_present_layout();
const VkImageSubresourceRange subresource_range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
VkImageLayout target_layout = present_layout;
VkRenderPass single_target_pass = VK_NULL_HANDLE;
vk::framebuffer_holder* direct_fbo = nullptr;
vk::viewable_image* calibration_src = nullptr;
if (image_to_flip)
{
if (aspect_ratio.x || aspect_ratio.y)
{
VkClearColorValue clear_black {};
vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range);
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
if (UNLIKELY(!g_cfg.video.full_rgb_range_output || !rsx::fcmp(avconfig->gamma, 1.f)))
{
calibration_src = dynamic_cast<vk::viewable_image*>(image_to_flip);
verify("Image handle not viewable!" HERE), calibration_src;
}
if (LIKELY(!calibration_src))
{
vk::copy_scaled_image(*m_current_command_buffer, image_to_flip->value, target_image, image_to_flip->current_layout, target_layout,
{ 0, 0, static_cast<s32>(buffer_width), static_cast<s32>(buffer_height) }, aspect_ratio, 1, VK_IMAGE_ASPECT_COLOR_BIT, false);
}
else
{
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, subresource_range);
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
single_target_pass = vk::get_renderpass(*m_device, key);
verify("Usupported renderpass configuration" HERE), single_target_pass != VK_NULL_HANDLE;
direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image);
direct_fbo->add_ref();
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_video_output_pass->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, calibration_src, avconfig->gamma, !g_cfg.video.full_rgb_range_output, single_target_pass);
image_to_flip->pop_layout(*m_current_command_buffer);
direct_fbo->release();
}
}
else
{
//No draw call was issued!
//TODO: Upload raw bytes from cpu for rendering
VkClearColorValue clear_black {};
vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range);
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
const size_t sshot_size = buffer_height * buffer_width * 4;
vk::buffer sshot_vkbuf(*m_device, align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
VkBufferImageCopy copy_info;
copy_info.bufferOffset = 0;
copy_info.bufferRowLength = 0;
copy_info.bufferImageHeight = 0;
copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copy_info.imageSubresource.baseArrayLayer = 0;
copy_info.imageSubresource.layerCount = 1;
copy_info.imageSubresource.mipLevel = 0;
copy_info.imageOffset.x = 0;
copy_info.imageOffset.y = 0;
copy_info.imageOffset.z = 0;
copy_info.imageExtent.width = buffer_width;
copy_info.imageExtent.height = buffer_height;
copy_info.imageExtent.depth = 1;
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vk::copy_image_to_buffer(*m_current_command_buffer, image_to_flip, &sshot_vkbuf, copy_info);
image_to_flip->pop_layout(*m_current_command_buffer);
flush_command_queue(true);
auto src = sshot_vkbuf.map(0, sshot_size);
std::vector<u8> sshot_frame(sshot_size);
memcpy(sshot_frame.data(), src, sshot_size);
sshot_vkbuf.unmap();
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
}
const bool has_overlay = (m_overlay_manager && m_overlay_manager->has_visible());
if (g_cfg.video.overlay || has_overlay)
{
if (target_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
{
// Change the image layout whilst setting up a dependency on waiting for the blit op to finish before we start writing
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
barrier.oldLayout = target_layout;
barrier.image = target_image;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = subresource_range;
vkCmdPipelineBarrier(*m_current_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
}
if (!direct_fbo)
{
const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
single_target_pass = vk::get_renderpass(*m_device, key);
verify("Usupported renderpass configuration" HERE), single_target_pass != VK_NULL_HANDLE;
direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image);
}
direct_fbo->add_ref();
if (has_overlay)
{
// Lock to avoid modification during run-update chain
std::lock_guard lock(*m_overlay_manager);
for (const auto& view : m_overlay_manager->get_views())
{
m_ui_renderer->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, single_target_pass, m_texture_upload_buffer_ring_info, *view.get());
}
}
if (g_cfg.video.overlay)
{
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load()));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time));
const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
const auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024);
const auto num_flushes = m_texture_cache.get_num_flush_requests();
const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions();
const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes();
const auto num_misses = m_texture_cache.get_num_cache_misses();
const auto num_unavoidable = m_texture_cache.get_num_unavoidable_hard_faults();
const auto cache_miss_ratio = static_cast<u32>(ceil(m_texture_cache.get_cache_miss_ratio() * 100));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), fmt::format("Unreleased textures: %8d", num_dirty_textures));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), fmt::format("Texture cache memory: %7dM", texture_memory_size));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Temporary texture memory: %3dM", tmp_texture_memory_size));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate));
}
direct_fbo->release();
}
if (target_layout != present_layout)
{
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, present_layout, subresource_range);
}
queue_swap_request();
m_frame_stats.flip_time = m_profiler.duration();
m_frame->flip(m_context);
rsx::thread::flip(info);
}

View file

@ -93,6 +93,7 @@
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\GLHelpers.cpp" />
<ClCompile Include="Emu\RSX\GL\GLPresent.cpp" />
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
<ClCompile Include="Emu\RSX\GL\GLTexture.cpp" />
@ -101,4 +102,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View file

@ -8,6 +8,7 @@
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
<ClCompile Include="Emu\RSX\GL\GLPresent.cpp" />
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" />
</ItemGroup>
@ -28,4 +29,4 @@
<ClInclude Include="Emu\RSX\GL\GLExecutionState.h" />
<ClInclude Include="Emu\RSX\GL\GLCompute.h" />
</ItemGroup>
</Project>
</Project>

View file

@ -52,6 +52,7 @@
<ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp" />
<ClCompile Include="Emu\RSX\VK\VKGSRender.cpp" />
<ClCompile Include="Emu\RSX\VK\VKHelpers.cpp" />
<ClCompile Include="Emu\RSX\VK\VKPresent.cpp" />
<ClCompile Include="Emu\RSX\VK\VKProgramPipeline.cpp" />
<ClCompile Include="Emu\RSX\VK\VKRenderPass.cpp" />
<ClCompile Include="Emu\RSX\VK\VKResolveHelper.cpp" />

View file

@ -8,6 +8,7 @@
<ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp" />
<ClCompile Include="Emu\RSX\VK\VKGSRender.cpp" />
<ClCompile Include="Emu\RSX\VK\VKHelpers.cpp" />
<ClCompile Include="Emu\RSX\VK\VKPresent.cpp" />
<ClCompile Include="Emu\RSX\VK\VKProgramPipeline.cpp" />
<ClCompile Include="Emu\RSX\VK\VKRenderPass.cpp" />
<ClCompile Include="Emu\RSX\VK\VKResolveHelper.cpp" />
@ -18,7 +19,7 @@
<ClCompile Include="Emu\RSX\VK\VKMemAlloc.cpp" />
<ClCompile Include="Emu\RSX\VK\VKCommandStream.cpp" />
</ItemGroup>
<ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\VK\VKCommonDecompiler.h" />
<ClInclude Include="Emu\RSX\VK\VKCompute.h" />
<ClInclude Include="Emu\RSX\VK\VKDMA.h" />
@ -37,6 +38,6 @@
<ClInclude Include="Emu\RSX\VK\VKTextureCache.h" />
<ClInclude Include="Emu\RSX\VK\VKVertexProgram.h" />
<ClInclude Include="Emu\RSX\VK\VulkanAPI.h" />
<ClInclude Include="Emu\RSX\VK\VKCommandStream.h" />
<ClInclude Include="Emu\RSX\VK\VKCommandStream.h" />
</ItemGroup>
</Project>