From 05ffb50037facbae55db104cc94294cb75351b63 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 13 Jul 2017 00:49:50 +0300 Subject: [PATCH] vk/rsx: Bug fixes and improvements - Improvements to framebuffer usage; Avoid creating new resources every frame - Handle null fragment program properly - Collect vertex upload statistics - vk: Pre-initialize 'unused' varying registers in the vertex shader in case it gets matched with a fs that consumes it -- Fixes a crash about fog_c not being declared gl/dx12/vk: Handle null fragment program - cleanup - use yield semantic instead of sleep(0) as yield is more cross-platform -- sleep(0) is a windows specific scheduler hint --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 16 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 3 + rpcs3/Emu/RSX/GL/GLGSRender.cpp | 14 +- rpcs3/Emu/RSX/RSXFragmentProgram.h | 3 + rpcs3/Emu/RSX/RSXThread.cpp | 32 +-- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 246 ++++++++++++++++----- rpcs3/Emu/RSX/VK/VKGSRender.h | 13 +- rpcs3/Emu/RSX/VK/VKHelpers.h | 26 ++- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 26 ++- rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 2 +- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 33 ++- 11 files changed, 317 insertions(+), 97 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index a61057a6b5..9636994c12 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -324,6 +324,17 @@ void D3D12GSRender::end() { std::chrono::time_point start_duration = steady_clock::now(); + std::chrono::time_point program_load_start = steady_clock::now(); + load_program(); + std::chrono::time_point program_load_end = steady_clock::now(); + m_timers.program_load_duration += std::chrono::duration_cast(program_load_end - program_load_start).count(); + + if (!m_fragment_program.valid) + { + rsx::thread::end(); + return; + } + std::chrono::time_point rtt_duration_start = steady_clock::now(); prepare_render_targets(get_current_resource_storage().command_list.Get()); @@ -344,11 +355,6 @@ void D3D12GSRender::end() std::chrono::time_point vertex_index_duration_end = steady_clock::now(); m_timers.vertex_index_duration += std::chrono::duration_cast(vertex_index_duration_end - vertex_index_duration_start).count(); - std::chrono::time_point program_load_start = steady_clock::now(); - load_program(); - std::chrono::time_point program_load_end = steady_clock::now(); - m_timers.program_load_duration += std::chrono::duration_cast(program_load_end - program_load_start).count(); - get_current_resource_storage().command_list->SetGraphicsRootSignature(m_shared_root_signature.Get()); get_current_resource_storage().command_list->OMSetStencilRef(rsx::method_registers.stencil_func_ref()); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index ca919feceb..8cbe35bbc3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -56,6 +56,9 @@ void D3D12GSRender::load_program() m_vertex_program = get_current_vertex_program(); m_fragment_program = get_current_fragment_program(rtt_lookup_func); + if (!m_fragment_program.valid) + return; + D3D12PipelineProperties prop = {}; prop.Topology = get_primitive_topology_type(rsx::method_registers.current_draw_clause.primitive); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 8953b8075e..58843627c2 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -322,17 +322,15 @@ namespace void GLGSRender::end() { - if (skip_frame || !framebuffer_status_valid) + std::chrono::time_point program_start = steady_clock::now(); + //Load program here since it is dependent on vertex state + + if (skip_frame || !framebuffer_status_valid || !load_program()) { rsx::thread::end(); return; } - std::chrono::time_point program_start = steady_clock::now(); - - //Load program here since it is dependent on vertex state - load_program(); - std::chrono::time_point program_stop = steady_clock::now(); m_begin_time += (u32)std::chrono::duration_cast(program_stop - program_start).count(); @@ -841,8 +839,10 @@ bool GLGSRender::load_program() return std::make_tuple(true, surface->get_native_pitch()); }; - RSXVertexProgram vertex_program = get_current_vertex_program(); RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func); + if (!fragment_program.valid) return false; + + RSXVertexProgram vertex_program = get_current_vertex_program(); u32 unnormalized_rtts = 0; diff --git a/rpcs3/Emu/RSX/RSXFragmentProgram.h b/rpcs3/Emu/RSX/RSXFragmentProgram.h index 8cc967bc46..668b0e6b8b 100644 --- a/rpcs3/Emu/RSX/RSXFragmentProgram.h +++ b/rpcs3/Emu/RSX/RSXFragmentProgram.h @@ -239,6 +239,8 @@ struct RSXFragmentProgram u8 textures_alpha_kill[16]; u32 textures_zfunc[16]; + bool valid; + rsx::texture_dimension_extended get_texture_dimension(u8 id) const { return (rsx::texture_dimension_extended)((texture_dimensions >> (id * 2)) & 0x3); @@ -263,6 +265,7 @@ struct RSXFragmentProgram , ctrl(0) , unnormalized_coords(0) , texture_dimensions(0) + , valid(false) { } }; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 98a37465e4..512c290940 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -974,9 +974,17 @@ namespace rsx RSXFragmentProgram thread::get_current_fragment_program(std::function(u32, fragment_texture&, bool)> get_surface_info) const { RSXFragmentProgram result = {}; - u32 shader_program = rsx::method_registers.shader_program_address(); - result.offset = shader_program & ~0x3; - result.addr = vm::base(rsx::get_address(result.offset, (shader_program & 0x3) - 1)); + + const u32 shader_program = rsx::method_registers.shader_program_address(); + if (shader_program == 0) + return result; + + const u32 program_location = (shader_program & 0x3) - 1; + const u32 program_offset = (shader_program & ~0x3); + + result.offset = program_offset; + result.addr = vm::base(rsx::get_address(program_offset, program_location)); + result.valid = true; result.ctrl = rsx::method_registers.shader_control(); result.unnormalized_coords = 0; result.front_back_color_enabled = !rsx::method_registers.two_side_light_en(); @@ -1174,26 +1182,26 @@ namespace rsx if (packet.post_upload_func) packet.post_upload_func(packet.dst_span.data(), packet.type, (u8)packet.vector_width, task.vertex_count); - _mm_sfence(); task.remaining_packets--; current_job += step; + _mm_sfence(); } _mm_mfence(); while (task.remaining_packets > 0 && !Emu.IsStopped()) { + std::this_thread::yield(); _mm_lfence(); - std::this_thread::sleep_for(0us); } - _mm_sfence(); task.ready_threads++; + _mm_sfence(); } else - std::this_thread::sleep_for(0us); - //thread_ctrl::wait(); - //busy_wait(); + { + std::this_thread::yield(); + } } }); } @@ -1201,8 +1209,7 @@ namespace rsx while (m_vertex_streaming_task.ready_threads != 0 && !Emu.IsStopped()) { - _mm_lfence(); - busy_wait(); + _mm_pause(); } m_vertex_streaming_task.vertex_count = vertex_count; @@ -1214,8 +1221,7 @@ namespace rsx { while (m_vertex_streaming_task.remaining_packets > 0 && !Emu.IsStopped()) { - _mm_lfence(); - busy_wait(); + _mm_pause(); } m_vertex_streaming_task.packets.resize(0); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 98606a3761..74efb0d482 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -677,6 +677,7 @@ VKGSRender::~VKGSRender() m_buffer_view_to_clean.clear(); m_sampler_to_clean.clear(); m_framebuffer_to_clean.clear(); + m_draw_fbo.reset(); //Render passes for (auto &render_pass : m_render_passes) @@ -880,11 +881,11 @@ void VKGSRender::begin_render_pass() VkRenderPassBeginInfo rp_begin = {}; rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; rp_begin.renderPass = current_render_pass; - rp_begin.framebuffer = m_framebuffer_to_clean.back()->value; + rp_begin.framebuffer = m_draw_fbo->value; rp_begin.renderArea.offset.x = 0; rp_begin.renderArea.offset.y = 0; - rp_begin.renderArea.extent.width = m_framebuffer_to_clean.back()->width(); - rp_begin.renderArea.extent.height = m_framebuffer_to_clean.back()->height(); + rp_begin.renderArea.extent.width = m_draw_fbo->width(); + rp_begin.renderArea.extent.height = m_draw_fbo->height(); vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); render_pass_open = true; @@ -941,10 +942,15 @@ void VKGSRender::end() } //Load program here since it is dependent on vertex state - load_program(is_instanced); + if (!load_program(is_instanced)) + { + LOG_ERROR(RSX, "No valid program bound to pipeline. Skipping draw"); + rsx::thread::end(); + return; + } std::chrono::time_point program_stop = steady_clock::now(); - m_setup_time += std::chrono::duration_cast(program_stop - program_start).count(); + //m_setup_time += std::chrono::duration_cast(program_stop - program_start).count(); if (is_instanced) { @@ -1123,7 +1129,7 @@ void VKGSRender::end() depth_clear_value.depthStencil.depth = 1.f; depth_clear_value.depthStencil.stencil = 255; - VkClearRect clear_rect = { 0, 0, m_framebuffer_to_clean.back()->width(), m_framebuffer_to_clean.back()->height(), 0, 1 }; + VkClearRect clear_rect = { 0, 0, m_draw_fbo->width(), m_draw_fbo->height(), 0, 1 }; VkClearAttachment clear_desc = { ds->attachment_aspect_flag, 0, depth_clear_value }; vkCmdClearAttachments(*m_current_command_buffer, 1, &clear_desc, 1, &clear_rect); @@ -1133,18 +1139,15 @@ void VKGSRender::end() std::optional > index_info = std::get<2>(upload_info); - if (m_attrib_ring_info.mapped) - { - wait_for_vertex_upload_task(); - m_attrib_ring_info.unmap(); - } - std::chrono::time_point vertex_end = steady_clock::now(); m_vertex_upload_time += std::chrono::duration_cast(vertex_end - textures_end).count(); if (!index_info) { - vkCmdDraw(*m_current_command_buffer, std::get<1>(upload_info), 1, 0, 0); + const auto vertex_count = std::get<1>(upload_info); + vkCmdDraw(*m_current_command_buffer, vertex_count, 1, 0, 0); + + m_last_vertex_count = vertex_count; m_last_draw_indexed = false; } else @@ -1175,6 +1178,22 @@ void VKGSRender::end() copy_render_targets_to_dma_location(); m_draw_calls++; + if (g_cfg.video.overlay) + { + if (m_last_vertex_count < 1024) + m_uploads_small++; + else if (m_last_vertex_count < 2048) + m_uploads_1k++; + else if (m_last_vertex_count < 4096) + m_uploads_2k++; + else if (m_last_vertex_count < 8192) + m_uploads_4k++; + else if (m_last_vertex_count < 16384) + m_uploads_8k++; + else + m_uploads_16k++; + } + rsx::thread::end(); } @@ -1260,8 +1279,8 @@ void VKGSRender::clear_surface(u32 mask) u16 scissor_y = rsx::method_registers.scissor_origin_y(); u16 scissor_h = rsx::method_registers.scissor_height(); - const u32 fb_width = m_framebuffer_to_clean.back()->width(); - const u32 fb_height = m_framebuffer_to_clean.back()->height(); + const u32 fb_width = m_draw_fbo->width(); + const u32 fb_height = m_draw_fbo->height(); //clip region std::tie(scissor_x, scissor_y, scissor_w, scissor_h) = rsx::clip_region(fb_width, fb_height, scissor_x, scissor_y, scissor_w, scissor_h, true); @@ -1392,6 +1411,12 @@ void VKGSRender::copy_render_targets_to_dma_location() void VKGSRender::flush_command_queue(bool hard_sync) { + if (m_attrib_ring_info.mapped) + { + wait_for_vertex_upload_task(); + m_attrib_ring_info.unmap(); + } + close_render_pass(); close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence); @@ -1480,7 +1505,13 @@ void VKGSRender::process_swap_request() m_buffer_view_to_clean.clear(); m_sampler_to_clean.clear(); - m_framebuffer_to_clean.clear(); + + m_framebuffer_to_clean.remove_if([](std::unique_ptr& fbo) + { + if (fbo->deref_count >= 2) return true; + fbo->deref_count++; + return false; + }); if (g_cfg.video.overlay) { @@ -1545,8 +1576,10 @@ bool VKGSRender::load_program(bool fast_update) return std::make_tuple(true, surface->native_pitch); }; - vertex_program = get_current_vertex_program(); fragment_program = get_current_fragment_program(rtt_lookup_func); + if (!fragment_program.valid) return false; + + vertex_program = get_current_vertex_program(); vk::pipeline_props properties = {}; @@ -1864,6 +1897,35 @@ void VKGSRender::prepare_rtts() const u32 surface_pitchs[] = { rsx::method_registers.surface_a_pitch(), rsx::method_registers.surface_b_pitch(), rsx::method_registers.surface_c_pitch(), rsx::method_registers.surface_d_pitch() }; + if (m_draw_fbo) + { + const u32 fb_width = m_draw_fbo->width(); + const u32 fb_height = m_draw_fbo->height(); + + bool really_changed = false; + + if (fb_width == clip_width && fb_height == clip_height) + { + for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) + { + if (m_surface_info[i].address != surface_addresses[i]) + { + really_changed = true; + break; + } + } + + if (!really_changed) + { + if (zeta_address == m_depth_surface_info.address) + { + //Nothing has changed, we're still using the same framebuffer + return; + } + } + } + } + m_rtts.prepare_render_target(&*m_current_command_buffer, rsx::method_registers.surface_color(), rsx::method_registers.surface_depth_fmt(), clip_width, clip_height, @@ -1887,20 +1949,16 @@ void VKGSRender::prepare_rtts() //Bind created rtts as current fbo... std::vector draw_buffers = vk::get_draw_buffers(rsx::method_registers.surface_color_target()); - std::vector> fbo_images; + + //Search old framebuffers for this same configuration + bool framebuffer_found = false; + + std::vector bound_images; + bound_images.reserve(5); for (u8 index : draw_buffers) { - vk::image *raw = std::get<1>(m_rtts.m_bound_render_targets[index]); - - VkImageSubresourceRange subres = {}; - subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - subres.baseArrayLayer = 0; - subres.baseMipLevel = 0; - subres.layerCount = 1; - subres.levelCount = 1; - - fbo_images.push_back(std::make_unique(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres)); + bound_images.push_back(std::get<1>(m_rtts.m_bound_render_targets[index])); m_surface_info[index].address = surface_addresses[index]; m_surface_info[index].pitch = surface_pitchs[index]; @@ -1913,20 +1971,9 @@ void VKGSRender::prepare_rtts() } } - m_draw_buffers_count = static_cast(fbo_images.size()); - - if (std::get<1>(m_rtts.m_bound_depth_stencil) != nullptr) + if (std::get<0>(m_rtts.m_bound_depth_stencil) != 0) { - vk::image *raw = (std::get<1>(m_rtts.m_bound_depth_stencil)); - - VkImageSubresourceRange subres = {}; - subres.aspectMask = (rsx::method_registers.surface_depth_fmt() == rsx::surface_depth_format::z24s8) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_DEPTH_BIT; - subres.baseArrayLayer = 0; - subres.baseMipLevel = 0; - subres.layerCount = 1; - subres.levelCount = 1; - - fbo_images.push_back(std::make_unique(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres)); + bound_images.push_back(std::get<1>(m_rtts.m_bound_depth_stencil)); m_depth_surface_info.address = zeta_address; m_depth_surface_info.pitch = rsx::method_registers.surface_z_pitch(); @@ -1935,6 +1982,8 @@ void VKGSRender::prepare_rtts() m_depth_surface_info.pitch = 0; } + m_draw_buffers_count = static_cast(bound_images.size()); + if (g_cfg.video.write_color_buffers) { for (u8 index : draw_buffers) @@ -1943,7 +1992,7 @@ void VKGSRender::prepare_rtts() const u32 range = m_surface_info[index].pitch * m_surface_info[index].height; m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range, - m_surface_info[index].width, m_surface_info[index].height); + m_surface_info[index].width, m_surface_info[index].height); } } @@ -1960,10 +2009,59 @@ void VKGSRender::prepare_rtts() } } - size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size()); - VkRenderPass current_render_pass = m_render_passes[idx]; + for (auto &fbo : m_framebuffer_to_clean) + { + if (fbo->matches(bound_images, clip_width, clip_height)) + { + m_draw_fbo.swap(fbo); + m_draw_fbo->reset_refs(); + framebuffer_found = true; + //LOG_ERROR(RSX, "Matching framebuffer exists, using that instead"); + break; + } + } - m_framebuffer_to_clean.push_back(std::make_unique(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images))); + if (!framebuffer_found) + { + std::vector> fbo_images; + fbo_images.reserve(5); + + for (u8 index : draw_buffers) + { + vk::image *raw = std::get<1>(m_rtts.m_bound_render_targets[index]); + + VkImageSubresourceRange subres = {}; + subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subres.baseArrayLayer = 0; + subres.baseMipLevel = 0; + subres.layerCount = 1; + subres.levelCount = 1; + + fbo_images.push_back(std::make_unique(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres)); + } + + if (std::get<1>(m_rtts.m_bound_depth_stencil) != nullptr) + { + vk::image *raw = (std::get<1>(m_rtts.m_bound_depth_stencil)); + + VkImageSubresourceRange subres = {}; + subres.aspectMask = (rsx::method_registers.surface_depth_fmt() == rsx::surface_depth_format::z24s8) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_DEPTH_BIT; + subres.baseArrayLayer = 0; + subres.baseMipLevel = 0; + subres.layerCount = 1; + subres.levelCount = 1; + + fbo_images.push_back(std::make_unique(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres)); + } + + size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size()); + VkRenderPass current_render_pass = m_render_passes[idx]; + + if (m_draw_fbo) + m_framebuffer_to_clean.push_back(std::move(m_draw_fbo)); + + m_draw_fbo.reset(new vk::framebuffer_holder(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images))); + } } @@ -1982,6 +2080,13 @@ void VKGSRender::flip(int buffer) m_setup_time = 0; m_vertex_upload_time = 0; m_textures_upload_time = 0; + + m_uploads_small = 0; + m_uploads_1k = 0; + m_uploads_2k = 0; + m_uploads_4k = 0; + m_uploads_8k = 0; + m_uploads_16k = 0; } return; @@ -2061,7 +2166,7 @@ void VKGSRender::flip(int buffer) vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range); } - std::unique_ptr direct_fbo; + std::unique_ptr direct_fbo; std::vector> swap_image_view; if (g_cfg.video.overlay) { @@ -2083,9 +2188,24 @@ void VKGSRender::flip(int buffer) size_t idx = vk::get_render_pass_location(m_swap_chain->get_surface_format(), VK_FORMAT_UNDEFINED, 1); VkRenderPass single_target_pass = m_render_passes[idx]; - swap_image_view.push_back(std::make_unique(*m_device, target_image, VK_IMAGE_VIEW_TYPE_2D, m_swap_chain->get_surface_format(), vk::default_component_map(), subres)); - direct_fbo.reset(new vk::framebuffer(*m_device, single_target_pass, m_client_width, m_client_height, std::move(swap_image_view))); - + for (auto &It = m_framebuffer_to_clean.begin(); It != m_framebuffer_to_clean.end(); It++) + { + auto &fbo = *It; + if (fbo->attachments[0]->info.image == target_image) + { + direct_fbo.swap(fbo); + direct_fbo->reset_refs(); + m_framebuffer_to_clean.erase(It); + break; + } + } + + if (!direct_fbo) + { + swap_image_view.push_back(std::make_unique(*m_device, target_image, VK_IMAGE_VIEW_TYPE_2D, m_swap_chain->get_surface_format(), vk::default_component_map(), subres)); + direct_fbo.reset(new vk::framebuffer_holder(*m_device, single_target_pass, m_client_width, m_client_height, std::move(swap_image_view))); + } + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), "draw calls: " + std::to_string(m_draw_calls) + ", instanced repeats: " + std::to_string(m_instanced_draws)); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), "draw call setup: " + std::to_string(m_setup_time) + "us"); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us"); @@ -2093,10 +2213,29 @@ void VKGSRender::flip(int buffer) m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), "draw call execution: " + std::to_string(m_draw_time) + "us"); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us"); + //Vertex upload statistics + u32 _small, _1k, _2k, _4k, _8k, _16k; + if (m_draw_calls > 0) + { + _small = m_uploads_small * 100 / m_draw_calls; + _1k = m_uploads_1k * 100 / m_draw_calls; + _2k = m_uploads_2k * 100 / m_draw_calls; + _4k = m_uploads_4k * 100 / m_draw_calls; + _8k = m_uploads_8k * 100 / m_draw_calls; + _16k = m_uploads_16k * 100 / m_draw_calls; + } + else + { + _small = _1k = _2k = _4k = _8k = _16k = 0; + } + + std::string message = fmt::format("Vertex sizes: < 1k: %d%%, 1k+: %d%%, 2k+: %d%%, 4k+: %d%%, 8k+: %d%%, 16k+: %d%%", _small, _1k, _2k, _4k, _8k, _16k); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), message); + vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres); + m_framebuffer_to_clean.push_back(std::move(direct_fbo)); } - m_framebuffer_to_clean.push_back(std::move(direct_fbo)); queue_swap_request(); } else @@ -2194,4 +2333,11 @@ void VKGSRender::flip(int buffer) m_setup_time = 0; m_vertex_upload_time = 0; m_textures_upload_time = 0; + + m_uploads_small = 0; + m_uploads_1k = 0; + m_uploads_2k = 0; + m_uploads_4k = 0; + m_uploads_8k = 0; + m_uploads_16k = 0; } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 2a068e517e..300eda6b99 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -149,15 +149,26 @@ private: vk::descriptor_pool descriptor_pool; std::vector > m_buffer_view_to_clean; - std::vector > m_framebuffer_to_clean; std::vector > m_sampler_to_clean; + std::list > m_framebuffer_to_clean; + std::unique_ptr m_draw_fbo; u32 m_client_width = 0; u32 m_client_height = 0; + // Draw call stats u32 m_draw_calls = 0; u32 m_instanced_draws = 0; + // Vertex buffer usage stats + u32 m_uploads_small = 0; + u32 m_uploads_1k = 0; + u32 m_uploads_2k = 0; + u32 m_uploads_4k = 0; + u32 m_uploads_8k = 0; + u32 m_uploads_16k = 0; + + // Timers s64 m_setup_time = 0; s64 m_vertex_upload_time = 0; s64 m_textures_upload_time = 0; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index d2c02b6fe3..479172856c 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -650,17 +650,17 @@ namespace vk { VkFramebuffer value; VkFramebufferCreateInfo info = {}; - std::vector> attachements; + std::vector> attachments; u32 m_width = 0; u32 m_height = 0; public: framebuffer(VkDevice dev, VkRenderPass pass, u32 width, u32 height, std::vector> &&atts) - : m_device(dev), attachements(std::move(atts)) + : m_device(dev), attachments(std::move(atts)) { - std::vector image_view_array(attachements.size()); + std::vector image_view_array(attachments.size()); size_t i = 0; - for (const auto &att : attachements) + for (const auto &att : attachments) { image_view_array[i++] = att->value; } @@ -694,6 +694,24 @@ namespace vk return m_height; } + bool matches(std::vector fbo_images, u32 width, u32 height) + { + if (m_width != width || m_height != height) + return false; + + if (fbo_images.size() != attachments.size()) + return false; + + for (int n = 0; n < fbo_images.size(); ++n) + { + if (attachments[n]->info.image != fbo_images[n]->value || + attachments[n]->info.format != fbo_images[n]->info.format) + return false; + } + + return true; + } + framebuffer(const framebuffer&) = delete; framebuffer(framebuffer&&) = delete; diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 9ccd88540d..97270ad71a 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -7,12 +7,17 @@ #include "../Common/TextureUtils.h" #include "VKFormats.h" +struct ref_counted +{ + u8 deref_count = 0; + + void reset_refs() { deref_count = 0; } +}; + namespace vk { - struct render_target : public image + struct render_target : public image, public ref_counted { - u8 deref_count = 0; - bool dirty = false; u16 native_pitch = 0; VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT; @@ -36,6 +41,17 @@ namespace vk mipmaps, layers, samples, initial_layout, tiling, usage, image_flags) {} }; + + struct framebuffer_holder: public vk::framebuffer, public ref_counted + { + framebuffer_holder(VkDevice dev, + VkRenderPass pass, + u32 width, u32 height, + std::vector> &&atts) + + : framebuffer(dev, pass, width, height, std::move(atts)) + {} + }; } namespace rsx @@ -270,9 +286,9 @@ namespace rsx void free_invalidated() { - invalidated_resources.remove_if([](std::unique_ptr& rtt) + invalidated_resources.remove_if([](std::unique_ptr &rtt) { - if (rtt->deref_count > 1) return true; + if (rtt->deref_count >= 2) return true; rtt->deref_count++; return false; diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 0912cf7e14..2f50fe2cdd 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -477,7 +477,7 @@ namespace { const auto &vbo = vertex_buffers[i]; - if (vbo.which() == 0 && vertex_count >= g_cfg.video.mt_vertex_upload_threshold && vertex_buffers.size() > 1 && rsxthr->vertex_upload_task_ready()) + if (vbo.which() == 0 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && vertex_buffers.size() > 1 && rsxthr->vertex_upload_task_ready()) { //vertex array buffer. We can thread this thing heavily const auto& v = vbo.get(); diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 51dd982f86..c7b01aab79 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -146,8 +146,9 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std static const vertex_reg_info reg_table[] = { { "gl_Position", false, "dst_reg0", "", false }, - { "back_diff_color", true, "dst_reg1", "", false }, - { "back_spec_color", true, "dst_reg2", "", false }, + //Technically these two are for both back and front + { "back_diff_color", true, "dst_reg1", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTDIFFUSE }, + { "back_spec_color", true, "dst_reg2", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTSPECULAR }, { "front_diff_color", true, "dst_reg3", "", false }, { "front_spec_color", true, "dst_reg4", "", false }, { "fog_c", true, "dst_reg5", ".xxxx", true, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_FOG }, @@ -159,15 +160,15 @@ static const vertex_reg_info reg_table[] = { "gl_ClipDistance[3]", false, "dst_reg6", ".y * userClipFactor[0].w", false, "userClipEnabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 }, { "gl_ClipDistance[4]", false, "dst_reg6", ".z * userClipFactor[1].x", false, "userClipEnabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 }, { "gl_ClipDistance[5]", false, "dst_reg6", ".w * userClipFactor[1].y", false, "userClipEnabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 }, - { "tc0", true, "dst_reg7", "", false }, - { "tc1", true, "dst_reg8", "", false }, - { "tc2", true, "dst_reg9", "", false }, - { "tc3", true, "dst_reg10", "", false }, - { "tc4", true, "dst_reg11", "", false }, - { "tc5", true, "dst_reg12", "", false }, - { "tc6", true, "dst_reg13", "", false }, - { "tc7", true, "dst_reg14", "", false }, - { "tc8", true, "dst_reg15", "", false }, + { "tc0", true, "dst_reg7", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX0 }, + { "tc1", true, "dst_reg8", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX1 }, + { "tc2", true, "dst_reg9", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX2 }, + { "tc3", true, "dst_reg10", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX3 }, + { "tc4", true, "dst_reg11", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX4 }, + { "tc5", true, "dst_reg12", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX5 }, + { "tc6", true, "dst_reg13", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX6 }, + { "tc7", true, "dst_reg14", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX7 }, + { "tc8", true, "dst_reg15", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX8 }, { "tc9", true, "dst_reg6", "", false, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX9 } // In this line, dst_reg6 is correct since dst_reg goes from 0 to 15. }; @@ -195,6 +196,16 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std:: const vk::varying_register_t ® = vk::get_varying_register(i.name); OS << "layout(location=" << reg.reg_location << ") out vec4 " << i.name << ";\n"; } + else + { + //Force some outputs to be declared even if unused so we can set default values + //NOTE: Registers that can be skept will not have their check_mask_value set + if (i.need_declare && (rsx_vertex_program.output_mask & i.check_mask_value) > 0) + { + const vk::varying_register_t ® = vk::get_varying_register(i.name); + OS << "layout(location=" << reg.reg_location << ") out vec4 " << i.name << ";\n"; + } + } } if (insert_back_diffuse && insert_front_diffuse)