From 440a31ef18f7a5f6e86de682d0270dd475144654 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 10 Apr 2018 18:06:29 +0300 Subject: [PATCH] rsx: Optimizations for program management --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 60 ++++++++++++++++----------------- rpcs3/Emu/RSX/RSXThread.cpp | 23 +++++++++---- rpcs3/Emu/RSX/RSXThread.h | 5 +++ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 9 +++-- rpcs3/Emu/RSX/rsx_methods.cpp | 23 +++++++++++++ 5 files changed, 81 insertions(+), 39 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 81886998f2..1a925eddf6 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1049,37 +1049,37 @@ bool GLGSRender::check_program_state() void GLGSRender::load_program(const gl::vertex_upload_info& upload_info) { - get_current_fragment_program(fs_sampler_state); - verify(HERE), current_fragment_program.valid; - - get_current_vertex_program(); - - auto &fragment_program = current_fragment_program; - auto &vertex_program = current_vertex_program; - - vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side - fragment_program.unnormalized_coords = 0; //unused - void* pipeline_properties = nullptr; - - m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, pipeline_properties); - m_program->use(); - - if (m_prog_buffer.check_cache_missed()) + if (m_fragment_program_dirty || m_vertex_program_dirty) { - m_shaders_cache->store(pipeline_properties, vertex_program, fragment_program); + get_current_fragment_program(fs_sampler_state); + verify(HERE), current_fragment_program.valid; - //Notify the user with HUD notification - if (g_cfg.misc.show_shader_compilation_hint) + get_current_vertex_program(); + + current_vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side + current_fragment_program.unnormalized_coords = 0; //unused + void* pipeline_properties = nullptr; + + m_program = &m_prog_buffer.getGraphicPipelineState(current_vertex_program, current_fragment_program, pipeline_properties); + m_program->use(); + + if (m_prog_buffer.check_cache_missed()) { - if (!m_custom_ui) + m_shaders_cache->store(pipeline_properties, current_vertex_program, current_fragment_program); + + //Notify the user with HUD notification + if (g_cfg.misc.show_shader_compilation_hint) { - //Create notification but do not draw it at this time. No need to spam flip requests - m_custom_ui = std::make_unique(); - } - else if (auto casted = dynamic_cast(m_custom_ui.get())) - { - //Probe the notification - casted->touch(); + if (!m_custom_ui) + { + //Create notification but do not draw it at this time. No need to spam flip requests + m_custom_ui = std::make_unique(); + } + else if (auto casted = dynamic_cast(m_custom_ui.get())) + { + //Probe the notification + casted->touch(); + } } } } @@ -1089,7 +1089,7 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info) u32 vertex_constants_offset; u32 fragment_constants_offset; - const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); + const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(current_fragment_program); const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float)); if (manually_flush_ring_buffers) @@ -1128,11 +1128,11 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info) if (fragment_constants_size) { m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast(buf), gsl::narrow(fragment_constants_size) }, - fragment_program, gl::get_driver_caps().vendor_NVIDIA); + current_fragment_program, gl::get_driver_caps().vendor_NVIDIA); } // Fragment state - fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program); + fill_fragment_state_buffer(buf+fragment_constants_size, current_fragment_program); m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512); m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 9a75b6bd91..fba3dd5fa3 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1329,6 +1329,10 @@ namespace rsx void thread::get_current_vertex_program() { + if (!m_vertex_program_dirty) + return; + + m_vertex_program_dirty = false; const u32 transform_program_start = rsx::method_registers.transform_program_start(); current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask(); current_vertex_program.skip_vertex_input_check = false; @@ -1341,8 +1345,8 @@ namespace rsx memcpy(ucode_dst, ucode_src, current_vertex_program.data.size() * sizeof(u32)); - auto program_info = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data); - current_vertex_program.data.resize(program_info.ucode_size); + current_vp_metadata = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data); + current_vertex_program.data.resize(current_vp_metadata.ucode_size); const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask(); const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); @@ -1540,20 +1544,27 @@ namespace rsx void thread::get_current_fragment_program(const std::array, rsx::limits::fragment_textures_count>& sampler_descriptors) { + if (!m_fragment_program_dirty) + return; + + m_fragment_program_dirty = false; auto &result = current_fragment_program = {}; const u32 shader_program = rsx::method_registers.shader_program_address(); if (shader_program == 0) + { + current_fp_metadata = {}; return; + } const u32 program_location = (shader_program & 0x3) - 1; const u32 program_offset = (shader_program & ~0x3); result.addr = vm::base(rsx::get_address(program_offset, program_location)); - const auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr); + current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr); - result.addr = ((u8*)result.addr + program_info.program_start_offset); - result.offset = program_offset + program_info.program_start_offset; + result.addr = ((u8*)result.addr + current_fp_metadata.program_start_offset); + result.offset = program_offset + current_fp_metadata.program_start_offset; result.valid = true; result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT); result.unnormalized_coords = 0; @@ -1574,7 +1585,7 @@ namespace rsx result.texture_scale[i][1] = sampler_descriptors[i]->scale_y; result.texture_scale[i][2] = (f32)tex.remap(); //Debug value - if (tex.enabled() && (program_info.referenced_textures_mask & (1 << i))) + if (tex.enabled() && (current_fp_metadata.referenced_textures_mask & (1 << i))) { u32 texture_control = 0; result.texture_dimensions |= ((u32)sampler_descriptors[i]->image_type << (i << 1)); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 5b663b3d06..f2030c7a8d 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -331,6 +331,8 @@ namespace rsx bool m_textures_dirty[16]; bool m_vertex_textures_dirty[4]; bool m_framebuffer_state_contested = false; + bool m_fragment_program_dirty = false; + bool m_vertex_program_dirty = false; protected: std::array get_color_surface_addresses() const; @@ -344,6 +346,9 @@ namespace rsx RSXVertexProgram current_vertex_program = {}; RSXFragmentProgram current_fragment_program = {}; + program_hash_util::fragment_program_utils::fragment_program_metadata current_fp_metadata = {}; + program_hash_util::vertex_program_utils::vertex_program_metadata current_vp_metadata = {}; + void get_current_vertex_program(); /** diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 8064ac7727..b6b9a9bed0 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2209,10 +2209,13 @@ bool VKGSRender::check_program_status() void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info) { - get_current_fragment_program(fs_sampler_state); - verify(HERE), current_fragment_program.valid; + if (m_fragment_program_dirty || m_vertex_program_dirty) + { + get_current_fragment_program(fs_sampler_state); + verify(HERE), current_fragment_program.valid; - get_current_vertex_program(); + get_current_vertex_program(); + } auto &vertex_program = current_vertex_program; auto &fragment_program = current_fragment_program; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index ef0264d42c..c097e3043f 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -357,9 +357,21 @@ namespace rsx static void impl(thread* rsx, u32 _reg, u32 arg) { method_registers.commit_4_transform_program_instructions(index); + rsx->m_vertex_program_dirty = true; } }; + void set_transform_program_start(thread* rsx, u32, u32) + { + rsx->m_vertex_program_dirty = true; + } + + void set_vertex_attribute_output_mask(thread* rsx, u32, u32) + { + rsx->m_vertex_program_dirty = true; + rsx->m_fragment_program_dirty = true; + } + void set_begin_end(thread* rsxthr, u32 _reg, u32 arg) { if (arg) @@ -521,6 +533,11 @@ namespace rsx rsx->sync(); } + void invalidate_L2(thread* rsx, u32, u32) + { + rsx->m_fragment_program_dirty = true; + } + void set_surface_dirty_bit(thread* rsx, u32, u32) { rsx->m_rtts_dirty = true; @@ -539,6 +556,7 @@ namespace rsx static void impl(thread* rsx, u32 _reg, u32 arg) { rsx->m_textures_dirty[index] = true; + rsx->m_fragment_program_dirty = true; } }; @@ -565,6 +583,8 @@ namespace rsx const u32 pixel_offset = (method_registers.blit_engine_output_pitch_nv3062() * y) + (x << 2); u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + index * 4, method_registers.blit_engine_output_location_nv3062()); vm::write32(address, arg); + + rsx->m_fragment_program_dirty = true; } }; } @@ -1711,6 +1731,9 @@ namespace rsx bind(); bind(); bind(); + bind(); + bind(); + bind(); //NV308A bind_range();