From c7dca1dbefe7c55b11828c6d67569336e526e9a4 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 9 Aug 2017 14:30:15 +0300 Subject: [PATCH] rsx/vk: Implement shaders cache and fix broken pipeline_storage comparison and hash - Fix pipeline state storage to uniquely store each pipeline variant - Adds a progress bar to indicate loading is taking place --- rpcs3/Emu/RSX/Common/ProgramStateCache.h | 5 + rpcs3/Emu/RSX/RSXFragmentProgram.h | 8 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 18 +- rpcs3/Emu/RSX/VK/VKGSRender.h | 5 +- rpcs3/Emu/RSX/VK/VKProgramBuffer.h | 58 +++++- rpcs3/Emu/RSX/rsx_cache.h | 240 +++++++++++++++++++++++ 6 files changed, 319 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.h b/rpcs3/Emu/RSX/Common/ProgramStateCache.h index 0d182b7c11..620255e3d9 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.h @@ -110,6 +110,7 @@ class program_state_cache protected: size_t m_next_id = 0; + bool m_cache_miss_flag; binary_to_vertex_program m_vertex_shader_cache; binary_to_fragment_program m_fragment_shader_cache; std::unordered_map m_storage; @@ -197,7 +198,10 @@ public: { const auto I = m_storage.find(key); if (I != m_storage.end()) + { + m_cache_miss_flag = false; return I->second; + } } LOG_NOTICE(RSX, "Add program :"); @@ -205,6 +209,7 @@ public: LOG_NOTICE(RSX, "*** fp id = %d", fragment_program.id); m_storage[key] = backend_traits::build_pipeline(vertex_program, fragment_program, pipelineProperties, std::forward(args)...); + m_cache_miss_flag = true; return m_storage[key]; } diff --git a/rpcs3/Emu/RSX/RSXFragmentProgram.h b/rpcs3/Emu/RSX/RSXFragmentProgram.h index 668b0e6b8b..e1a0ddded1 100644 --- a/rpcs3/Emu/RSX/RSXFragmentProgram.h +++ b/rpcs3/Emu/RSX/RSXFragmentProgram.h @@ -259,13 +259,7 @@ struct RSXFragmentProgram } RSXFragmentProgram() - : size(0) - , addr(0) - , offset(0) - , ctrl(0) - , unnormalized_coords(0) - , texture_dimensions(0) - , valid(false) { + memset(this, 0, sizeof(RSXFragmentProgram)); } }; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 17aec9c3af..7fb313183e 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -614,11 +614,15 @@ VKGSRender::VKGSRender() : GSRender() m_text_writer->init(*m_device, m_memory_type_mapping, m_render_passes[idx]); } + m_prog_buffer.reset(new VKProgramBuffer(m_render_passes.data())); + if (g_cfg.video.disable_vertex_cache) m_vertex_cache.reset(new vk::null_vertex_cache()); else m_vertex_cache.reset(new vk::weak_vertex_cache()); + m_shaders_cache.reset(new vk::shader_cache(*m_prog_buffer.get(), "v1")); + open_command_buffer(); for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i) @@ -655,7 +659,7 @@ VKGSRender::~VKGSRender() //Shaders vk::finalize_compiler_context(); - m_prog_buffer.clear(); + m_prog_buffer->clear(); //Global resources vk::destroy_global_resources(); @@ -1213,6 +1217,8 @@ void VKGSRender::on_init_thread() GSRender::on_init_thread(); rsx_thread = std::this_thread::get_id(); + + m_shaders_cache->load(*m_device, pipeline_layout); } void VKGSRender::on_exit() @@ -1732,6 +1738,7 @@ bool VKGSRender::check_program_status() (u8)m_draw_buffers_count); properties.render_pass = m_render_passes[idx]; + properties.render_pass_location = (int)idx; properties.num_targets = m_draw_buffers_count; @@ -1739,7 +1746,10 @@ bool VKGSRender::check_program_status() //Load current program from buffer vertex_program.skip_vertex_input_check = true; - m_program = m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get(); + m_program = m_prog_buffer->getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get(); + + if (m_prog_buffer->check_cache_missed()) + m_shaders_cache->store(properties, vertex_program, fragment_program); vk::leave_uninterruptible(); @@ -1781,7 +1791,7 @@ void VKGSRender::load_program(u32 vertex_count, u32 vertex_base) auto &vertex_program = current_vertex_program; auto &fragment_program = current_fragment_program; - const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); + const size_t fragment_constants_sz = m_prog_buffer->get_fragment_constants_buffer_size(fragment_program); const size_t fragment_buffer_sz = fragment_constants_sz + (17 * 4 * sizeof(float)); const size_t required_mem = 512 + 8192 + fragment_buffer_sz; @@ -1807,7 +1817,7 @@ void VKGSRender::load_program(u32 vertex_count, u32 vertex_base) //Fragment constants buf = buf + 8192; if (fragment_constants_sz) - m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast(buf), ::narrow(fragment_constants_sz) }, fragment_program); + m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast(buf), ::narrow(fragment_constants_sz) }, fragment_program); fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 64775db26f..1dae995b97 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -21,6 +21,8 @@ namespace vk using vertex_cache = rsx::vertex_cache::default_vertex_cache, VkFormat>; using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache; using null_vertex_cache = vertex_cache; + + using shader_cache = rsx::shaders_cache; } //Heap allocation sizes in MB @@ -123,9 +125,10 @@ private: public: //vk::fbo draw_fbo; std::unique_ptr m_vertex_cache; + std::unique_ptr m_shaders_cache; private: - VKProgramBuffer m_prog_buffer; + std::unique_ptr m_prog_buffer; vk::render_device *m_device; vk::swap_chain* m_swap_chain; diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h index 56ad392a56..685a21268c 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -16,6 +16,7 @@ namespace vk VkRenderPass render_pass; int num_targets; + int render_pass_location; bool operator==(const pipeline_props& other) const { @@ -28,7 +29,13 @@ namespace vk if (memcmp(&rs, &other.rs, sizeof(VkPipelineRasterizationStateCreateInfo))) return false; - if (memcmp(&cs, &other.cs, sizeof(VkPipelineColorBlendStateCreateInfo))) + //Cannot memcmp cs due to pAttachments being a pointer to memory + if (cs.attachmentCount != other.cs.attachmentCount || + cs.flags != other.cs.flags || + cs.logicOp != other.cs.logicOp || + cs.logicOpEnable != other.cs.logicOpEnable || + cs.sType != other.cs.sType || + memcmp(cs.blendConstants, other.cs.blendConstants, 4 * sizeof(f32))) return false; if (memcmp(&ia, &other.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo))) @@ -37,6 +44,9 @@ namespace vk if (memcmp(&ds, &other.ds, sizeof(VkPipelineDepthStencilStateCreateInfo))) return false; + if (num_targets != other.num_targets) + return false; + return num_targets == other.num_targets; } }; @@ -64,7 +74,12 @@ namespace std seed ^= hash_struct(pipelineProperties.ia); seed ^= hash_struct(pipelineProperties.ds); seed ^= hash_struct(pipelineProperties.rs); - seed ^= hash_struct(pipelineProperties.cs); + + //Do not compare pointers to memory! + auto tmp = pipelineProperties.cs; + tmp.pAttachments = nullptr; + seed ^= hash_struct(tmp); + seed ^= hash_struct(pipelineProperties.att_state[0]); return hash()(seed); } @@ -93,7 +108,8 @@ struct VKTraits } static - pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const vk::pipeline_props &pipelineProperties, VkDevice dev, VkPipelineLayout common_pipeline_layout) + pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, + const vk::pipeline_props &pipelineProperties, VkDevice dev, VkPipelineLayout common_pipeline_layout) { VkPipelineShaderStageCreateInfo shader_stages[2] = {}; @@ -159,11 +175,47 @@ struct VKTraits class VKProgramBuffer : public program_state_cache { + const VkRenderPass *m_render_pass_data; + public: + VKProgramBuffer(VkRenderPass *renderpass_list) + : m_render_pass_data(renderpass_list) + {} + void clear() { program_state_cache::clear(); m_vertex_shader_cache.clear(); m_fragment_shader_cache.clear(); } + + u64 get_hash(vk::pipeline_props &props) + { + return std::hash()(props); + } + + u64 get_hash(RSXVertexProgram &prog) + { + return program_hash_util::vertex_program_hash()(prog); + } + + u64 get_hash(RSXFragmentProgram &prog) + { + return program_hash_util::fragment_program_hash()(prog); + } + + template + void add_pipeline_entry(RSXVertexProgram &vp, RSXFragmentProgram &fp, vk::pipeline_props &props, Args&& ...args) + { + //Extract pointers from pipeline props + props.render_pass = m_render_pass_data[props.render_pass_location]; + props.cs.pAttachments = props.att_state; + vp.skip_vertex_input_check = true; + getGraphicPipelineState(vp, fp, props, std::forward(args)...); + } + + bool check_cache_missed() const + { + return m_cache_miss_flag; + } }; diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 0e296f0bac..d49756872f 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -2,6 +2,9 @@ #include "Utilities/VirtualMemory.h" #include "Emu/Memory/vm.h" #include "gcm_enums.h" +#include "Common/ProgramStateCache.h" +#include "Emu/Cell/Modules/cellMsgDialog.h" +#include "Emu/System.h" namespace rsx { @@ -202,6 +205,243 @@ namespace rsx } }; + template + class shaders_cache + { + struct pipeline_data + { + u64 vertex_program_hash; + u64 fragment_program_hash; + u64 pipeline_storage_hash; + + u32 fp_ctrl; + u32 fp_texture_dimensions; + u16 fp_unnormalized_coords; + u16 fp_height; + u16 fp_pixel_layout; + u16 fp_lighting_flags; + u16 fp_shadow_textures; + u16 fp_redirected_textures; + + pipeline_storage_type pipeline_properties; + }; + + std::string version_prefix; + std::string root_path; + std::unordered_map> fragment_program_data; + + backend_storage& m_storage; + + public: + + shaders_cache(backend_storage& storage, std::string version_prefix_str = "v1") + : version_prefix(version_prefix_str) + , m_storage(storage) + { + root_path = Emu.GetCachePath() + "/shaders_cache"; + } + + template + void load(Args&& ...args) + { + std::string directory_path = root_path + "/pipelines"; + + if (!fs::is_dir(directory_path)) + { + fs::create_path(directory_path); + fs::create_path(root_path + "/raw"); + + return; + } + + fs::dir root = fs::dir(directory_path); + fs::dir_entry tmp; + + u32 entry_count = 0; + for (auto It = root.begin(); It != root.end(); ++It, entry_count++); + + if (entry_count <= 2) + return; + + entry_count -= 2; + f32 delta = 100.f / entry_count; + f32 tally = 0.f; + + root.rewind(); + + // Progress dialog + auto dlg = Emu.GetCallbacks().get_msg_dialog(); + dlg->type.se_normal = true; + dlg->type.bg_invisible = true; + dlg->type.progress_bar_count = 1; + dlg->on_close = [](s32 status) + { + Emu.CallAfter([]() + { + Emu.Stop(); + }); + }; + + Emu.CallAfter([=]() + { + dlg->Create("Preloading cached shaders from disk.\nPlease wait..."); + }); + + u32 processed = 0; + while (root.read(tmp)) + { + if (tmp.name == "." || tmp.name == "..") + continue; + + std::vector bytes; + fs::file f(directory_path + "/" + tmp.name); + + processed++; + Emu.CallAfter([=]() + { + dlg->ProgressBarSetMsg(0, fmt::format("Loading pipeline object %u of %u", processed, entry_count)); + }); + + if (f.size() != sizeof(pipeline_data)) + { + LOG_ERROR(RSX, "Cached pipeline object %s is not binary compatible with the current shader cache", tmp.name.c_str()); + continue; + } + + f.read(bytes, f.size()); + auto unpacked = unpack(*(pipeline_data*)bytes.data()); + m_storage.add_pipeline_entry(std::get<1>(unpacked), std::get<2>(unpacked), std::get<0>(unpacked), std::forward(args)...); + + tally += delta; + if (tally > 1.f) + { + u32 value = (u32)tally; + Emu.CallAfter([=]() + { + dlg->ProgressBarInc(0, value); + }); + + tally -= (f32)value; + } + } + } + + void store(pipeline_storage_type &pipeline, RSXVertexProgram &vp, RSXFragmentProgram &fp) + { + pipeline_data data = pack(pipeline, vp, fp); + std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash); + std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash); + + if (!fs::is_file(fp_name)) + { + const auto size = program_hash_util::fragment_program_utils::get_fragment_program_ucode_size(fp.addr); + fs::file(fp_name, fs::rewrite).write(fp.addr, size); + } + + if (!fs::is_file(vp_name)) + { + std::vector output; + output.resize(vp.data.size() + 1); + output[0] = vp.output_mask; + std::copy(vp.data.begin(), vp.data.end(), output.begin() + 1); + + fs::file(vp_name, fs::rewrite).write(output); + } + + u64 state_hash = 0; + state_hash ^= std::hash()(data.fp_ctrl); + state_hash ^= std::hash()(data.fp_texture_dimensions); + state_hash ^= std::hash()(data.fp_unnormalized_coords); + state_hash ^= std::hash()(data.fp_height); + state_hash ^= std::hash()(data.fp_pixel_layout); + state_hash ^= std::hash()(data.fp_lighting_flags); + state_hash ^= std::hash()(data.fp_shadow_textures); + state_hash ^= std::hash()(data.fp_redirected_textures); + + std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash); + std::string pipeline_path = root_path + "/pipelines/" + version_prefix + "-" + pipeline_file_name; + fs::file(pipeline_path, fs::rewrite).write(&data, sizeof(pipeline_data)); + } + + RSXVertexProgram load_vp_raw(u64 program_hash) + { + std::vector data; + std::string filename = fmt::format("%llX.vp", program_hash); + + fs::file f(root_path + "/raw/" + filename); + f.read(data, f.size() / sizeof(u32)); + + RSXVertexProgram vp = {}; + vp.data.resize(data.size() - 1); + + vp.output_mask = data[0]; + vp.skip_vertex_input_check = true; + std::copy(data.begin() + 1, data.end(), vp.data.begin()); + + return vp; + } + + RSXFragmentProgram load_fp_raw(u64 program_hash) + { + std::vector data; + std::string filename = fmt::format("%llX.fp", program_hash); + + fs::file f(root_path + "/raw/" + filename); + f.read(data, f.size()); + + RSXFragmentProgram fp = {}; + fragment_program_data[program_hash] = data; + fp.addr = fragment_program_data[program_hash].data(); + + return fp; + } + + std::tuple unpack(pipeline_data &data) + { + RSXVertexProgram vp = load_vp_raw(data.vertex_program_hash); + RSXFragmentProgram fp = load_fp_raw(data.fragment_program_hash); + pipeline_storage_type pipeline = data.pipeline_properties; + + fp.ctrl = data.fp_ctrl; + fp.texture_dimensions = data.fp_texture_dimensions; + fp.unnormalized_coords = data.fp_unnormalized_coords; + fp.height = data.fp_height; + fp.pixel_center_mode = (rsx::window_pixel_center)(data.fp_pixel_layout & 0x3); + fp.origin_mode = (rsx::window_origin)((data.fp_pixel_layout >> 2) & 0x1); + fp.alpha_func = (rsx::comparison_function)((data.fp_pixel_layout >> 3) & 0xF); + fp.front_back_color_enabled = (data.fp_lighting_flags & 0x1) != 0; + fp.back_color_diffuse_output = ((data.fp_lighting_flags >> 1) & 0x1) != 0; + fp.back_color_specular_output = ((data.fp_lighting_flags >> 2) & 0x1) != 0; + fp.front_color_diffuse_output = ((data.fp_lighting_flags >> 3) & 0x1) != 0; + fp.front_color_specular_output = ((data.fp_lighting_flags >> 4) & 0x1) != 0; + fp.shadow_textures = data.fp_shadow_textures; + fp.redirected_textures = data.fp_redirected_textures; + + return std::make_tuple(pipeline, vp, fp); + } + + pipeline_data pack(pipeline_storage_type &pipeline, RSXVertexProgram &vp, RSXFragmentProgram &fp) + { + pipeline_data data_block; + data_block.pipeline_properties = pipeline; + data_block.vertex_program_hash = m_storage.get_hash(vp); + data_block.fragment_program_hash = m_storage.get_hash(fp); + data_block.pipeline_storage_hash = m_storage.get_hash(pipeline); + + data_block.fp_ctrl = fp.ctrl; + data_block.fp_texture_dimensions = fp.texture_dimensions; + data_block.fp_unnormalized_coords = fp.unnormalized_coords; + data_block.fp_height = fp.height; + data_block.fp_pixel_layout = (u16)fp.pixel_center_mode | (u16)fp.origin_mode << 2 | (u16)fp.alpha_func << 3; + data_block.fp_lighting_flags = (u16)fp.front_back_color_enabled | (u16)fp.back_color_diffuse_output << 1 | + (u16)fp.back_color_specular_output << 2 | (u16)fp.front_color_diffuse_output << 3 | (u16)fp.front_color_specular_output << 4; + data_block.fp_shadow_textures = fp.shadow_textures; + data_block.fp_redirected_textures = fp.redirected_textures; + + return data_block; + } + }; + namespace vertex_cache { // A null vertex cache