diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index d013941f6b..0fae530d00 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -626,6 +626,11 @@ VKGSRender::VKGSRender() : GSRender() m_text_writer.reset(new vk::text_writer()); m_text_writer->init(*m_device, m_memory_type_mapping, m_render_passes[idx]); } + + if (g_cfg.video.disable_vertex_cache) + m_vertex_cache.reset(new null_vertex_cache()); + else + m_vertex_cache.reset(new vk::vertex_cache::weak_vertex_cache()); } VKGSRender::~VKGSRender() @@ -809,7 +814,7 @@ void VKGSRender::begin() std::chrono::time_point submit_start = steady_clock::now(); flush_command_queue(true); - m_vertex_cache.purge(); + m_vertex_cache->purge(); CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0)); m_last_descriptor_set = VK_NULL_HANDLE; @@ -1264,6 +1269,8 @@ void VKGSRender::on_init_thread() GSRender::on_init_thread(); rsx_thread = std::this_thread::get_id(); + + thread_ctrl::set_native_priority(1); } void VKGSRender::on_exit() @@ -1535,7 +1542,7 @@ void VKGSRender::process_swap_request() m_text_writer->reset_descriptors(); } - m_vertex_cache.purge(); + m_vertex_cache->purge(); m_swap_command_buffer = nullptr; } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 6d021135d3..f032cbbce9 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -16,6 +16,9 @@ #pragma comment(lib, "VKstatic.1.lib") +using namespace vk::vertex_cache; +using null_vertex_cache = rsx::vertex_cache; + //Heap allocation sizes in MB #define VK_ATTRIB_RING_BUFFER_SIZE_M 256 #define VK_UBO_RING_BUFFER_SIZE_M 32 @@ -90,49 +93,6 @@ struct command_buffer_chunk: public vk::command_buffer } }; -struct weak_vertex_cache -{ - struct uploaded_range - { - u32 offset_in_heap; - - VkFormat buffer_format; - uintptr_t local_address; - u32 data_length; - }; - -private: - std::vector vertex_ranges; -public: - - uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length) - { - for (auto &v : vertex_ranges) - { - if (v.local_address == local_addr && v.buffer_format == fmt && v.data_length == data_length) - return &v; - } - - return nullptr; - } - - void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap) - { - uploaded_range v = {}; - v.buffer_format = fmt; - v.data_length = data_length; - v.local_address = local_addr; - v.offset_in_heap = offset_in_heap; - - vertex_ranges.push_back(v); - } - - void purge() - { - vertex_ranges.resize(0); - } -}; - class VKGSRender : public GSRender { private: @@ -157,7 +117,7 @@ private: public: //vk::fbo draw_fbo; - weak_vertex_cache m_vertex_cache; + std::unique_ptr m_vertex_cache; private: VKProgramBuffer m_prog_buffer; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 93d6d2cb85..1ded3ebf69 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -17,6 +17,7 @@ #include "../GCM.h" #include "../Common/TextureUtils.h" #include "../Common/ring_buffer_helper.h" +#include "../rsx_cache.h" #define DESCRIPTOR_MAX_DRAW_CALLS 4096 @@ -1456,6 +1457,54 @@ namespace vk } }; + namespace vertex_cache + { + struct uploaded_range + { + uintptr_t local_address; + VkFormat buffer_format; + u32 offset_in_heap; + u32 data_length; + }; + + // A weak vertex cache with no data checks or memory range locks + // Of limited use since contents are only guaranteed to be valid once per frame + // TODO: Strict vertex cache with range locks + class weak_vertex_cache: public rsx::vertex_cache + { + private: + std::unordered_map> vertex_ranges; + public: + + uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length) override + { + for (auto &v : vertex_ranges[local_addr]) + { + if (v.buffer_format == fmt && v.data_length == data_length) + return &v; + } + + return nullptr; + } + + void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap) override + { + uploaded_range v = {}; + v.buffer_format = fmt; + v.data_length = data_length; + v.local_address = local_addr; + v.offset_in_heap = offset_in_heap; + + vertex_ranges[local_addr].push_back(v); + } + + void purge() override + { + vertex_ranges.clear(); + } + }; + } + /** * Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer. * Then copy all layers into dst_image. diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 26fed96214..2ddcb496e2 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -252,10 +252,10 @@ namespace vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap, vk::glsl::program* prog, VkDescriptorSet desc_set, std::vector>& buffer_view_to_clean, - weak_vertex_cache& vertex_cache) + rsx::vertex_cache* vertex_cache) : vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog), descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean), - vertex_cache(&vertex_cache) + vertex_cache(vertex_cache) { } @@ -341,7 +341,7 @@ namespace vk::glsl::program* m_program; VkDescriptorSet descriptor_sets; std::vector>& m_buffer_view_to_clean; - weak_vertex_cache* vertex_cache; + rsx::vertex_cache* vertex_cache; }; using attribute_storage = std::vectorm_vertex_cache); + m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean, rsxthr->m_vertex_cache.get()); const auto& vertex_buffers = get_vertex_buffers( rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}}); @@ -500,7 +500,7 @@ namespace const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size); const uintptr_t local_addr = (uintptr_t)v.data.data(); - const auto cached = rsxthr->m_vertex_cache.find_vertex_range(local_addr, format, upload_size); + const auto cached = rsxthr->m_vertex_cache->find_vertex_range(local_addr, format, upload_size); if (cached) { m_buffer_view_to_clean.push_back(std::make_unique(m_device, m_attrib_ring_info.heap->value, format, cached->offset_in_heap, upload_size)); @@ -520,7 +520,7 @@ namespace upload_jobs.push_back(i); const uintptr_t local_addr = (uintptr_t)v.data.data(); - rsxthr->m_vertex_cache.store_range(local_addr, format, upload_size, (u32)offset); + rsxthr->m_vertex_cache->store_range(local_addr, format, upload_size, (u32)offset); m_buffer_view_to_clean.push_back(std::make_unique(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size)); m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets); diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index c44c43cda8..6c0e9e0b55 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -194,4 +194,13 @@ namespace rsx return std::make_pair(min, max); } }; + + template + class vertex_cache + { + public: + virtual storage_type* find_vertex_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; } + virtual void store_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {} + virtual void purge() {} + }; } diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index c163aad0d6..5ea4180dbe 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -277,7 +277,7 @@ struct cfg_root : cfg::node cfg::_int<32, 16384> max_spu_immediate_write_size{this, "Maximum immediate DMA write size", 16384}; // Maximum size that an SPU thread can write directly without posting to MFC cfg::_int<0, 6> preferred_spu_threads{this, "Preferred SPU Threads", 0}; //Numnber of hardware threads dedicated to heavy simultaneous spu tasks cfg::_int<0, 16> spu_delay_penalty{this, "SPU delay penalty", 3}; //Number of milliseconds to block a thread if a virtual 'core' isn't free - cfg::_bool spu_loop_detection{this, "SPU loop detection", false}; //Try to detect wait loops and trigger thread yield + cfg::_bool spu_loop_detection{this, "SPU loop detection", true}; //Try to detect wait loops and trigger thread yield cfg::_enum lib_loading{this, "Lib Loader", lib_loading_type::automatic}; cfg::_bool hook_functions{this, "Hook static functions"}; @@ -326,6 +326,7 @@ struct cfg_root : cfg::node cfg::_bool invalidate_surface_cache_every_frame{this, "Invalidate Cache Every Frame", true}; cfg::_bool strict_rendering_mode{this, "Strict Rendering Mode"}; + cfg::_bool disable_vertex_cache{this, "Disable Vertex Cache", false}; cfg::_bool batch_instanced_geometry{this, "Batch Instanced Geometry", false}; //Avoid re-uploading geometry if the same draw command is repeated cfg::_int<1, 16> vertex_upload_threads{ this, "Vertex Upload Threads", 1 }; //Max number of threads to use for parallel vertex processing cfg::_int<32, 65536> mt_vertex_upload_threshold{ this, "Multithreaded Vertex Upload Threshold", 512}; //Minimum vertex count to parallelize