diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 0d22366760..fdffff19ae 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -20,8 +20,8 @@ namespace gl { - using vertex_cache = rsx::vertex_cache::default_vertex_cache, GLenum>; - using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache; + using vertex_cache = rsx::vertex_cache::default_vertex_cache; + using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache; using null_vertex_cache = vertex_cache; using shader_cache = rsx::shaders_cache; diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index b214b4cbfd..f4e4405407 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -195,7 +195,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer() const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride); storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset; - if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first)) + if (auto cached = m_vertex_cache->find_vertex_range(storage_address, required.first)) { ensure(cached->local_address == storage_address); @@ -216,7 +216,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer() if (to_store) { //store ref in vertex cache - m_vertex_cache->store_range(storage_address, GL_R8UI, required.first, persistent_mapping.second); + m_vertex_cache->store_range(storage_address, required.first, persistent_mapping.second); } } diff --git a/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp b/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp index 0306c692e7..10d48f848b 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp +++ b/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp @@ -31,8 +31,8 @@ namespace vk struct program_cache; struct pipeline_props; - using vertex_cache = rsx::vertex_cache::default_vertex_cache, VkFormat>; - using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache; + using vertex_cache = rsx::vertex_cache::default_vertex_cache; + using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache; using null_vertex_cache = vertex_cache; using shader_cache = rsx::shaders_cache; diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 59f4a47ae5..e690139653 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -251,7 +251,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data() const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride); storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset; - if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first)) + if (auto cached = m_vertex_cache->find_vertex_range(storage_address, required.first)) { ensure(cached->local_address == storage_address); @@ -274,7 +274,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data() if (to_store) { //store ref in vertex cache - m_vertex_cache->store_range(storage_address, VK_FORMAT_R8_UINT, required.first, static_cast(persistent_offset)); + m_vertex_cache->store_range(storage_address, required.first, static_cast(persistent_offset)); } } } diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 51fc4313af..f2cde4ec50 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -3,6 +3,7 @@ #include "Utilities/lockless.h" #include "Utilities/Thread.h" #include "Common/bitfield.hpp" +#include "Common/unordered_map.hpp" #include "Emu/System.h" #include "Emu/cache_utils.hpp" #include "Program/ProgramStateCache.h" @@ -10,7 +11,6 @@ #include "Overlays/Shaders/shader_loading_dialog.h" #include -#include #include "util/sysinfo.hpp" #include "util/fnv_hash.hpp" @@ -447,67 +447,62 @@ namespace rsx namespace vertex_cache { // A null vertex cache - template + template class default_vertex_cache { public: virtual ~default_vertex_cache() = default; - virtual storage_type* find_vertex_range(uptr /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; } - virtual void store_range(uptr /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {} + virtual const storage_type* find_vertex_range(u32 /*local_addr*/, u32 /*data_length*/) { return nullptr; } + virtual void store_range(u32 /*local_addr*/, u32 /*data_length*/, u32 /*offset_in_heap*/) {} virtual void purge() {} }; - // A weak vertex cache with no data checks or memory range locks - // Of limited use since contents are only guaranteed to be valid once per frame - // TODO: Strict vertex cache with range locks - template struct uploaded_range { uptr local_address; - upload_format buffer_format; u32 offset_in_heap; u32 data_length; }; - template - class weak_vertex_cache : public default_vertex_cache, upload_format> + // A weak vertex cache with no data checks or memory range locks + // Of limited use since contents are only guaranteed to be valid once per frame + // Supports upto 1GiB block lengths if typed and full 4GiB otherwise. + // Using a 1:1 hash-value with robin-hood is 2x faster than what we had before with std-map-of-arrays. + class weak_vertex_cache : public default_vertex_cache { - using storage_type = uploaded_range; + using storage_type = uploaded_range; private: - std::unordered_map> vertex_ranges; + rsx::unordered_map vertex_ranges; + + FORCE_INLINE u64 hash(u32 local_addr, u32 data_length) const + { + return u64(local_addr) | (u64(data_length) << 32); + } public: - storage_type* find_vertex_range(uptr local_addr, upload_format fmt, u32 data_length) override + const storage_type* find_vertex_range(u32 local_addr, u32 data_length) override { - auto found = vertex_ranges.find(local_addr); + const auto key = hash(local_addr, data_length); + const auto found = vertex_ranges.find(key); if (found == vertex_ranges.end()) { return nullptr; } - for (auto &v : found->second) - { - // NOTE: This has to match exactly. Using sized shortcuts such as >= comparison causes artifacting in some applications (UC1) - if (v.data_length == data_length && v.buffer_format == fmt) - { - return &v; - } - } - - return nullptr; + return std::addressof(found->second); } - void store_range(uptr local_addr, upload_format fmt, u32 data_length, u32 offset_in_heap) override + void store_range(u32 local_addr, u32 data_length, u32 offset_in_heap) override { storage_type v = {}; - v.buffer_format = fmt; v.data_length = data_length; v.local_address = local_addr; v.offset_in_heap = offset_in_heap; - vertex_ranges[local_addr].push_back(v); + const auto key = hash(local_addr, data_length); + vertex_ranges[key] = v; } void purge() override diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 84c2fcbaf6..7298809bc1 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -556,6 +556,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index edb6fca74f..a3505ea837 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -2365,6 +2365,9 @@ Emu\Io + + Emu\GPU\RSX\Common +