rsx: Drop format information from the vertex cache and use a simpler model that is much faster

This commit is contained in:
kd-11 2023-06-09 00:09:39 +03:00 committed by kd-11
parent 2df7e46cb1
commit 4d565eade6
7 changed files with 35 additions and 36 deletions

View file

@ -20,8 +20,8 @@
namespace gl
{
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<GLenum>, GLenum>;
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<GLenum>;
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range>;
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache;
using null_vertex_cache = vertex_cache;
using shader_cache = rsx::shaders_cache<void*, GLProgramBuffer>;

View file

@ -195,7 +195,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride);
storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset;
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, required.first))
{
ensure(cached->local_address == storage_address);
@ -216,7 +216,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
if (to_store)
{
//store ref in vertex cache
m_vertex_cache->store_range(storage_address, GL_R8UI, required.first, persistent_mapping.second);
m_vertex_cache->store_range(storage_address, required.first, persistent_mapping.second);
}
}

View file

@ -31,8 +31,8 @@ namespace vk
struct program_cache;
struct pipeline_props;
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<VkFormat>, VkFormat>;
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<VkFormat>;
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range>;
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache;
using null_vertex_cache = vertex_cache;
using shader_cache = rsx::shaders_cache<vk::pipeline_props, vk::program_cache>;

View file

@ -251,7 +251,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride);
storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset;
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, required.first))
{
ensure(cached->local_address == storage_address);
@ -274,7 +274,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
if (to_store)
{
//store ref in vertex cache
m_vertex_cache->store_range(storage_address, VK_FORMAT_R8_UINT, required.first, static_cast<u32>(persistent_offset));
m_vertex_cache->store_range(storage_address, required.first, static_cast<u32>(persistent_offset));
}
}
}

View file

@ -3,6 +3,7 @@
#include "Utilities/lockless.h"
#include "Utilities/Thread.h"
#include "Common/bitfield.hpp"
#include "Common/unordered_map.hpp"
#include "Emu/System.h"
#include "Emu/cache_utils.hpp"
#include "Program/ProgramStateCache.h"
@ -10,7 +11,6 @@
#include "Overlays/Shaders/shader_loading_dialog.h"
#include <chrono>
#include <unordered_map>
#include "util/sysinfo.hpp"
#include "util/fnv_hash.hpp"
@ -447,67 +447,62 @@ namespace rsx
namespace vertex_cache
{
// A null vertex cache
template <typename storage_type, typename upload_format>
template <typename storage_type>
class default_vertex_cache
{
public:
virtual ~default_vertex_cache() = default;
virtual storage_type* find_vertex_range(uptr /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; }
virtual void store_range(uptr /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {}
virtual const storage_type* find_vertex_range(u32 /*local_addr*/, u32 /*data_length*/) { return nullptr; }
virtual void store_range(u32 /*local_addr*/, u32 /*data_length*/, u32 /*offset_in_heap*/) {}
virtual void purge() {}
};
// A weak vertex cache with no data checks or memory range locks
// Of limited use since contents are only guaranteed to be valid once per frame
// TODO: Strict vertex cache with range locks
template <typename upload_format>
struct uploaded_range
{
uptr local_address;
upload_format buffer_format;
u32 offset_in_heap;
u32 data_length;
};
template <typename upload_format>
class weak_vertex_cache : public default_vertex_cache<uploaded_range<upload_format>, upload_format>
// A weak vertex cache with no data checks or memory range locks
// Of limited use since contents are only guaranteed to be valid once per frame
// Supports upto 1GiB block lengths if typed and full 4GiB otherwise.
// Using a 1:1 hash-value with robin-hood is 2x faster than what we had before with std-map-of-arrays.
class weak_vertex_cache : public default_vertex_cache<uploaded_range>
{
using storage_type = uploaded_range<upload_format>;
using storage_type = uploaded_range;
private:
std::unordered_map<uptr, std::vector<storage_type>> vertex_ranges;
rsx::unordered_map<uptr, storage_type> vertex_ranges;
FORCE_INLINE u64 hash(u32 local_addr, u32 data_length) const
{
return u64(local_addr) | (u64(data_length) << 32);
}
public:
storage_type* find_vertex_range(uptr local_addr, upload_format fmt, u32 data_length) override
const storage_type* find_vertex_range(u32 local_addr, u32 data_length) override
{
auto found = vertex_ranges.find(local_addr);
const auto key = hash(local_addr, data_length);
const auto found = vertex_ranges.find(key);
if (found == vertex_ranges.end())
{
return nullptr;
}
for (auto &v : found->second)
{
// NOTE: This has to match exactly. Using sized shortcuts such as >= comparison causes artifacting in some applications (UC1)
if (v.data_length == data_length && v.buffer_format == fmt)
{
return &v;
}
}
return nullptr;
return std::addressof(found->second);
}
void store_range(uptr local_addr, upload_format fmt, u32 data_length, u32 offset_in_heap) override
void store_range(u32 local_addr, u32 data_length, u32 offset_in_heap) override
{
storage_type v = {};
v.buffer_format = fmt;
v.data_length = data_length;
v.local_address = local_addr;
v.offset_in_heap = offset_in_heap;
vertex_ranges[local_addr].push_back(v);
const auto key = hash(local_addr, data_length);
vertex_ranges[key] = v;
}
void purge() override

View file

@ -556,6 +556,7 @@
<ClInclude Include="Emu\RSX\Common\simple_array.hpp" />
<ClInclude Include="Emu\RSX\Common\surface_cache_dma.hpp" />
<ClInclude Include="Emu\RSX\Common\time.hpp" />
<ClInclude Include="Emu\RSX\Common\unordered_map.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXEngLock.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXFrameBuffer.h" />
<ClInclude Include="Emu\RSX\Core\RSXIOMap.hpp" />

View file

@ -2365,6 +2365,9 @@
<ClInclude Include="Emu\Io\emulated_pad_config.h">
<Filter>Emu\Io</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Common\unordered_map.hpp">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">