mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-21 03:55:32 +00:00
rsx: Drop format information from the vertex cache and use a simpler model that is much faster
This commit is contained in:
parent
2df7e46cb1
commit
4d565eade6
7 changed files with 35 additions and 36 deletions
|
@ -20,8 +20,8 @@
|
|||
|
||||
namespace gl
|
||||
{
|
||||
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<GLenum>, GLenum>;
|
||||
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<GLenum>;
|
||||
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range>;
|
||||
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache;
|
||||
using null_vertex_cache = vertex_cache;
|
||||
|
||||
using shader_cache = rsx::shaders_cache<void*, GLProgramBuffer>;
|
||||
|
|
|
@ -195,7 +195,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
|||
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride);
|
||||
storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset;
|
||||
|
||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
|
||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, required.first))
|
||||
{
|
||||
ensure(cached->local_address == storage_address);
|
||||
|
||||
|
@ -216,7 +216,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
|||
if (to_store)
|
||||
{
|
||||
//store ref in vertex cache
|
||||
m_vertex_cache->store_range(storage_address, GL_R8UI, required.first, persistent_mapping.second);
|
||||
m_vertex_cache->store_range(storage_address, required.first, persistent_mapping.second);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,8 +31,8 @@ namespace vk
|
|||
struct program_cache;
|
||||
struct pipeline_props;
|
||||
|
||||
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<VkFormat>, VkFormat>;
|
||||
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<VkFormat>;
|
||||
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range>;
|
||||
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache;
|
||||
using null_vertex_cache = vertex_cache;
|
||||
|
||||
using shader_cache = rsx::shaders_cache<vk::pipeline_props, vk::program_cache>;
|
||||
|
|
|
@ -251,7 +251,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
|||
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride);
|
||||
storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset;
|
||||
|
||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
|
||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, required.first))
|
||||
{
|
||||
ensure(cached->local_address == storage_address);
|
||||
|
||||
|
@ -274,7 +274,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
|||
if (to_store)
|
||||
{
|
||||
//store ref in vertex cache
|
||||
m_vertex_cache->store_range(storage_address, VK_FORMAT_R8_UINT, required.first, static_cast<u32>(persistent_offset));
|
||||
m_vertex_cache->store_range(storage_address, required.first, static_cast<u32>(persistent_offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "Utilities/lockless.h"
|
||||
#include "Utilities/Thread.h"
|
||||
#include "Common/bitfield.hpp"
|
||||
#include "Common/unordered_map.hpp"
|
||||
#include "Emu/System.h"
|
||||
#include "Emu/cache_utils.hpp"
|
||||
#include "Program/ProgramStateCache.h"
|
||||
|
@ -10,7 +11,6 @@
|
|||
#include "Overlays/Shaders/shader_loading_dialog.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "util/sysinfo.hpp"
|
||||
#include "util/fnv_hash.hpp"
|
||||
|
@ -447,67 +447,62 @@ namespace rsx
|
|||
namespace vertex_cache
|
||||
{
|
||||
// A null vertex cache
|
||||
template <typename storage_type, typename upload_format>
|
||||
template <typename storage_type>
|
||||
class default_vertex_cache
|
||||
{
|
||||
public:
|
||||
virtual ~default_vertex_cache() = default;
|
||||
virtual storage_type* find_vertex_range(uptr /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; }
|
||||
virtual void store_range(uptr /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {}
|
||||
virtual const storage_type* find_vertex_range(u32 /*local_addr*/, u32 /*data_length*/) { return nullptr; }
|
||||
virtual void store_range(u32 /*local_addr*/, u32 /*data_length*/, u32 /*offset_in_heap*/) {}
|
||||
virtual void purge() {}
|
||||
};
|
||||
|
||||
// A weak vertex cache with no data checks or memory range locks
|
||||
// Of limited use since contents are only guaranteed to be valid once per frame
|
||||
// TODO: Strict vertex cache with range locks
|
||||
template <typename upload_format>
|
||||
struct uploaded_range
|
||||
{
|
||||
uptr local_address;
|
||||
upload_format buffer_format;
|
||||
u32 offset_in_heap;
|
||||
u32 data_length;
|
||||
};
|
||||
|
||||
template <typename upload_format>
|
||||
class weak_vertex_cache : public default_vertex_cache<uploaded_range<upload_format>, upload_format>
|
||||
// A weak vertex cache with no data checks or memory range locks
|
||||
// Of limited use since contents are only guaranteed to be valid once per frame
|
||||
// Supports upto 1GiB block lengths if typed and full 4GiB otherwise.
|
||||
// Using a 1:1 hash-value with robin-hood is 2x faster than what we had before with std-map-of-arrays.
|
||||
class weak_vertex_cache : public default_vertex_cache<uploaded_range>
|
||||
{
|
||||
using storage_type = uploaded_range<upload_format>;
|
||||
using storage_type = uploaded_range;
|
||||
|
||||
private:
|
||||
std::unordered_map<uptr, std::vector<storage_type>> vertex_ranges;
|
||||
rsx::unordered_map<uptr, storage_type> vertex_ranges;
|
||||
|
||||
FORCE_INLINE u64 hash(u32 local_addr, u32 data_length) const
|
||||
{
|
||||
return u64(local_addr) | (u64(data_length) << 32);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
storage_type* find_vertex_range(uptr local_addr, upload_format fmt, u32 data_length) override
|
||||
const storage_type* find_vertex_range(u32 local_addr, u32 data_length) override
|
||||
{
|
||||
auto found = vertex_ranges.find(local_addr);
|
||||
const auto key = hash(local_addr, data_length);
|
||||
const auto found = vertex_ranges.find(key);
|
||||
if (found == vertex_ranges.end())
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
for (auto &v : found->second)
|
||||
{
|
||||
// NOTE: This has to match exactly. Using sized shortcuts such as >= comparison causes artifacting in some applications (UC1)
|
||||
if (v.data_length == data_length && v.buffer_format == fmt)
|
||||
{
|
||||
return &v;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
return std::addressof(found->second);
|
||||
}
|
||||
|
||||
void store_range(uptr local_addr, upload_format fmt, u32 data_length, u32 offset_in_heap) override
|
||||
void store_range(u32 local_addr, u32 data_length, u32 offset_in_heap) override
|
||||
{
|
||||
storage_type v = {};
|
||||
v.buffer_format = fmt;
|
||||
v.data_length = data_length;
|
||||
v.local_address = local_addr;
|
||||
v.offset_in_heap = offset_in_heap;
|
||||
|
||||
vertex_ranges[local_addr].push_back(v);
|
||||
const auto key = hash(local_addr, data_length);
|
||||
vertex_ranges[key] = v;
|
||||
}
|
||||
|
||||
void purge() override
|
||||
|
|
|
@ -556,6 +556,7 @@
|
|||
<ClInclude Include="Emu\RSX\Common\simple_array.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\surface_cache_dma.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\time.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\unordered_map.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXEngLock.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXFrameBuffer.h" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXIOMap.hpp" />
|
||||
|
|
|
@ -2365,6 +2365,9 @@
|
|||
<ClInclude Include="Emu\Io\emulated_pad_config.h">
|
||||
<Filter>Emu\Io</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Common\unordered_map.hpp">
|
||||
<Filter>Emu\GPU\RSX\Common</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">
|
||||
|
|
Loading…
Add table
Reference in a new issue