mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-21 03:55:32 +00:00
rsx/vk: Vertex cache rewritten, add option to disable it as well
- Also enable SPU loop detection by default while were at it
This commit is contained in:
parent
7fa42cfaad
commit
46fa6e47fe
6 changed files with 79 additions and 53 deletions
|
@ -626,6 +626,11 @@ VKGSRender::VKGSRender() : GSRender()
|
|||
m_text_writer.reset(new vk::text_writer());
|
||||
m_text_writer->init(*m_device, m_memory_type_mapping, m_render_passes[idx]);
|
||||
}
|
||||
|
||||
if (g_cfg.video.disable_vertex_cache)
|
||||
m_vertex_cache.reset(new null_vertex_cache());
|
||||
else
|
||||
m_vertex_cache.reset(new vk::vertex_cache::weak_vertex_cache());
|
||||
}
|
||||
|
||||
VKGSRender::~VKGSRender()
|
||||
|
@ -809,7 +814,7 @@ void VKGSRender::begin()
|
|||
std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
|
||||
|
||||
flush_command_queue(true);
|
||||
m_vertex_cache.purge();
|
||||
m_vertex_cache->purge();
|
||||
|
||||
CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0));
|
||||
m_last_descriptor_set = VK_NULL_HANDLE;
|
||||
|
@ -1264,6 +1269,8 @@ void VKGSRender::on_init_thread()
|
|||
|
||||
GSRender::on_init_thread();
|
||||
rsx_thread = std::this_thread::get_id();
|
||||
|
||||
thread_ctrl::set_native_priority(1);
|
||||
}
|
||||
|
||||
void VKGSRender::on_exit()
|
||||
|
@ -1535,7 +1542,7 @@ void VKGSRender::process_swap_request()
|
|||
m_text_writer->reset_descriptors();
|
||||
}
|
||||
|
||||
m_vertex_cache.purge();
|
||||
m_vertex_cache->purge();
|
||||
|
||||
m_swap_command_buffer = nullptr;
|
||||
}
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
|
||||
#pragma comment(lib, "VKstatic.1.lib")
|
||||
|
||||
using namespace vk::vertex_cache;
|
||||
using null_vertex_cache = rsx::vertex_cache<uploaded_range, VkFormat>;
|
||||
|
||||
//Heap allocation sizes in MB
|
||||
#define VK_ATTRIB_RING_BUFFER_SIZE_M 256
|
||||
#define VK_UBO_RING_BUFFER_SIZE_M 32
|
||||
|
@ -90,49 +93,6 @@ struct command_buffer_chunk: public vk::command_buffer
|
|||
}
|
||||
};
|
||||
|
||||
struct weak_vertex_cache
|
||||
{
|
||||
struct uploaded_range
|
||||
{
|
||||
u32 offset_in_heap;
|
||||
|
||||
VkFormat buffer_format;
|
||||
uintptr_t local_address;
|
||||
u32 data_length;
|
||||
};
|
||||
|
||||
private:
|
||||
std::vector<uploaded_range> vertex_ranges;
|
||||
public:
|
||||
|
||||
uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length)
|
||||
{
|
||||
for (auto &v : vertex_ranges)
|
||||
{
|
||||
if (v.local_address == local_addr && v.buffer_format == fmt && v.data_length == data_length)
|
||||
return &v;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap)
|
||||
{
|
||||
uploaded_range v = {};
|
||||
v.buffer_format = fmt;
|
||||
v.data_length = data_length;
|
||||
v.local_address = local_addr;
|
||||
v.offset_in_heap = offset_in_heap;
|
||||
|
||||
vertex_ranges.push_back(v);
|
||||
}
|
||||
|
||||
void purge()
|
||||
{
|
||||
vertex_ranges.resize(0);
|
||||
}
|
||||
};
|
||||
|
||||
class VKGSRender : public GSRender
|
||||
{
|
||||
private:
|
||||
|
@ -157,7 +117,7 @@ private:
|
|||
|
||||
public:
|
||||
//vk::fbo draw_fbo;
|
||||
weak_vertex_cache m_vertex_cache;
|
||||
std::unique_ptr<null_vertex_cache> m_vertex_cache;
|
||||
|
||||
private:
|
||||
VKProgramBuffer m_prog_buffer;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "../GCM.h"
|
||||
#include "../Common/TextureUtils.h"
|
||||
#include "../Common/ring_buffer_helper.h"
|
||||
#include "../rsx_cache.h"
|
||||
|
||||
#define DESCRIPTOR_MAX_DRAW_CALLS 4096
|
||||
|
||||
|
@ -1456,6 +1457,54 @@ namespace vk
|
|||
}
|
||||
};
|
||||
|
||||
namespace vertex_cache
|
||||
{
|
||||
struct uploaded_range
|
||||
{
|
||||
uintptr_t local_address;
|
||||
VkFormat buffer_format;
|
||||
u32 offset_in_heap;
|
||||
u32 data_length;
|
||||
};
|
||||
|
||||
// A weak vertex cache with no data checks or memory range locks
|
||||
// Of limited use since contents are only guaranteed to be valid once per frame
|
||||
// TODO: Strict vertex cache with range locks
|
||||
class weak_vertex_cache: public rsx::vertex_cache<uploaded_range, VkFormat>
|
||||
{
|
||||
private:
|
||||
std::unordered_map<uintptr_t, std::vector<uploaded_range>> vertex_ranges;
|
||||
public:
|
||||
|
||||
uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length) override
|
||||
{
|
||||
for (auto &v : vertex_ranges[local_addr])
|
||||
{
|
||||
if (v.buffer_format == fmt && v.data_length == data_length)
|
||||
return &v;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap) override
|
||||
{
|
||||
uploaded_range v = {};
|
||||
v.buffer_format = fmt;
|
||||
v.data_length = data_length;
|
||||
v.local_address = local_addr;
|
||||
v.offset_in_heap = offset_in_heap;
|
||||
|
||||
vertex_ranges[local_addr].push_back(v);
|
||||
}
|
||||
|
||||
void purge() override
|
||||
{
|
||||
vertex_ranges.clear();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer.
|
||||
* Then copy all layers into dst_image.
|
||||
|
|
|
@ -252,10 +252,10 @@ namespace
|
|||
vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap,
|
||||
vk::glsl::program* prog, VkDescriptorSet desc_set,
|
||||
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
|
||||
weak_vertex_cache& vertex_cache)
|
||||
rsx::vertex_cache<uploaded_range, VkFormat>* vertex_cache)
|
||||
: vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog),
|
||||
descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean),
|
||||
vertex_cache(&vertex_cache)
|
||||
vertex_cache(vertex_cache)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -341,7 +341,7 @@ namespace
|
|||
vk::glsl::program* m_program;
|
||||
VkDescriptorSet descriptor_sets;
|
||||
std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
|
||||
weak_vertex_cache* vertex_cache;
|
||||
rsx::vertex_cache<uploaded_range, VkFormat>* vertex_cache;
|
||||
};
|
||||
|
||||
using attribute_storage = std::vector<std::variant<rsx::vertex_array_buffer,
|
||||
|
@ -470,7 +470,7 @@ namespace
|
|||
const u32 vertex_count = vertex_max_index - min_index + 1;
|
||||
|
||||
vertex_buffer_visitor visitor(vertex_count, m_device,
|
||||
m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean, rsxthr->m_vertex_cache);
|
||||
m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean, rsxthr->m_vertex_cache.get());
|
||||
|
||||
const auto& vertex_buffers = get_vertex_buffers(
|
||||
rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}});
|
||||
|
@ -500,7 +500,7 @@ namespace
|
|||
const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size);
|
||||
const uintptr_t local_addr = (uintptr_t)v.data.data();
|
||||
|
||||
const auto cached = rsxthr->m_vertex_cache.find_vertex_range(local_addr, format, upload_size);
|
||||
const auto cached = rsxthr->m_vertex_cache->find_vertex_range(local_addr, format, upload_size);
|
||||
if (cached)
|
||||
{
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, cached->offset_in_heap, upload_size));
|
||||
|
@ -520,7 +520,7 @@ namespace
|
|||
upload_jobs.push_back(i);
|
||||
|
||||
const uintptr_t local_addr = (uintptr_t)v.data.data();
|
||||
rsxthr->m_vertex_cache.store_range(local_addr, format, upload_size, (u32)offset);
|
||||
rsxthr->m_vertex_cache->store_range(local_addr, format, upload_size, (u32)offset);
|
||||
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size));
|
||||
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets);
|
||||
|
|
|
@ -194,4 +194,13 @@ namespace rsx
|
|||
return std::make_pair(min, max);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename storage_type, typename upload_format>
|
||||
class vertex_cache
|
||||
{
|
||||
public:
|
||||
virtual storage_type* find_vertex_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; }
|
||||
virtual void store_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {}
|
||||
virtual void purge() {}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -277,7 +277,7 @@ struct cfg_root : cfg::node
|
|||
cfg::_int<32, 16384> max_spu_immediate_write_size{this, "Maximum immediate DMA write size", 16384}; // Maximum size that an SPU thread can write directly without posting to MFC
|
||||
cfg::_int<0, 6> preferred_spu_threads{this, "Preferred SPU Threads", 0}; //Numnber of hardware threads dedicated to heavy simultaneous spu tasks
|
||||
cfg::_int<0, 16> spu_delay_penalty{this, "SPU delay penalty", 3}; //Number of milliseconds to block a thread if a virtual 'core' isn't free
|
||||
cfg::_bool spu_loop_detection{this, "SPU loop detection", false}; //Try to detect wait loops and trigger thread yield
|
||||
cfg::_bool spu_loop_detection{this, "SPU loop detection", true}; //Try to detect wait loops and trigger thread yield
|
||||
|
||||
cfg::_enum<lib_loading_type> lib_loading{this, "Lib Loader", lib_loading_type::automatic};
|
||||
cfg::_bool hook_functions{this, "Hook static functions"};
|
||||
|
@ -326,6 +326,7 @@ struct cfg_root : cfg::node
|
|||
cfg::_bool invalidate_surface_cache_every_frame{this, "Invalidate Cache Every Frame", true};
|
||||
cfg::_bool strict_rendering_mode{this, "Strict Rendering Mode"};
|
||||
|
||||
cfg::_bool disable_vertex_cache{this, "Disable Vertex Cache", false};
|
||||
cfg::_bool batch_instanced_geometry{this, "Batch Instanced Geometry", false}; //Avoid re-uploading geometry if the same draw command is repeated
|
||||
cfg::_int<1, 16> vertex_upload_threads{ this, "Vertex Upload Threads", 1 }; //Max number of threads to use for parallel vertex processing
|
||||
cfg::_int<32, 65536> mt_vertex_upload_threshold{ this, "Multithreaded Vertex Upload Threshold", 512}; //Minimum vertex count to parallelize
|
||||
|
|
Loading…
Add table
Reference in a new issue