mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
rsx/vk/gl: Refactoring - Also adds a vertex cache to openGL as well
This commit is contained in:
parent
fc6891a0e3
commit
4cd5624fa7
10 changed files with 124 additions and 86 deletions
|
@ -26,6 +26,11 @@ GLGSRender::GLGSRender() : GSRender()
|
|||
{
|
||||
//TODO
|
||||
//shaders_cache.load(rsx::old_shaders_cache::shader_language::glsl);
|
||||
|
||||
if (g_cfg.video.disable_vertex_cache)
|
||||
m_vertex_cache.reset(new gl::null_vertex_cache());
|
||||
else
|
||||
m_vertex_cache.reset(new gl::weak_vertex_cache());
|
||||
}
|
||||
|
||||
u32 GLGSRender::enable(u32 condition, u32 cap)
|
||||
|
@ -1091,6 +1096,8 @@ void GLGSRender::flip(int buffer)
|
|||
if (g_cfg.video.invalidate_surface_cache_every_frame)
|
||||
m_rtts.invalidate_surface_cache_data(nullptr);
|
||||
|
||||
m_vertex_cache->purge();
|
||||
|
||||
//If we are skipping the next frame, fo not reset perf counters
|
||||
if (skip_frame) return;
|
||||
|
||||
|
|
|
@ -14,6 +14,13 @@
|
|||
|
||||
#pragma comment(lib, "opengl32.lib")
|
||||
|
||||
namespace gl
|
||||
{
|
||||
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<GLenum>, GLenum>;
|
||||
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<GLenum>;
|
||||
using null_vertex_cache = vertex_cache;
|
||||
}
|
||||
|
||||
struct work_item
|
||||
{
|
||||
std::condition_variable cv;
|
||||
|
@ -57,8 +64,7 @@ private:
|
|||
s64 m_vertex_upload_time = 0;
|
||||
s64 m_textures_upload_time = 0;
|
||||
|
||||
//Compare to see if transform matrix have changed
|
||||
size_t m_transform_buffer_hash = 0;
|
||||
std::unique_ptr<gl::vertex_cache> m_vertex_cache;
|
||||
|
||||
GLint m_min_texbuffer_alignment = 256;
|
||||
GLint m_uniform_buffer_offset_align = 256;
|
||||
|
|
|
@ -198,12 +198,13 @@ namespace
|
|||
|
||||
struct vertex_buffer_visitor
|
||||
{
|
||||
vertex_buffer_visitor(u32 vtx_cnt, gl::ring_buffer& heap, gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset)
|
||||
vertex_buffer_visitor(u32 vtx_cnt, gl::ring_buffer& heap, gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset, gl::vertex_cache* vertex_cache)
|
||||
: vertex_count(vtx_cnt)
|
||||
, m_attrib_ring_info(heap)
|
||||
, m_program(prog)
|
||||
, m_gl_attrib_buffers(attrib_buffer)
|
||||
, m_min_texbuffer_alignment(min_texbuffer_offset)
|
||||
, m_vertex_cache(vertex_cache)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -213,21 +214,30 @@ namespace
|
|||
if (!m_program->uniforms.has_location(s_reg_table[vertex_array.index], &location))
|
||||
return;
|
||||
|
||||
// Fill vertex_array
|
||||
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size);
|
||||
|
||||
u32 data_size = vertex_count * element_size;
|
||||
u32 gl_type = to_gl_internal_type(vertex_array.type, vertex_array.attribute_size);
|
||||
GLenum gl_type = to_gl_internal_type(vertex_array.type, vertex_array.attribute_size);
|
||||
auto& texture = m_gl_attrib_buffers[vertex_array.index];
|
||||
const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size);
|
||||
const u32 data_size = vertex_count * element_size;
|
||||
|
||||
const uintptr_t local_addr = (uintptr_t)vertex_array.data.data();
|
||||
u32 buffer_offset = 0;
|
||||
auto mapping = m_attrib_ring_info.alloc_from_heap(data_size, m_min_texbuffer_alignment);
|
||||
gsl::byte* dst = static_cast<gsl::byte*>(mapping.first);
|
||||
buffer_offset = mapping.second;
|
||||
gsl::span<gsl::byte> dest_span(dst, data_size);
|
||||
|
||||
write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size));
|
||||
prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count);
|
||||
if (auto uploaded = m_vertex_cache->find_vertex_range(local_addr, gl_type, data_size))
|
||||
{
|
||||
buffer_offset = uploaded->offset_in_heap;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fill vertex_array
|
||||
auto mapping = m_attrib_ring_info.alloc_from_heap(data_size, m_min_texbuffer_alignment);
|
||||
gsl::byte* dst = static_cast<gsl::byte*>(mapping.first);
|
||||
buffer_offset = mapping.second;
|
||||
gsl::span<gsl::byte> dest_span(dst, data_size);
|
||||
|
||||
m_vertex_cache->store_range(local_addr, gl_type, data_size, buffer_offset);
|
||||
write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size));
|
||||
prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count);
|
||||
}
|
||||
|
||||
texture.copy_from(m_attrib_ring_info, gl_type, buffer_offset, data_size);
|
||||
}
|
||||
|
@ -263,6 +273,7 @@ namespace
|
|||
gl::glsl::program* m_program;
|
||||
gl::texture* m_gl_attrib_buffers;
|
||||
GLint m_min_texbuffer_alignment;
|
||||
gl::vertex_cache* m_vertex_cache;
|
||||
};
|
||||
|
||||
struct draw_command_visitor
|
||||
|
@ -272,6 +283,7 @@ namespace
|
|||
|
||||
draw_command_visitor(gl::ring_buffer& index_ring_buffer, gl::ring_buffer& attrib_ring_buffer,
|
||||
gl::texture* gl_attrib_buffers, gl::glsl::program* program, GLint min_texbuffer_alignment,
|
||||
gl::vertex_cache* vertex_cache,
|
||||
std::function<attribute_storage(rsx::rsx_state, std::vector<std::pair<u32, u32>>)> gvb)
|
||||
: m_index_ring_buffer(index_ring_buffer)
|
||||
, m_attrib_ring_buffer(attrib_ring_buffer)
|
||||
|
@ -279,6 +291,7 @@ namespace
|
|||
, m_program(program)
|
||||
, m_min_texbuffer_alignment(min_texbuffer_alignment)
|
||||
, get_vertex_buffers(gvb)
|
||||
, m_vertex_cache(vertex_cache)
|
||||
{
|
||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index) {
|
||||
if (rsx::method_registers.vertex_arrays_info[index].size() ||
|
||||
|
@ -368,7 +381,7 @@ namespace
|
|||
gl::ring_buffer& m_index_ring_buffer;
|
||||
gl::ring_buffer& m_attrib_ring_buffer;
|
||||
gl::texture* m_gl_attrib_buffers;
|
||||
|
||||
gl::vertex_cache* m_vertex_cache;
|
||||
gl::glsl::program* m_program;
|
||||
GLint m_min_texbuffer_alignment;
|
||||
std::function<attribute_storage(rsx::rsx_state, std::vector<std::pair<u32, u32>>)>
|
||||
|
@ -379,7 +392,7 @@ namespace
|
|||
u32 verts_allocated = max_index - min_index + 1;
|
||||
|
||||
vertex_buffer_visitor visitor(verts_allocated, m_attrib_ring_buffer,
|
||||
m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment);
|
||||
m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment, m_vertex_cache);
|
||||
const auto& vertex_buffers =
|
||||
get_vertex_buffers(rsx::method_registers, {{min_index, verts_allocated}});
|
||||
for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo);
|
||||
|
@ -452,11 +465,12 @@ std::tuple<u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::set_vertex_b
|
|||
{
|
||||
std::chrono::time_point<steady_clock> then = steady_clock::now();
|
||||
auto result = std::apply_visitor(draw_command_visitor(*m_index_ring_buffer, *m_attrib_ring_buffer,
|
||||
m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment,
|
||||
[this](const auto& state, const auto& list) {
|
||||
return this->get_vertex_buffers(state, list, 0);
|
||||
}),
|
||||
get_draw_command(rsx::method_registers));
|
||||
m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment,
|
||||
m_vertex_cache.get(),
|
||||
[this](const auto& state, const auto& list) {
|
||||
return this->get_vertex_buffers(state, list, 0);
|
||||
}),
|
||||
get_draw_command(rsx::method_registers));
|
||||
|
||||
std::chrono::time_point<steady_clock> now = steady_clock::now();
|
||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
|
||||
|
|
|
@ -406,6 +406,9 @@ namespace rsx
|
|||
}
|
||||
});
|
||||
|
||||
// Raise priority above other threads
|
||||
thread_ctrl::set_native_priority(1);
|
||||
|
||||
// TODO: exit condition
|
||||
while (!Emu.IsStopped())
|
||||
{
|
||||
|
|
|
@ -628,9 +628,9 @@ VKGSRender::VKGSRender() : GSRender()
|
|||
}
|
||||
|
||||
if (g_cfg.video.disable_vertex_cache)
|
||||
m_vertex_cache.reset(new null_vertex_cache());
|
||||
m_vertex_cache.reset(new vk::null_vertex_cache());
|
||||
else
|
||||
m_vertex_cache.reset(new vk::vertex_cache::weak_vertex_cache());
|
||||
m_vertex_cache.reset(new vk::weak_vertex_cache());
|
||||
}
|
||||
|
||||
VKGSRender::~VKGSRender()
|
||||
|
@ -1269,8 +1269,6 @@ void VKGSRender::on_init_thread()
|
|||
|
||||
GSRender::on_init_thread();
|
||||
rsx_thread = std::this_thread::get_id();
|
||||
|
||||
thread_ctrl::set_native_priority(1);
|
||||
}
|
||||
|
||||
void VKGSRender::on_exit()
|
||||
|
|
|
@ -16,8 +16,12 @@
|
|||
|
||||
#pragma comment(lib, "VKstatic.1.lib")
|
||||
|
||||
using namespace vk::vertex_cache;
|
||||
using null_vertex_cache = rsx::vertex_cache<uploaded_range, VkFormat>;
|
||||
namespace vk
|
||||
{
|
||||
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<VkFormat>, VkFormat>;
|
||||
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<VkFormat>;
|
||||
using null_vertex_cache = vertex_cache;
|
||||
}
|
||||
|
||||
//Heap allocation sizes in MB
|
||||
#define VK_ATTRIB_RING_BUFFER_SIZE_M 256
|
||||
|
@ -117,7 +121,7 @@ private:
|
|||
|
||||
public:
|
||||
//vk::fbo draw_fbo;
|
||||
std::unique_ptr<null_vertex_cache> m_vertex_cache;
|
||||
std::unique_ptr<vk::vertex_cache> m_vertex_cache;
|
||||
|
||||
private:
|
||||
VKProgramBuffer m_prog_buffer;
|
||||
|
|
|
@ -1457,54 +1457,6 @@ namespace vk
|
|||
}
|
||||
};
|
||||
|
||||
namespace vertex_cache
|
||||
{
|
||||
struct uploaded_range
|
||||
{
|
||||
uintptr_t local_address;
|
||||
VkFormat buffer_format;
|
||||
u32 offset_in_heap;
|
||||
u32 data_length;
|
||||
};
|
||||
|
||||
// A weak vertex cache with no data checks or memory range locks
|
||||
// Of limited use since contents are only guaranteed to be valid once per frame
|
||||
// TODO: Strict vertex cache with range locks
|
||||
class weak_vertex_cache: public rsx::vertex_cache<uploaded_range, VkFormat>
|
||||
{
|
||||
private:
|
||||
std::unordered_map<uintptr_t, std::vector<uploaded_range>> vertex_ranges;
|
||||
public:
|
||||
|
||||
uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length) override
|
||||
{
|
||||
for (auto &v : vertex_ranges[local_addr])
|
||||
{
|
||||
if (v.buffer_format == fmt && v.data_length == data_length)
|
||||
return &v;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap) override
|
||||
{
|
||||
uploaded_range v = {};
|
||||
v.buffer_format = fmt;
|
||||
v.data_length = data_length;
|
||||
v.local_address = local_addr;
|
||||
v.offset_in_heap = offset_in_heap;
|
||||
|
||||
vertex_ranges[local_addr].push_back(v);
|
||||
}
|
||||
|
||||
void purge() override
|
||||
{
|
||||
vertex_ranges.clear();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer.
|
||||
* Then copy all layers into dst_image.
|
||||
|
|
|
@ -191,7 +191,7 @@ namespace rsx
|
|||
change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range);
|
||||
}
|
||||
|
||||
static void invalidate_rtt_surface_contents(vk::command_buffer* pcmd, vk::render_target *rtt, vk::render_target *old_surface, bool forced)
|
||||
static void invalidate_rtt_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *rtt, vk::render_target *old_surface, bool forced)
|
||||
{
|
||||
if (forced)
|
||||
{
|
||||
|
|
|
@ -252,7 +252,7 @@ namespace
|
|||
vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap,
|
||||
vk::glsl::program* prog, VkDescriptorSet desc_set,
|
||||
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
|
||||
rsx::vertex_cache<uploaded_range, VkFormat>* vertex_cache)
|
||||
vk::vertex_cache* vertex_cache)
|
||||
: vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog),
|
||||
descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean),
|
||||
vertex_cache(vertex_cache)
|
||||
|
@ -341,7 +341,7 @@ namespace
|
|||
vk::glsl::program* m_program;
|
||||
VkDescriptorSet descriptor_sets;
|
||||
std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
|
||||
rsx::vertex_cache<uploaded_range, VkFormat>* vertex_cache;
|
||||
vk::vertex_cache* vertex_cache;
|
||||
};
|
||||
|
||||
using attribute_storage = std::vector<std::variant<rsx::vertex_array_buffer,
|
||||
|
|
|
@ -195,12 +195,66 @@ namespace rsx
|
|||
}
|
||||
};
|
||||
|
||||
template <typename storage_type, typename upload_format>
|
||||
class vertex_cache
|
||||
namespace vertex_cache
|
||||
{
|
||||
public:
|
||||
virtual storage_type* find_vertex_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; }
|
||||
virtual void store_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {}
|
||||
virtual void purge() {}
|
||||
};
|
||||
// A null vertex cache
|
||||
template <typename storage_type, typename upload_format>
|
||||
class default_vertex_cache
|
||||
{
|
||||
public:
|
||||
virtual storage_type* find_vertex_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; }
|
||||
virtual void store_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {}
|
||||
virtual void purge() {}
|
||||
};
|
||||
|
||||
// A weak vertex cache with no data checks or memory range locks
|
||||
// Of limited use since contents are only guaranteed to be valid once per frame
|
||||
// TODO: Strict vertex cache with range locks
|
||||
template <typename upload_format>
|
||||
struct uploaded_range
|
||||
{
|
||||
uintptr_t local_address;
|
||||
upload_format buffer_format;
|
||||
u32 offset_in_heap;
|
||||
u32 data_length;
|
||||
};
|
||||
|
||||
template <typename upload_format>
|
||||
class weak_vertex_cache : public default_vertex_cache<uploaded_range<upload_format>, upload_format>
|
||||
{
|
||||
using storage_type = uploaded_range<upload_format>;
|
||||
|
||||
private:
|
||||
std::unordered_map<uintptr_t, std::vector<storage_type>> vertex_ranges;
|
||||
|
||||
public:
|
||||
|
||||
storage_type* find_vertex_range(uintptr_t local_addr, upload_format fmt, u32 data_length) override
|
||||
{
|
||||
for (auto &v : vertex_ranges[local_addr])
|
||||
{
|
||||
if (v.buffer_format == fmt && v.data_length == data_length)
|
||||
return &v;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void store_range(uintptr_t local_addr, upload_format fmt, u32 data_length, u32 offset_in_heap) override
|
||||
{
|
||||
storage_type v = {};
|
||||
v.buffer_format = fmt;
|
||||
v.data_length = data_length;
|
||||
v.local_address = local_addr;
|
||||
v.offset_in_heap = offset_in_heap;
|
||||
|
||||
vertex_ranges[local_addr].push_back(v);
|
||||
}
|
||||
|
||||
void purge() override
|
||||
{
|
||||
vertex_ranges.clear();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue