rsx/vk/gl: Refactoring - Also adds a vertex cache to openGL as well

This commit is contained in:
kd-11 2017-07-26 19:32:13 +03:00
parent fc6891a0e3
commit 4cd5624fa7
10 changed files with 124 additions and 86 deletions

View file

@ -26,6 +26,11 @@ GLGSRender::GLGSRender() : GSRender()
{
//TODO
//shaders_cache.load(rsx::old_shaders_cache::shader_language::glsl);
if (g_cfg.video.disable_vertex_cache)
m_vertex_cache.reset(new gl::null_vertex_cache());
else
m_vertex_cache.reset(new gl::weak_vertex_cache());
}
u32 GLGSRender::enable(u32 condition, u32 cap)
@ -1091,6 +1096,8 @@ void GLGSRender::flip(int buffer)
if (g_cfg.video.invalidate_surface_cache_every_frame)
m_rtts.invalidate_surface_cache_data(nullptr);
m_vertex_cache->purge();
//If we are skipping the next frame, fo not reset perf counters
if (skip_frame) return;

View file

@ -14,6 +14,13 @@
#pragma comment(lib, "opengl32.lib")
namespace gl
{
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<GLenum>, GLenum>;
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<GLenum>;
using null_vertex_cache = vertex_cache;
}
struct work_item
{
std::condition_variable cv;
@ -57,8 +64,7 @@ private:
s64 m_vertex_upload_time = 0;
s64 m_textures_upload_time = 0;
//Compare to see if transform matrix have changed
size_t m_transform_buffer_hash = 0;
std::unique_ptr<gl::vertex_cache> m_vertex_cache;
GLint m_min_texbuffer_alignment = 256;
GLint m_uniform_buffer_offset_align = 256;

View file

@ -198,12 +198,13 @@ namespace
struct vertex_buffer_visitor
{
vertex_buffer_visitor(u32 vtx_cnt, gl::ring_buffer& heap, gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset)
vertex_buffer_visitor(u32 vtx_cnt, gl::ring_buffer& heap, gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset, gl::vertex_cache* vertex_cache)
: vertex_count(vtx_cnt)
, m_attrib_ring_info(heap)
, m_program(prog)
, m_gl_attrib_buffers(attrib_buffer)
, m_min_texbuffer_alignment(min_texbuffer_offset)
, m_vertex_cache(vertex_cache)
{
}
@ -213,21 +214,30 @@ namespace
if (!m_program->uniforms.has_location(s_reg_table[vertex_array.index], &location))
return;
// Fill vertex_array
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size);
u32 data_size = vertex_count * element_size;
u32 gl_type = to_gl_internal_type(vertex_array.type, vertex_array.attribute_size);
GLenum gl_type = to_gl_internal_type(vertex_array.type, vertex_array.attribute_size);
auto& texture = m_gl_attrib_buffers[vertex_array.index];
const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size);
const u32 data_size = vertex_count * element_size;
const uintptr_t local_addr = (uintptr_t)vertex_array.data.data();
u32 buffer_offset = 0;
auto mapping = m_attrib_ring_info.alloc_from_heap(data_size, m_min_texbuffer_alignment);
gsl::byte* dst = static_cast<gsl::byte*>(mapping.first);
buffer_offset = mapping.second;
gsl::span<gsl::byte> dest_span(dst, data_size);
write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size));
prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count);
if (auto uploaded = m_vertex_cache->find_vertex_range(local_addr, gl_type, data_size))
{
buffer_offset = uploaded->offset_in_heap;
}
else
{
// Fill vertex_array
auto mapping = m_attrib_ring_info.alloc_from_heap(data_size, m_min_texbuffer_alignment);
gsl::byte* dst = static_cast<gsl::byte*>(mapping.first);
buffer_offset = mapping.second;
gsl::span<gsl::byte> dest_span(dst, data_size);
m_vertex_cache->store_range(local_addr, gl_type, data_size, buffer_offset);
write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size));
prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count);
}
texture.copy_from(m_attrib_ring_info, gl_type, buffer_offset, data_size);
}
@ -263,6 +273,7 @@ namespace
gl::glsl::program* m_program;
gl::texture* m_gl_attrib_buffers;
GLint m_min_texbuffer_alignment;
gl::vertex_cache* m_vertex_cache;
};
struct draw_command_visitor
@ -272,6 +283,7 @@ namespace
draw_command_visitor(gl::ring_buffer& index_ring_buffer, gl::ring_buffer& attrib_ring_buffer,
gl::texture* gl_attrib_buffers, gl::glsl::program* program, GLint min_texbuffer_alignment,
gl::vertex_cache* vertex_cache,
std::function<attribute_storage(rsx::rsx_state, std::vector<std::pair<u32, u32>>)> gvb)
: m_index_ring_buffer(index_ring_buffer)
, m_attrib_ring_buffer(attrib_ring_buffer)
@ -279,6 +291,7 @@ namespace
, m_program(program)
, m_min_texbuffer_alignment(min_texbuffer_alignment)
, get_vertex_buffers(gvb)
, m_vertex_cache(vertex_cache)
{
for (u8 index = 0; index < rsx::limits::vertex_count; ++index) {
if (rsx::method_registers.vertex_arrays_info[index].size() ||
@ -368,7 +381,7 @@ namespace
gl::ring_buffer& m_index_ring_buffer;
gl::ring_buffer& m_attrib_ring_buffer;
gl::texture* m_gl_attrib_buffers;
gl::vertex_cache* m_vertex_cache;
gl::glsl::program* m_program;
GLint m_min_texbuffer_alignment;
std::function<attribute_storage(rsx::rsx_state, std::vector<std::pair<u32, u32>>)>
@ -379,7 +392,7 @@ namespace
u32 verts_allocated = max_index - min_index + 1;
vertex_buffer_visitor visitor(verts_allocated, m_attrib_ring_buffer,
m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment);
m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment, m_vertex_cache);
const auto& vertex_buffers =
get_vertex_buffers(rsx::method_registers, {{min_index, verts_allocated}});
for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo);
@ -452,11 +465,12 @@ std::tuple<u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::set_vertex_b
{
std::chrono::time_point<steady_clock> then = steady_clock::now();
auto result = std::apply_visitor(draw_command_visitor(*m_index_ring_buffer, *m_attrib_ring_buffer,
m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment,
[this](const auto& state, const auto& list) {
return this->get_vertex_buffers(state, list, 0);
}),
get_draw_command(rsx::method_registers));
m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment,
m_vertex_cache.get(),
[this](const auto& state, const auto& list) {
return this->get_vertex_buffers(state, list, 0);
}),
get_draw_command(rsx::method_registers));
std::chrono::time_point<steady_clock> now = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();

View file

@ -406,6 +406,9 @@ namespace rsx
}
});
// Raise priority above other threads
thread_ctrl::set_native_priority(1);
// TODO: exit condition
while (!Emu.IsStopped())
{

View file

@ -628,9 +628,9 @@ VKGSRender::VKGSRender() : GSRender()
}
if (g_cfg.video.disable_vertex_cache)
m_vertex_cache.reset(new null_vertex_cache());
m_vertex_cache.reset(new vk::null_vertex_cache());
else
m_vertex_cache.reset(new vk::vertex_cache::weak_vertex_cache());
m_vertex_cache.reset(new vk::weak_vertex_cache());
}
VKGSRender::~VKGSRender()
@ -1269,8 +1269,6 @@ void VKGSRender::on_init_thread()
GSRender::on_init_thread();
rsx_thread = std::this_thread::get_id();
thread_ctrl::set_native_priority(1);
}
void VKGSRender::on_exit()

View file

@ -16,8 +16,12 @@
#pragma comment(lib, "VKstatic.1.lib")
using namespace vk::vertex_cache;
using null_vertex_cache = rsx::vertex_cache<uploaded_range, VkFormat>;
namespace vk
{
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<VkFormat>, VkFormat>;
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<VkFormat>;
using null_vertex_cache = vertex_cache;
}
//Heap allocation sizes in MB
#define VK_ATTRIB_RING_BUFFER_SIZE_M 256
@ -117,7 +121,7 @@ private:
public:
//vk::fbo draw_fbo;
std::unique_ptr<null_vertex_cache> m_vertex_cache;
std::unique_ptr<vk::vertex_cache> m_vertex_cache;
private:
VKProgramBuffer m_prog_buffer;

View file

@ -1457,54 +1457,6 @@ namespace vk
}
};
namespace vertex_cache
{
struct uploaded_range
{
uintptr_t local_address;
VkFormat buffer_format;
u32 offset_in_heap;
u32 data_length;
};
// A weak vertex cache with no data checks or memory range locks
// Of limited use since contents are only guaranteed to be valid once per frame
// TODO: Strict vertex cache with range locks
class weak_vertex_cache: public rsx::vertex_cache<uploaded_range, VkFormat>
{
private:
std::unordered_map<uintptr_t, std::vector<uploaded_range>> vertex_ranges;
public:
uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length) override
{
for (auto &v : vertex_ranges[local_addr])
{
if (v.buffer_format == fmt && v.data_length == data_length)
return &v;
}
return nullptr;
}
void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap) override
{
uploaded_range v = {};
v.buffer_format = fmt;
v.data_length = data_length;
v.local_address = local_addr;
v.offset_in_heap = offset_in_heap;
vertex_ranges[local_addr].push_back(v);
}
void purge() override
{
vertex_ranges.clear();
}
};
}
/**
* Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer.
* Then copy all layers into dst_image.

View file

@ -191,7 +191,7 @@ namespace rsx
change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range);
}
static void invalidate_rtt_surface_contents(vk::command_buffer* pcmd, vk::render_target *rtt, vk::render_target *old_surface, bool forced)
static void invalidate_rtt_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *rtt, vk::render_target *old_surface, bool forced)
{
if (forced)
{

View file

@ -252,7 +252,7 @@ namespace
vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap,
vk::glsl::program* prog, VkDescriptorSet desc_set,
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
rsx::vertex_cache<uploaded_range, VkFormat>* vertex_cache)
vk::vertex_cache* vertex_cache)
: vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog),
descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean),
vertex_cache(vertex_cache)
@ -341,7 +341,7 @@ namespace
vk::glsl::program* m_program;
VkDescriptorSet descriptor_sets;
std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
rsx::vertex_cache<uploaded_range, VkFormat>* vertex_cache;
vk::vertex_cache* vertex_cache;
};
using attribute_storage = std::vector<std::variant<rsx::vertex_array_buffer,

View file

@ -195,12 +195,66 @@ namespace rsx
}
};
template <typename storage_type, typename upload_format>
class vertex_cache
namespace vertex_cache
{
public:
virtual storage_type* find_vertex_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; }
virtual void store_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {}
virtual void purge() {}
};
// A null vertex cache
template <typename storage_type, typename upload_format>
class default_vertex_cache
{
public:
virtual storage_type* find_vertex_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; }
virtual void store_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {}
virtual void purge() {}
};
// A weak vertex cache with no data checks or memory range locks
// Of limited use since contents are only guaranteed to be valid once per frame
// TODO: Strict vertex cache with range locks
template <typename upload_format>
struct uploaded_range
{
uintptr_t local_address;
upload_format buffer_format;
u32 offset_in_heap;
u32 data_length;
};
template <typename upload_format>
class weak_vertex_cache : public default_vertex_cache<uploaded_range<upload_format>, upload_format>
{
using storage_type = uploaded_range<upload_format>;
private:
std::unordered_map<uintptr_t, std::vector<storage_type>> vertex_ranges;
public:
storage_type* find_vertex_range(uintptr_t local_addr, upload_format fmt, u32 data_length) override
{
for (auto &v : vertex_ranges[local_addr])
{
if (v.buffer_format == fmt && v.data_length == data_length)
return &v;
}
return nullptr;
}
void store_range(uintptr_t local_addr, upload_format fmt, u32 data_length, u32 offset_in_heap) override
{
storage_type v = {};
v.buffer_format = fmt;
v.data_length = data_length;
v.local_address = local_addr;
v.offset_in_heap = offset_in_heap;
vertex_ranges[local_addr].push_back(v);
}
void purge() override
{
vertex_ranges.clear();
}
};
}
}