From 4cd5624fa7a51c3a97422eb5a89872cec16d31c8 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 26 Jul 2017 19:32:13 +0300 Subject: [PATCH] rsx/vk/gl: Refactoring - Also adds a vertex cache to openGL as well --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 7 +++ rpcs3/Emu/RSX/GL/GLGSRender.h | 10 +++- rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp | 52 +++++++++++++-------- rpcs3/Emu/RSX/RSXThread.cpp | 3 ++ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 6 +-- rpcs3/Emu/RSX/VK/VKGSRender.h | 10 ++-- rpcs3/Emu/RSX/VK/VKHelpers.h | 48 -------------------- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 2 +- rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 4 +- rpcs3/Emu/RSX/rsx_cache.h | 68 +++++++++++++++++++++++++--- 10 files changed, 124 insertions(+), 86 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index e3278e903a..58efe16bd5 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -26,6 +26,11 @@ GLGSRender::GLGSRender() : GSRender() { //TODO //shaders_cache.load(rsx::old_shaders_cache::shader_language::glsl); + + if (g_cfg.video.disable_vertex_cache) + m_vertex_cache.reset(new gl::null_vertex_cache()); + else + m_vertex_cache.reset(new gl::weak_vertex_cache()); } u32 GLGSRender::enable(u32 condition, u32 cap) @@ -1091,6 +1096,8 @@ void GLGSRender::flip(int buffer) if (g_cfg.video.invalidate_surface_cache_every_frame) m_rtts.invalidate_surface_cache_data(nullptr); + m_vertex_cache->purge(); + //If we are skipping the next frame, fo not reset perf counters if (skip_frame) return; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 4e8e7f6472..c84aee96d4 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -14,6 +14,13 @@ #pragma comment(lib, "opengl32.lib") +namespace gl +{ + using vertex_cache = rsx::vertex_cache::default_vertex_cache, GLenum>; + using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache; + using null_vertex_cache = vertex_cache; +} + struct work_item { std::condition_variable cv; @@ -57,8 +64,7 @@ private: s64 m_vertex_upload_time = 0; s64 m_textures_upload_time = 0; - //Compare to see if transform matrix have changed - size_t m_transform_buffer_hash = 0; + std::unique_ptr m_vertex_cache; GLint m_min_texbuffer_alignment = 256; GLint m_uniform_buffer_offset_align = 256; diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index e24b9b6559..4bd4eb27c2 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -198,12 +198,13 @@ namespace struct vertex_buffer_visitor { - vertex_buffer_visitor(u32 vtx_cnt, gl::ring_buffer& heap, gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset) + vertex_buffer_visitor(u32 vtx_cnt, gl::ring_buffer& heap, gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset, gl::vertex_cache* vertex_cache) : vertex_count(vtx_cnt) , m_attrib_ring_info(heap) , m_program(prog) , m_gl_attrib_buffers(attrib_buffer) , m_min_texbuffer_alignment(min_texbuffer_offset) + , m_vertex_cache(vertex_cache) { } @@ -213,21 +214,30 @@ namespace if (!m_program->uniforms.has_location(s_reg_table[vertex_array.index], &location)) return; - // Fill vertex_array - u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size); - - u32 data_size = vertex_count * element_size; - u32 gl_type = to_gl_internal_type(vertex_array.type, vertex_array.attribute_size); + GLenum gl_type = to_gl_internal_type(vertex_array.type, vertex_array.attribute_size); auto& texture = m_gl_attrib_buffers[vertex_array.index]; + const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size); + const u32 data_size = vertex_count * element_size; + const uintptr_t local_addr = (uintptr_t)vertex_array.data.data(); u32 buffer_offset = 0; - auto mapping = m_attrib_ring_info.alloc_from_heap(data_size, m_min_texbuffer_alignment); - gsl::byte* dst = static_cast(mapping.first); - buffer_offset = mapping.second; - gsl::span dest_span(dst, data_size); - write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size)); - prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count); + if (auto uploaded = m_vertex_cache->find_vertex_range(local_addr, gl_type, data_size)) + { + buffer_offset = uploaded->offset_in_heap; + } + else + { + // Fill vertex_array + auto mapping = m_attrib_ring_info.alloc_from_heap(data_size, m_min_texbuffer_alignment); + gsl::byte* dst = static_cast(mapping.first); + buffer_offset = mapping.second; + gsl::span dest_span(dst, data_size); + + m_vertex_cache->store_range(local_addr, gl_type, data_size, buffer_offset); + write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size)); + prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count); + } texture.copy_from(m_attrib_ring_info, gl_type, buffer_offset, data_size); } @@ -263,6 +273,7 @@ namespace gl::glsl::program* m_program; gl::texture* m_gl_attrib_buffers; GLint m_min_texbuffer_alignment; + gl::vertex_cache* m_vertex_cache; }; struct draw_command_visitor @@ -272,6 +283,7 @@ namespace draw_command_visitor(gl::ring_buffer& index_ring_buffer, gl::ring_buffer& attrib_ring_buffer, gl::texture* gl_attrib_buffers, gl::glsl::program* program, GLint min_texbuffer_alignment, + gl::vertex_cache* vertex_cache, std::function>)> gvb) : m_index_ring_buffer(index_ring_buffer) , m_attrib_ring_buffer(attrib_ring_buffer) @@ -279,6 +291,7 @@ namespace , m_program(program) , m_min_texbuffer_alignment(min_texbuffer_alignment) , get_vertex_buffers(gvb) + , m_vertex_cache(vertex_cache) { for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { if (rsx::method_registers.vertex_arrays_info[index].size() || @@ -368,7 +381,7 @@ namespace gl::ring_buffer& m_index_ring_buffer; gl::ring_buffer& m_attrib_ring_buffer; gl::texture* m_gl_attrib_buffers; - + gl::vertex_cache* m_vertex_cache; gl::glsl::program* m_program; GLint m_min_texbuffer_alignment; std::function>)> @@ -379,7 +392,7 @@ namespace u32 verts_allocated = max_index - min_index + 1; vertex_buffer_visitor visitor(verts_allocated, m_attrib_ring_buffer, - m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment); + m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment, m_vertex_cache); const auto& vertex_buffers = get_vertex_buffers(rsx::method_registers, {{min_index, verts_allocated}}); for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo); @@ -452,11 +465,12 @@ std::tuple>> GLGSRender::set_vertex_b { std::chrono::time_point then = steady_clock::now(); auto result = std::apply_visitor(draw_command_visitor(*m_index_ring_buffer, *m_attrib_ring_buffer, - m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment, - [this](const auto& state, const auto& list) { - return this->get_vertex_buffers(state, list, 0); - }), - get_draw_command(rsx::method_registers)); + m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment, + m_vertex_cache.get(), + [this](const auto& state, const auto& list) { + return this->get_vertex_buffers(state, list, 0); + }), + get_draw_command(rsx::method_registers)); std::chrono::time_point now = steady_clock::now(); m_vertex_upload_time += std::chrono::duration_cast(now - then).count(); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 15536d5790..4a26cdca99 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -406,6 +406,9 @@ namespace rsx } }); + // Raise priority above other threads + thread_ctrl::set_native_priority(1); + // TODO: exit condition while (!Emu.IsStopped()) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 0fae530d00..2f7c7fcfba 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -628,9 +628,9 @@ VKGSRender::VKGSRender() : GSRender() } if (g_cfg.video.disable_vertex_cache) - m_vertex_cache.reset(new null_vertex_cache()); + m_vertex_cache.reset(new vk::null_vertex_cache()); else - m_vertex_cache.reset(new vk::vertex_cache::weak_vertex_cache()); + m_vertex_cache.reset(new vk::weak_vertex_cache()); } VKGSRender::~VKGSRender() @@ -1269,8 +1269,6 @@ void VKGSRender::on_init_thread() GSRender::on_init_thread(); rsx_thread = std::this_thread::get_id(); - - thread_ctrl::set_native_priority(1); } void VKGSRender::on_exit() diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index f032cbbce9..f43e8e1dc5 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -16,8 +16,12 @@ #pragma comment(lib, "VKstatic.1.lib") -using namespace vk::vertex_cache; -using null_vertex_cache = rsx::vertex_cache; +namespace vk +{ + using vertex_cache = rsx::vertex_cache::default_vertex_cache, VkFormat>; + using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache; + using null_vertex_cache = vertex_cache; +} //Heap allocation sizes in MB #define VK_ATTRIB_RING_BUFFER_SIZE_M 256 @@ -117,7 +121,7 @@ private: public: //vk::fbo draw_fbo; - std::unique_ptr m_vertex_cache; + std::unique_ptr m_vertex_cache; private: VKProgramBuffer m_prog_buffer; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 1ded3ebf69..60bd53d5ba 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -1457,54 +1457,6 @@ namespace vk } }; - namespace vertex_cache - { - struct uploaded_range - { - uintptr_t local_address; - VkFormat buffer_format; - u32 offset_in_heap; - u32 data_length; - }; - - // A weak vertex cache with no data checks or memory range locks - // Of limited use since contents are only guaranteed to be valid once per frame - // TODO: Strict vertex cache with range locks - class weak_vertex_cache: public rsx::vertex_cache - { - private: - std::unordered_map> vertex_ranges; - public: - - uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length) override - { - for (auto &v : vertex_ranges[local_addr]) - { - if (v.buffer_format == fmt && v.data_length == data_length) - return &v; - } - - return nullptr; - } - - void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap) override - { - uploaded_range v = {}; - v.buffer_format = fmt; - v.data_length = data_length; - v.local_address = local_addr; - v.offset_in_heap = offset_in_heap; - - vertex_ranges[local_addr].push_back(v); - } - - void purge() override - { - vertex_ranges.clear(); - } - }; - } - /** * Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer. * Then copy all layers into dst_image. diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index bd367fc97d..7642197b6a 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -191,7 +191,7 @@ namespace rsx change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range); } - static void invalidate_rtt_surface_contents(vk::command_buffer* pcmd, vk::render_target *rtt, vk::render_target *old_surface, bool forced) + static void invalidate_rtt_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *rtt, vk::render_target *old_surface, bool forced) { if (forced) { diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 2ddcb496e2..035611cc98 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -252,7 +252,7 @@ namespace vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap, vk::glsl::program* prog, VkDescriptorSet desc_set, std::vector>& buffer_view_to_clean, - rsx::vertex_cache* vertex_cache) + vk::vertex_cache* vertex_cache) : vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog), descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean), vertex_cache(vertex_cache) @@ -341,7 +341,7 @@ namespace vk::glsl::program* m_program; VkDescriptorSet descriptor_sets; std::vector>& m_buffer_view_to_clean; - rsx::vertex_cache* vertex_cache; + vk::vertex_cache* vertex_cache; }; using attribute_storage = std::vector - class vertex_cache + namespace vertex_cache { - public: - virtual storage_type* find_vertex_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; } - virtual void store_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {} - virtual void purge() {} - }; + // A null vertex cache + template + class default_vertex_cache + { + public: + virtual storage_type* find_vertex_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; } + virtual void store_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {} + virtual void purge() {} + }; + + // A weak vertex cache with no data checks or memory range locks + // Of limited use since contents are only guaranteed to be valid once per frame + // TODO: Strict vertex cache with range locks + template + struct uploaded_range + { + uintptr_t local_address; + upload_format buffer_format; + u32 offset_in_heap; + u32 data_length; + }; + + template + class weak_vertex_cache : public default_vertex_cache, upload_format> + { + using storage_type = uploaded_range; + + private: + std::unordered_map> vertex_ranges; + + public: + + storage_type* find_vertex_range(uintptr_t local_addr, upload_format fmt, u32 data_length) override + { + for (auto &v : vertex_ranges[local_addr]) + { + if (v.buffer_format == fmt && v.data_length == data_length) + return &v; + } + + return nullptr; + } + + void store_range(uintptr_t local_addr, upload_format fmt, u32 data_length, u32 offset_in_heap) override + { + storage_type v = {}; + v.buffer_format = fmt; + v.data_length = data_length; + v.local_address = local_addr; + v.offset_in_heap = offset_in_heap; + + vertex_ranges[local_addr].push_back(v); + } + + void purge() override + { + vertex_ranges.clear(); + } + }; + } }