diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index 87f835941f..e951b5bd7c 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -454,6 +454,8 @@ target_sources(rpcs3_emu PRIVATE RSX/Program/VertexProgramDecompiler.cpp RSX/Capture/rsx_capture.cpp RSX/Capture/rsx_replay.cpp + RSX/GL/glutils/buffer_object.cpp + RSX/GL/glutils/ring_buffer.cpp RSX/GL/GLCommonDecompiler.cpp RSX/GL/GLCompute.cpp RSX/GL/GLDraw.cpp diff --git a/rpcs3/Emu/RSX/GL/GLExecutionState.h b/rpcs3/Emu/RSX/GL/GLExecutionState.h index 8c1b58fa74..1259dd42ca 100644 --- a/rpcs3/Emu/RSX/GL/GLExecutionState.h +++ b/rpcs3/Emu/RSX/GL/GLExecutionState.h @@ -1,235 +1,12 @@ #pragma once -#include "util/logs.hpp" -#include "util/types.hpp" -#include "Utilities/geometry.h" -#include "OpenGL.h" +#include "glutils/capabilities.hpp" +#include "Utilities/geometry.h" #include namespace gl { - class capabilities - { - public: - bool EXT_dsa_supported = false; - bool EXT_depth_bounds_test = false; - bool ARB_dsa_supported = false; - bool ARB_bindless_texture_supported = false; - bool ARB_buffer_storage_supported = false; - bool ARB_texture_buffer_supported = false; - bool ARB_shader_draw_parameters_supported = false; - bool ARB_depth_buffer_float_supported = false; - bool ARB_texture_barrier_supported = false; - bool ARB_shader_stencil_export_supported = false; - bool NV_texture_barrier_supported = false; - bool NV_gpu_shader5_supported = false; - bool AMD_gpu_shader_half_float_supported = false; - bool ARB_compute_shader_supported = false; - bool NV_depth_buffer_float_supported = false; - bool initialized = false; - bool vendor_INTEL = false; // has broken GLSL compiler - bool vendor_AMD = false; // has broken ARB_multidraw - bool vendor_NVIDIA = false; // has NaN poisoning issues - bool vendor_MESA = false; // requires CLIENT_STORAGE bit set for streaming buffers - - bool check(const std::string& ext_name, const char* test) - { - if (ext_name == test) - { - rsx_log.notice("Extension %s is supported", ext_name); - return true; - } - - return false; - } - - void initialize() - { - int find_count = 15; - int ext_count = 0; - glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count); - - if (!ext_count) - { - rsx_log.error("Coult not initialize GL driver capabilities. Is OpenGL initialized?"); - return; - } - - std::string vendor_string = reinterpret_cast(glGetString(GL_VENDOR)); - std::string version_string = reinterpret_cast(glGetString(GL_VERSION)); - std::string renderer_string = reinterpret_cast(glGetString(GL_RENDERER)); - - for (int i = 0; i < ext_count; i++) - { - if (!find_count) break; - - const std::string ext_name = reinterpret_cast(glGetStringi(GL_EXTENSIONS, i)); - - if (check(ext_name, "GL_ARB_shader_draw_parameters")) - { - ARB_shader_draw_parameters_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_EXT_direct_state_access")) - { - EXT_dsa_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_ARB_direct_state_access")) - { - ARB_dsa_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_ARB_bindless_texture")) - { - ARB_bindless_texture_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_ARB_buffer_storage")) - { - ARB_buffer_storage_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_ARB_texture_buffer_object")) - { - ARB_texture_buffer_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_ARB_depth_buffer_float")) - { - ARB_depth_buffer_float_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_ARB_texture_barrier")) - { - ARB_texture_barrier_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_NV_texture_barrier")) - { - NV_texture_barrier_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_NV_gpu_shader5")) - { - NV_gpu_shader5_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_AMD_gpu_shader_half_float")) - { - AMD_gpu_shader_half_float_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_ARB_compute_shader")) - { - ARB_compute_shader_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_EXT_depth_bounds_test")) - { - EXT_depth_bounds_test = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_NV_depth_buffer_float")) - { - NV_depth_buffer_float_supported = true; - find_count--; - continue; - } - - if (check(ext_name, "GL_ARB_shader_stencil_export")) - { - ARB_shader_stencil_export_supported = true; - find_count--; - continue; - } - } - - // Check GL_VERSION and GL_RENDERER for the presence of Mesa - if (version_string.find("Mesa") != umax || renderer_string.find("Mesa") != umax) - { - vendor_MESA = true; - } - - // Workaround for intel drivers which have terrible capability reporting - if (!vendor_string.empty()) - { - std::transform(vendor_string.begin(), vendor_string.end(), vendor_string.begin(), ::tolower); - } - else - { - rsx_log.error("Failed to get vendor string from driver. Are we missing a context?"); - vendor_string = "intel"; // lowest acceptable value - } - - if (!vendor_MESA && vendor_string.find("intel") != umax) - { - int version_major = 0; - int version_minor = 0; - - glGetIntegerv(GL_MAJOR_VERSION, &version_major); - glGetIntegerv(GL_MINOR_VERSION, &version_minor); - - vendor_INTEL = true; - - // Texture buffers moved into core at GL 3.3 - if (version_major > 3 || (version_major == 3 && version_minor >= 3)) - ARB_texture_buffer_supported = true; - - // Check for expected library entry-points for some required functions - if (!ARB_buffer_storage_supported && glNamedBufferStorage && glMapNamedBufferRange) - ARB_buffer_storage_supported = true; - - if (!ARB_dsa_supported && glGetTextureImage && glTextureBufferRange) - ARB_dsa_supported = true; - - if (!EXT_dsa_supported && glGetTextureImageEXT && glTextureBufferRangeEXT) - EXT_dsa_supported = true; - } - else if (!vendor_MESA && vendor_string.find("nvidia") != umax) - { - vendor_NVIDIA = true; - } -#ifdef _WIN32 - else if (vendor_string.find("amd") != umax || vendor_string.find("ati") != umax) - { - vendor_AMD = true; - } -#endif - - initialized = true; - } - }; - - const capabilities& get_driver_caps(); - struct driver_state { const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 10cd2bb396..98c4dd217e 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -12,6 +12,8 @@ #include #include +#include "glutils/ring_buffer.h" + #ifdef _WIN32 #pragma comment(lib, "opengl32.lib") #endif diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 72267813e2..e986fd74d2 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -19,6 +19,10 @@ #include "util/logs.hpp" #include "util/asm.hpp" +#include "glutils/common.h" +// TODO: Include on use +#include "glutils/buffer_object.h" + #define GL_FRAGMENT_TEXTURES_START 0 #define GL_VERTEX_TEXTURES_START (GL_FRAGMENT_TEXTURES_START + 16) #define GL_STENCIL_MIRRORS_START (GL_VERTEX_TEXTURES_START + 4) @@ -48,32 +52,6 @@ using namespace ::rsx::format_class_; namespace gl { - //Function call wrapped in ARB_DSA vs EXT_DSA compat check -#define DSA_CALL(func, object_name, target, ...)\ - if (::gl::get_driver_caps().ARB_dsa_supported)\ - gl##func(object_name, __VA_ARGS__);\ - else\ - gl##func##EXT(object_name, target, __VA_ARGS__); - -#define DSA_CALL2(func, ...)\ - if (::gl::get_driver_caps().ARB_dsa_supported)\ - gl##func(__VA_ARGS__);\ - else\ - gl##func##EXT(__VA_ARGS__); - -#define DSA_CALL2_RET(func, ...)\ - (::gl::get_driver_caps().ARB_dsa_supported) ?\ - gl##func(__VA_ARGS__) :\ - gl##func##EXT(__VA_ARGS__) - -#define DSA_CALL3(funcARB, funcDSA, ...)\ - if (::gl::get_driver_caps().ARB_dsa_supported)\ - gl##funcARB(__VA_ARGS__);\ - else\ - gl##funcDSA##EXT(__VA_ARGS__); - - class fence; - void enable_debugging(); bool is_primitive_native(rsx::primitive_type in); GLenum draw_mode(rsx::primitive_type in); @@ -97,129 +75,6 @@ namespace gl } }; - class fence - { - GLsync m_value = nullptr; - mutable GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT; - mutable bool signaled = false; - - public: - - fence() = default; - ~fence() = default; - - void create() - { - m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - flags = GL_SYNC_FLUSH_COMMANDS_BIT; - } - - void destroy() - { - glDeleteSync(m_value); - m_value = nullptr; - } - - void reset() - { - if (m_value != nullptr) - destroy(); - - create(); - } - - bool is_empty() const - { - return (m_value == nullptr); - } - - bool check_signaled() const - { - ensure(m_value); - - if (signaled) - return true; - - if (flags) - { - GLenum err = glClientWaitSync(m_value, flags, 0); - flags = 0; - - if (!(err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED)) - return false; - } - else - { - GLint status = GL_UNSIGNALED; - GLint tmp; - - glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status); - - if (status != GL_SIGNALED) - return false; - } - - signaled = true; - return true; - } - - bool wait_for_signal() - { - ensure(m_value); - - if (signaled == GL_FALSE) - { - GLenum err = GL_WAIT_FAILED; - bool done = false; - - while (!done) - { - if (flags) - { - err = glClientWaitSync(m_value, flags, 0); - flags = 0; - - switch (err) - { - default: - rsx_log.error("gl::fence sync returned unknown error 0x%X", err); - [[fallthrough]]; - case GL_ALREADY_SIGNALED: - case GL_CONDITION_SATISFIED: - done = true; - break; - case GL_TIMEOUT_EXPIRED: - continue; - } - } - else - { - GLint status = GL_UNSIGNALED; - GLint tmp; - - glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status); - - if (status == GL_SIGNALED) - break; - } - } - - signaled = (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED); - } - - glDeleteSync(m_value); - m_value = nullptr; - - return signaled; - } - - void server_wait_sync() const - { - ensure(m_value != nullptr); - glWaitSync(m_value, 0, GL_TIMEOUT_IGNORED); - } - }; - template class save_binding_state_base { @@ -538,631 +393,6 @@ namespace gl } }; - class buffer - { - public: - enum class target - { - pixel_pack = GL_PIXEL_PACK_BUFFER, - pixel_unpack = GL_PIXEL_UNPACK_BUFFER, - array = GL_ARRAY_BUFFER, - element_array = GL_ELEMENT_ARRAY_BUFFER, - uniform = GL_UNIFORM_BUFFER, - texture = GL_TEXTURE_BUFFER, - ssbo = GL_SHADER_STORAGE_BUFFER - }; - - enum class access - { - read = GL_MAP_READ_BIT, - write = GL_MAP_WRITE_BIT, - read_write = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT - }; - - enum class memory_type - { - undefined = 0, - local = 1, - host_visible = 2 - }; - - class save_binding_state - { - GLint m_last_binding; - GLenum m_target; - - public: - save_binding_state(target target_, const buffer& new_state) : save_binding_state(target_) - { - new_state.bind(target_); - } - - save_binding_state(target target_) - { - GLenum pname{}; - switch (target_) - { - case target::pixel_pack: pname = GL_PIXEL_PACK_BUFFER_BINDING; break; - case target::pixel_unpack: pname = GL_PIXEL_UNPACK_BUFFER_BINDING; break; - case target::array: pname = GL_ARRAY_BUFFER_BINDING; break; - case target::element_array: pname = GL_ELEMENT_ARRAY_BUFFER_BINDING; break; - case target::uniform: pname = GL_UNIFORM_BUFFER_BINDING; break; - case target::texture: pname = GL_TEXTURE_BUFFER_BINDING; break; - case target::ssbo: pname = GL_SHADER_STORAGE_BUFFER_BINDING; break; - default: fmt::throw_exception("Invalid binding state target (0x%x)", static_cast(target_)); - } - - glGetIntegerv(pname, &m_last_binding); - m_target = static_cast(target_); - } - - ~save_binding_state() - { - glBindBuffer(m_target, m_last_binding); - } - }; - - protected: - GLuint m_id = GL_NONE; - GLsizeiptr m_size = 0; - target m_target = target::array; - memory_type m_memory_type = memory_type::undefined; - - void allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage) - { - if (const auto& caps = get_driver_caps(); - caps.ARB_buffer_storage_supported) - { - GLenum flags = 0; - if (type == memory_type::host_visible) - { - switch (usage) - { - case GL_STREAM_DRAW: - case GL_STATIC_DRAW: - case GL_DYNAMIC_DRAW: - flags |= GL_MAP_WRITE_BIT; - break; - case GL_STREAM_READ: - case GL_STATIC_READ: - case GL_DYNAMIC_READ: - flags |= GL_MAP_READ_BIT; - break; - default: - fmt::throw_exception("Unsupported buffer usage 0x%x", usage); - } - } - else - { - // Local memory hints - if (usage == GL_DYNAMIC_COPY) - { - flags |= GL_DYNAMIC_STORAGE_BIT; - } - } - - if ((flags & GL_MAP_READ_BIT) && !caps.vendor_AMD) - { - // This flag stops NVIDIA from allocating read-only memory in VRAM. - // NOTE: On AMD, allocating client-side memory via CLIENT_STORAGE_BIT or - // making use of GL_AMD_pinned_memory brings everything down to a crawl. - // Afaict there is no reason for this; disabling pixel pack/unpack operations does not alleviate the problem. - // The driver seems to eventually figure out the optimal storage location by itself. - flags |= GL_CLIENT_STORAGE_BIT; - } - - DSA_CALL2(NamedBufferStorage, m_id, size, data_, flags); - m_size = size; - } - else - { - data(size, data_, usage); - } - - m_memory_type = type; - } - - public: - buffer() = default; - buffer(const buffer&) = delete; - - buffer(GLuint id) - { - set_id(id); - } - - ~buffer() - { - if (created()) - remove(); - } - - void recreate() - { - if (created()) - { - remove(); - } - - create(); - } - - void recreate(GLsizeiptr size, const void* data = nullptr) - { - if (created()) - { - remove(); - } - - create(size, data); - } - - void create() - { - glGenBuffers(1, &m_id); - save_binding_state save(current_target(), *this); - } - - void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW) - { - create(); - allocate(size, data_, type, usage); - } - - void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW) - { - m_target = target_; - - create(); - allocate(size, data_, type, usage); - } - - void bind(target target_) const - { - glBindBuffer(static_cast(target_), m_id); - } - - void bind() const - { - bind(current_target()); - } - - target current_target() const - { - return m_target; - } - - void remove() - { - if (m_id != GL_NONE) - { - glDeleteBuffers(1, &m_id); - m_id = GL_NONE; - m_size = 0; - } - } - - GLsizeiptr size() const - { - return m_size; - } - - uint id() const - { - return m_id; - } - - void set_id(uint id) - { - m_id = id; - } - - bool created() const - { - return m_id != GL_NONE; - } - - explicit operator bool() const - { - return created(); - } - - void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW) - { - ensure(m_memory_type != memory_type::local); - - DSA_CALL2(NamedBufferData, m_id, size, data_, usage); - m_size = size; - } - - void sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data) - { - ensure(m_memory_type == memory_type::local); - DSA_CALL2(NamedBufferSubData, m_id, offset, length, data); - } - - GLubyte* map(GLsizeiptr offset, GLsizeiptr length, access access_) - { - ensure(m_memory_type == memory_type::host_visible); - - GLenum access_bits = static_cast(access_); - if (access_bits == GL_MAP_WRITE_BIT) access_bits |= GL_MAP_UNSYNCHRONIZED_BIT; - - auto raw_data = DSA_CALL2_RET(MapNamedBufferRange, id(), offset, length, access_bits); - return reinterpret_cast(raw_data); - } - - void unmap() - { - ensure(m_memory_type == memory_type::host_visible); - DSA_CALL2(UnmapNamedBuffer, id()); - } - - void bind_range(u32 index, u32 offset, u32 size) const - { - glBindBufferRange(static_cast(current_target()), index, id(), offset, size); - } - - void bind_range(target target_, u32 index, u32 offset, u32 size) const - { - glBindBufferRange(static_cast(target_), index, id(), offset, size); - } - - void copy_to(buffer* other, u64 src_offset, u64 dst_offset, u64 size) - { - if (get_driver_caps().ARB_dsa_supported) - { - glCopyNamedBufferSubData(this->id(), other->id(), src_offset, dst_offset, size); - } - else - { - glNamedCopyBufferSubDataEXT(this->id(), other->id(), src_offset, dst_offset, size); - } - } - }; - - class ring_buffer : public buffer - { - protected: - - u32 m_data_loc = 0; - void *m_memory_mapping = nullptr; - - fence m_fence; - - public: - - virtual void bind() - { - buffer::bind(); - } - - virtual void recreate(GLsizeiptr size, const void* data = nullptr) - { - if (m_id) - { - m_fence.wait_for_signal(); - remove(); - } - - buffer::create(); - save_binding_state save(current_target(), *this); - - GLbitfield buffer_storage_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; - if (gl::get_driver_caps().vendor_MESA) buffer_storage_flags |= GL_CLIENT_STORAGE_BIT; - - DSA_CALL2(NamedBufferStorage, m_id, size, data, buffer_storage_flags); - m_memory_mapping = DSA_CALL2_RET(MapNamedBufferRange, m_id, 0, size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); - - ensure(m_memory_mapping != nullptr); - m_data_loc = 0; - m_size = ::narrow(size); - m_memory_type = memory_type::host_visible; - } - - void create(target target_, GLsizeiptr size, const void* data_ = nullptr) - { - m_target = target_; - recreate(size, data_); - } - - virtual std::pair alloc_from_heap(u32 alloc_size, u16 alignment) - { - u32 offset = m_data_loc; - if (m_data_loc) offset = utils::align(offset, alignment); - - if ((offset + alloc_size) > m_size) - { - if (!m_fence.is_empty()) - { - m_fence.wait_for_signal(); - } - else - { - rsx_log.error("OOM Error: Ring buffer was likely being used without notify() being called"); - glFinish(); - } - - m_data_loc = 0; - offset = 0; - } - - //Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently - m_data_loc = utils::align(offset + alloc_size, 256); - return std::make_pair(static_cast(m_memory_mapping) + offset, offset); - } - - virtual void remove() - { - if (m_memory_mapping) - { - buffer::unmap(); - - m_memory_mapping = nullptr; - m_data_loc = 0; - m_size = 0; - } - - - if (m_id != GL_NONE) - { - glDeleteBuffers(1, &m_id); - m_id = GL_NONE; - } - } - - virtual void reserve_storage_on_heap(u32 /*alloc_size*/) {} - - virtual void unmap() {} - - virtual void flush() {} - - //Notification of a draw command - virtual void notify() - { - //Insert fence about 25% into the buffer - if (m_fence.is_empty() && (m_data_loc > (m_size >> 2))) - m_fence.reset(); - } - }; - - class legacy_ring_buffer : public ring_buffer - { - u32 m_mapped_bytes = 0; - u32 m_mapping_offset = 0; - u32 m_alignment_offset = 0; - - public: - - void recreate(GLsizeiptr size, const void* data = nullptr) override - { - if (m_id) - remove(); - - buffer::create(); - buffer::data(size, data, GL_DYNAMIC_DRAW); - - m_memory_type = memory_type::host_visible; - m_memory_mapping = nullptr; - m_data_loc = 0; - m_size = ::narrow(size); - } - - void create(target target_, GLsizeiptr size, const void* data_ = nullptr) - { - m_target = target_; - recreate(size, data_); - } - - void reserve_storage_on_heap(u32 alloc_size) override - { - ensure(m_memory_mapping == nullptr); - - u32 offset = m_data_loc; - if (m_data_loc) offset = utils::align(offset, 256); - - const u32 block_size = utils::align(alloc_size + 16, 256); //Overallocate just in case we need to realign base - - if ((offset + block_size) > m_size) - { - buffer::data(m_size, nullptr, GL_DYNAMIC_DRAW); - m_data_loc = 0; - } - - m_memory_mapping = DSA_CALL2_RET(MapNamedBufferRange, m_id, m_data_loc, block_size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); - m_mapped_bytes = block_size; - m_mapping_offset = m_data_loc; - m_alignment_offset = 0; - - //When using debugging tools, the mapped base might not be aligned as expected - const u64 mapped_address_base = reinterpret_cast(m_memory_mapping); - if (mapped_address_base & 0xF) - { - //Unaligned result was returned. We have to modify the base address a bit - //We lose some memory here, but the 16 byte overallocation above makes up for it - const u64 new_base = (mapped_address_base & ~0xF) + 16; - const u64 diff_bytes = new_base - mapped_address_base; - - m_memory_mapping = reinterpret_cast(new_base); - m_mapped_bytes -= ::narrow(diff_bytes); - m_alignment_offset = ::narrow(diff_bytes); - } - - ensure(m_mapped_bytes >= alloc_size); - } - - std::pair alloc_from_heap(u32 alloc_size, u16 alignment) override - { - u32 offset = m_data_loc; - if (m_data_loc) offset = utils::align(offset, alignment); - - u32 padding = (offset - m_data_loc); - u32 real_size = utils::align(padding + alloc_size, alignment); //Ensures we leave the loc pointer aligned after we exit - - if (real_size > m_mapped_bytes) - { - //Missed allocation. We take a performance hit on doing this. - //Overallocate slightly for the next allocation if requested size is too small - unmap(); - reserve_storage_on_heap(std::max(real_size, 4096U)); - - offset = m_data_loc; - if (m_data_loc) offset = utils::align(offset, alignment); - - padding = (offset - m_data_loc); - real_size = utils::align(padding + alloc_size, alignment); - } - - m_data_loc = offset + real_size; - m_mapped_bytes -= real_size; - - u32 local_offset = (offset - m_mapping_offset); - return std::make_pair(static_cast(m_memory_mapping) + local_offset, offset + m_alignment_offset); - } - - void remove() override - { - ring_buffer::remove(); - m_mapped_bytes = 0; - } - - void unmap() override - { - buffer::unmap(); - - m_memory_mapping = nullptr; - m_mapped_bytes = 0; - m_mapping_offset = 0; - } - - void notify() override {} - }; - - // A non-persistent ring buffer - // Internally maps and unmaps data. Uses persistent storage just like the regular persistent variant - // Works around drivers that have issues using mapped data for specific sources (e.g AMD proprietary driver with index buffers) - class transient_ring_buffer : public ring_buffer - { - bool dirty = false; - - void* map_internal(u32 offset, u32 length) - { - flush(); - - dirty = true; - return DSA_CALL2_RET(MapNamedBufferRange, m_id, offset, length, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); - } - - public: - - void bind() override - { - flush(); - buffer::bind(); - } - - void recreate(GLsizeiptr size, const void* data = nullptr) override - { - if (m_id) - { - m_fence.wait_for_signal(); - remove(); - } - - buffer::create(); - save_binding_state save(current_target(), *this); - DSA_CALL2(NamedBufferStorage, m_id, size, data, GL_MAP_WRITE_BIT); - - m_data_loc = 0; - m_size = ::narrow(size); - m_memory_type = memory_type::host_visible; - } - - std::pair alloc_from_heap(u32 alloc_size, u16 alignment) override - { - ensure(m_memory_mapping == nullptr); - const auto allocation = ring_buffer::alloc_from_heap(alloc_size, alignment); - return { map_internal(allocation.second, alloc_size), allocation.second }; - } - - void flush() override - { - if (dirty) - { - buffer::unmap(); - dirty = false; - } - } - - void unmap() override - { - flush(); - } - }; - - class buffer_view - { - buffer* m_buffer = nullptr; - u32 m_offset = 0; - u32 m_range = 0; - GLenum m_format = GL_R8UI; - - public: - buffer_view(buffer *_buffer, u32 offset, u32 range, GLenum format = GL_R8UI) - : m_buffer(_buffer), m_offset(offset), m_range(range), m_format(format) - {} - - buffer_view() = default; - - void update(buffer *_buffer, u32 offset, u32 range, GLenum format = GL_R8UI) - { - ensure(_buffer->size() >= (offset + range)); - m_buffer = _buffer; - m_offset = offset; - m_range = range; - m_format = format; - } - - u32 offset() const - { - return m_offset; - } - - u32 range() const - { - return m_range; - } - - u32 format() const - { - return m_format; - } - - buffer* value() const - { - return m_buffer; - } - - bool in_range(u32 address, u32 size, u32& new_offset) const - { - if (address < m_offset) - return false; - - const u32 _offset = address - m_offset; - if (m_range < _offset) - return false; - - const auto remaining = m_range - _offset; - if (size <= remaining) - { - new_offset = _offset; - return true; - } - - return false; - } - }; - class vao { template @@ -1353,6 +583,8 @@ namespace gl class texture { + friend class texture_view; + public: enum class type { @@ -1480,7 +712,6 @@ namespace gl rsx::format_class m_format_class = RSX_FORMAT_CLASS_UNDEFINED; - public: class save_binding_state { GLenum target = GL_NONE; @@ -1519,6 +750,7 @@ namespace gl } }; + public: texture(const texture&) = delete; texture(texture&& texture_) = delete; diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.cpp b/rpcs3/Emu/RSX/GL/GLOverlays.cpp index 3c412c0d3a..9a46daf2c7 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.cpp +++ b/rpcs3/Emu/RSX/GL/GLOverlays.cpp @@ -575,10 +575,10 @@ namespace gl program_handle.uniforms["stereo_image_count"] = (source[1] == GL_NONE? 1 : 2); saved_sampler_state saved(31, m_sampler); - glBindTexture(GL_TEXTURE_2D, source[0]); + cmd->bind_texture(31, GL_TEXTURE_2D, source[0]); saved_sampler_state saved2(30, m_sampler); - glBindTexture(GL_TEXTURE_2D, source[1]); + cmd->bind_texture(30, GL_TEXTURE_2D, source[1]); overlay_pass::run(cmd, viewport, GL_NONE, false, false); } diff --git a/rpcs3/Emu/RSX/GL/glutils/buffer_object.cpp b/rpcs3/Emu/RSX/GL/glutils/buffer_object.cpp new file mode 100644 index 0000000000..83d1afec86 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/glutils/buffer_object.cpp @@ -0,0 +1,197 @@ +#include "stdafx.h" +#include "buffer_object.h" + +namespace gl +{ + void buffer::allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage) + { + if (const auto& caps = get_driver_caps(); + caps.ARB_buffer_storage_supported) + { + GLenum flags = 0; + if (type == memory_type::host_visible) + { + switch (usage) + { + case GL_STREAM_DRAW: + case GL_STATIC_DRAW: + case GL_DYNAMIC_DRAW: + flags |= GL_MAP_WRITE_BIT; + break; + case GL_STREAM_READ: + case GL_STATIC_READ: + case GL_DYNAMIC_READ: + flags |= GL_MAP_READ_BIT; + break; + default: + fmt::throw_exception("Unsupported buffer usage 0x%x", usage); + } + } + else + { + // Local memory hints + if (usage == GL_DYNAMIC_COPY) + { + flags |= GL_DYNAMIC_STORAGE_BIT; + } + } + + if ((flags & GL_MAP_READ_BIT) && !caps.vendor_AMD) + { + // This flag stops NVIDIA from allocating read-only memory in VRAM. + // NOTE: On AMD, allocating client-side memory via CLIENT_STORAGE_BIT or + // making use of GL_AMD_pinned_memory brings everything down to a crawl. + // Afaict there is no reason for this; disabling pixel pack/unpack operations does not alleviate the problem. + // The driver seems to eventually figure out the optimal storage location by itself. + flags |= GL_CLIENT_STORAGE_BIT; + } + + DSA_CALL2(NamedBufferStorage, m_id, size, data_, flags); + m_size = size; + } + else + { + data(size, data_, usage); + } + + m_memory_type = type; + } + + buffer::~buffer() + { + if (created()) + remove(); + } + + void buffer::recreate() + { + if (created()) + { + remove(); + } + + create(); + } + + void buffer::recreate(GLsizeiptr size, const void* data) + { + if (created()) + { + remove(); + } + + create(size, data); + } + + void buffer::create() + { + glGenBuffers(1, &m_id); + save_binding_state save(current_target(), *this); + } + + void buffer::create(GLsizeiptr size, const void* data_, memory_type type, GLenum usage) + { + create(); + allocate(size, data_, type, usage); + } + + void buffer::create(target target_, GLsizeiptr size, const void* data_, memory_type type, GLenum usage) + { + m_target = target_; + + create(); + allocate(size, data_, type, usage); + } + + void buffer::remove() + { + if (m_id != GL_NONE) + { + glDeleteBuffers(1, &m_id); + m_id = GL_NONE; + m_size = 0; + } + } + + void buffer::data(GLsizeiptr size, const void* data_, GLenum usage) + { + ensure(m_memory_type != memory_type::local); + + DSA_CALL2(NamedBufferData, m_id, size, data_, usage); + m_size = size; + } + + void buffer::sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data) + { + ensure(m_memory_type == memory_type::local); + DSA_CALL2(NamedBufferSubData, m_id, offset, length, data); + } + + GLubyte* buffer::map(GLsizeiptr offset, GLsizeiptr length, access access_) + { + ensure(m_memory_type == memory_type::host_visible); + + GLenum access_bits = static_cast(access_); + if (access_bits == GL_MAP_WRITE_BIT) access_bits |= GL_MAP_UNSYNCHRONIZED_BIT; + + auto raw_data = DSA_CALL2_RET(MapNamedBufferRange, id(), offset, length, access_bits); + return reinterpret_cast(raw_data); + } + + void buffer::unmap() + { + ensure(m_memory_type == memory_type::host_visible); + DSA_CALL2(UnmapNamedBuffer, id()); + } + + void buffer::bind_range(u32 index, u32 offset, u32 size) const + { + glBindBufferRange(static_cast(current_target()), index, id(), offset, size); + } + + void buffer::bind_range(target target_, u32 index, u32 offset, u32 size) const + { + glBindBufferRange(static_cast(target_), index, id(), offset, size); + } + + void buffer::copy_to(buffer* other, u64 src_offset, u64 dst_offset, u64 size) + { + if (get_driver_caps().ARB_dsa_supported) + { + glCopyNamedBufferSubData(this->id(), other->id(), src_offset, dst_offset, size); + } + else + { + glNamedCopyBufferSubDataEXT(this->id(), other->id(), src_offset, dst_offset, size); + } + } + + // Buffer view + void buffer_view::update(buffer* _buffer, u32 offset, u32 range, GLenum format) + { + ensure(_buffer->size() >= (offset + range)); + m_buffer = _buffer; + m_offset = offset; + m_range = range; + m_format = format; + } + + bool buffer_view::in_range(u32 address, u32 size, u32& new_offset) const + { + if (address < m_offset) + return false; + + const u32 _offset = address - m_offset; + if (m_range < _offset) + return false; + + const auto remaining = m_range - _offset; + if (size <= remaining) + { + new_offset = _offset; + return true; + } + + return false; + } +} diff --git a/rpcs3/Emu/RSX/GL/glutils/buffer_object.h b/rpcs3/Emu/RSX/GL/glutils/buffer_object.h new file mode 100644 index 0000000000..65bf6106e4 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/glutils/buffer_object.h @@ -0,0 +1,142 @@ +#pragma once + +#include "common.h" + +namespace gl +{ + class buffer + { + public: + enum class target + { + pixel_pack = GL_PIXEL_PACK_BUFFER, + pixel_unpack = GL_PIXEL_UNPACK_BUFFER, + array = GL_ARRAY_BUFFER, + element_array = GL_ELEMENT_ARRAY_BUFFER, + uniform = GL_UNIFORM_BUFFER, + texture = GL_TEXTURE_BUFFER, + ssbo = GL_SHADER_STORAGE_BUFFER + }; + + enum class access + { + read = GL_MAP_READ_BIT, + write = GL_MAP_WRITE_BIT, + read_write = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT + }; + + enum class memory_type + { + undefined = 0, + local = 1, + host_visible = 2 + }; + + class save_binding_state + { + GLint m_last_binding; + GLenum m_target; + + public: + save_binding_state(target target_, const buffer& new_state) : save_binding_state(target_) + { + new_state.bind(target_); + } + + save_binding_state(target target_) + { + GLenum pname{}; + switch (target_) + { + case target::pixel_pack: pname = GL_PIXEL_PACK_BUFFER_BINDING; break; + case target::pixel_unpack: pname = GL_PIXEL_UNPACK_BUFFER_BINDING; break; + case target::array: pname = GL_ARRAY_BUFFER_BINDING; break; + case target::element_array: pname = GL_ELEMENT_ARRAY_BUFFER_BINDING; break; + case target::uniform: pname = GL_UNIFORM_BUFFER_BINDING; break; + case target::texture: pname = GL_TEXTURE_BUFFER_BINDING; break; + case target::ssbo: pname = GL_SHADER_STORAGE_BUFFER_BINDING; break; + default: fmt::throw_exception("Invalid binding state target (0x%x)", static_cast(target_)); + } + + glGetIntegerv(pname, &m_last_binding); + m_target = static_cast(target_); + } + + ~save_binding_state() + { + glBindBuffer(m_target, m_last_binding); + } + }; + + protected: + GLuint m_id = GL_NONE; + GLsizeiptr m_size = 0; + target m_target = target::array; + memory_type m_memory_type = memory_type::undefined; + + void allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage); + + public: + buffer() = default; + buffer(const buffer&) = delete; + ~buffer(); + + void recreate(); + void recreate(GLsizeiptr size, const void* data = nullptr); + + void create(); + void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW); + void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW); + + void remove(); + + void bind(target target_) const { glBindBuffer(static_cast(target_), m_id); } + void bind() const { bind(current_target()); } + + void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW); + void sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data); + + GLubyte* map(GLsizeiptr offset, GLsizeiptr length, access access_); + void unmap(); + + void bind_range(u32 index, u32 offset, u32 size) const; + void bind_range(target target_, u32 index, u32 offset, u32 size) const; + + void copy_to(buffer* other, u64 src_offset, u64 dst_offset, u64 size); + + target current_target() const { return m_target; } + GLsizeiptr size() const { return m_size; } + uint id() const { return m_id; } + void set_id(uint id) { m_id = id; } + bool created() const { return m_id != GL_NONE; } + + explicit operator bool() const { return created(); } + }; + + class buffer_view + { + buffer* m_buffer = nullptr; + u32 m_offset = 0; + u32 m_range = 0; + GLenum m_format = GL_R8UI; + + public: + buffer_view(buffer* _buffer, u32 offset, u32 range, GLenum format = GL_R8UI) + : m_buffer(_buffer), m_offset(offset), m_range(range), m_format(format) + {} + + buffer_view() = default; + + void update(buffer* _buffer, u32 offset, u32 range, GLenum format = GL_R8UI); + + u32 offset() const { return m_offset; } + + u32 range() const { return m_range; } + + u32 format() const { return m_format; } + + buffer* value() const { return m_buffer; } + + bool in_range(u32 address, u32 size, u32& new_offset) const; + }; +} diff --git a/rpcs3/Emu/RSX/GL/glutils/capabilities.hpp b/rpcs3/Emu/RSX/GL/glutils/capabilities.hpp new file mode 100644 index 0000000000..39de9ebf18 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/glutils/capabilities.hpp @@ -0,0 +1,230 @@ +#pragma once + +#include "../OpenGL.h" +#include +#include +#include + +namespace gl +{ + class capabilities + { + public: + bool EXT_dsa_supported = false; + bool EXT_depth_bounds_test = false; + bool ARB_dsa_supported = false; + bool ARB_bindless_texture_supported = false; + bool ARB_buffer_storage_supported = false; + bool ARB_texture_buffer_supported = false; + bool ARB_shader_draw_parameters_supported = false; + bool ARB_depth_buffer_float_supported = false; + bool ARB_texture_barrier_supported = false; + bool ARB_shader_stencil_export_supported = false; + bool NV_texture_barrier_supported = false; + bool NV_gpu_shader5_supported = false; + bool AMD_gpu_shader_half_float_supported = false; + bool ARB_compute_shader_supported = false; + bool NV_depth_buffer_float_supported = false; + bool initialized = false; + bool vendor_INTEL = false; // has broken GLSL compiler + bool vendor_AMD = false; // has broken ARB_multidraw + bool vendor_NVIDIA = false; // has NaN poisoning issues + bool vendor_MESA = false; // requires CLIENT_STORAGE bit set for streaming buffers + + bool check(const std::string& ext_name, const char* test) + { + if (ext_name == test) + { + rsx_log.notice("Extension %s is supported", ext_name); + return true; + } + + return false; + } + + void initialize() + { + int find_count = 15; + int ext_count = 0; + glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count); + + if (!ext_count) + { + rsx_log.error("Coult not initialize GL driver capabilities. Is OpenGL initialized?"); + return; + } + + std::string vendor_string = reinterpret_cast(glGetString(GL_VENDOR)); + std::string version_string = reinterpret_cast(glGetString(GL_VERSION)); + std::string renderer_string = reinterpret_cast(glGetString(GL_RENDERER)); + + for (int i = 0; i < ext_count; i++) + { + if (!find_count) break; + + const std::string ext_name = reinterpret_cast(glGetStringi(GL_EXTENSIONS, i)); + + if (check(ext_name, "GL_ARB_shader_draw_parameters")) + { + ARB_shader_draw_parameters_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_EXT_direct_state_access")) + { + EXT_dsa_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_ARB_direct_state_access")) + { + ARB_dsa_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_ARB_bindless_texture")) + { + ARB_bindless_texture_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_ARB_buffer_storage")) + { + ARB_buffer_storage_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_ARB_texture_buffer_object")) + { + ARB_texture_buffer_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_ARB_depth_buffer_float")) + { + ARB_depth_buffer_float_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_ARB_texture_barrier")) + { + ARB_texture_barrier_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_NV_texture_barrier")) + { + NV_texture_barrier_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_NV_gpu_shader5")) + { + NV_gpu_shader5_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_AMD_gpu_shader_half_float")) + { + AMD_gpu_shader_half_float_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_ARB_compute_shader")) + { + ARB_compute_shader_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_EXT_depth_bounds_test")) + { + EXT_depth_bounds_test = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_NV_depth_buffer_float")) + { + NV_depth_buffer_float_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_ARB_shader_stencil_export")) + { + ARB_shader_stencil_export_supported = true; + find_count--; + continue; + } + } + + // Check GL_VERSION and GL_RENDERER for the presence of Mesa + if (version_string.find("Mesa") != umax || renderer_string.find("Mesa") != umax) + { + vendor_MESA = true; + } + + // Workaround for intel drivers which have terrible capability reporting + if (!vendor_string.empty()) + { + std::transform(vendor_string.begin(), vendor_string.end(), vendor_string.begin(), ::tolower); + } + else + { + rsx_log.error("Failed to get vendor string from driver. Are we missing a context?"); + vendor_string = "intel"; // lowest acceptable value + } + + if (!vendor_MESA && vendor_string.find("intel") != umax) + { + int version_major = 0; + int version_minor = 0; + + glGetIntegerv(GL_MAJOR_VERSION, &version_major); + glGetIntegerv(GL_MINOR_VERSION, &version_minor); + + vendor_INTEL = true; + + // Texture buffers moved into core at GL 3.3 + if (version_major > 3 || (version_major == 3 && version_minor >= 3)) + ARB_texture_buffer_supported = true; + + // Check for expected library entry-points for some required functions + if (!ARB_buffer_storage_supported && glNamedBufferStorage && glMapNamedBufferRange) + ARB_buffer_storage_supported = true; + + if (!ARB_dsa_supported && glGetTextureImage && glTextureBufferRange) + ARB_dsa_supported = true; + + if (!EXT_dsa_supported && glGetTextureImageEXT && glTextureBufferRangeEXT) + EXT_dsa_supported = true; + } + else if (!vendor_MESA && vendor_string.find("nvidia") != umax) + { + vendor_NVIDIA = true; + } +#ifdef _WIN32 + else if (vendor_string.find("amd") != umax || vendor_string.find("ati") != umax) + { + vendor_AMD = true; + } +#endif + + initialized = true; + } + }; + + const capabilities& get_driver_caps(); +} diff --git a/rpcs3/Emu/RSX/GL/glutils/common.h b/rpcs3/Emu/RSX/GL/glutils/common.h new file mode 100644 index 0000000000..df78e1246c --- /dev/null +++ b/rpcs3/Emu/RSX/GL/glutils/common.h @@ -0,0 +1,154 @@ +#pragma once + +#include "capabilities.hpp" + +//Function call wrapped in ARB_DSA vs EXT_DSA compat check +#define DSA_CALL(func, object_name, target, ...)\ + if (::gl::get_driver_caps().ARB_dsa_supported)\ + gl##func(object_name, __VA_ARGS__);\ + else\ + gl##func##EXT(object_name, target, __VA_ARGS__); + +#define DSA_CALL2(func, ...)\ + if (::gl::get_driver_caps().ARB_dsa_supported)\ + gl##func(__VA_ARGS__);\ + else\ + gl##func##EXT(__VA_ARGS__); + +#define DSA_CALL2_RET(func, ...)\ + (::gl::get_driver_caps().ARB_dsa_supported) ?\ + gl##func(__VA_ARGS__) :\ + gl##func##EXT(__VA_ARGS__) + +#define DSA_CALL3(funcARB, funcDSA, ...)\ + if (::gl::get_driver_caps().ARB_dsa_supported)\ + gl##funcARB(__VA_ARGS__);\ + else\ + gl##funcDSA##EXT(__VA_ARGS__); + +namespace gl +{ + // TODO: Move to sync.h + class fence + { + GLsync m_value = nullptr; + mutable GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT; + mutable bool signaled = false; + + public: + + fence() = default; + ~fence() = default; + + void create() + { + m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + flags = GL_SYNC_FLUSH_COMMANDS_BIT; + } + + void destroy() + { + glDeleteSync(m_value); + m_value = nullptr; + } + + void reset() + { + if (m_value != nullptr) + destroy(); + + create(); + } + + bool is_empty() const + { + return (m_value == nullptr); + } + + bool check_signaled() const + { + ensure(m_value); + + if (signaled) + return true; + + if (flags) + { + GLenum err = glClientWaitSync(m_value, flags, 0); + flags = 0; + + if (!(err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED)) + return false; + } + else + { + GLint status = GL_UNSIGNALED; + GLint tmp; + + glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status); + + if (status != GL_SIGNALED) + return false; + } + + signaled = true; + return true; + } + + bool wait_for_signal() + { + ensure(m_value); + + if (signaled == GL_FALSE) + { + GLenum err = GL_WAIT_FAILED; + bool done = false; + + while (!done) + { + if (flags) + { + err = glClientWaitSync(m_value, flags, 0); + flags = 0; + + switch (err) + { + default: + rsx_log.error("gl::fence sync returned unknown error 0x%X", err); + [[fallthrough]]; + case GL_ALREADY_SIGNALED: + case GL_CONDITION_SATISFIED: + done = true; + break; + case GL_TIMEOUT_EXPIRED: + continue; + } + } + else + { + GLint status = GL_UNSIGNALED; + GLint tmp; + + glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status); + + if (status == GL_SIGNALED) + break; + } + } + + signaled = (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED); + } + + glDeleteSync(m_value); + m_value = nullptr; + + return signaled; + } + + void server_wait_sync() const + { + ensure(m_value != nullptr); + glWaitSync(m_value, 0, GL_TIMEOUT_IGNORED); + } + }; +} diff --git a/rpcs3/Emu/RSX/GL/glutils/ring_buffer.cpp b/rpcs3/Emu/RSX/GL/glutils/ring_buffer.cpp new file mode 100644 index 0000000000..1c416d05ac --- /dev/null +++ b/rpcs3/Emu/RSX/GL/glutils/ring_buffer.cpp @@ -0,0 +1,241 @@ +#include "stdafx.h" +#include "ring_buffer.h" + +namespace gl +{ + void ring_buffer::recreate(GLsizeiptr size, const void* data) + { + if (m_id) + { + m_fence.wait_for_signal(); + remove(); + } + + buffer::create(); + save_binding_state save(current_target(), *this); + + GLbitfield buffer_storage_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; + if (gl::get_driver_caps().vendor_MESA) buffer_storage_flags |= GL_CLIENT_STORAGE_BIT; + + DSA_CALL2(NamedBufferStorage, m_id, size, data, buffer_storage_flags); + m_memory_mapping = DSA_CALL2_RET(MapNamedBufferRange, m_id, 0, size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); + + ensure(m_memory_mapping != nullptr); + m_data_loc = 0; + m_size = ::narrow(size); + m_memory_type = memory_type::host_visible; + } + + void ring_buffer::create(target target_, GLsizeiptr size, const void* data_) + { + m_target = target_; + recreate(size, data_); + } + + std::pair ring_buffer::alloc_from_heap(u32 alloc_size, u16 alignment) + { + u32 offset = m_data_loc; + if (m_data_loc) offset = utils::align(offset, alignment); + + if ((offset + alloc_size) > m_size) + { + if (!m_fence.is_empty()) + { + m_fence.wait_for_signal(); + } + else + { + rsx_log.error("OOM Error: Ring buffer was likely being used without notify() being called"); + glFinish(); + } + + m_data_loc = 0; + offset = 0; + } + + //Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently + m_data_loc = utils::align(offset + alloc_size, 256); + return std::make_pair(static_cast(m_memory_mapping) + offset, offset); + } + + void ring_buffer::remove() + { + if (m_memory_mapping) + { + buffer::unmap(); + + m_memory_mapping = nullptr; + m_data_loc = 0; + m_size = 0; + } + + + if (m_id != GL_NONE) + { + glDeleteBuffers(1, &m_id); + m_id = GL_NONE; + } + } + + void ring_buffer::notify() + { + //Insert fence about 25% into the buffer + if (m_fence.is_empty() && (m_data_loc > (m_size >> 2))) + m_fence.reset(); + } + + // Legacy ring buffer - used when ARB_buffer_storage is not available, OR when capturing with renderdoc + void legacy_ring_buffer::recreate(GLsizeiptr size, const void* data) + { + if (m_id) + remove(); + + buffer::create(); + buffer::data(size, data, GL_DYNAMIC_DRAW); + + m_memory_type = memory_type::host_visible; + m_memory_mapping = nullptr; + m_data_loc = 0; + m_size = ::narrow(size); + } + + void legacy_ring_buffer::create(target target_, GLsizeiptr size, const void* data_) + { + m_target = target_; + recreate(size, data_); + } + + void legacy_ring_buffer::reserve_storage_on_heap(u32 alloc_size) + { + ensure(m_memory_mapping == nullptr); + + u32 offset = m_data_loc; + if (m_data_loc) offset = utils::align(offset, 256); + + const u32 block_size = utils::align(alloc_size + 16, 256); //Overallocate just in case we need to realign base + + if ((offset + block_size) > m_size) + { + buffer::data(m_size, nullptr, GL_DYNAMIC_DRAW); + m_data_loc = 0; + } + + m_memory_mapping = DSA_CALL2_RET(MapNamedBufferRange, m_id, m_data_loc, block_size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + m_mapped_bytes = block_size; + m_mapping_offset = m_data_loc; + m_alignment_offset = 0; + + //When using debugging tools, the mapped base might not be aligned as expected + const u64 mapped_address_base = reinterpret_cast(m_memory_mapping); + if (mapped_address_base & 0xF) + { + //Unaligned result was returned. We have to modify the base address a bit + //We lose some memory here, but the 16 byte overallocation above makes up for it + const u64 new_base = (mapped_address_base & ~0xF) + 16; + const u64 diff_bytes = new_base - mapped_address_base; + + m_memory_mapping = reinterpret_cast(new_base); + m_mapped_bytes -= ::narrow(diff_bytes); + m_alignment_offset = ::narrow(diff_bytes); + } + + ensure(m_mapped_bytes >= alloc_size); + } + + std::pair legacy_ring_buffer::alloc_from_heap(u32 alloc_size, u16 alignment) + { + u32 offset = m_data_loc; + if (m_data_loc) offset = utils::align(offset, alignment); + + u32 padding = (offset - m_data_loc); + u32 real_size = utils::align(padding + alloc_size, alignment); //Ensures we leave the loc pointer aligned after we exit + + if (real_size > m_mapped_bytes) + { + //Missed allocation. We take a performance hit on doing this. + //Overallocate slightly for the next allocation if requested size is too small + unmap(); + reserve_storage_on_heap(std::max(real_size, 4096U)); + + offset = m_data_loc; + if (m_data_loc) offset = utils::align(offset, alignment); + + padding = (offset - m_data_loc); + real_size = utils::align(padding + alloc_size, alignment); + } + + m_data_loc = offset + real_size; + m_mapped_bytes -= real_size; + + u32 local_offset = (offset - m_mapping_offset); + return std::make_pair(static_cast(m_memory_mapping) + local_offset, offset + m_alignment_offset); + } + + void legacy_ring_buffer::remove() + { + ring_buffer::remove(); + m_mapped_bytes = 0; + } + + void legacy_ring_buffer::unmap() + { + buffer::unmap(); + + m_memory_mapping = nullptr; + m_mapped_bytes = 0; + m_mapping_offset = 0; + } + + // AMD persistent mapping workaround for driver-assisted flushing + void* transient_ring_buffer::map_internal(u32 offset, u32 length) + { + flush(); + + dirty = true; + return DSA_CALL2_RET(MapNamedBufferRange, m_id, offset, length, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + } + + void transient_ring_buffer::bind() + { + flush(); + buffer::bind(); + } + + void transient_ring_buffer::recreate(GLsizeiptr size, const void* data) + { + if (m_id) + { + m_fence.wait_for_signal(); + remove(); + } + + buffer::create(); + save_binding_state save(current_target(), *this); + DSA_CALL2(NamedBufferStorage, m_id, size, data, GL_MAP_WRITE_BIT); + + m_data_loc = 0; + m_size = ::narrow(size); + m_memory_type = memory_type::host_visible; + } + + std::pair transient_ring_buffer::alloc_from_heap(u32 alloc_size, u16 alignment) + { + ensure(m_memory_mapping == nullptr); + const auto allocation = ring_buffer::alloc_from_heap(alloc_size, alignment); + return { map_internal(allocation.second, alloc_size), allocation.second }; + } + + void transient_ring_buffer::flush() + { + if (dirty) + { + buffer::unmap(); + dirty = false; + } + } + + void transient_ring_buffer::unmap() + { + flush(); + } +} diff --git a/rpcs3/Emu/RSX/GL/glutils/ring_buffer.h b/rpcs3/Emu/RSX/GL/glutils/ring_buffer.h new file mode 100644 index 0000000000..5016dffe70 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/glutils/ring_buffer.h @@ -0,0 +1,81 @@ +#pragma once + +#include "buffer_object.h" + +namespace gl +{ + class ring_buffer : public buffer + { + protected: + + u32 m_data_loc = 0; + void* m_memory_mapping = nullptr; + + fence m_fence; + + public: + + virtual void bind() { buffer::bind(); } + + virtual void recreate(GLsizeiptr size, const void* data = nullptr); + + void create(target target_, GLsizeiptr size, const void* data_ = nullptr); + + virtual std::pair alloc_from_heap(u32 alloc_size, u16 alignment); + + virtual void remove(); + + virtual void reserve_storage_on_heap(u32 /*alloc_size*/) {} + + virtual void unmap() {} + + virtual void flush() {} + + virtual void notify(); + }; + + class legacy_ring_buffer : public ring_buffer + { + u32 m_mapped_bytes = 0; + u32 m_mapping_offset = 0; + u32 m_alignment_offset = 0; + + public: + + void recreate(GLsizeiptr size, const void* data = nullptr) override; + + void create(target target_, GLsizeiptr size, const void* data_ = nullptr); + + void reserve_storage_on_heap(u32 alloc_size) override; + + std::pair alloc_from_heap(u32 alloc_size, u16 alignment) override; + + void remove() override; + + void unmap() override; + + void notify() override {} + }; + + // A non-persistent ring buffer + // Internally maps and unmaps data. Uses persistent storage just like the regular persistent variant + // Works around drivers that have issues using mapped data for specific sources (e.g AMD proprietary driver with index buffers) + class transient_ring_buffer : public ring_buffer + { + bool dirty = false; + + void* map_internal(u32 offset, u32 length); + + public: + + void bind() override; + + void recreate(GLsizeiptr size, const void* data = nullptr) override; + + std::pair alloc_from_heap(u32 alloc_size, u16 alignment) override; + + void flush() override; + + void unmap() override; + }; +} \ No newline at end of file diff --git a/rpcs3/GLGSRender.vcxproj b/rpcs3/GLGSRender.vcxproj index 1d604d4642..d2c43837dd 100644 --- a/rpcs3/GLGSRender.vcxproj +++ b/rpcs3/GLGSRender.vcxproj @@ -61,6 +61,10 @@ + + + + @@ -77,6 +81,8 @@ + + diff --git a/rpcs3/GLGSRender.vcxproj.filters b/rpcs3/GLGSRender.vcxproj.filters index 00b2469faa..07be953468 100644 --- a/rpcs3/GLGSRender.vcxproj.filters +++ b/rpcs3/GLGSRender.vcxproj.filters @@ -17,6 +17,12 @@ + + glutils + + + glutils + @@ -36,5 +42,22 @@ + + glutils + + + glutils + + + glutils + + + glutils + + + + + {ed9ef6b7-efbb-4a8e-88a4-583b6655c141} + \ No newline at end of file