gl: Refactor buffer object code

This commit is contained in:
kd-11 2022-06-02 20:43:46 +03:00 committed by kd-11
parent ff9c939720
commit 3fd846687e
13 changed files with 1089 additions and 1002 deletions

View file

@ -454,6 +454,8 @@ target_sources(rpcs3_emu PRIVATE
RSX/Program/VertexProgramDecompiler.cpp
RSX/Capture/rsx_capture.cpp
RSX/Capture/rsx_replay.cpp
RSX/GL/glutils/buffer_object.cpp
RSX/GL/glutils/ring_buffer.cpp
RSX/GL/GLCommonDecompiler.cpp
RSX/GL/GLCompute.cpp
RSX/GL/GLDraw.cpp

View file

@ -1,235 +1,12 @@
#pragma once
#include "util/logs.hpp"
#include "util/types.hpp"
#include "Utilities/geometry.h"
#include "OpenGL.h"
#include "glutils/capabilities.hpp"
#include "Utilities/geometry.h"
#include <unordered_map>
namespace gl
{
class capabilities
{
public:
bool EXT_dsa_supported = false;
bool EXT_depth_bounds_test = false;
bool ARB_dsa_supported = false;
bool ARB_bindless_texture_supported = false;
bool ARB_buffer_storage_supported = false;
bool ARB_texture_buffer_supported = false;
bool ARB_shader_draw_parameters_supported = false;
bool ARB_depth_buffer_float_supported = false;
bool ARB_texture_barrier_supported = false;
bool ARB_shader_stencil_export_supported = false;
bool NV_texture_barrier_supported = false;
bool NV_gpu_shader5_supported = false;
bool AMD_gpu_shader_half_float_supported = false;
bool ARB_compute_shader_supported = false;
bool NV_depth_buffer_float_supported = false;
bool initialized = false;
bool vendor_INTEL = false; // has broken GLSL compiler
bool vendor_AMD = false; // has broken ARB_multidraw
bool vendor_NVIDIA = false; // has NaN poisoning issues
bool vendor_MESA = false; // requires CLIENT_STORAGE bit set for streaming buffers
bool check(const std::string& ext_name, const char* test)
{
if (ext_name == test)
{
rsx_log.notice("Extension %s is supported", ext_name);
return true;
}
return false;
}
void initialize()
{
int find_count = 15;
int ext_count = 0;
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
if (!ext_count)
{
rsx_log.error("Coult not initialize GL driver capabilities. Is OpenGL initialized?");
return;
}
std::string vendor_string = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
std::string version_string = reinterpret_cast<const char*>(glGetString(GL_VERSION));
std::string renderer_string = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
for (int i = 0; i < ext_count; i++)
{
if (!find_count) break;
const std::string ext_name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i));
if (check(ext_name, "GL_ARB_shader_draw_parameters"))
{
ARB_shader_draw_parameters_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_EXT_direct_state_access"))
{
EXT_dsa_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_direct_state_access"))
{
ARB_dsa_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_bindless_texture"))
{
ARB_bindless_texture_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_buffer_storage"))
{
ARB_buffer_storage_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_texture_buffer_object"))
{
ARB_texture_buffer_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_depth_buffer_float"))
{
ARB_depth_buffer_float_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_texture_barrier"))
{
ARB_texture_barrier_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_NV_texture_barrier"))
{
NV_texture_barrier_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_NV_gpu_shader5"))
{
NV_gpu_shader5_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_AMD_gpu_shader_half_float"))
{
AMD_gpu_shader_half_float_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_compute_shader"))
{
ARB_compute_shader_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_EXT_depth_bounds_test"))
{
EXT_depth_bounds_test = true;
find_count--;
continue;
}
if (check(ext_name, "GL_NV_depth_buffer_float"))
{
NV_depth_buffer_float_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_shader_stencil_export"))
{
ARB_shader_stencil_export_supported = true;
find_count--;
continue;
}
}
// Check GL_VERSION and GL_RENDERER for the presence of Mesa
if (version_string.find("Mesa") != umax || renderer_string.find("Mesa") != umax)
{
vendor_MESA = true;
}
// Workaround for intel drivers which have terrible capability reporting
if (!vendor_string.empty())
{
std::transform(vendor_string.begin(), vendor_string.end(), vendor_string.begin(), ::tolower);
}
else
{
rsx_log.error("Failed to get vendor string from driver. Are we missing a context?");
vendor_string = "intel"; // lowest acceptable value
}
if (!vendor_MESA && vendor_string.find("intel") != umax)
{
int version_major = 0;
int version_minor = 0;
glGetIntegerv(GL_MAJOR_VERSION, &version_major);
glGetIntegerv(GL_MINOR_VERSION, &version_minor);
vendor_INTEL = true;
// Texture buffers moved into core at GL 3.3
if (version_major > 3 || (version_major == 3 && version_minor >= 3))
ARB_texture_buffer_supported = true;
// Check for expected library entry-points for some required functions
if (!ARB_buffer_storage_supported && glNamedBufferStorage && glMapNamedBufferRange)
ARB_buffer_storage_supported = true;
if (!ARB_dsa_supported && glGetTextureImage && glTextureBufferRange)
ARB_dsa_supported = true;
if (!EXT_dsa_supported && glGetTextureImageEXT && glTextureBufferRangeEXT)
EXT_dsa_supported = true;
}
else if (!vendor_MESA && vendor_string.find("nvidia") != umax)
{
vendor_NVIDIA = true;
}
#ifdef _WIN32
else if (vendor_string.find("amd") != umax || vendor_string.find("ati") != umax)
{
vendor_AMD = true;
}
#endif
initialized = true;
}
};
const capabilities& get_driver_caps();
struct driver_state
{
const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001;

View file

@ -12,6 +12,8 @@
#include <optional>
#include <unordered_map>
#include "glutils/ring_buffer.h"
#ifdef _WIN32
#pragma comment(lib, "opengl32.lib")
#endif

View file

@ -19,6 +19,10 @@
#include "util/logs.hpp"
#include "util/asm.hpp"
#include "glutils/common.h"
// TODO: Include on use
#include "glutils/buffer_object.h"
#define GL_FRAGMENT_TEXTURES_START 0
#define GL_VERTEX_TEXTURES_START (GL_FRAGMENT_TEXTURES_START + 16)
#define GL_STENCIL_MIRRORS_START (GL_VERTEX_TEXTURES_START + 4)
@ -48,32 +52,6 @@ using namespace ::rsx::format_class_;
namespace gl
{
//Function call wrapped in ARB_DSA vs EXT_DSA compat check
#define DSA_CALL(func, object_name, target, ...)\
if (::gl::get_driver_caps().ARB_dsa_supported)\
gl##func(object_name, __VA_ARGS__);\
else\
gl##func##EXT(object_name, target, __VA_ARGS__);
#define DSA_CALL2(func, ...)\
if (::gl::get_driver_caps().ARB_dsa_supported)\
gl##func(__VA_ARGS__);\
else\
gl##func##EXT(__VA_ARGS__);
#define DSA_CALL2_RET(func, ...)\
(::gl::get_driver_caps().ARB_dsa_supported) ?\
gl##func(__VA_ARGS__) :\
gl##func##EXT(__VA_ARGS__)
#define DSA_CALL3(funcARB, funcDSA, ...)\
if (::gl::get_driver_caps().ARB_dsa_supported)\
gl##funcARB(__VA_ARGS__);\
else\
gl##funcDSA##EXT(__VA_ARGS__);
class fence;
void enable_debugging();
bool is_primitive_native(rsx::primitive_type in);
GLenum draw_mode(rsx::primitive_type in);
@ -97,129 +75,6 @@ namespace gl
}
};
class fence
{
GLsync m_value = nullptr;
mutable GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT;
mutable bool signaled = false;
public:
fence() = default;
~fence() = default;
void create()
{
m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
flags = GL_SYNC_FLUSH_COMMANDS_BIT;
}
void destroy()
{
glDeleteSync(m_value);
m_value = nullptr;
}
void reset()
{
if (m_value != nullptr)
destroy();
create();
}
bool is_empty() const
{
return (m_value == nullptr);
}
bool check_signaled() const
{
ensure(m_value);
if (signaled)
return true;
if (flags)
{
GLenum err = glClientWaitSync(m_value, flags, 0);
flags = 0;
if (!(err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED))
return false;
}
else
{
GLint status = GL_UNSIGNALED;
GLint tmp;
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
if (status != GL_SIGNALED)
return false;
}
signaled = true;
return true;
}
bool wait_for_signal()
{
ensure(m_value);
if (signaled == GL_FALSE)
{
GLenum err = GL_WAIT_FAILED;
bool done = false;
while (!done)
{
if (flags)
{
err = glClientWaitSync(m_value, flags, 0);
flags = 0;
switch (err)
{
default:
rsx_log.error("gl::fence sync returned unknown error 0x%X", err);
[[fallthrough]];
case GL_ALREADY_SIGNALED:
case GL_CONDITION_SATISFIED:
done = true;
break;
case GL_TIMEOUT_EXPIRED:
continue;
}
}
else
{
GLint status = GL_UNSIGNALED;
GLint tmp;
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
if (status == GL_SIGNALED)
break;
}
}
signaled = (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
}
glDeleteSync(m_value);
m_value = nullptr;
return signaled;
}
void server_wait_sync() const
{
ensure(m_value != nullptr);
glWaitSync(m_value, 0, GL_TIMEOUT_IGNORED);
}
};
template<typename Type, uint BindId, uint GetStateId>
class save_binding_state_base
{
@ -538,631 +393,6 @@ namespace gl
}
};
class buffer
{
public:
enum class target
{
pixel_pack = GL_PIXEL_PACK_BUFFER,
pixel_unpack = GL_PIXEL_UNPACK_BUFFER,
array = GL_ARRAY_BUFFER,
element_array = GL_ELEMENT_ARRAY_BUFFER,
uniform = GL_UNIFORM_BUFFER,
texture = GL_TEXTURE_BUFFER,
ssbo = GL_SHADER_STORAGE_BUFFER
};
enum class access
{
read = GL_MAP_READ_BIT,
write = GL_MAP_WRITE_BIT,
read_write = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT
};
enum class memory_type
{
undefined = 0,
local = 1,
host_visible = 2
};
class save_binding_state
{
GLint m_last_binding;
GLenum m_target;
public:
save_binding_state(target target_, const buffer& new_state) : save_binding_state(target_)
{
new_state.bind(target_);
}
save_binding_state(target target_)
{
GLenum pname{};
switch (target_)
{
case target::pixel_pack: pname = GL_PIXEL_PACK_BUFFER_BINDING; break;
case target::pixel_unpack: pname = GL_PIXEL_UNPACK_BUFFER_BINDING; break;
case target::array: pname = GL_ARRAY_BUFFER_BINDING; break;
case target::element_array: pname = GL_ELEMENT_ARRAY_BUFFER_BINDING; break;
case target::uniform: pname = GL_UNIFORM_BUFFER_BINDING; break;
case target::texture: pname = GL_TEXTURE_BUFFER_BINDING; break;
case target::ssbo: pname = GL_SHADER_STORAGE_BUFFER_BINDING; break;
default: fmt::throw_exception("Invalid binding state target (0x%x)", static_cast<int>(target_));
}
glGetIntegerv(pname, &m_last_binding);
m_target = static_cast<GLenum>(target_);
}
~save_binding_state()
{
glBindBuffer(m_target, m_last_binding);
}
};
protected:
GLuint m_id = GL_NONE;
GLsizeiptr m_size = 0;
target m_target = target::array;
memory_type m_memory_type = memory_type::undefined;
void allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
{
if (const auto& caps = get_driver_caps();
caps.ARB_buffer_storage_supported)
{
GLenum flags = 0;
if (type == memory_type::host_visible)
{
switch (usage)
{
case GL_STREAM_DRAW:
case GL_STATIC_DRAW:
case GL_DYNAMIC_DRAW:
flags |= GL_MAP_WRITE_BIT;
break;
case GL_STREAM_READ:
case GL_STATIC_READ:
case GL_DYNAMIC_READ:
flags |= GL_MAP_READ_BIT;
break;
default:
fmt::throw_exception("Unsupported buffer usage 0x%x", usage);
}
}
else
{
// Local memory hints
if (usage == GL_DYNAMIC_COPY)
{
flags |= GL_DYNAMIC_STORAGE_BIT;
}
}
if ((flags & GL_MAP_READ_BIT) && !caps.vendor_AMD)
{
// This flag stops NVIDIA from allocating read-only memory in VRAM.
// NOTE: On AMD, allocating client-side memory via CLIENT_STORAGE_BIT or
// making use of GL_AMD_pinned_memory brings everything down to a crawl.
// Afaict there is no reason for this; disabling pixel pack/unpack operations does not alleviate the problem.
// The driver seems to eventually figure out the optimal storage location by itself.
flags |= GL_CLIENT_STORAGE_BIT;
}
DSA_CALL2(NamedBufferStorage, m_id, size, data_, flags);
m_size = size;
}
else
{
data(size, data_, usage);
}
m_memory_type = type;
}
public:
buffer() = default;
buffer(const buffer&) = delete;
buffer(GLuint id)
{
set_id(id);
}
~buffer()
{
if (created())
remove();
}
void recreate()
{
if (created())
{
remove();
}
create();
}
void recreate(GLsizeiptr size, const void* data = nullptr)
{
if (created())
{
remove();
}
create(size, data);
}
void create()
{
glGenBuffers(1, &m_id);
save_binding_state save(current_target(), *this);
}
void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW)
{
create();
allocate(size, data_, type, usage);
}
void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW)
{
m_target = target_;
create();
allocate(size, data_, type, usage);
}
void bind(target target_) const
{
glBindBuffer(static_cast<GLenum>(target_), m_id);
}
void bind() const
{
bind(current_target());
}
target current_target() const
{
return m_target;
}
void remove()
{
if (m_id != GL_NONE)
{
glDeleteBuffers(1, &m_id);
m_id = GL_NONE;
m_size = 0;
}
}
GLsizeiptr size() const
{
return m_size;
}
uint id() const
{
return m_id;
}
void set_id(uint id)
{
m_id = id;
}
bool created() const
{
return m_id != GL_NONE;
}
explicit operator bool() const
{
return created();
}
void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW)
{
ensure(m_memory_type != memory_type::local);
DSA_CALL2(NamedBufferData, m_id, size, data_, usage);
m_size = size;
}
void sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data)
{
ensure(m_memory_type == memory_type::local);
DSA_CALL2(NamedBufferSubData, m_id, offset, length, data);
}
GLubyte* map(GLsizeiptr offset, GLsizeiptr length, access access_)
{
ensure(m_memory_type == memory_type::host_visible);
GLenum access_bits = static_cast<GLenum>(access_);
if (access_bits == GL_MAP_WRITE_BIT) access_bits |= GL_MAP_UNSYNCHRONIZED_BIT;
auto raw_data = DSA_CALL2_RET(MapNamedBufferRange, id(), offset, length, access_bits);
return reinterpret_cast<GLubyte*>(raw_data);
}
void unmap()
{
ensure(m_memory_type == memory_type::host_visible);
DSA_CALL2(UnmapNamedBuffer, id());
}
void bind_range(u32 index, u32 offset, u32 size) const
{
glBindBufferRange(static_cast<GLenum>(current_target()), index, id(), offset, size);
}
void bind_range(target target_, u32 index, u32 offset, u32 size) const
{
glBindBufferRange(static_cast<GLenum>(target_), index, id(), offset, size);
}
void copy_to(buffer* other, u64 src_offset, u64 dst_offset, u64 size)
{
if (get_driver_caps().ARB_dsa_supported)
{
glCopyNamedBufferSubData(this->id(), other->id(), src_offset, dst_offset, size);
}
else
{
glNamedCopyBufferSubDataEXT(this->id(), other->id(), src_offset, dst_offset, size);
}
}
};
class ring_buffer : public buffer
{
protected:
u32 m_data_loc = 0;
void *m_memory_mapping = nullptr;
fence m_fence;
public:
virtual void bind()
{
buffer::bind();
}
virtual void recreate(GLsizeiptr size, const void* data = nullptr)
{
if (m_id)
{
m_fence.wait_for_signal();
remove();
}
buffer::create();
save_binding_state save(current_target(), *this);
GLbitfield buffer_storage_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
if (gl::get_driver_caps().vendor_MESA) buffer_storage_flags |= GL_CLIENT_STORAGE_BIT;
DSA_CALL2(NamedBufferStorage, m_id, size, data, buffer_storage_flags);
m_memory_mapping = DSA_CALL2_RET(MapNamedBufferRange, m_id, 0, size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
ensure(m_memory_mapping != nullptr);
m_data_loc = 0;
m_size = ::narrow<u32>(size);
m_memory_type = memory_type::host_visible;
}
void create(target target_, GLsizeiptr size, const void* data_ = nullptr)
{
m_target = target_;
recreate(size, data_);
}
virtual std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment)
{
u32 offset = m_data_loc;
if (m_data_loc) offset = utils::align(offset, alignment);
if ((offset + alloc_size) > m_size)
{
if (!m_fence.is_empty())
{
m_fence.wait_for_signal();
}
else
{
rsx_log.error("OOM Error: Ring buffer was likely being used without notify() being called");
glFinish();
}
m_data_loc = 0;
offset = 0;
}
//Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently
m_data_loc = utils::align(offset + alloc_size, 256);
return std::make_pair(static_cast<char*>(m_memory_mapping) + offset, offset);
}
virtual void remove()
{
if (m_memory_mapping)
{
buffer::unmap();
m_memory_mapping = nullptr;
m_data_loc = 0;
m_size = 0;
}
if (m_id != GL_NONE)
{
glDeleteBuffers(1, &m_id);
m_id = GL_NONE;
}
}
virtual void reserve_storage_on_heap(u32 /*alloc_size*/) {}
virtual void unmap() {}
virtual void flush() {}
//Notification of a draw command
virtual void notify()
{
//Insert fence about 25% into the buffer
if (m_fence.is_empty() && (m_data_loc > (m_size >> 2)))
m_fence.reset();
}
};
class legacy_ring_buffer : public ring_buffer
{
u32 m_mapped_bytes = 0;
u32 m_mapping_offset = 0;
u32 m_alignment_offset = 0;
public:
void recreate(GLsizeiptr size, const void* data = nullptr) override
{
if (m_id)
remove();
buffer::create();
buffer::data(size, data, GL_DYNAMIC_DRAW);
m_memory_type = memory_type::host_visible;
m_memory_mapping = nullptr;
m_data_loc = 0;
m_size = ::narrow<u32>(size);
}
void create(target target_, GLsizeiptr size, const void* data_ = nullptr)
{
m_target = target_;
recreate(size, data_);
}
void reserve_storage_on_heap(u32 alloc_size) override
{
ensure(m_memory_mapping == nullptr);
u32 offset = m_data_loc;
if (m_data_loc) offset = utils::align(offset, 256);
const u32 block_size = utils::align(alloc_size + 16, 256); //Overallocate just in case we need to realign base
if ((offset + block_size) > m_size)
{
buffer::data(m_size, nullptr, GL_DYNAMIC_DRAW);
m_data_loc = 0;
}
m_memory_mapping = DSA_CALL2_RET(MapNamedBufferRange, m_id, m_data_loc, block_size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
m_mapped_bytes = block_size;
m_mapping_offset = m_data_loc;
m_alignment_offset = 0;
//When using debugging tools, the mapped base might not be aligned as expected
const u64 mapped_address_base = reinterpret_cast<u64>(m_memory_mapping);
if (mapped_address_base & 0xF)
{
//Unaligned result was returned. We have to modify the base address a bit
//We lose some memory here, but the 16 byte overallocation above makes up for it
const u64 new_base = (mapped_address_base & ~0xF) + 16;
const u64 diff_bytes = new_base - mapped_address_base;
m_memory_mapping = reinterpret_cast<void*>(new_base);
m_mapped_bytes -= ::narrow<u32>(diff_bytes);
m_alignment_offset = ::narrow<u32>(diff_bytes);
}
ensure(m_mapped_bytes >= alloc_size);
}
std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment) override
{
u32 offset = m_data_loc;
if (m_data_loc) offset = utils::align(offset, alignment);
u32 padding = (offset - m_data_loc);
u32 real_size = utils::align(padding + alloc_size, alignment); //Ensures we leave the loc pointer aligned after we exit
if (real_size > m_mapped_bytes)
{
//Missed allocation. We take a performance hit on doing this.
//Overallocate slightly for the next allocation if requested size is too small
unmap();
reserve_storage_on_heap(std::max(real_size, 4096U));
offset = m_data_loc;
if (m_data_loc) offset = utils::align(offset, alignment);
padding = (offset - m_data_loc);
real_size = utils::align(padding + alloc_size, alignment);
}
m_data_loc = offset + real_size;
m_mapped_bytes -= real_size;
u32 local_offset = (offset - m_mapping_offset);
return std::make_pair(static_cast<char*>(m_memory_mapping) + local_offset, offset + m_alignment_offset);
}
void remove() override
{
ring_buffer::remove();
m_mapped_bytes = 0;
}
void unmap() override
{
buffer::unmap();
m_memory_mapping = nullptr;
m_mapped_bytes = 0;
m_mapping_offset = 0;
}
void notify() override {}
};
// A non-persistent ring buffer
// Internally maps and unmaps data. Uses persistent storage just like the regular persistent variant
// Works around drivers that have issues using mapped data for specific sources (e.g AMD proprietary driver with index buffers)
class transient_ring_buffer : public ring_buffer
{
bool dirty = false;
void* map_internal(u32 offset, u32 length)
{
flush();
dirty = true;
return DSA_CALL2_RET(MapNamedBufferRange, m_id, offset, length, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
}
public:
void bind() override
{
flush();
buffer::bind();
}
void recreate(GLsizeiptr size, const void* data = nullptr) override
{
if (m_id)
{
m_fence.wait_for_signal();
remove();
}
buffer::create();
save_binding_state save(current_target(), *this);
DSA_CALL2(NamedBufferStorage, m_id, size, data, GL_MAP_WRITE_BIT);
m_data_loc = 0;
m_size = ::narrow<u32>(size);
m_memory_type = memory_type::host_visible;
}
std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment) override
{
ensure(m_memory_mapping == nullptr);
const auto allocation = ring_buffer::alloc_from_heap(alloc_size, alignment);
return { map_internal(allocation.second, alloc_size), allocation.second };
}
void flush() override
{
if (dirty)
{
buffer::unmap();
dirty = false;
}
}
void unmap() override
{
flush();
}
};
class buffer_view
{
buffer* m_buffer = nullptr;
u32 m_offset = 0;
u32 m_range = 0;
GLenum m_format = GL_R8UI;
public:
buffer_view(buffer *_buffer, u32 offset, u32 range, GLenum format = GL_R8UI)
: m_buffer(_buffer), m_offset(offset), m_range(range), m_format(format)
{}
buffer_view() = default;
void update(buffer *_buffer, u32 offset, u32 range, GLenum format = GL_R8UI)
{
ensure(_buffer->size() >= (offset + range));
m_buffer = _buffer;
m_offset = offset;
m_range = range;
m_format = format;
}
u32 offset() const
{
return m_offset;
}
u32 range() const
{
return m_range;
}
u32 format() const
{
return m_format;
}
buffer* value() const
{
return m_buffer;
}
bool in_range(u32 address, u32 size, u32& new_offset) const
{
if (address < m_offset)
return false;
const u32 _offset = address - m_offset;
if (m_range < _offset)
return false;
const auto remaining = m_range - _offset;
if (size <= remaining)
{
new_offset = _offset;
return true;
}
return false;
}
};
class vao
{
template<buffer::target BindId, uint GetStateId>
@ -1353,6 +583,8 @@ namespace gl
class texture
{
friend class texture_view;
public:
enum class type
{
@ -1480,7 +712,6 @@ namespace gl
rsx::format_class m_format_class = RSX_FORMAT_CLASS_UNDEFINED;
public:
class save_binding_state
{
GLenum target = GL_NONE;
@ -1519,6 +750,7 @@ namespace gl
}
};
public:
texture(const texture&) = delete;
texture(texture&& texture_) = delete;

View file

@ -575,10 +575,10 @@ namespace gl
program_handle.uniforms["stereo_image_count"] = (source[1] == GL_NONE? 1 : 2);
saved_sampler_state saved(31, m_sampler);
glBindTexture(GL_TEXTURE_2D, source[0]);
cmd->bind_texture(31, GL_TEXTURE_2D, source[0]);
saved_sampler_state saved2(30, m_sampler);
glBindTexture(GL_TEXTURE_2D, source[1]);
cmd->bind_texture(30, GL_TEXTURE_2D, source[1]);
overlay_pass::run(cmd, viewport, GL_NONE, false, false);
}

View file

@ -0,0 +1,197 @@
#include "stdafx.h"
#include "buffer_object.h"
namespace gl
{
void buffer::allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
{
if (const auto& caps = get_driver_caps();
caps.ARB_buffer_storage_supported)
{
GLenum flags = 0;
if (type == memory_type::host_visible)
{
switch (usage)
{
case GL_STREAM_DRAW:
case GL_STATIC_DRAW:
case GL_DYNAMIC_DRAW:
flags |= GL_MAP_WRITE_BIT;
break;
case GL_STREAM_READ:
case GL_STATIC_READ:
case GL_DYNAMIC_READ:
flags |= GL_MAP_READ_BIT;
break;
default:
fmt::throw_exception("Unsupported buffer usage 0x%x", usage);
}
}
else
{
// Local memory hints
if (usage == GL_DYNAMIC_COPY)
{
flags |= GL_DYNAMIC_STORAGE_BIT;
}
}
if ((flags & GL_MAP_READ_BIT) && !caps.vendor_AMD)
{
// This flag stops NVIDIA from allocating read-only memory in VRAM.
// NOTE: On AMD, allocating client-side memory via CLIENT_STORAGE_BIT or
// making use of GL_AMD_pinned_memory brings everything down to a crawl.
// Afaict there is no reason for this; disabling pixel pack/unpack operations does not alleviate the problem.
// The driver seems to eventually figure out the optimal storage location by itself.
flags |= GL_CLIENT_STORAGE_BIT;
}
DSA_CALL2(NamedBufferStorage, m_id, size, data_, flags);
m_size = size;
}
else
{
data(size, data_, usage);
}
m_memory_type = type;
}
buffer::~buffer()
{
if (created())
remove();
}
void buffer::recreate()
{
if (created())
{
remove();
}
create();
}
void buffer::recreate(GLsizeiptr size, const void* data)
{
if (created())
{
remove();
}
create(size, data);
}
void buffer::create()
{
glGenBuffers(1, &m_id);
save_binding_state save(current_target(), *this);
}
void buffer::create(GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
{
create();
allocate(size, data_, type, usage);
}
void buffer::create(target target_, GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
{
m_target = target_;
create();
allocate(size, data_, type, usage);
}
void buffer::remove()
{
if (m_id != GL_NONE)
{
glDeleteBuffers(1, &m_id);
m_id = GL_NONE;
m_size = 0;
}
}
void buffer::data(GLsizeiptr size, const void* data_, GLenum usage)
{
ensure(m_memory_type != memory_type::local);
DSA_CALL2(NamedBufferData, m_id, size, data_, usage);
m_size = size;
}
void buffer::sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data)
{
ensure(m_memory_type == memory_type::local);
DSA_CALL2(NamedBufferSubData, m_id, offset, length, data);
}
GLubyte* buffer::map(GLsizeiptr offset, GLsizeiptr length, access access_)
{
ensure(m_memory_type == memory_type::host_visible);
GLenum access_bits = static_cast<GLenum>(access_);
if (access_bits == GL_MAP_WRITE_BIT) access_bits |= GL_MAP_UNSYNCHRONIZED_BIT;
auto raw_data = DSA_CALL2_RET(MapNamedBufferRange, id(), offset, length, access_bits);
return reinterpret_cast<GLubyte*>(raw_data);
}
void buffer::unmap()
{
ensure(m_memory_type == memory_type::host_visible);
DSA_CALL2(UnmapNamedBuffer, id());
}
void buffer::bind_range(u32 index, u32 offset, u32 size) const
{
glBindBufferRange(static_cast<GLenum>(current_target()), index, id(), offset, size);
}
void buffer::bind_range(target target_, u32 index, u32 offset, u32 size) const
{
glBindBufferRange(static_cast<GLenum>(target_), index, id(), offset, size);
}
void buffer::copy_to(buffer* other, u64 src_offset, u64 dst_offset, u64 size)
{
if (get_driver_caps().ARB_dsa_supported)
{
glCopyNamedBufferSubData(this->id(), other->id(), src_offset, dst_offset, size);
}
else
{
glNamedCopyBufferSubDataEXT(this->id(), other->id(), src_offset, dst_offset, size);
}
}
// Buffer view
void buffer_view::update(buffer* _buffer, u32 offset, u32 range, GLenum format)
{
ensure(_buffer->size() >= (offset + range));
m_buffer = _buffer;
m_offset = offset;
m_range = range;
m_format = format;
}
bool buffer_view::in_range(u32 address, u32 size, u32& new_offset) const
{
if (address < m_offset)
return false;
const u32 _offset = address - m_offset;
if (m_range < _offset)
return false;
const auto remaining = m_range - _offset;
if (size <= remaining)
{
new_offset = _offset;
return true;
}
return false;
}
}

View file

@ -0,0 +1,142 @@
#pragma once
#include "common.h"
namespace gl
{
class buffer
{
public:
enum class target
{
pixel_pack = GL_PIXEL_PACK_BUFFER,
pixel_unpack = GL_PIXEL_UNPACK_BUFFER,
array = GL_ARRAY_BUFFER,
element_array = GL_ELEMENT_ARRAY_BUFFER,
uniform = GL_UNIFORM_BUFFER,
texture = GL_TEXTURE_BUFFER,
ssbo = GL_SHADER_STORAGE_BUFFER
};
enum class access
{
read = GL_MAP_READ_BIT,
write = GL_MAP_WRITE_BIT,
read_write = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT
};
enum class memory_type
{
undefined = 0,
local = 1,
host_visible = 2
};
class save_binding_state
{
GLint m_last_binding;
GLenum m_target;
public:
save_binding_state(target target_, const buffer& new_state) : save_binding_state(target_)
{
new_state.bind(target_);
}
save_binding_state(target target_)
{
GLenum pname{};
switch (target_)
{
case target::pixel_pack: pname = GL_PIXEL_PACK_BUFFER_BINDING; break;
case target::pixel_unpack: pname = GL_PIXEL_UNPACK_BUFFER_BINDING; break;
case target::array: pname = GL_ARRAY_BUFFER_BINDING; break;
case target::element_array: pname = GL_ELEMENT_ARRAY_BUFFER_BINDING; break;
case target::uniform: pname = GL_UNIFORM_BUFFER_BINDING; break;
case target::texture: pname = GL_TEXTURE_BUFFER_BINDING; break;
case target::ssbo: pname = GL_SHADER_STORAGE_BUFFER_BINDING; break;
default: fmt::throw_exception("Invalid binding state target (0x%x)", static_cast<int>(target_));
}
glGetIntegerv(pname, &m_last_binding);
m_target = static_cast<GLenum>(target_);
}
~save_binding_state()
{
glBindBuffer(m_target, m_last_binding);
}
};
protected:
GLuint m_id = GL_NONE;
GLsizeiptr m_size = 0;
target m_target = target::array;
memory_type m_memory_type = memory_type::undefined;
void allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage);
public:
buffer() = default;
buffer(const buffer&) = delete;
~buffer();
void recreate();
void recreate(GLsizeiptr size, const void* data = nullptr);
void create();
void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW);
void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW);
void remove();
void bind(target target_) const { glBindBuffer(static_cast<GLenum>(target_), m_id); }
void bind() const { bind(current_target()); }
void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW);
void sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data);
GLubyte* map(GLsizeiptr offset, GLsizeiptr length, access access_);
void unmap();
void bind_range(u32 index, u32 offset, u32 size) const;
void bind_range(target target_, u32 index, u32 offset, u32 size) const;
void copy_to(buffer* other, u64 src_offset, u64 dst_offset, u64 size);
target current_target() const { return m_target; }
GLsizeiptr size() const { return m_size; }
uint id() const { return m_id; }
void set_id(uint id) { m_id = id; }
bool created() const { return m_id != GL_NONE; }
explicit operator bool() const { return created(); }
};
class buffer_view
{
buffer* m_buffer = nullptr;
u32 m_offset = 0;
u32 m_range = 0;
GLenum m_format = GL_R8UI;
public:
buffer_view(buffer* _buffer, u32 offset, u32 range, GLenum format = GL_R8UI)
: m_buffer(_buffer), m_offset(offset), m_range(range), m_format(format)
{}
buffer_view() = default;
void update(buffer* _buffer, u32 offset, u32 range, GLenum format = GL_R8UI);
u32 offset() const { return m_offset; }
u32 range() const { return m_range; }
u32 format() const { return m_format; }
buffer* value() const { return m_buffer; }
bool in_range(u32 address, u32 size, u32& new_offset) const;
};
}

View file

@ -0,0 +1,230 @@
#pragma once
#include "../OpenGL.h"
#include <util/types.hpp>
#include <util/asm.hpp>
#include <util/logs.hpp>
namespace gl
{
class capabilities
{
public:
bool EXT_dsa_supported = false;
bool EXT_depth_bounds_test = false;
bool ARB_dsa_supported = false;
bool ARB_bindless_texture_supported = false;
bool ARB_buffer_storage_supported = false;
bool ARB_texture_buffer_supported = false;
bool ARB_shader_draw_parameters_supported = false;
bool ARB_depth_buffer_float_supported = false;
bool ARB_texture_barrier_supported = false;
bool ARB_shader_stencil_export_supported = false;
bool NV_texture_barrier_supported = false;
bool NV_gpu_shader5_supported = false;
bool AMD_gpu_shader_half_float_supported = false;
bool ARB_compute_shader_supported = false;
bool NV_depth_buffer_float_supported = false;
bool initialized = false;
bool vendor_INTEL = false; // has broken GLSL compiler
bool vendor_AMD = false; // has broken ARB_multidraw
bool vendor_NVIDIA = false; // has NaN poisoning issues
bool vendor_MESA = false; // requires CLIENT_STORAGE bit set for streaming buffers
bool check(const std::string& ext_name, const char* test)
{
if (ext_name == test)
{
rsx_log.notice("Extension %s is supported", ext_name);
return true;
}
return false;
}
void initialize()
{
int find_count = 15;
int ext_count = 0;
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
if (!ext_count)
{
rsx_log.error("Coult not initialize GL driver capabilities. Is OpenGL initialized?");
return;
}
std::string vendor_string = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
std::string version_string = reinterpret_cast<const char*>(glGetString(GL_VERSION));
std::string renderer_string = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
for (int i = 0; i < ext_count; i++)
{
if (!find_count) break;
const std::string ext_name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i));
if (check(ext_name, "GL_ARB_shader_draw_parameters"))
{
ARB_shader_draw_parameters_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_EXT_direct_state_access"))
{
EXT_dsa_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_direct_state_access"))
{
ARB_dsa_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_bindless_texture"))
{
ARB_bindless_texture_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_buffer_storage"))
{
ARB_buffer_storage_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_texture_buffer_object"))
{
ARB_texture_buffer_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_depth_buffer_float"))
{
ARB_depth_buffer_float_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_texture_barrier"))
{
ARB_texture_barrier_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_NV_texture_barrier"))
{
NV_texture_barrier_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_NV_gpu_shader5"))
{
NV_gpu_shader5_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_AMD_gpu_shader_half_float"))
{
AMD_gpu_shader_half_float_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_compute_shader"))
{
ARB_compute_shader_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_EXT_depth_bounds_test"))
{
EXT_depth_bounds_test = true;
find_count--;
continue;
}
if (check(ext_name, "GL_NV_depth_buffer_float"))
{
NV_depth_buffer_float_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_shader_stencil_export"))
{
ARB_shader_stencil_export_supported = true;
find_count--;
continue;
}
}
// Check GL_VERSION and GL_RENDERER for the presence of Mesa
if (version_string.find("Mesa") != umax || renderer_string.find("Mesa") != umax)
{
vendor_MESA = true;
}
// Workaround for intel drivers which have terrible capability reporting
if (!vendor_string.empty())
{
std::transform(vendor_string.begin(), vendor_string.end(), vendor_string.begin(), ::tolower);
}
else
{
rsx_log.error("Failed to get vendor string from driver. Are we missing a context?");
vendor_string = "intel"; // lowest acceptable value
}
if (!vendor_MESA && vendor_string.find("intel") != umax)
{
int version_major = 0;
int version_minor = 0;
glGetIntegerv(GL_MAJOR_VERSION, &version_major);
glGetIntegerv(GL_MINOR_VERSION, &version_minor);
vendor_INTEL = true;
// Texture buffers moved into core at GL 3.3
if (version_major > 3 || (version_major == 3 && version_minor >= 3))
ARB_texture_buffer_supported = true;
// Check for expected library entry-points for some required functions
if (!ARB_buffer_storage_supported && glNamedBufferStorage && glMapNamedBufferRange)
ARB_buffer_storage_supported = true;
if (!ARB_dsa_supported && glGetTextureImage && glTextureBufferRange)
ARB_dsa_supported = true;
if (!EXT_dsa_supported && glGetTextureImageEXT && glTextureBufferRangeEXT)
EXT_dsa_supported = true;
}
else if (!vendor_MESA && vendor_string.find("nvidia") != umax)
{
vendor_NVIDIA = true;
}
#ifdef _WIN32
else if (vendor_string.find("amd") != umax || vendor_string.find("ati") != umax)
{
vendor_AMD = true;
}
#endif
initialized = true;
}
};
const capabilities& get_driver_caps();
}

View file

@ -0,0 +1,154 @@
#pragma once
#include "capabilities.hpp"
//Function call wrapped in ARB_DSA vs EXT_DSA compat check
#define DSA_CALL(func, object_name, target, ...)\
if (::gl::get_driver_caps().ARB_dsa_supported)\
gl##func(object_name, __VA_ARGS__);\
else\
gl##func##EXT(object_name, target, __VA_ARGS__);
#define DSA_CALL2(func, ...)\
if (::gl::get_driver_caps().ARB_dsa_supported)\
gl##func(__VA_ARGS__);\
else\
gl##func##EXT(__VA_ARGS__);
#define DSA_CALL2_RET(func, ...)\
(::gl::get_driver_caps().ARB_dsa_supported) ?\
gl##func(__VA_ARGS__) :\
gl##func##EXT(__VA_ARGS__)
#define DSA_CALL3(funcARB, funcDSA, ...)\
if (::gl::get_driver_caps().ARB_dsa_supported)\
gl##funcARB(__VA_ARGS__);\
else\
gl##funcDSA##EXT(__VA_ARGS__);
namespace gl
{
// TODO: Move to sync.h
class fence
{
GLsync m_value = nullptr;
mutable GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT;
mutable bool signaled = false;
public:
fence() = default;
~fence() = default;
void create()
{
m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
flags = GL_SYNC_FLUSH_COMMANDS_BIT;
}
void destroy()
{
glDeleteSync(m_value);
m_value = nullptr;
}
void reset()
{
if (m_value != nullptr)
destroy();
create();
}
bool is_empty() const
{
return (m_value == nullptr);
}
bool check_signaled() const
{
ensure(m_value);
if (signaled)
return true;
if (flags)
{
GLenum err = glClientWaitSync(m_value, flags, 0);
flags = 0;
if (!(err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED))
return false;
}
else
{
GLint status = GL_UNSIGNALED;
GLint tmp;
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
if (status != GL_SIGNALED)
return false;
}
signaled = true;
return true;
}
bool wait_for_signal()
{
ensure(m_value);
if (signaled == GL_FALSE)
{
GLenum err = GL_WAIT_FAILED;
bool done = false;
while (!done)
{
if (flags)
{
err = glClientWaitSync(m_value, flags, 0);
flags = 0;
switch (err)
{
default:
rsx_log.error("gl::fence sync returned unknown error 0x%X", err);
[[fallthrough]];
case GL_ALREADY_SIGNALED:
case GL_CONDITION_SATISFIED:
done = true;
break;
case GL_TIMEOUT_EXPIRED:
continue;
}
}
else
{
GLint status = GL_UNSIGNALED;
GLint tmp;
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
if (status == GL_SIGNALED)
break;
}
}
signaled = (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
}
glDeleteSync(m_value);
m_value = nullptr;
return signaled;
}
void server_wait_sync() const
{
ensure(m_value != nullptr);
glWaitSync(m_value, 0, GL_TIMEOUT_IGNORED);
}
};
}

View file

@ -0,0 +1,241 @@
#include "stdafx.h"
#include "ring_buffer.h"
namespace gl
{
void ring_buffer::recreate(GLsizeiptr size, const void* data)
{
if (m_id)
{
m_fence.wait_for_signal();
remove();
}
buffer::create();
save_binding_state save(current_target(), *this);
GLbitfield buffer_storage_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
if (gl::get_driver_caps().vendor_MESA) buffer_storage_flags |= GL_CLIENT_STORAGE_BIT;
DSA_CALL2(NamedBufferStorage, m_id, size, data, buffer_storage_flags);
m_memory_mapping = DSA_CALL2_RET(MapNamedBufferRange, m_id, 0, size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
ensure(m_memory_mapping != nullptr);
m_data_loc = 0;
m_size = ::narrow<u32>(size);
m_memory_type = memory_type::host_visible;
}
void ring_buffer::create(target target_, GLsizeiptr size, const void* data_)
{
m_target = target_;
recreate(size, data_);
}
std::pair<void*, u32> ring_buffer::alloc_from_heap(u32 alloc_size, u16 alignment)
{
u32 offset = m_data_loc;
if (m_data_loc) offset = utils::align(offset, alignment);
if ((offset + alloc_size) > m_size)
{
if (!m_fence.is_empty())
{
m_fence.wait_for_signal();
}
else
{
rsx_log.error("OOM Error: Ring buffer was likely being used without notify() being called");
glFinish();
}
m_data_loc = 0;
offset = 0;
}
//Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently
m_data_loc = utils::align(offset + alloc_size, 256);
return std::make_pair(static_cast<char*>(m_memory_mapping) + offset, offset);
}
void ring_buffer::remove()
{
if (m_memory_mapping)
{
buffer::unmap();
m_memory_mapping = nullptr;
m_data_loc = 0;
m_size = 0;
}
if (m_id != GL_NONE)
{
glDeleteBuffers(1, &m_id);
m_id = GL_NONE;
}
}
void ring_buffer::notify()
{
//Insert fence about 25% into the buffer
if (m_fence.is_empty() && (m_data_loc > (m_size >> 2)))
m_fence.reset();
}
// Legacy ring buffer - used when ARB_buffer_storage is not available, OR when capturing with renderdoc
void legacy_ring_buffer::recreate(GLsizeiptr size, const void* data)
{
if (m_id)
remove();
buffer::create();
buffer::data(size, data, GL_DYNAMIC_DRAW);
m_memory_type = memory_type::host_visible;
m_memory_mapping = nullptr;
m_data_loc = 0;
m_size = ::narrow<u32>(size);
}
void legacy_ring_buffer::create(target target_, GLsizeiptr size, const void* data_)
{
m_target = target_;
recreate(size, data_);
}
void legacy_ring_buffer::reserve_storage_on_heap(u32 alloc_size)
{
ensure(m_memory_mapping == nullptr);
u32 offset = m_data_loc;
if (m_data_loc) offset = utils::align(offset, 256);
const u32 block_size = utils::align(alloc_size + 16, 256); //Overallocate just in case we need to realign base
if ((offset + block_size) > m_size)
{
buffer::data(m_size, nullptr, GL_DYNAMIC_DRAW);
m_data_loc = 0;
}
m_memory_mapping = DSA_CALL2_RET(MapNamedBufferRange, m_id, m_data_loc, block_size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
m_mapped_bytes = block_size;
m_mapping_offset = m_data_loc;
m_alignment_offset = 0;
//When using debugging tools, the mapped base might not be aligned as expected
const u64 mapped_address_base = reinterpret_cast<u64>(m_memory_mapping);
if (mapped_address_base & 0xF)
{
//Unaligned result was returned. We have to modify the base address a bit
//We lose some memory here, but the 16 byte overallocation above makes up for it
const u64 new_base = (mapped_address_base & ~0xF) + 16;
const u64 diff_bytes = new_base - mapped_address_base;
m_memory_mapping = reinterpret_cast<void*>(new_base);
m_mapped_bytes -= ::narrow<u32>(diff_bytes);
m_alignment_offset = ::narrow<u32>(diff_bytes);
}
ensure(m_mapped_bytes >= alloc_size);
}
std::pair<void*, u32> legacy_ring_buffer::alloc_from_heap(u32 alloc_size, u16 alignment)
{
u32 offset = m_data_loc;
if (m_data_loc) offset = utils::align(offset, alignment);
u32 padding = (offset - m_data_loc);
u32 real_size = utils::align(padding + alloc_size, alignment); //Ensures we leave the loc pointer aligned after we exit
if (real_size > m_mapped_bytes)
{
//Missed allocation. We take a performance hit on doing this.
//Overallocate slightly for the next allocation if requested size is too small
unmap();
reserve_storage_on_heap(std::max(real_size, 4096U));
offset = m_data_loc;
if (m_data_loc) offset = utils::align(offset, alignment);
padding = (offset - m_data_loc);
real_size = utils::align(padding + alloc_size, alignment);
}
m_data_loc = offset + real_size;
m_mapped_bytes -= real_size;
u32 local_offset = (offset - m_mapping_offset);
return std::make_pair(static_cast<char*>(m_memory_mapping) + local_offset, offset + m_alignment_offset);
}
void legacy_ring_buffer::remove()
{
ring_buffer::remove();
m_mapped_bytes = 0;
}
void legacy_ring_buffer::unmap()
{
buffer::unmap();
m_memory_mapping = nullptr;
m_mapped_bytes = 0;
m_mapping_offset = 0;
}
// AMD persistent mapping workaround for driver-assisted flushing
void* transient_ring_buffer::map_internal(u32 offset, u32 length)
{
flush();
dirty = true;
return DSA_CALL2_RET(MapNamedBufferRange, m_id, offset, length, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
}
void transient_ring_buffer::bind()
{
flush();
buffer::bind();
}
void transient_ring_buffer::recreate(GLsizeiptr size, const void* data)
{
if (m_id)
{
m_fence.wait_for_signal();
remove();
}
buffer::create();
save_binding_state save(current_target(), *this);
DSA_CALL2(NamedBufferStorage, m_id, size, data, GL_MAP_WRITE_BIT);
m_data_loc = 0;
m_size = ::narrow<u32>(size);
m_memory_type = memory_type::host_visible;
}
std::pair<void*, u32> transient_ring_buffer::alloc_from_heap(u32 alloc_size, u16 alignment)
{
ensure(m_memory_mapping == nullptr);
const auto allocation = ring_buffer::alloc_from_heap(alloc_size, alignment);
return { map_internal(allocation.second, alloc_size), allocation.second };
}
void transient_ring_buffer::flush()
{
if (dirty)
{
buffer::unmap();
dirty = false;
}
}
void transient_ring_buffer::unmap()
{
flush();
}
}

View file

@ -0,0 +1,81 @@
#pragma once
#include "buffer_object.h"
namespace gl
{
class ring_buffer : public buffer
{
protected:
u32 m_data_loc = 0;
void* m_memory_mapping = nullptr;
fence m_fence;
public:
virtual void bind() { buffer::bind(); }
virtual void recreate(GLsizeiptr size, const void* data = nullptr);
void create(target target_, GLsizeiptr size, const void* data_ = nullptr);
virtual std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment);
virtual void remove();
virtual void reserve_storage_on_heap(u32 /*alloc_size*/) {}
virtual void unmap() {}
virtual void flush() {}
virtual void notify();
};
class legacy_ring_buffer : public ring_buffer
{
u32 m_mapped_bytes = 0;
u32 m_mapping_offset = 0;
u32 m_alignment_offset = 0;
public:
void recreate(GLsizeiptr size, const void* data = nullptr) override;
void create(target target_, GLsizeiptr size, const void* data_ = nullptr);
void reserve_storage_on_heap(u32 alloc_size) override;
std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment) override;
void remove() override;
void unmap() override;
void notify() override {}
};
// A non-persistent ring buffer
// Internally maps and unmaps data. Uses persistent storage just like the regular persistent variant
// Works around drivers that have issues using mapped data for specific sources (e.g AMD proprietary driver with index buffers)
class transient_ring_buffer : public ring_buffer
{
bool dirty = false;
void* map_internal(u32 offset, u32 length);
public:
void bind() override;
void recreate(GLsizeiptr size, const void* data = nullptr) override;
std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment) override;
void flush() override;
void unmap() override;
};
}

View file

@ -61,6 +61,10 @@
<ClInclude Include="Emu\RSX\GL\GLGSRender.h" />
<ClInclude Include="Emu\RSX\GL\GLProcTable.h" />
<ClInclude Include="Emu\RSX\GL\GLProgramBuffer.h" />
<ClInclude Include="Emu\RSX\GL\glutils\buffer_object.h" />
<ClInclude Include="Emu\RSX\GL\glutils\capabilities.hpp" />
<ClInclude Include="Emu\RSX\GL\glutils\common.h" />
<ClInclude Include="Emu\RSX\GL\glutils\ring_buffer.h" />
<ClInclude Include="Emu\RSX\GL\GLVertexProgram.h" />
<ClInclude Include="Emu\RSX\GL\GLHelpers.h" />
<ClInclude Include="Emu\RSX\GL\GLRenderTargets.h" />
@ -77,6 +81,8 @@
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
<ClCompile Include="Emu\RSX\GL\GLOverlays.cpp" />
<ClCompile Include="Emu\RSX\GL\GLPipelineCompiler.cpp" />
<ClCompile Include="Emu\RSX\GL\glutils\buffer_object.cpp" />
<ClCompile Include="Emu\RSX\GL\glutils\ring_buffer.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\GLHelpers.cpp" />
<ClCompile Include="Emu\RSX\GL\GLPresent.cpp" />

View file

@ -17,6 +17,12 @@
<ClCompile Include="Emu\RSX\GL\GLTextureCache.cpp" />
<ClCompile Include="Emu\RSX\GL\GLOverlays.cpp" />
<ClCompile Include="Emu\RSX\GL\GLCompute.cpp" />
<ClCompile Include="Emu\RSX\GL\glutils\buffer_object.cpp">
<Filter>glutils</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\GL\glutils\ring_buffer.cpp">
<Filter>glutils</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\GL\GLTexture.h" />
@ -36,5 +42,22 @@
<ClInclude Include="Emu\RSX\GL\GLExecutionState.h" />
<ClInclude Include="Emu\RSX\GL\GLCompute.h" />
<ClInclude Include="Emu\RSX\GL\GLPipelineCompiler.h" />
<ClInclude Include="Emu\RSX\GL\glutils\buffer_object.h">
<Filter>glutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\GL\glutils\ring_buffer.h">
<Filter>glutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\GL\glutils\common.h">
<Filter>glutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\GL\glutils\capabilities.hpp">
<Filter>glutils</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="glutils">
<UniqueIdentifier>{ed9ef6b7-efbb-4a8e-88a4-583b6655c141}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>