mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-21 12:05:23 +00:00
gl: Refactor buffer object code
This commit is contained in:
parent
ff9c939720
commit
3fd846687e
13 changed files with 1089 additions and 1002 deletions
|
@ -454,6 +454,8 @@ target_sources(rpcs3_emu PRIVATE
|
|||
RSX/Program/VertexProgramDecompiler.cpp
|
||||
RSX/Capture/rsx_capture.cpp
|
||||
RSX/Capture/rsx_replay.cpp
|
||||
RSX/GL/glutils/buffer_object.cpp
|
||||
RSX/GL/glutils/ring_buffer.cpp
|
||||
RSX/GL/GLCommonDecompiler.cpp
|
||||
RSX/GL/GLCompute.cpp
|
||||
RSX/GL/GLDraw.cpp
|
||||
|
|
|
@ -1,235 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#include "util/logs.hpp"
|
||||
#include "util/types.hpp"
|
||||
#include "Utilities/geometry.h"
|
||||
#include "OpenGL.h"
|
||||
#include "glutils/capabilities.hpp"
|
||||
|
||||
#include "Utilities/geometry.h"
|
||||
#include <unordered_map>
|
||||
|
||||
namespace gl
|
||||
{
|
||||
class capabilities
|
||||
{
|
||||
public:
|
||||
bool EXT_dsa_supported = false;
|
||||
bool EXT_depth_bounds_test = false;
|
||||
bool ARB_dsa_supported = false;
|
||||
bool ARB_bindless_texture_supported = false;
|
||||
bool ARB_buffer_storage_supported = false;
|
||||
bool ARB_texture_buffer_supported = false;
|
||||
bool ARB_shader_draw_parameters_supported = false;
|
||||
bool ARB_depth_buffer_float_supported = false;
|
||||
bool ARB_texture_barrier_supported = false;
|
||||
bool ARB_shader_stencil_export_supported = false;
|
||||
bool NV_texture_barrier_supported = false;
|
||||
bool NV_gpu_shader5_supported = false;
|
||||
bool AMD_gpu_shader_half_float_supported = false;
|
||||
bool ARB_compute_shader_supported = false;
|
||||
bool NV_depth_buffer_float_supported = false;
|
||||
bool initialized = false;
|
||||
bool vendor_INTEL = false; // has broken GLSL compiler
|
||||
bool vendor_AMD = false; // has broken ARB_multidraw
|
||||
bool vendor_NVIDIA = false; // has NaN poisoning issues
|
||||
bool vendor_MESA = false; // requires CLIENT_STORAGE bit set for streaming buffers
|
||||
|
||||
bool check(const std::string& ext_name, const char* test)
|
||||
{
|
||||
if (ext_name == test)
|
||||
{
|
||||
rsx_log.notice("Extension %s is supported", ext_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void initialize()
|
||||
{
|
||||
int find_count = 15;
|
||||
int ext_count = 0;
|
||||
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
|
||||
|
||||
if (!ext_count)
|
||||
{
|
||||
rsx_log.error("Coult not initialize GL driver capabilities. Is OpenGL initialized?");
|
||||
return;
|
||||
}
|
||||
|
||||
std::string vendor_string = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
||||
std::string version_string = reinterpret_cast<const char*>(glGetString(GL_VERSION));
|
||||
std::string renderer_string = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
|
||||
|
||||
for (int i = 0; i < ext_count; i++)
|
||||
{
|
||||
if (!find_count) break;
|
||||
|
||||
const std::string ext_name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i));
|
||||
|
||||
if (check(ext_name, "GL_ARB_shader_draw_parameters"))
|
||||
{
|
||||
ARB_shader_draw_parameters_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_EXT_direct_state_access"))
|
||||
{
|
||||
EXT_dsa_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_direct_state_access"))
|
||||
{
|
||||
ARB_dsa_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_bindless_texture"))
|
||||
{
|
||||
ARB_bindless_texture_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_buffer_storage"))
|
||||
{
|
||||
ARB_buffer_storage_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_texture_buffer_object"))
|
||||
{
|
||||
ARB_texture_buffer_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_depth_buffer_float"))
|
||||
{
|
||||
ARB_depth_buffer_float_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_texture_barrier"))
|
||||
{
|
||||
ARB_texture_barrier_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_NV_texture_barrier"))
|
||||
{
|
||||
NV_texture_barrier_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_NV_gpu_shader5"))
|
||||
{
|
||||
NV_gpu_shader5_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_AMD_gpu_shader_half_float"))
|
||||
{
|
||||
AMD_gpu_shader_half_float_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_compute_shader"))
|
||||
{
|
||||
ARB_compute_shader_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_EXT_depth_bounds_test"))
|
||||
{
|
||||
EXT_depth_bounds_test = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_NV_depth_buffer_float"))
|
||||
{
|
||||
NV_depth_buffer_float_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_shader_stencil_export"))
|
||||
{
|
||||
ARB_shader_stencil_export_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Check GL_VERSION and GL_RENDERER for the presence of Mesa
|
||||
if (version_string.find("Mesa") != umax || renderer_string.find("Mesa") != umax)
|
||||
{
|
||||
vendor_MESA = true;
|
||||
}
|
||||
|
||||
// Workaround for intel drivers which have terrible capability reporting
|
||||
if (!vendor_string.empty())
|
||||
{
|
||||
std::transform(vendor_string.begin(), vendor_string.end(), vendor_string.begin(), ::tolower);
|
||||
}
|
||||
else
|
||||
{
|
||||
rsx_log.error("Failed to get vendor string from driver. Are we missing a context?");
|
||||
vendor_string = "intel"; // lowest acceptable value
|
||||
}
|
||||
|
||||
if (!vendor_MESA && vendor_string.find("intel") != umax)
|
||||
{
|
||||
int version_major = 0;
|
||||
int version_minor = 0;
|
||||
|
||||
glGetIntegerv(GL_MAJOR_VERSION, &version_major);
|
||||
glGetIntegerv(GL_MINOR_VERSION, &version_minor);
|
||||
|
||||
vendor_INTEL = true;
|
||||
|
||||
// Texture buffers moved into core at GL 3.3
|
||||
if (version_major > 3 || (version_major == 3 && version_minor >= 3))
|
||||
ARB_texture_buffer_supported = true;
|
||||
|
||||
// Check for expected library entry-points for some required functions
|
||||
if (!ARB_buffer_storage_supported && glNamedBufferStorage && glMapNamedBufferRange)
|
||||
ARB_buffer_storage_supported = true;
|
||||
|
||||
if (!ARB_dsa_supported && glGetTextureImage && glTextureBufferRange)
|
||||
ARB_dsa_supported = true;
|
||||
|
||||
if (!EXT_dsa_supported && glGetTextureImageEXT && glTextureBufferRangeEXT)
|
||||
EXT_dsa_supported = true;
|
||||
}
|
||||
else if (!vendor_MESA && vendor_string.find("nvidia") != umax)
|
||||
{
|
||||
vendor_NVIDIA = true;
|
||||
}
|
||||
#ifdef _WIN32
|
||||
else if (vendor_string.find("amd") != umax || vendor_string.find("ati") != umax)
|
||||
{
|
||||
vendor_AMD = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
initialized = true;
|
||||
}
|
||||
};
|
||||
|
||||
const capabilities& get_driver_caps();
|
||||
|
||||
struct driver_state
|
||||
{
|
||||
const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001;
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
#include <optional>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "glutils/ring_buffer.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#pragma comment(lib, "opengl32.lib")
|
||||
#endif
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
#include "util/logs.hpp"
|
||||
#include "util/asm.hpp"
|
||||
|
||||
#include "glutils/common.h"
|
||||
// TODO: Include on use
|
||||
#include "glutils/buffer_object.h"
|
||||
|
||||
#define GL_FRAGMENT_TEXTURES_START 0
|
||||
#define GL_VERTEX_TEXTURES_START (GL_FRAGMENT_TEXTURES_START + 16)
|
||||
#define GL_STENCIL_MIRRORS_START (GL_VERTEX_TEXTURES_START + 4)
|
||||
|
@ -48,32 +52,6 @@ using namespace ::rsx::format_class_;
|
|||
|
||||
namespace gl
|
||||
{
|
||||
//Function call wrapped in ARB_DSA vs EXT_DSA compat check
|
||||
#define DSA_CALL(func, object_name, target, ...)\
|
||||
if (::gl::get_driver_caps().ARB_dsa_supported)\
|
||||
gl##func(object_name, __VA_ARGS__);\
|
||||
else\
|
||||
gl##func##EXT(object_name, target, __VA_ARGS__);
|
||||
|
||||
#define DSA_CALL2(func, ...)\
|
||||
if (::gl::get_driver_caps().ARB_dsa_supported)\
|
||||
gl##func(__VA_ARGS__);\
|
||||
else\
|
||||
gl##func##EXT(__VA_ARGS__);
|
||||
|
||||
#define DSA_CALL2_RET(func, ...)\
|
||||
(::gl::get_driver_caps().ARB_dsa_supported) ?\
|
||||
gl##func(__VA_ARGS__) :\
|
||||
gl##func##EXT(__VA_ARGS__)
|
||||
|
||||
#define DSA_CALL3(funcARB, funcDSA, ...)\
|
||||
if (::gl::get_driver_caps().ARB_dsa_supported)\
|
||||
gl##funcARB(__VA_ARGS__);\
|
||||
else\
|
||||
gl##funcDSA##EXT(__VA_ARGS__);
|
||||
|
||||
class fence;
|
||||
|
||||
void enable_debugging();
|
||||
bool is_primitive_native(rsx::primitive_type in);
|
||||
GLenum draw_mode(rsx::primitive_type in);
|
||||
|
@ -97,129 +75,6 @@ namespace gl
|
|||
}
|
||||
};
|
||||
|
||||
class fence
|
||||
{
|
||||
GLsync m_value = nullptr;
|
||||
mutable GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT;
|
||||
mutable bool signaled = false;
|
||||
|
||||
public:
|
||||
|
||||
fence() = default;
|
||||
~fence() = default;
|
||||
|
||||
void create()
|
||||
{
|
||||
m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
flags = GL_SYNC_FLUSH_COMMANDS_BIT;
|
||||
}
|
||||
|
||||
void destroy()
|
||||
{
|
||||
glDeleteSync(m_value);
|
||||
m_value = nullptr;
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
if (m_value != nullptr)
|
||||
destroy();
|
||||
|
||||
create();
|
||||
}
|
||||
|
||||
bool is_empty() const
|
||||
{
|
||||
return (m_value == nullptr);
|
||||
}
|
||||
|
||||
bool check_signaled() const
|
||||
{
|
||||
ensure(m_value);
|
||||
|
||||
if (signaled)
|
||||
return true;
|
||||
|
||||
if (flags)
|
||||
{
|
||||
GLenum err = glClientWaitSync(m_value, flags, 0);
|
||||
flags = 0;
|
||||
|
||||
if (!(err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
GLint status = GL_UNSIGNALED;
|
||||
GLint tmp;
|
||||
|
||||
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
|
||||
|
||||
if (status != GL_SIGNALED)
|
||||
return false;
|
||||
}
|
||||
|
||||
signaled = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool wait_for_signal()
|
||||
{
|
||||
ensure(m_value);
|
||||
|
||||
if (signaled == GL_FALSE)
|
||||
{
|
||||
GLenum err = GL_WAIT_FAILED;
|
||||
bool done = false;
|
||||
|
||||
while (!done)
|
||||
{
|
||||
if (flags)
|
||||
{
|
||||
err = glClientWaitSync(m_value, flags, 0);
|
||||
flags = 0;
|
||||
|
||||
switch (err)
|
||||
{
|
||||
default:
|
||||
rsx_log.error("gl::fence sync returned unknown error 0x%X", err);
|
||||
[[fallthrough]];
|
||||
case GL_ALREADY_SIGNALED:
|
||||
case GL_CONDITION_SATISFIED:
|
||||
done = true;
|
||||
break;
|
||||
case GL_TIMEOUT_EXPIRED:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
GLint status = GL_UNSIGNALED;
|
||||
GLint tmp;
|
||||
|
||||
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
|
||||
|
||||
if (status == GL_SIGNALED)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
signaled = (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
|
||||
}
|
||||
|
||||
glDeleteSync(m_value);
|
||||
m_value = nullptr;
|
||||
|
||||
return signaled;
|
||||
}
|
||||
|
||||
void server_wait_sync() const
|
||||
{
|
||||
ensure(m_value != nullptr);
|
||||
glWaitSync(m_value, 0, GL_TIMEOUT_IGNORED);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Type, uint BindId, uint GetStateId>
|
||||
class save_binding_state_base
|
||||
{
|
||||
|
@ -538,631 +393,6 @@ namespace gl
|
|||
}
|
||||
};
|
||||
|
||||
class buffer
|
||||
{
|
||||
public:
|
||||
enum class target
|
||||
{
|
||||
pixel_pack = GL_PIXEL_PACK_BUFFER,
|
||||
pixel_unpack = GL_PIXEL_UNPACK_BUFFER,
|
||||
array = GL_ARRAY_BUFFER,
|
||||
element_array = GL_ELEMENT_ARRAY_BUFFER,
|
||||
uniform = GL_UNIFORM_BUFFER,
|
||||
texture = GL_TEXTURE_BUFFER,
|
||||
ssbo = GL_SHADER_STORAGE_BUFFER
|
||||
};
|
||||
|
||||
enum class access
|
||||
{
|
||||
read = GL_MAP_READ_BIT,
|
||||
write = GL_MAP_WRITE_BIT,
|
||||
read_write = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT
|
||||
};
|
||||
|
||||
enum class memory_type
|
||||
{
|
||||
undefined = 0,
|
||||
local = 1,
|
||||
host_visible = 2
|
||||
};
|
||||
|
||||
class save_binding_state
|
||||
{
|
||||
GLint m_last_binding;
|
||||
GLenum m_target;
|
||||
|
||||
public:
|
||||
save_binding_state(target target_, const buffer& new_state) : save_binding_state(target_)
|
||||
{
|
||||
new_state.bind(target_);
|
||||
}
|
||||
|
||||
save_binding_state(target target_)
|
||||
{
|
||||
GLenum pname{};
|
||||
switch (target_)
|
||||
{
|
||||
case target::pixel_pack: pname = GL_PIXEL_PACK_BUFFER_BINDING; break;
|
||||
case target::pixel_unpack: pname = GL_PIXEL_UNPACK_BUFFER_BINDING; break;
|
||||
case target::array: pname = GL_ARRAY_BUFFER_BINDING; break;
|
||||
case target::element_array: pname = GL_ELEMENT_ARRAY_BUFFER_BINDING; break;
|
||||
case target::uniform: pname = GL_UNIFORM_BUFFER_BINDING; break;
|
||||
case target::texture: pname = GL_TEXTURE_BUFFER_BINDING; break;
|
||||
case target::ssbo: pname = GL_SHADER_STORAGE_BUFFER_BINDING; break;
|
||||
default: fmt::throw_exception("Invalid binding state target (0x%x)", static_cast<int>(target_));
|
||||
}
|
||||
|
||||
glGetIntegerv(pname, &m_last_binding);
|
||||
m_target = static_cast<GLenum>(target_);
|
||||
}
|
||||
|
||||
~save_binding_state()
|
||||
{
|
||||
glBindBuffer(m_target, m_last_binding);
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
GLuint m_id = GL_NONE;
|
||||
GLsizeiptr m_size = 0;
|
||||
target m_target = target::array;
|
||||
memory_type m_memory_type = memory_type::undefined;
|
||||
|
||||
void allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
|
||||
{
|
||||
if (const auto& caps = get_driver_caps();
|
||||
caps.ARB_buffer_storage_supported)
|
||||
{
|
||||
GLenum flags = 0;
|
||||
if (type == memory_type::host_visible)
|
||||
{
|
||||
switch (usage)
|
||||
{
|
||||
case GL_STREAM_DRAW:
|
||||
case GL_STATIC_DRAW:
|
||||
case GL_DYNAMIC_DRAW:
|
||||
flags |= GL_MAP_WRITE_BIT;
|
||||
break;
|
||||
case GL_STREAM_READ:
|
||||
case GL_STATIC_READ:
|
||||
case GL_DYNAMIC_READ:
|
||||
flags |= GL_MAP_READ_BIT;
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unsupported buffer usage 0x%x", usage);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Local memory hints
|
||||
if (usage == GL_DYNAMIC_COPY)
|
||||
{
|
||||
flags |= GL_DYNAMIC_STORAGE_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & GL_MAP_READ_BIT) && !caps.vendor_AMD)
|
||||
{
|
||||
// This flag stops NVIDIA from allocating read-only memory in VRAM.
|
||||
// NOTE: On AMD, allocating client-side memory via CLIENT_STORAGE_BIT or
|
||||
// making use of GL_AMD_pinned_memory brings everything down to a crawl.
|
||||
// Afaict there is no reason for this; disabling pixel pack/unpack operations does not alleviate the problem.
|
||||
// The driver seems to eventually figure out the optimal storage location by itself.
|
||||
flags |= GL_CLIENT_STORAGE_BIT;
|
||||
}
|
||||
|
||||
DSA_CALL2(NamedBufferStorage, m_id, size, data_, flags);
|
||||
m_size = size;
|
||||
}
|
||||
else
|
||||
{
|
||||
data(size, data_, usage);
|
||||
}
|
||||
|
||||
m_memory_type = type;
|
||||
}
|
||||
|
||||
public:
|
||||
buffer() = default;
|
||||
buffer(const buffer&) = delete;
|
||||
|
||||
buffer(GLuint id)
|
||||
{
|
||||
set_id(id);
|
||||
}
|
||||
|
||||
~buffer()
|
||||
{
|
||||
if (created())
|
||||
remove();
|
||||
}
|
||||
|
||||
void recreate()
|
||||
{
|
||||
if (created())
|
||||
{
|
||||
remove();
|
||||
}
|
||||
|
||||
create();
|
||||
}
|
||||
|
||||
void recreate(GLsizeiptr size, const void* data = nullptr)
|
||||
{
|
||||
if (created())
|
||||
{
|
||||
remove();
|
||||
}
|
||||
|
||||
create(size, data);
|
||||
}
|
||||
|
||||
void create()
|
||||
{
|
||||
glGenBuffers(1, &m_id);
|
||||
save_binding_state save(current_target(), *this);
|
||||
}
|
||||
|
||||
void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW)
|
||||
{
|
||||
create();
|
||||
allocate(size, data_, type, usage);
|
||||
}
|
||||
|
||||
void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW)
|
||||
{
|
||||
m_target = target_;
|
||||
|
||||
create();
|
||||
allocate(size, data_, type, usage);
|
||||
}
|
||||
|
||||
void bind(target target_) const
|
||||
{
|
||||
glBindBuffer(static_cast<GLenum>(target_), m_id);
|
||||
}
|
||||
|
||||
void bind() const
|
||||
{
|
||||
bind(current_target());
|
||||
}
|
||||
|
||||
target current_target() const
|
||||
{
|
||||
return m_target;
|
||||
}
|
||||
|
||||
void remove()
|
||||
{
|
||||
if (m_id != GL_NONE)
|
||||
{
|
||||
glDeleteBuffers(1, &m_id);
|
||||
m_id = GL_NONE;
|
||||
m_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
GLsizeiptr size() const
|
||||
{
|
||||
return m_size;
|
||||
}
|
||||
|
||||
uint id() const
|
||||
{
|
||||
return m_id;
|
||||
}
|
||||
|
||||
void set_id(uint id)
|
||||
{
|
||||
m_id = id;
|
||||
}
|
||||
|
||||
bool created() const
|
||||
{
|
||||
return m_id != GL_NONE;
|
||||
}
|
||||
|
||||
explicit operator bool() const
|
||||
{
|
||||
return created();
|
||||
}
|
||||
|
||||
void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW)
|
||||
{
|
||||
ensure(m_memory_type != memory_type::local);
|
||||
|
||||
DSA_CALL2(NamedBufferData, m_id, size, data_, usage);
|
||||
m_size = size;
|
||||
}
|
||||
|
||||
void sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data)
|
||||
{
|
||||
ensure(m_memory_type == memory_type::local);
|
||||
DSA_CALL2(NamedBufferSubData, m_id, offset, length, data);
|
||||
}
|
||||
|
||||
GLubyte* map(GLsizeiptr offset, GLsizeiptr length, access access_)
|
||||
{
|
||||
ensure(m_memory_type == memory_type::host_visible);
|
||||
|
||||
GLenum access_bits = static_cast<GLenum>(access_);
|
||||
if (access_bits == GL_MAP_WRITE_BIT) access_bits |= GL_MAP_UNSYNCHRONIZED_BIT;
|
||||
|
||||
auto raw_data = DSA_CALL2_RET(MapNamedBufferRange, id(), offset, length, access_bits);
|
||||
return reinterpret_cast<GLubyte*>(raw_data);
|
||||
}
|
||||
|
||||
void unmap()
|
||||
{
|
||||
ensure(m_memory_type == memory_type::host_visible);
|
||||
DSA_CALL2(UnmapNamedBuffer, id());
|
||||
}
|
||||
|
||||
void bind_range(u32 index, u32 offset, u32 size) const
|
||||
{
|
||||
glBindBufferRange(static_cast<GLenum>(current_target()), index, id(), offset, size);
|
||||
}
|
||||
|
||||
void bind_range(target target_, u32 index, u32 offset, u32 size) const
|
||||
{
|
||||
glBindBufferRange(static_cast<GLenum>(target_), index, id(), offset, size);
|
||||
}
|
||||
|
||||
void copy_to(buffer* other, u64 src_offset, u64 dst_offset, u64 size)
|
||||
{
|
||||
if (get_driver_caps().ARB_dsa_supported)
|
||||
{
|
||||
glCopyNamedBufferSubData(this->id(), other->id(), src_offset, dst_offset, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
glNamedCopyBufferSubDataEXT(this->id(), other->id(), src_offset, dst_offset, size);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class ring_buffer : public buffer
|
||||
{
|
||||
protected:
|
||||
|
||||
u32 m_data_loc = 0;
|
||||
void *m_memory_mapping = nullptr;
|
||||
|
||||
fence m_fence;
|
||||
|
||||
public:
|
||||
|
||||
virtual void bind()
|
||||
{
|
||||
buffer::bind();
|
||||
}
|
||||
|
||||
virtual void recreate(GLsizeiptr size, const void* data = nullptr)
|
||||
{
|
||||
if (m_id)
|
||||
{
|
||||
m_fence.wait_for_signal();
|
||||
remove();
|
||||
}
|
||||
|
||||
buffer::create();
|
||||
save_binding_state save(current_target(), *this);
|
||||
|
||||
GLbitfield buffer_storage_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
|
||||
if (gl::get_driver_caps().vendor_MESA) buffer_storage_flags |= GL_CLIENT_STORAGE_BIT;
|
||||
|
||||
DSA_CALL2(NamedBufferStorage, m_id, size, data, buffer_storage_flags);
|
||||
m_memory_mapping = DSA_CALL2_RET(MapNamedBufferRange, m_id, 0, size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
|
||||
|
||||
ensure(m_memory_mapping != nullptr);
|
||||
m_data_loc = 0;
|
||||
m_size = ::narrow<u32>(size);
|
||||
m_memory_type = memory_type::host_visible;
|
||||
}
|
||||
|
||||
void create(target target_, GLsizeiptr size, const void* data_ = nullptr)
|
||||
{
|
||||
m_target = target_;
|
||||
recreate(size, data_);
|
||||
}
|
||||
|
||||
virtual std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment)
|
||||
{
|
||||
u32 offset = m_data_loc;
|
||||
if (m_data_loc) offset = utils::align(offset, alignment);
|
||||
|
||||
if ((offset + alloc_size) > m_size)
|
||||
{
|
||||
if (!m_fence.is_empty())
|
||||
{
|
||||
m_fence.wait_for_signal();
|
||||
}
|
||||
else
|
||||
{
|
||||
rsx_log.error("OOM Error: Ring buffer was likely being used without notify() being called");
|
||||
glFinish();
|
||||
}
|
||||
|
||||
m_data_loc = 0;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
//Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently
|
||||
m_data_loc = utils::align(offset + alloc_size, 256);
|
||||
return std::make_pair(static_cast<char*>(m_memory_mapping) + offset, offset);
|
||||
}
|
||||
|
||||
virtual void remove()
|
||||
{
|
||||
if (m_memory_mapping)
|
||||
{
|
||||
buffer::unmap();
|
||||
|
||||
m_memory_mapping = nullptr;
|
||||
m_data_loc = 0;
|
||||
m_size = 0;
|
||||
}
|
||||
|
||||
|
||||
if (m_id != GL_NONE)
|
||||
{
|
||||
glDeleteBuffers(1, &m_id);
|
||||
m_id = GL_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
virtual void reserve_storage_on_heap(u32 /*alloc_size*/) {}
|
||||
|
||||
virtual void unmap() {}
|
||||
|
||||
virtual void flush() {}
|
||||
|
||||
//Notification of a draw command
|
||||
virtual void notify()
|
||||
{
|
||||
//Insert fence about 25% into the buffer
|
||||
if (m_fence.is_empty() && (m_data_loc > (m_size >> 2)))
|
||||
m_fence.reset();
|
||||
}
|
||||
};
|
||||
|
||||
class legacy_ring_buffer : public ring_buffer
|
||||
{
|
||||
u32 m_mapped_bytes = 0;
|
||||
u32 m_mapping_offset = 0;
|
||||
u32 m_alignment_offset = 0;
|
||||
|
||||
public:
|
||||
|
||||
void recreate(GLsizeiptr size, const void* data = nullptr) override
|
||||
{
|
||||
if (m_id)
|
||||
remove();
|
||||
|
||||
buffer::create();
|
||||
buffer::data(size, data, GL_DYNAMIC_DRAW);
|
||||
|
||||
m_memory_type = memory_type::host_visible;
|
||||
m_memory_mapping = nullptr;
|
||||
m_data_loc = 0;
|
||||
m_size = ::narrow<u32>(size);
|
||||
}
|
||||
|
||||
void create(target target_, GLsizeiptr size, const void* data_ = nullptr)
|
||||
{
|
||||
m_target = target_;
|
||||
recreate(size, data_);
|
||||
}
|
||||
|
||||
void reserve_storage_on_heap(u32 alloc_size) override
|
||||
{
|
||||
ensure(m_memory_mapping == nullptr);
|
||||
|
||||
u32 offset = m_data_loc;
|
||||
if (m_data_loc) offset = utils::align(offset, 256);
|
||||
|
||||
const u32 block_size = utils::align(alloc_size + 16, 256); //Overallocate just in case we need to realign base
|
||||
|
||||
if ((offset + block_size) > m_size)
|
||||
{
|
||||
buffer::data(m_size, nullptr, GL_DYNAMIC_DRAW);
|
||||
m_data_loc = 0;
|
||||
}
|
||||
|
||||
m_memory_mapping = DSA_CALL2_RET(MapNamedBufferRange, m_id, m_data_loc, block_size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
m_mapped_bytes = block_size;
|
||||
m_mapping_offset = m_data_loc;
|
||||
m_alignment_offset = 0;
|
||||
|
||||
//When using debugging tools, the mapped base might not be aligned as expected
|
||||
const u64 mapped_address_base = reinterpret_cast<u64>(m_memory_mapping);
|
||||
if (mapped_address_base & 0xF)
|
||||
{
|
||||
//Unaligned result was returned. We have to modify the base address a bit
|
||||
//We lose some memory here, but the 16 byte overallocation above makes up for it
|
||||
const u64 new_base = (mapped_address_base & ~0xF) + 16;
|
||||
const u64 diff_bytes = new_base - mapped_address_base;
|
||||
|
||||
m_memory_mapping = reinterpret_cast<void*>(new_base);
|
||||
m_mapped_bytes -= ::narrow<u32>(diff_bytes);
|
||||
m_alignment_offset = ::narrow<u32>(diff_bytes);
|
||||
}
|
||||
|
||||
ensure(m_mapped_bytes >= alloc_size);
|
||||
}
|
||||
|
||||
std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment) override
|
||||
{
|
||||
u32 offset = m_data_loc;
|
||||
if (m_data_loc) offset = utils::align(offset, alignment);
|
||||
|
||||
u32 padding = (offset - m_data_loc);
|
||||
u32 real_size = utils::align(padding + alloc_size, alignment); //Ensures we leave the loc pointer aligned after we exit
|
||||
|
||||
if (real_size > m_mapped_bytes)
|
||||
{
|
||||
//Missed allocation. We take a performance hit on doing this.
|
||||
//Overallocate slightly for the next allocation if requested size is too small
|
||||
unmap();
|
||||
reserve_storage_on_heap(std::max(real_size, 4096U));
|
||||
|
||||
offset = m_data_loc;
|
||||
if (m_data_loc) offset = utils::align(offset, alignment);
|
||||
|
||||
padding = (offset - m_data_loc);
|
||||
real_size = utils::align(padding + alloc_size, alignment);
|
||||
}
|
||||
|
||||
m_data_loc = offset + real_size;
|
||||
m_mapped_bytes -= real_size;
|
||||
|
||||
u32 local_offset = (offset - m_mapping_offset);
|
||||
return std::make_pair(static_cast<char*>(m_memory_mapping) + local_offset, offset + m_alignment_offset);
|
||||
}
|
||||
|
||||
void remove() override
|
||||
{
|
||||
ring_buffer::remove();
|
||||
m_mapped_bytes = 0;
|
||||
}
|
||||
|
||||
void unmap() override
|
||||
{
|
||||
buffer::unmap();
|
||||
|
||||
m_memory_mapping = nullptr;
|
||||
m_mapped_bytes = 0;
|
||||
m_mapping_offset = 0;
|
||||
}
|
||||
|
||||
void notify() override {}
|
||||
};
|
||||
|
||||
// A non-persistent ring buffer
|
||||
// Internally maps and unmaps data. Uses persistent storage just like the regular persistent variant
|
||||
// Works around drivers that have issues using mapped data for specific sources (e.g AMD proprietary driver with index buffers)
|
||||
class transient_ring_buffer : public ring_buffer
|
||||
{
|
||||
bool dirty = false;
|
||||
|
||||
void* map_internal(u32 offset, u32 length)
|
||||
{
|
||||
flush();
|
||||
|
||||
dirty = true;
|
||||
return DSA_CALL2_RET(MapNamedBufferRange, m_id, offset, length, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
void bind() override
|
||||
{
|
||||
flush();
|
||||
buffer::bind();
|
||||
}
|
||||
|
||||
void recreate(GLsizeiptr size, const void* data = nullptr) override
|
||||
{
|
||||
if (m_id)
|
||||
{
|
||||
m_fence.wait_for_signal();
|
||||
remove();
|
||||
}
|
||||
|
||||
buffer::create();
|
||||
save_binding_state save(current_target(), *this);
|
||||
DSA_CALL2(NamedBufferStorage, m_id, size, data, GL_MAP_WRITE_BIT);
|
||||
|
||||
m_data_loc = 0;
|
||||
m_size = ::narrow<u32>(size);
|
||||
m_memory_type = memory_type::host_visible;
|
||||
}
|
||||
|
||||
std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment) override
|
||||
{
|
||||
ensure(m_memory_mapping == nullptr);
|
||||
const auto allocation = ring_buffer::alloc_from_heap(alloc_size, alignment);
|
||||
return { map_internal(allocation.second, alloc_size), allocation.second };
|
||||
}
|
||||
|
||||
void flush() override
|
||||
{
|
||||
if (dirty)
|
||||
{
|
||||
buffer::unmap();
|
||||
dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
void unmap() override
|
||||
{
|
||||
flush();
|
||||
}
|
||||
};
|
||||
|
||||
class buffer_view
|
||||
{
|
||||
buffer* m_buffer = nullptr;
|
||||
u32 m_offset = 0;
|
||||
u32 m_range = 0;
|
||||
GLenum m_format = GL_R8UI;
|
||||
|
||||
public:
|
||||
buffer_view(buffer *_buffer, u32 offset, u32 range, GLenum format = GL_R8UI)
|
||||
: m_buffer(_buffer), m_offset(offset), m_range(range), m_format(format)
|
||||
{}
|
||||
|
||||
buffer_view() = default;
|
||||
|
||||
void update(buffer *_buffer, u32 offset, u32 range, GLenum format = GL_R8UI)
|
||||
{
|
||||
ensure(_buffer->size() >= (offset + range));
|
||||
m_buffer = _buffer;
|
||||
m_offset = offset;
|
||||
m_range = range;
|
||||
m_format = format;
|
||||
}
|
||||
|
||||
u32 offset() const
|
||||
{
|
||||
return m_offset;
|
||||
}
|
||||
|
||||
u32 range() const
|
||||
{
|
||||
return m_range;
|
||||
}
|
||||
|
||||
u32 format() const
|
||||
{
|
||||
return m_format;
|
||||
}
|
||||
|
||||
buffer* value() const
|
||||
{
|
||||
return m_buffer;
|
||||
}
|
||||
|
||||
bool in_range(u32 address, u32 size, u32& new_offset) const
|
||||
{
|
||||
if (address < m_offset)
|
||||
return false;
|
||||
|
||||
const u32 _offset = address - m_offset;
|
||||
if (m_range < _offset)
|
||||
return false;
|
||||
|
||||
const auto remaining = m_range - _offset;
|
||||
if (size <= remaining)
|
||||
{
|
||||
new_offset = _offset;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
class vao
|
||||
{
|
||||
template<buffer::target BindId, uint GetStateId>
|
||||
|
@ -1353,6 +583,8 @@ namespace gl
|
|||
|
||||
class texture
|
||||
{
|
||||
friend class texture_view;
|
||||
|
||||
public:
|
||||
enum class type
|
||||
{
|
||||
|
@ -1480,7 +712,6 @@ namespace gl
|
|||
|
||||
rsx::format_class m_format_class = RSX_FORMAT_CLASS_UNDEFINED;
|
||||
|
||||
public:
|
||||
class save_binding_state
|
||||
{
|
||||
GLenum target = GL_NONE;
|
||||
|
@ -1519,6 +750,7 @@ namespace gl
|
|||
}
|
||||
};
|
||||
|
||||
public:
|
||||
texture(const texture&) = delete;
|
||||
texture(texture&& texture_) = delete;
|
||||
|
||||
|
|
|
@ -575,10 +575,10 @@ namespace gl
|
|||
program_handle.uniforms["stereo_image_count"] = (source[1] == GL_NONE? 1 : 2);
|
||||
|
||||
saved_sampler_state saved(31, m_sampler);
|
||||
glBindTexture(GL_TEXTURE_2D, source[0]);
|
||||
cmd->bind_texture(31, GL_TEXTURE_2D, source[0]);
|
||||
|
||||
saved_sampler_state saved2(30, m_sampler);
|
||||
glBindTexture(GL_TEXTURE_2D, source[1]);
|
||||
cmd->bind_texture(30, GL_TEXTURE_2D, source[1]);
|
||||
|
||||
overlay_pass::run(cmd, viewport, GL_NONE, false, false);
|
||||
}
|
||||
|
|
197
rpcs3/Emu/RSX/GL/glutils/buffer_object.cpp
Normal file
197
rpcs3/Emu/RSX/GL/glutils/buffer_object.cpp
Normal file
|
@ -0,0 +1,197 @@
|
|||
#include "stdafx.h"
|
||||
#include "buffer_object.h"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
void buffer::allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
|
||||
{
|
||||
if (const auto& caps = get_driver_caps();
|
||||
caps.ARB_buffer_storage_supported)
|
||||
{
|
||||
GLenum flags = 0;
|
||||
if (type == memory_type::host_visible)
|
||||
{
|
||||
switch (usage)
|
||||
{
|
||||
case GL_STREAM_DRAW:
|
||||
case GL_STATIC_DRAW:
|
||||
case GL_DYNAMIC_DRAW:
|
||||
flags |= GL_MAP_WRITE_BIT;
|
||||
break;
|
||||
case GL_STREAM_READ:
|
||||
case GL_STATIC_READ:
|
||||
case GL_DYNAMIC_READ:
|
||||
flags |= GL_MAP_READ_BIT;
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unsupported buffer usage 0x%x", usage);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Local memory hints
|
||||
if (usage == GL_DYNAMIC_COPY)
|
||||
{
|
||||
flags |= GL_DYNAMIC_STORAGE_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & GL_MAP_READ_BIT) && !caps.vendor_AMD)
|
||||
{
|
||||
// This flag stops NVIDIA from allocating read-only memory in VRAM.
|
||||
// NOTE: On AMD, allocating client-side memory via CLIENT_STORAGE_BIT or
|
||||
// making use of GL_AMD_pinned_memory brings everything down to a crawl.
|
||||
// Afaict there is no reason for this; disabling pixel pack/unpack operations does not alleviate the problem.
|
||||
// The driver seems to eventually figure out the optimal storage location by itself.
|
||||
flags |= GL_CLIENT_STORAGE_BIT;
|
||||
}
|
||||
|
||||
DSA_CALL2(NamedBufferStorage, m_id, size, data_, flags);
|
||||
m_size = size;
|
||||
}
|
||||
else
|
||||
{
|
||||
data(size, data_, usage);
|
||||
}
|
||||
|
||||
m_memory_type = type;
|
||||
}
|
||||
|
||||
buffer::~buffer()
|
||||
{
|
||||
if (created())
|
||||
remove();
|
||||
}
|
||||
|
||||
void buffer::recreate()
|
||||
{
|
||||
if (created())
|
||||
{
|
||||
remove();
|
||||
}
|
||||
|
||||
create();
|
||||
}
|
||||
|
||||
void buffer::recreate(GLsizeiptr size, const void* data)
|
||||
{
|
||||
if (created())
|
||||
{
|
||||
remove();
|
||||
}
|
||||
|
||||
create(size, data);
|
||||
}
|
||||
|
||||
void buffer::create()
|
||||
{
|
||||
glGenBuffers(1, &m_id);
|
||||
save_binding_state save(current_target(), *this);
|
||||
}
|
||||
|
||||
void buffer::create(GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
|
||||
{
|
||||
create();
|
||||
allocate(size, data_, type, usage);
|
||||
}
|
||||
|
||||
void buffer::create(target target_, GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
|
||||
{
|
||||
m_target = target_;
|
||||
|
||||
create();
|
||||
allocate(size, data_, type, usage);
|
||||
}
|
||||
|
||||
void buffer::remove()
|
||||
{
|
||||
if (m_id != GL_NONE)
|
||||
{
|
||||
glDeleteBuffers(1, &m_id);
|
||||
m_id = GL_NONE;
|
||||
m_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void buffer::data(GLsizeiptr size, const void* data_, GLenum usage)
|
||||
{
|
||||
ensure(m_memory_type != memory_type::local);
|
||||
|
||||
DSA_CALL2(NamedBufferData, m_id, size, data_, usage);
|
||||
m_size = size;
|
||||
}
|
||||
|
||||
void buffer::sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data)
|
||||
{
|
||||
ensure(m_memory_type == memory_type::local);
|
||||
DSA_CALL2(NamedBufferSubData, m_id, offset, length, data);
|
||||
}
|
||||
|
||||
GLubyte* buffer::map(GLsizeiptr offset, GLsizeiptr length, access access_)
|
||||
{
|
||||
ensure(m_memory_type == memory_type::host_visible);
|
||||
|
||||
GLenum access_bits = static_cast<GLenum>(access_);
|
||||
if (access_bits == GL_MAP_WRITE_BIT) access_bits |= GL_MAP_UNSYNCHRONIZED_BIT;
|
||||
|
||||
auto raw_data = DSA_CALL2_RET(MapNamedBufferRange, id(), offset, length, access_bits);
|
||||
return reinterpret_cast<GLubyte*>(raw_data);
|
||||
}
|
||||
|
||||
void buffer::unmap()
|
||||
{
|
||||
ensure(m_memory_type == memory_type::host_visible);
|
||||
DSA_CALL2(UnmapNamedBuffer, id());
|
||||
}
|
||||
|
||||
void buffer::bind_range(u32 index, u32 offset, u32 size) const
|
||||
{
|
||||
glBindBufferRange(static_cast<GLenum>(current_target()), index, id(), offset, size);
|
||||
}
|
||||
|
||||
void buffer::bind_range(target target_, u32 index, u32 offset, u32 size) const
|
||||
{
|
||||
glBindBufferRange(static_cast<GLenum>(target_), index, id(), offset, size);
|
||||
}
|
||||
|
||||
void buffer::copy_to(buffer* other, u64 src_offset, u64 dst_offset, u64 size)
|
||||
{
|
||||
if (get_driver_caps().ARB_dsa_supported)
|
||||
{
|
||||
glCopyNamedBufferSubData(this->id(), other->id(), src_offset, dst_offset, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
glNamedCopyBufferSubDataEXT(this->id(), other->id(), src_offset, dst_offset, size);
|
||||
}
|
||||
}
|
||||
|
||||
// Buffer view
|
||||
void buffer_view::update(buffer* _buffer, u32 offset, u32 range, GLenum format)
|
||||
{
|
||||
ensure(_buffer->size() >= (offset + range));
|
||||
m_buffer = _buffer;
|
||||
m_offset = offset;
|
||||
m_range = range;
|
||||
m_format = format;
|
||||
}
|
||||
|
||||
bool buffer_view::in_range(u32 address, u32 size, u32& new_offset) const
|
||||
{
|
||||
if (address < m_offset)
|
||||
return false;
|
||||
|
||||
const u32 _offset = address - m_offset;
|
||||
if (m_range < _offset)
|
||||
return false;
|
||||
|
||||
const auto remaining = m_range - _offset;
|
||||
if (size <= remaining)
|
||||
{
|
||||
new_offset = _offset;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
142
rpcs3/Emu/RSX/GL/glutils/buffer_object.h
Normal file
142
rpcs3/Emu/RSX/GL/glutils/buffer_object.h
Normal file
|
@ -0,0 +1,142 @@
|
|||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
class buffer
|
||||
{
|
||||
public:
|
||||
enum class target
|
||||
{
|
||||
pixel_pack = GL_PIXEL_PACK_BUFFER,
|
||||
pixel_unpack = GL_PIXEL_UNPACK_BUFFER,
|
||||
array = GL_ARRAY_BUFFER,
|
||||
element_array = GL_ELEMENT_ARRAY_BUFFER,
|
||||
uniform = GL_UNIFORM_BUFFER,
|
||||
texture = GL_TEXTURE_BUFFER,
|
||||
ssbo = GL_SHADER_STORAGE_BUFFER
|
||||
};
|
||||
|
||||
enum class access
|
||||
{
|
||||
read = GL_MAP_READ_BIT,
|
||||
write = GL_MAP_WRITE_BIT,
|
||||
read_write = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT
|
||||
};
|
||||
|
||||
enum class memory_type
|
||||
{
|
||||
undefined = 0,
|
||||
local = 1,
|
||||
host_visible = 2
|
||||
};
|
||||
|
||||
class save_binding_state
|
||||
{
|
||||
GLint m_last_binding;
|
||||
GLenum m_target;
|
||||
|
||||
public:
|
||||
save_binding_state(target target_, const buffer& new_state) : save_binding_state(target_)
|
||||
{
|
||||
new_state.bind(target_);
|
||||
}
|
||||
|
||||
save_binding_state(target target_)
|
||||
{
|
||||
GLenum pname{};
|
||||
switch (target_)
|
||||
{
|
||||
case target::pixel_pack: pname = GL_PIXEL_PACK_BUFFER_BINDING; break;
|
||||
case target::pixel_unpack: pname = GL_PIXEL_UNPACK_BUFFER_BINDING; break;
|
||||
case target::array: pname = GL_ARRAY_BUFFER_BINDING; break;
|
||||
case target::element_array: pname = GL_ELEMENT_ARRAY_BUFFER_BINDING; break;
|
||||
case target::uniform: pname = GL_UNIFORM_BUFFER_BINDING; break;
|
||||
case target::texture: pname = GL_TEXTURE_BUFFER_BINDING; break;
|
||||
case target::ssbo: pname = GL_SHADER_STORAGE_BUFFER_BINDING; break;
|
||||
default: fmt::throw_exception("Invalid binding state target (0x%x)", static_cast<int>(target_));
|
||||
}
|
||||
|
||||
glGetIntegerv(pname, &m_last_binding);
|
||||
m_target = static_cast<GLenum>(target_);
|
||||
}
|
||||
|
||||
~save_binding_state()
|
||||
{
|
||||
glBindBuffer(m_target, m_last_binding);
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
GLuint m_id = GL_NONE;
|
||||
GLsizeiptr m_size = 0;
|
||||
target m_target = target::array;
|
||||
memory_type m_memory_type = memory_type::undefined;
|
||||
|
||||
void allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage);
|
||||
|
||||
public:
|
||||
buffer() = default;
|
||||
buffer(const buffer&) = delete;
|
||||
~buffer();
|
||||
|
||||
void recreate();
|
||||
void recreate(GLsizeiptr size, const void* data = nullptr);
|
||||
|
||||
void create();
|
||||
void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW);
|
||||
void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW);
|
||||
|
||||
void remove();
|
||||
|
||||
void bind(target target_) const { glBindBuffer(static_cast<GLenum>(target_), m_id); }
|
||||
void bind() const { bind(current_target()); }
|
||||
|
||||
void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW);
|
||||
void sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data);
|
||||
|
||||
GLubyte* map(GLsizeiptr offset, GLsizeiptr length, access access_);
|
||||
void unmap();
|
||||
|
||||
void bind_range(u32 index, u32 offset, u32 size) const;
|
||||
void bind_range(target target_, u32 index, u32 offset, u32 size) const;
|
||||
|
||||
void copy_to(buffer* other, u64 src_offset, u64 dst_offset, u64 size);
|
||||
|
||||
target current_target() const { return m_target; }
|
||||
GLsizeiptr size() const { return m_size; }
|
||||
uint id() const { return m_id; }
|
||||
void set_id(uint id) { m_id = id; }
|
||||
bool created() const { return m_id != GL_NONE; }
|
||||
|
||||
explicit operator bool() const { return created(); }
|
||||
};
|
||||
|
||||
class buffer_view
|
||||
{
|
||||
buffer* m_buffer = nullptr;
|
||||
u32 m_offset = 0;
|
||||
u32 m_range = 0;
|
||||
GLenum m_format = GL_R8UI;
|
||||
|
||||
public:
|
||||
buffer_view(buffer* _buffer, u32 offset, u32 range, GLenum format = GL_R8UI)
|
||||
: m_buffer(_buffer), m_offset(offset), m_range(range), m_format(format)
|
||||
{}
|
||||
|
||||
buffer_view() = default;
|
||||
|
||||
void update(buffer* _buffer, u32 offset, u32 range, GLenum format = GL_R8UI);
|
||||
|
||||
u32 offset() const { return m_offset; }
|
||||
|
||||
u32 range() const { return m_range; }
|
||||
|
||||
u32 format() const { return m_format; }
|
||||
|
||||
buffer* value() const { return m_buffer; }
|
||||
|
||||
bool in_range(u32 address, u32 size, u32& new_offset) const;
|
||||
};
|
||||
}
|
230
rpcs3/Emu/RSX/GL/glutils/capabilities.hpp
Normal file
230
rpcs3/Emu/RSX/GL/glutils/capabilities.hpp
Normal file
|
@ -0,0 +1,230 @@
|
|||
#pragma once
|
||||
|
||||
#include "../OpenGL.h"
|
||||
#include <util/types.hpp>
|
||||
#include <util/asm.hpp>
|
||||
#include <util/logs.hpp>
|
||||
|
||||
namespace gl
|
||||
{
|
||||
class capabilities
|
||||
{
|
||||
public:
|
||||
bool EXT_dsa_supported = false;
|
||||
bool EXT_depth_bounds_test = false;
|
||||
bool ARB_dsa_supported = false;
|
||||
bool ARB_bindless_texture_supported = false;
|
||||
bool ARB_buffer_storage_supported = false;
|
||||
bool ARB_texture_buffer_supported = false;
|
||||
bool ARB_shader_draw_parameters_supported = false;
|
||||
bool ARB_depth_buffer_float_supported = false;
|
||||
bool ARB_texture_barrier_supported = false;
|
||||
bool ARB_shader_stencil_export_supported = false;
|
||||
bool NV_texture_barrier_supported = false;
|
||||
bool NV_gpu_shader5_supported = false;
|
||||
bool AMD_gpu_shader_half_float_supported = false;
|
||||
bool ARB_compute_shader_supported = false;
|
||||
bool NV_depth_buffer_float_supported = false;
|
||||
bool initialized = false;
|
||||
bool vendor_INTEL = false; // has broken GLSL compiler
|
||||
bool vendor_AMD = false; // has broken ARB_multidraw
|
||||
bool vendor_NVIDIA = false; // has NaN poisoning issues
|
||||
bool vendor_MESA = false; // requires CLIENT_STORAGE bit set for streaming buffers
|
||||
|
||||
bool check(const std::string& ext_name, const char* test)
|
||||
{
|
||||
if (ext_name == test)
|
||||
{
|
||||
rsx_log.notice("Extension %s is supported", ext_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void initialize()
|
||||
{
|
||||
int find_count = 15;
|
||||
int ext_count = 0;
|
||||
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
|
||||
|
||||
if (!ext_count)
|
||||
{
|
||||
rsx_log.error("Coult not initialize GL driver capabilities. Is OpenGL initialized?");
|
||||
return;
|
||||
}
|
||||
|
||||
std::string vendor_string = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
||||
std::string version_string = reinterpret_cast<const char*>(glGetString(GL_VERSION));
|
||||
std::string renderer_string = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
|
||||
|
||||
for (int i = 0; i < ext_count; i++)
|
||||
{
|
||||
if (!find_count) break;
|
||||
|
||||
const std::string ext_name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i));
|
||||
|
||||
if (check(ext_name, "GL_ARB_shader_draw_parameters"))
|
||||
{
|
||||
ARB_shader_draw_parameters_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_EXT_direct_state_access"))
|
||||
{
|
||||
EXT_dsa_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_direct_state_access"))
|
||||
{
|
||||
ARB_dsa_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_bindless_texture"))
|
||||
{
|
||||
ARB_bindless_texture_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_buffer_storage"))
|
||||
{
|
||||
ARB_buffer_storage_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_texture_buffer_object"))
|
||||
{
|
||||
ARB_texture_buffer_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_depth_buffer_float"))
|
||||
{
|
||||
ARB_depth_buffer_float_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_texture_barrier"))
|
||||
{
|
||||
ARB_texture_barrier_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_NV_texture_barrier"))
|
||||
{
|
||||
NV_texture_barrier_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_NV_gpu_shader5"))
|
||||
{
|
||||
NV_gpu_shader5_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_AMD_gpu_shader_half_float"))
|
||||
{
|
||||
AMD_gpu_shader_half_float_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_compute_shader"))
|
||||
{
|
||||
ARB_compute_shader_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_EXT_depth_bounds_test"))
|
||||
{
|
||||
EXT_depth_bounds_test = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_NV_depth_buffer_float"))
|
||||
{
|
||||
NV_depth_buffer_float_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_shader_stencil_export"))
|
||||
{
|
||||
ARB_shader_stencil_export_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Check GL_VERSION and GL_RENDERER for the presence of Mesa
|
||||
if (version_string.find("Mesa") != umax || renderer_string.find("Mesa") != umax)
|
||||
{
|
||||
vendor_MESA = true;
|
||||
}
|
||||
|
||||
// Workaround for intel drivers which have terrible capability reporting
|
||||
if (!vendor_string.empty())
|
||||
{
|
||||
std::transform(vendor_string.begin(), vendor_string.end(), vendor_string.begin(), ::tolower);
|
||||
}
|
||||
else
|
||||
{
|
||||
rsx_log.error("Failed to get vendor string from driver. Are we missing a context?");
|
||||
vendor_string = "intel"; // lowest acceptable value
|
||||
}
|
||||
|
||||
if (!vendor_MESA && vendor_string.find("intel") != umax)
|
||||
{
|
||||
int version_major = 0;
|
||||
int version_minor = 0;
|
||||
|
||||
glGetIntegerv(GL_MAJOR_VERSION, &version_major);
|
||||
glGetIntegerv(GL_MINOR_VERSION, &version_minor);
|
||||
|
||||
vendor_INTEL = true;
|
||||
|
||||
// Texture buffers moved into core at GL 3.3
|
||||
if (version_major > 3 || (version_major == 3 && version_minor >= 3))
|
||||
ARB_texture_buffer_supported = true;
|
||||
|
||||
// Check for expected library entry-points for some required functions
|
||||
if (!ARB_buffer_storage_supported && glNamedBufferStorage && glMapNamedBufferRange)
|
||||
ARB_buffer_storage_supported = true;
|
||||
|
||||
if (!ARB_dsa_supported && glGetTextureImage && glTextureBufferRange)
|
||||
ARB_dsa_supported = true;
|
||||
|
||||
if (!EXT_dsa_supported && glGetTextureImageEXT && glTextureBufferRangeEXT)
|
||||
EXT_dsa_supported = true;
|
||||
}
|
||||
else if (!vendor_MESA && vendor_string.find("nvidia") != umax)
|
||||
{
|
||||
vendor_NVIDIA = true;
|
||||
}
|
||||
#ifdef _WIN32
|
||||
else if (vendor_string.find("amd") != umax || vendor_string.find("ati") != umax)
|
||||
{
|
||||
vendor_AMD = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
initialized = true;
|
||||
}
|
||||
};
|
||||
|
||||
const capabilities& get_driver_caps();
|
||||
}
|
154
rpcs3/Emu/RSX/GL/glutils/common.h
Normal file
154
rpcs3/Emu/RSX/GL/glutils/common.h
Normal file
|
@ -0,0 +1,154 @@
|
|||
#pragma once
|
||||
|
||||
#include "capabilities.hpp"
|
||||
|
||||
//Function call wrapped in ARB_DSA vs EXT_DSA compat check
|
||||
#define DSA_CALL(func, object_name, target, ...)\
|
||||
if (::gl::get_driver_caps().ARB_dsa_supported)\
|
||||
gl##func(object_name, __VA_ARGS__);\
|
||||
else\
|
||||
gl##func##EXT(object_name, target, __VA_ARGS__);
|
||||
|
||||
#define DSA_CALL2(func, ...)\
|
||||
if (::gl::get_driver_caps().ARB_dsa_supported)\
|
||||
gl##func(__VA_ARGS__);\
|
||||
else\
|
||||
gl##func##EXT(__VA_ARGS__);
|
||||
|
||||
#define DSA_CALL2_RET(func, ...)\
|
||||
(::gl::get_driver_caps().ARB_dsa_supported) ?\
|
||||
gl##func(__VA_ARGS__) :\
|
||||
gl##func##EXT(__VA_ARGS__)
|
||||
|
||||
#define DSA_CALL3(funcARB, funcDSA, ...)\
|
||||
if (::gl::get_driver_caps().ARB_dsa_supported)\
|
||||
gl##funcARB(__VA_ARGS__);\
|
||||
else\
|
||||
gl##funcDSA##EXT(__VA_ARGS__);
|
||||
|
||||
namespace gl
|
||||
{
|
||||
// TODO: Move to sync.h
|
||||
class fence
|
||||
{
|
||||
GLsync m_value = nullptr;
|
||||
mutable GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT;
|
||||
mutable bool signaled = false;
|
||||
|
||||
public:
|
||||
|
||||
fence() = default;
|
||||
~fence() = default;
|
||||
|
||||
void create()
|
||||
{
|
||||
m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
flags = GL_SYNC_FLUSH_COMMANDS_BIT;
|
||||
}
|
||||
|
||||
void destroy()
|
||||
{
|
||||
glDeleteSync(m_value);
|
||||
m_value = nullptr;
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
if (m_value != nullptr)
|
||||
destroy();
|
||||
|
||||
create();
|
||||
}
|
||||
|
||||
bool is_empty() const
|
||||
{
|
||||
return (m_value == nullptr);
|
||||
}
|
||||
|
||||
bool check_signaled() const
|
||||
{
|
||||
ensure(m_value);
|
||||
|
||||
if (signaled)
|
||||
return true;
|
||||
|
||||
if (flags)
|
||||
{
|
||||
GLenum err = glClientWaitSync(m_value, flags, 0);
|
||||
flags = 0;
|
||||
|
||||
if (!(err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
GLint status = GL_UNSIGNALED;
|
||||
GLint tmp;
|
||||
|
||||
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
|
||||
|
||||
if (status != GL_SIGNALED)
|
||||
return false;
|
||||
}
|
||||
|
||||
signaled = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool wait_for_signal()
|
||||
{
|
||||
ensure(m_value);
|
||||
|
||||
if (signaled == GL_FALSE)
|
||||
{
|
||||
GLenum err = GL_WAIT_FAILED;
|
||||
bool done = false;
|
||||
|
||||
while (!done)
|
||||
{
|
||||
if (flags)
|
||||
{
|
||||
err = glClientWaitSync(m_value, flags, 0);
|
||||
flags = 0;
|
||||
|
||||
switch (err)
|
||||
{
|
||||
default:
|
||||
rsx_log.error("gl::fence sync returned unknown error 0x%X", err);
|
||||
[[fallthrough]];
|
||||
case GL_ALREADY_SIGNALED:
|
||||
case GL_CONDITION_SATISFIED:
|
||||
done = true;
|
||||
break;
|
||||
case GL_TIMEOUT_EXPIRED:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
GLint status = GL_UNSIGNALED;
|
||||
GLint tmp;
|
||||
|
||||
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
|
||||
|
||||
if (status == GL_SIGNALED)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
signaled = (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
|
||||
}
|
||||
|
||||
glDeleteSync(m_value);
|
||||
m_value = nullptr;
|
||||
|
||||
return signaled;
|
||||
}
|
||||
|
||||
void server_wait_sync() const
|
||||
{
|
||||
ensure(m_value != nullptr);
|
||||
glWaitSync(m_value, 0, GL_TIMEOUT_IGNORED);
|
||||
}
|
||||
};
|
||||
}
|
241
rpcs3/Emu/RSX/GL/glutils/ring_buffer.cpp
Normal file
241
rpcs3/Emu/RSX/GL/glutils/ring_buffer.cpp
Normal file
|
@ -0,0 +1,241 @@
|
|||
#include "stdafx.h"
|
||||
#include "ring_buffer.h"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
void ring_buffer::recreate(GLsizeiptr size, const void* data)
|
||||
{
|
||||
if (m_id)
|
||||
{
|
||||
m_fence.wait_for_signal();
|
||||
remove();
|
||||
}
|
||||
|
||||
buffer::create();
|
||||
save_binding_state save(current_target(), *this);
|
||||
|
||||
GLbitfield buffer_storage_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
|
||||
if (gl::get_driver_caps().vendor_MESA) buffer_storage_flags |= GL_CLIENT_STORAGE_BIT;
|
||||
|
||||
DSA_CALL2(NamedBufferStorage, m_id, size, data, buffer_storage_flags);
|
||||
m_memory_mapping = DSA_CALL2_RET(MapNamedBufferRange, m_id, 0, size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
|
||||
|
||||
ensure(m_memory_mapping != nullptr);
|
||||
m_data_loc = 0;
|
||||
m_size = ::narrow<u32>(size);
|
||||
m_memory_type = memory_type::host_visible;
|
||||
}
|
||||
|
||||
void ring_buffer::create(target target_, GLsizeiptr size, const void* data_)
|
||||
{
|
||||
m_target = target_;
|
||||
recreate(size, data_);
|
||||
}
|
||||
|
||||
std::pair<void*, u32> ring_buffer::alloc_from_heap(u32 alloc_size, u16 alignment)
|
||||
{
|
||||
u32 offset = m_data_loc;
|
||||
if (m_data_loc) offset = utils::align(offset, alignment);
|
||||
|
||||
if ((offset + alloc_size) > m_size)
|
||||
{
|
||||
if (!m_fence.is_empty())
|
||||
{
|
||||
m_fence.wait_for_signal();
|
||||
}
|
||||
else
|
||||
{
|
||||
rsx_log.error("OOM Error: Ring buffer was likely being used without notify() being called");
|
||||
glFinish();
|
||||
}
|
||||
|
||||
m_data_loc = 0;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
//Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently
|
||||
m_data_loc = utils::align(offset + alloc_size, 256);
|
||||
return std::make_pair(static_cast<char*>(m_memory_mapping) + offset, offset);
|
||||
}
|
||||
|
||||
void ring_buffer::remove()
|
||||
{
|
||||
if (m_memory_mapping)
|
||||
{
|
||||
buffer::unmap();
|
||||
|
||||
m_memory_mapping = nullptr;
|
||||
m_data_loc = 0;
|
||||
m_size = 0;
|
||||
}
|
||||
|
||||
|
||||
if (m_id != GL_NONE)
|
||||
{
|
||||
glDeleteBuffers(1, &m_id);
|
||||
m_id = GL_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
void ring_buffer::notify()
|
||||
{
|
||||
//Insert fence about 25% into the buffer
|
||||
if (m_fence.is_empty() && (m_data_loc > (m_size >> 2)))
|
||||
m_fence.reset();
|
||||
}
|
||||
|
||||
// Legacy ring buffer - used when ARB_buffer_storage is not available, OR when capturing with renderdoc
|
||||
void legacy_ring_buffer::recreate(GLsizeiptr size, const void* data)
|
||||
{
|
||||
if (m_id)
|
||||
remove();
|
||||
|
||||
buffer::create();
|
||||
buffer::data(size, data, GL_DYNAMIC_DRAW);
|
||||
|
||||
m_memory_type = memory_type::host_visible;
|
||||
m_memory_mapping = nullptr;
|
||||
m_data_loc = 0;
|
||||
m_size = ::narrow<u32>(size);
|
||||
}
|
||||
|
||||
void legacy_ring_buffer::create(target target_, GLsizeiptr size, const void* data_)
|
||||
{
|
||||
m_target = target_;
|
||||
recreate(size, data_);
|
||||
}
|
||||
|
||||
void legacy_ring_buffer::reserve_storage_on_heap(u32 alloc_size)
|
||||
{
|
||||
ensure(m_memory_mapping == nullptr);
|
||||
|
||||
u32 offset = m_data_loc;
|
||||
if (m_data_loc) offset = utils::align(offset, 256);
|
||||
|
||||
const u32 block_size = utils::align(alloc_size + 16, 256); //Overallocate just in case we need to realign base
|
||||
|
||||
if ((offset + block_size) > m_size)
|
||||
{
|
||||
buffer::data(m_size, nullptr, GL_DYNAMIC_DRAW);
|
||||
m_data_loc = 0;
|
||||
}
|
||||
|
||||
m_memory_mapping = DSA_CALL2_RET(MapNamedBufferRange, m_id, m_data_loc, block_size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
m_mapped_bytes = block_size;
|
||||
m_mapping_offset = m_data_loc;
|
||||
m_alignment_offset = 0;
|
||||
|
||||
//When using debugging tools, the mapped base might not be aligned as expected
|
||||
const u64 mapped_address_base = reinterpret_cast<u64>(m_memory_mapping);
|
||||
if (mapped_address_base & 0xF)
|
||||
{
|
||||
//Unaligned result was returned. We have to modify the base address a bit
|
||||
//We lose some memory here, but the 16 byte overallocation above makes up for it
|
||||
const u64 new_base = (mapped_address_base & ~0xF) + 16;
|
||||
const u64 diff_bytes = new_base - mapped_address_base;
|
||||
|
||||
m_memory_mapping = reinterpret_cast<void*>(new_base);
|
||||
m_mapped_bytes -= ::narrow<u32>(diff_bytes);
|
||||
m_alignment_offset = ::narrow<u32>(diff_bytes);
|
||||
}
|
||||
|
||||
ensure(m_mapped_bytes >= alloc_size);
|
||||
}
|
||||
|
||||
std::pair<void*, u32> legacy_ring_buffer::alloc_from_heap(u32 alloc_size, u16 alignment)
|
||||
{
|
||||
u32 offset = m_data_loc;
|
||||
if (m_data_loc) offset = utils::align(offset, alignment);
|
||||
|
||||
u32 padding = (offset - m_data_loc);
|
||||
u32 real_size = utils::align(padding + alloc_size, alignment); //Ensures we leave the loc pointer aligned after we exit
|
||||
|
||||
if (real_size > m_mapped_bytes)
|
||||
{
|
||||
//Missed allocation. We take a performance hit on doing this.
|
||||
//Overallocate slightly for the next allocation if requested size is too small
|
||||
unmap();
|
||||
reserve_storage_on_heap(std::max(real_size, 4096U));
|
||||
|
||||
offset = m_data_loc;
|
||||
if (m_data_loc) offset = utils::align(offset, alignment);
|
||||
|
||||
padding = (offset - m_data_loc);
|
||||
real_size = utils::align(padding + alloc_size, alignment);
|
||||
}
|
||||
|
||||
m_data_loc = offset + real_size;
|
||||
m_mapped_bytes -= real_size;
|
||||
|
||||
u32 local_offset = (offset - m_mapping_offset);
|
||||
return std::make_pair(static_cast<char*>(m_memory_mapping) + local_offset, offset + m_alignment_offset);
|
||||
}
|
||||
|
||||
void legacy_ring_buffer::remove()
|
||||
{
|
||||
ring_buffer::remove();
|
||||
m_mapped_bytes = 0;
|
||||
}
|
||||
|
||||
void legacy_ring_buffer::unmap()
|
||||
{
|
||||
buffer::unmap();
|
||||
|
||||
m_memory_mapping = nullptr;
|
||||
m_mapped_bytes = 0;
|
||||
m_mapping_offset = 0;
|
||||
}
|
||||
|
||||
// AMD persistent mapping workaround for driver-assisted flushing
|
||||
void* transient_ring_buffer::map_internal(u32 offset, u32 length)
|
||||
{
|
||||
flush();
|
||||
|
||||
dirty = true;
|
||||
return DSA_CALL2_RET(MapNamedBufferRange, m_id, offset, length, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
}
|
||||
|
||||
void transient_ring_buffer::bind()
|
||||
{
|
||||
flush();
|
||||
buffer::bind();
|
||||
}
|
||||
|
||||
void transient_ring_buffer::recreate(GLsizeiptr size, const void* data)
|
||||
{
|
||||
if (m_id)
|
||||
{
|
||||
m_fence.wait_for_signal();
|
||||
remove();
|
||||
}
|
||||
|
||||
buffer::create();
|
||||
save_binding_state save(current_target(), *this);
|
||||
DSA_CALL2(NamedBufferStorage, m_id, size, data, GL_MAP_WRITE_BIT);
|
||||
|
||||
m_data_loc = 0;
|
||||
m_size = ::narrow<u32>(size);
|
||||
m_memory_type = memory_type::host_visible;
|
||||
}
|
||||
|
||||
std::pair<void*, u32> transient_ring_buffer::alloc_from_heap(u32 alloc_size, u16 alignment)
|
||||
{
|
||||
ensure(m_memory_mapping == nullptr);
|
||||
const auto allocation = ring_buffer::alloc_from_heap(alloc_size, alignment);
|
||||
return { map_internal(allocation.second, alloc_size), allocation.second };
|
||||
}
|
||||
|
||||
void transient_ring_buffer::flush()
|
||||
{
|
||||
if (dirty)
|
||||
{
|
||||
buffer::unmap();
|
||||
dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
void transient_ring_buffer::unmap()
|
||||
{
|
||||
flush();
|
||||
}
|
||||
}
|
81
rpcs3/Emu/RSX/GL/glutils/ring_buffer.h
Normal file
81
rpcs3/Emu/RSX/GL/glutils/ring_buffer.h
Normal file
|
@ -0,0 +1,81 @@
|
|||
#pragma once
|
||||
|
||||
#include "buffer_object.h"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
class ring_buffer : public buffer
|
||||
{
|
||||
protected:
|
||||
|
||||
u32 m_data_loc = 0;
|
||||
void* m_memory_mapping = nullptr;
|
||||
|
||||
fence m_fence;
|
||||
|
||||
public:
|
||||
|
||||
virtual void bind() { buffer::bind(); }
|
||||
|
||||
virtual void recreate(GLsizeiptr size, const void* data = nullptr);
|
||||
|
||||
void create(target target_, GLsizeiptr size, const void* data_ = nullptr);
|
||||
|
||||
virtual std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment);
|
||||
|
||||
virtual void remove();
|
||||
|
||||
virtual void reserve_storage_on_heap(u32 /*alloc_size*/) {}
|
||||
|
||||
virtual void unmap() {}
|
||||
|
||||
virtual void flush() {}
|
||||
|
||||
virtual void notify();
|
||||
};
|
||||
|
||||
class legacy_ring_buffer : public ring_buffer
|
||||
{
|
||||
u32 m_mapped_bytes = 0;
|
||||
u32 m_mapping_offset = 0;
|
||||
u32 m_alignment_offset = 0;
|
||||
|
||||
public:
|
||||
|
||||
void recreate(GLsizeiptr size, const void* data = nullptr) override;
|
||||
|
||||
void create(target target_, GLsizeiptr size, const void* data_ = nullptr);
|
||||
|
||||
void reserve_storage_on_heap(u32 alloc_size) override;
|
||||
|
||||
std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment) override;
|
||||
|
||||
void remove() override;
|
||||
|
||||
void unmap() override;
|
||||
|
||||
void notify() override {}
|
||||
};
|
||||
|
||||
// A non-persistent ring buffer
|
||||
// Internally maps and unmaps data. Uses persistent storage just like the regular persistent variant
|
||||
// Works around drivers that have issues using mapped data for specific sources (e.g AMD proprietary driver with index buffers)
|
||||
class transient_ring_buffer : public ring_buffer
|
||||
{
|
||||
bool dirty = false;
|
||||
|
||||
void* map_internal(u32 offset, u32 length);
|
||||
|
||||
public:
|
||||
|
||||
void bind() override;
|
||||
|
||||
void recreate(GLsizeiptr size, const void* data = nullptr) override;
|
||||
|
||||
std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment) override;
|
||||
|
||||
void flush() override;
|
||||
|
||||
void unmap() override;
|
||||
};
|
||||
}
|
|
@ -61,6 +61,10 @@
|
|||
<ClInclude Include="Emu\RSX\GL\GLGSRender.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLProcTable.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLProgramBuffer.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\glutils\buffer_object.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\glutils\capabilities.hpp" />
|
||||
<ClInclude Include="Emu\RSX\GL\glutils\common.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\glutils\ring_buffer.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLVertexProgram.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLHelpers.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLRenderTargets.h" />
|
||||
|
@ -77,6 +81,8 @@
|
|||
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLOverlays.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLPipelineCompiler.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\glutils\buffer_object.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\glutils\ring_buffer.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLHelpers.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLPresent.cpp" />
|
||||
|
|
|
@ -17,6 +17,12 @@
|
|||
<ClCompile Include="Emu\RSX\GL\GLTextureCache.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLOverlays.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLCompute.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\glutils\buffer_object.cpp">
|
||||
<Filter>glutils</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Emu\RSX\GL\glutils\ring_buffer.cpp">
|
||||
<Filter>glutils</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Emu\RSX\GL\GLTexture.h" />
|
||||
|
@ -36,5 +42,22 @@
|
|||
<ClInclude Include="Emu\RSX\GL\GLExecutionState.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLCompute.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLPipelineCompiler.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\glutils\buffer_object.h">
|
||||
<Filter>glutils</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\GL\glutils\ring_buffer.h">
|
||||
<Filter>glutils</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\GL\glutils\common.h">
|
||||
<Filter>glutils</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\GL\glutils\capabilities.hpp">
|
||||
<Filter>glutils</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="glutils">
|
||||
<UniqueIdentifier>{ed9ef6b7-efbb-4a8e-88a4-583b6655c141}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
Add table
Reference in a new issue