mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-08-02 22:30:39 +00:00
gl: Refactoring and fallback support for compute acceleration
This commit is contained in:
parent
b39bfa02a6
commit
d9a9766e41
2 changed files with 102 additions and 93 deletions
|
@ -114,6 +114,7 @@ namespace gl
|
||||||
bool NV_texture_barrier_supported = false;
|
bool NV_texture_barrier_supported = false;
|
||||||
bool NV_gpu_shader5_supported = false;
|
bool NV_gpu_shader5_supported = false;
|
||||||
bool AMD_gpu_shader_half_float_supported = false;
|
bool AMD_gpu_shader_half_float_supported = false;
|
||||||
|
bool ARB_compute_shader_supported = false;
|
||||||
bool initialized = false;
|
bool initialized = false;
|
||||||
bool vendor_INTEL = false; // has broken GLSL compiler
|
bool vendor_INTEL = false; // has broken GLSL compiler
|
||||||
bool vendor_AMD = false; // has broken ARB_multidraw
|
bool vendor_AMD = false; // has broken ARB_multidraw
|
||||||
|
@ -133,7 +134,7 @@ namespace gl
|
||||||
|
|
||||||
void initialize()
|
void initialize()
|
||||||
{
|
{
|
||||||
int find_count = 10;
|
int find_count = 11;
|
||||||
int ext_count = 0;
|
int ext_count = 0;
|
||||||
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
|
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
|
||||||
|
|
||||||
|
@ -212,6 +213,13 @@ namespace gl
|
||||||
find_count--;
|
find_count--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (check(ext_name, "GL_ARB_compute_shader"))
|
||||||
|
{
|
||||||
|
ARB_compute_shader_supported = true;
|
||||||
|
find_count--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Workaround for intel drivers which have terrible capability reporting
|
// Workaround for intel drivers which have terrible capability reporting
|
||||||
|
|
|
@ -738,116 +738,117 @@ namespace gl
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cs_shuffle_base* get_pixel_transform_job(const pixel_buffer_layout& pack_info)
|
||||||
|
{
|
||||||
|
const bool is_depth_stencil = (pack_info.type == GL_UNSIGNED_INT_24_8);
|
||||||
|
if (LIKELY(!is_depth_stencil))
|
||||||
|
{
|
||||||
|
if (!pack_info.swap_bytes)
|
||||||
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (pack_info.size)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
return nullptr;
|
||||||
|
case 2:
|
||||||
|
return gl::get_compute_task<gl::cs_shuffle_16>();
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
return gl::get_compute_task<gl::cs_shuffle_32>();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fmt::throw_exception("Unsupported format");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (pack_info.swap_bytes)
|
||||||
|
{
|
||||||
|
return gl::get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<true>>();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return gl::get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<false>>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void copy_typeless(texture * dst, const texture * src)
|
void copy_typeless(texture * dst, const texture * src)
|
||||||
{
|
{
|
||||||
GLsizeiptr src_mem = src->width() * src->height();
|
GLsizeiptr src_mem = src->pitch() * src->height();
|
||||||
GLsizeiptr dst_mem = dst->width() * dst->height();
|
GLsizeiptr dst_mem = dst->pitch() * dst->height();
|
||||||
|
|
||||||
auto max_mem = std::max(src_mem, dst_mem) * 16;
|
auto max_mem = std::max(src_mem, dst_mem);
|
||||||
if (!g_typeless_transfer_buffer || max_mem > g_typeless_transfer_buffer.size())
|
if (!g_typeless_transfer_buffer || max_mem > g_typeless_transfer_buffer.size())
|
||||||
{
|
{
|
||||||
if (g_typeless_transfer_buffer) g_typeless_transfer_buffer.remove();
|
if (g_typeless_transfer_buffer) g_typeless_transfer_buffer.remove();
|
||||||
g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto& caps = gl::get_driver_caps();
|
||||||
const auto pack_info = get_format_type(src->get_internal_format());
|
const auto pack_info = get_format_type(src->get_internal_format());
|
||||||
const auto unpack_info = get_format_type(dst->get_internal_format());
|
const auto unpack_info = get_format_type(dst->get_internal_format());
|
||||||
|
|
||||||
pixel_pack_settings pack_settings{};
|
// Start pack operation
|
||||||
g_typeless_transfer_buffer.bind(buffer::target::pixel_pack);
|
g_typeless_transfer_buffer.bind(buffer::target::pixel_pack);
|
||||||
src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type, pack_settings);
|
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
|
||||||
|
|
||||||
const bool src_is_ds = !!(src->aspect() & gl::image_aspect::stencil);
|
if (LIKELY(caps.ARB_compute_shader_supported))
|
||||||
const bool dst_is_ds = !!(src->aspect() & gl::image_aspect::stencil);
|
|
||||||
|
|
||||||
if (pack_info.swap_bytes || unpack_info.swap_bytes || src_is_ds || dst_is_ds)
|
|
||||||
{
|
{
|
||||||
gl::cs_shuffle_base *src_transform = nullptr, *dst_transform = nullptr;
|
// Raw copy
|
||||||
|
src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type);
|
||||||
if (src_is_ds)
|
}
|
||||||
{
|
else
|
||||||
if (pack_info.swap_bytes)
|
{
|
||||||
{
|
pixel_pack_settings pack_settings{};
|
||||||
src_transform = gl::get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<true>>();
|
pack_settings.swap_bytes(pack_info.swap_bytes);
|
||||||
}
|
src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type, pack_settings);
|
||||||
else
|
|
||||||
{
|
|
||||||
src_transform = gl::get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<false>>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (pack_info.swap_bytes)
|
|
||||||
{
|
|
||||||
switch (pack_info.size)
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
src_transform = gl::get_compute_task<gl::cs_shuffle_16>();
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
src_transform = gl::get_compute_task<gl::cs_shuffle_32>();
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
fmt::throw_exception("Unsupported format");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dst_is_ds)
|
|
||||||
{
|
|
||||||
if (unpack_info.swap_bytes)
|
|
||||||
{
|
|
||||||
dst_transform = gl::get_compute_task<gl::cs_shuffle_x8d24_to_d24x8<true>>();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
dst_transform = gl::get_compute_task<gl::cs_shuffle_x8d24_to_d24x8<false>>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (unpack_info.swap_bytes)
|
|
||||||
{
|
|
||||||
switch (unpack_info.size)
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
dst_transform = gl::get_compute_task<gl::cs_shuffle_16>();
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
dst_transform = gl::get_compute_task<gl::cs_shuffle_32>();
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
fmt::throw_exception("Unsupported format");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!src_is_ds)
|
|
||||||
{
|
|
||||||
if (src_transform == dst_transform)
|
|
||||||
{
|
|
||||||
src_transform = dst_transform = nullptr;
|
|
||||||
}
|
|
||||||
else if (src_transform)
|
|
||||||
{
|
|
||||||
src_transform = gl::get_compute_task<gl::cs_shuffle_32_16>();
|
|
||||||
dst_transform = nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (src_transform)
|
|
||||||
{
|
|
||||||
const auto image_size = src->pitch() * src->height();
|
|
||||||
src_transform->run(&g_typeless_transfer_buffer, image_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dst_transform)
|
|
||||||
{
|
|
||||||
const auto image_size = dst->pitch() * dst->height();
|
|
||||||
dst_transform->run(&g_typeless_transfer_buffer, image_size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||||
|
|
||||||
|
// Start unpack operation
|
||||||
pixel_unpack_settings unpack_settings{};
|
pixel_unpack_settings unpack_settings{};
|
||||||
|
|
||||||
|
if (LIKELY(caps.ARB_compute_shader_supported))
|
||||||
|
{
|
||||||
|
auto src_transform = get_pixel_transform_job(pack_info);
|
||||||
|
auto dst_transform = get_pixel_transform_job(unpack_info);
|
||||||
|
|
||||||
|
if (src->aspect() == gl::image_aspect::color && dst->aspect() == gl::image_aspect::color)
|
||||||
|
{
|
||||||
|
if (src_transform == dst_transform)
|
||||||
|
{
|
||||||
|
src_transform = dst_transform = nullptr;
|
||||||
|
}
|
||||||
|
else if (src_transform && dst_transform)
|
||||||
|
{
|
||||||
|
src_transform = gl::get_compute_task<cs_shuffle_32_16>();
|
||||||
|
dst_transform = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto job_length = std::min(src_mem, dst_mem);
|
||||||
|
if (src_transform)
|
||||||
|
{
|
||||||
|
src_transform->run(&g_typeless_transfer_buffer, job_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dst_transform)
|
||||||
|
{
|
||||||
|
dst_transform->run(&g_typeless_transfer_buffer, job_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: glBindBufferRange also binds the buffer to the old-school target.
|
||||||
|
// Unbind it to avoid glitching later
|
||||||
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
unpack_settings.swap_bytes(unpack_info.swap_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
g_typeless_transfer_buffer.bind(buffer::target::pixel_unpack);
|
g_typeless_transfer_buffer.bind(buffer::target::pixel_unpack);
|
||||||
dst->copy_from(nullptr, (texture::format)unpack_info.format, (texture::type)unpack_info.type, unpack_settings);
|
dst->copy_from(nullptr, (texture::format)unpack_info.format, (texture::type)unpack_info.type, unpack_settings);
|
||||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE);
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue