mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-21 03:55:32 +00:00
gl: Use compute shaders for typeless texture decode
This commit is contained in:
parent
7a6e2e716f
commit
105d4b51e6
7 changed files with 218 additions and 186 deletions
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "Utilities/StrUtil.h"
|
||||
#include "Emu/IdManager.h"
|
||||
#include "GLHelpers.h"
|
||||
|
||||
namespace gl
|
||||
|
@ -16,6 +17,28 @@ namespace gl
|
|||
bool unroll_loops = true;
|
||||
u32 optimal_group_size = 1;
|
||||
u32 optimal_kernel_size = 1;
|
||||
u32 max_invocations_x = 65535;
|
||||
|
||||
void initialize()
|
||||
{
|
||||
// Set up optimal kernel size
|
||||
const auto& caps = gl::get_driver_caps();
|
||||
if (caps.vendor_AMD || caps.vendor_MESA)
|
||||
{
|
||||
optimal_group_size = 64;
|
||||
unroll_loops = false;
|
||||
}
|
||||
else if (caps.vendor_NVIDIA)
|
||||
{
|
||||
optimal_group_size = 32;
|
||||
}
|
||||
else
|
||||
{
|
||||
optimal_group_size = 128;
|
||||
}
|
||||
|
||||
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, (GLint*)&max_invocations_x);
|
||||
}
|
||||
|
||||
void create()
|
||||
{
|
||||
|
@ -52,6 +75,7 @@ namespace gl
|
|||
GLint old_program;
|
||||
glGetIntegerv(GL_CURRENT_PROGRAM, &old_program);
|
||||
|
||||
bind_resources();
|
||||
m_program.use();
|
||||
glDispatchCompute(invocations_x, invocations_y, 1);
|
||||
|
||||
|
@ -60,7 +84,23 @@ namespace gl
|
|||
|
||||
void run(u32 num_invocations)
|
||||
{
|
||||
run(num_invocations, 1);
|
||||
u32 invocations_x, invocations_y;
|
||||
if (LIKELY(num_invocations <= max_invocations_x))
|
||||
{
|
||||
invocations_x = num_invocations;
|
||||
invocations_y = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Since all the invocations will run, the optimal distribution is sqrt(count)
|
||||
const auto optimal_length = (u32)floor(std::sqrt(num_invocations));
|
||||
invocations_x = optimal_length;
|
||||
invocations_y = invocations_x;
|
||||
|
||||
if (num_invocations % invocations_x) invocations_y++;
|
||||
}
|
||||
|
||||
run(invocations_x, invocations_y);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -89,7 +129,7 @@ namespace gl
|
|||
void build(const char* function_name, u32 _kernel_size = 0)
|
||||
{
|
||||
// Initialize to allow detecting optimal settings
|
||||
create();
|
||||
initialize();
|
||||
|
||||
kernel_size = _kernel_size? _kernel_size : optimal_kernel_size;
|
||||
|
||||
|
@ -107,15 +147,21 @@ namespace gl
|
|||
"#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16\n"
|
||||
"\n"
|
||||
"// Depth format conversions\n"
|
||||
"#define d24_to_f32(bits) floatBitsToUint(float(bits) / 16777215.f)\n"
|
||||
"#define f32_to_d24(bits) uint(uintBitsToFloat(bits) * 16777215.f)\n"
|
||||
"#define d24x8_to_f32(bits) d24_to_f32(bits >> 8)\n"
|
||||
"#define d24x8_to_d24x8_swapped(bits) (bits & 0xFF00) | (bits & 0xFF0000) >> 16 | (bits & 0xFF) << 16\n"
|
||||
"#define f32_to_d24x8_swapped(bits) d24x8_to_d24x8_swapped(f32_to_d24(bits))\n"
|
||||
"#define d24x8_to_x8d24(bits) (bits << 8) | (bits >> 24)\n"
|
||||
"#define d24x8_to_x8d24_swapped(bits) bswap_u32(d24x8_to_x8d24(bits))\n"
|
||||
"#define x8d24_to_d24x8(bits) (bits >> 8) | (bits << 24)\n"
|
||||
"#define x8d24_to_d24x8_swapped(bits) x8d24_to_d24x8(bswap_u32(bits))\n"
|
||||
"\n"
|
||||
"uint linear_invocation_id()\n"
|
||||
"{\n"
|
||||
" uint size_in_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);\n"
|
||||
" return (gl_GlobalInvocationID.y * size_in_x) + gl_GlobalInvocationID.x;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" uint index = gl_GlobalInvocationID.x * KERNEL_SIZE;\n"
|
||||
" uint invocation_id = linear_invocation_id();\n"
|
||||
" uint index = invocation_id * KERNEL_SIZE;\n"
|
||||
" uint value;\n"
|
||||
" %vars"
|
||||
"\n";
|
||||
|
@ -169,7 +215,7 @@ namespace gl
|
|||
|
||||
void bind_resources() override
|
||||
{
|
||||
m_data->bind_range(GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_data_length);
|
||||
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_data_length);
|
||||
}
|
||||
|
||||
void run(const gl::buffer* data, u32 data_length, u32 data_offset = 0)
|
||||
|
@ -220,156 +266,35 @@ namespace gl
|
|||
}
|
||||
};
|
||||
|
||||
struct cs_shuffle_d24x8_f32 : cs_shuffle_base
|
||||
template<bool _SwapBytes = false>
|
||||
struct cs_shuffle_d24x8_to_x8d24 : cs_shuffle_base
|
||||
{
|
||||
// convert d24x8 to f32
|
||||
cs_shuffle_d24x8_f32()
|
||||
cs_shuffle_d24x8_to_x8d24()
|
||||
{
|
||||
cs_shuffle_base::build("d24x8_to_f32");
|
||||
if constexpr (_SwapBytes)
|
||||
{
|
||||
cs_shuffle_base::build("d24x8_to_x8d24_swapped");
|
||||
}
|
||||
else
|
||||
{
|
||||
cs_shuffle_base::build("d24x8_to_x8d24");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct cs_shuffle_se_f32_d24x8 : cs_shuffle_base
|
||||
template<bool _SwapBytes = false>
|
||||
struct cs_shuffle_x8d24_to_d24x8 : cs_shuffle_base
|
||||
{
|
||||
// convert f32 to d24x8 and swap endianness
|
||||
cs_shuffle_se_f32_d24x8()
|
||||
cs_shuffle_x8d24_to_d24x8()
|
||||
{
|
||||
cs_shuffle_base::build("f32_to_d24x8_swapped");
|
||||
}
|
||||
};
|
||||
|
||||
struct cs_shuffle_se_d24x8 : cs_shuffle_base
|
||||
{
|
||||
// swap endianness of d24x8
|
||||
cs_shuffle_se_d24x8()
|
||||
{
|
||||
cs_shuffle_base::build("d24x8_to_d24x8_swapped");
|
||||
}
|
||||
};
|
||||
|
||||
// NOTE: D24S8 layout has the stencil in the MSB! Its actually S8|D24|S8|D24 starting at offset 0
|
||||
struct cs_interleave_task : cs_shuffle_base
|
||||
{
|
||||
cs_interleave_task()
|
||||
{
|
||||
uniforms =
|
||||
" uniform uint block_length;\n"
|
||||
" uniform uint z_offset;\n"
|
||||
" uniform uint s_offset;\n";
|
||||
|
||||
variables =
|
||||
" uint depth;\n"
|
||||
" uint stencil;\n"
|
||||
" uint stencil_shift;\n"
|
||||
" uint stencil_offset;\n";
|
||||
}
|
||||
|
||||
void run(const gl::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset)
|
||||
{
|
||||
m_program.uniforms["block_length"] = data_length;
|
||||
m_program.uniforms["z_offset"] = zeta_offset - data_offset;
|
||||
m_program.uniforms["s_offset"] = stencil_offset - data_offset;
|
||||
cs_shuffle_base::run(data, data_length, data_offset);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool _SwapBytes = false>
|
||||
struct cs_gather_d24x8 : cs_interleave_task
|
||||
{
|
||||
cs_gather_d24x8()
|
||||
{
|
||||
work_kernel =
|
||||
" if (index >= block_length)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" depth = data[index + z_offset] & 0x00FFFFFF;\n"
|
||||
" stencil_offset = (index / 4);\n"
|
||||
" stencil_shift = (index % 4) * 8;\n"
|
||||
" stencil = data[stencil_offset + s_offset];\n"
|
||||
" stencil = (stencil >> stencil_shift) & 0xFF;\n"
|
||||
" value = (depth << 8) | stencil;\n";
|
||||
|
||||
if constexpr (!_SwapBytes)
|
||||
{
|
||||
work_kernel +=
|
||||
" data[index] = value;\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
work_kernel +=
|
||||
" data[index] = bswap_u32(value);\n";
|
||||
}
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
};
|
||||
|
||||
template<bool _SwapBytes = false>
|
||||
struct cs_gather_d32x8 : cs_interleave_task
|
||||
{
|
||||
cs_gather_d32x8()
|
||||
{
|
||||
work_kernel =
|
||||
" if (index >= block_length)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" depth = f32_to_d24(data[index + z_offset]);\n"
|
||||
" stencil_offset = (index / 4);\n"
|
||||
" stencil_shift = (index % 4) * 8;\n"
|
||||
" stencil = data[stencil_offset + s_offset];\n"
|
||||
" stencil = (stencil >> stencil_shift) & 0xFF;\n"
|
||||
" value = (depth << 8) | stencil;\n";
|
||||
|
||||
if constexpr (!_SwapBytes)
|
||||
{
|
||||
work_kernel +=
|
||||
" data[index] = value;\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
work_kernel +=
|
||||
" data[index] = bswap_u32(value);\n";
|
||||
}
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
};
|
||||
|
||||
struct cs_scatter_d24x8 : cs_interleave_task
|
||||
{
|
||||
cs_scatter_d24x8()
|
||||
{
|
||||
work_kernel =
|
||||
" if (index >= block_length)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" value = data[index];\n"
|
||||
" data[index + z_offset] = (value >> 8);\n"
|
||||
" stencil_offset = (index / 4);\n"
|
||||
" stencil_shift = (index % 4) * 8;\n"
|
||||
" stencil = (value & 0xFF) << stencil_shift;\n"
|
||||
" data[stencil_offset + s_offset] |= stencil;\n";
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
};
|
||||
|
||||
struct cs_scatter_d32x8 : cs_interleave_task
|
||||
{
|
||||
cs_scatter_d32x8()
|
||||
{
|
||||
work_kernel =
|
||||
" if (index >= block_length)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" value = data[index];\n"
|
||||
" data[index + z_offset] = d24_to_f32(value >> 8);\n"
|
||||
" stencil_offset = (index / 4);\n"
|
||||
" stencil_shift = (index % 4) * 8;\n"
|
||||
" stencil = (value & 0xFF) << stencil_shift;\n"
|
||||
" data[stencil_offset + s_offset] |= stencil;\n";
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
if constexpr (_SwapBytes)
|
||||
{
|
||||
cs_shuffle_base::build("x8d24_to_d24x8_swapped");
|
||||
}
|
||||
else
|
||||
{
|
||||
cs_shuffle_base::build("x8d24_to_d24x8");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -390,4 +315,6 @@ namespace gl
|
|||
|
||||
return static_cast<T*>(e.get());
|
||||
}
|
||||
|
||||
void destroy_compute_tasks();
|
||||
}
|
|
@ -2,6 +2,7 @@
|
|||
#include "Emu/Memory/vm.h"
|
||||
#include "Emu/System.h"
|
||||
#include "GLGSRender.h"
|
||||
#include "GLCompute.h"
|
||||
#include "GLVertexProgram.h"
|
||||
#include "../rsx_methods.h"
|
||||
#include "../Common/BufferUtils.h"
|
||||
|
@ -965,6 +966,8 @@ void GLGSRender::on_init_thread()
|
|||
|
||||
void GLGSRender::on_exit()
|
||||
{
|
||||
gl::destroy_compute_tasks();
|
||||
|
||||
zcull_ctrl.release();
|
||||
|
||||
m_prog_buffer.clear();
|
||||
|
|
|
@ -30,6 +30,16 @@ namespace gl
|
|||
}
|
||||
}
|
||||
|
||||
void destroy_compute_tasks()
|
||||
{
|
||||
for (auto& [key, prog] : g_compute_tasks)
|
||||
{
|
||||
prog->destroy();
|
||||
}
|
||||
|
||||
g_compute_tasks.clear();
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
void APIENTRY dbgFunc(GLenum source, GLenum type, GLuint id,
|
||||
GLenum severity, GLsizei lenght, const GLchar* message,
|
||||
|
|
|
@ -2454,8 +2454,8 @@ public:
|
|||
case type::fragment:
|
||||
base_name = "shaderlog/FragmentProgram";
|
||||
break;
|
||||
case type::geometry:
|
||||
base_name = "shaderlog/GeometryProgram";
|
||||
case type::compute:
|
||||
base_name = "shaderlog/ComputeProgram";
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include "stdafx.h"
|
||||
#include "GLTexture.h"
|
||||
#include "GLCompute.h"
|
||||
#include "../GCM.h"
|
||||
#include "../RSXThread.h"
|
||||
#include "../RSXTexture.h"
|
||||
|
@ -90,43 +91,43 @@ namespace gl
|
|||
fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format);
|
||||
}
|
||||
|
||||
std::tuple<GLenum, GLenum, bool> get_format_type(texture::internal_format format)
|
||||
pixel_buffer_layout get_format_type(texture::internal_format format)
|
||||
{
|
||||
switch (format)
|
||||
{
|
||||
case texture::internal_format::compressed_rgba_s3tc_dxt1:
|
||||
case texture::internal_format::compressed_rgba_s3tc_dxt3:
|
||||
case texture::internal_format::compressed_rgba_s3tc_dxt5:
|
||||
return std::make_tuple(GL_RGBA, GL_UNSIGNED_BYTE, false);
|
||||
return { GL_RGBA, GL_UNSIGNED_BYTE, 1, false };
|
||||
case texture::internal_format::r8:
|
||||
return std::make_tuple(GL_RED, GL_UNSIGNED_BYTE, false);
|
||||
return { GL_RED, GL_UNSIGNED_BYTE, 1, false };
|
||||
case texture::internal_format::r16:
|
||||
return std::make_tuple(GL_RED, GL_UNSIGNED_SHORT, true);
|
||||
return { GL_RED, GL_UNSIGNED_SHORT, 2, true };
|
||||
case texture::internal_format::r32f:
|
||||
return std::make_tuple(GL_RED, GL_FLOAT, true);
|
||||
return { GL_RED, GL_FLOAT, 4, true };
|
||||
case texture::internal_format::rg8:
|
||||
return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE, false);
|
||||
return { GL_RG, GL_UNSIGNED_BYTE, 1, false };
|
||||
case texture::internal_format::rg16:
|
||||
return std::make_tuple(GL_RG, GL_UNSIGNED_SHORT, true);
|
||||
return { GL_RG, GL_UNSIGNED_SHORT, 2, true };
|
||||
case texture::internal_format::rg16f:
|
||||
return std::make_tuple(GL_RG, GL_HALF_FLOAT, true);
|
||||
return { GL_RG, GL_HALF_FLOAT, 2, true };
|
||||
case texture::internal_format::rgb565:
|
||||
return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5, true);
|
||||
return { GL_RGB, GL_UNSIGNED_SHORT_5_6_5, 2, true };
|
||||
case texture::internal_format::rgb5a1:
|
||||
return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_5_5_1, true);
|
||||
return { GL_RGB, GL_UNSIGNED_SHORT_5_5_5_1, 2, true };
|
||||
case texture::internal_format::rgba4:
|
||||
return std::make_tuple(GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4, false);
|
||||
return { GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4, 2, false };
|
||||
case texture::internal_format::rgba8:
|
||||
return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, false);
|
||||
return { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, 4, false };
|
||||
case texture::internal_format::rgba16f:
|
||||
return std::make_tuple(GL_RGBA, GL_HALF_FLOAT, true);
|
||||
return { GL_RGBA, GL_HALF_FLOAT, 2, true };
|
||||
case texture::internal_format::rgba32f:
|
||||
return std::make_tuple(GL_RGBA, GL_FLOAT, true);
|
||||
return { GL_RGBA, GL_FLOAT, 4, true };
|
||||
case texture::internal_format::depth16:
|
||||
return std::make_tuple(GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, true);
|
||||
return { GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 2, true };
|
||||
case texture::internal_format::depth24_stencil8:
|
||||
case texture::internal_format::depth32f_stencil8:
|
||||
return std::make_tuple(GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, true);
|
||||
return { GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 4, true };
|
||||
default:
|
||||
fmt::throw_exception("Unexpected internal format 0x%X" HERE, (u32)format);
|
||||
}
|
||||
|
@ -742,30 +743,113 @@ namespace gl
|
|||
GLsizeiptr src_mem = src->width() * src->height();
|
||||
GLsizeiptr dst_mem = dst->width() * dst->height();
|
||||
|
||||
GLenum buffer_copy_flag = GL_STATIC_COPY;
|
||||
if (gl::get_driver_caps().vendor_MESA) buffer_copy_flag = GL_STREAM_COPY;
|
||||
// NOTE: Mesa lacks acceleration for PBO unpacking and is currently fastest with GL_STREAM_COPY
|
||||
// See https://bugs.freedesktop.org/show_bug.cgi?id=111043
|
||||
|
||||
auto max_mem = std::max(src_mem, dst_mem) * 16;
|
||||
if (!g_typeless_transfer_buffer || max_mem > g_typeless_transfer_buffer.size())
|
||||
{
|
||||
if (g_typeless_transfer_buffer) g_typeless_transfer_buffer.remove();
|
||||
g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, buffer_copy_flag);
|
||||
g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
}
|
||||
|
||||
auto format_type = get_format_type(src->get_internal_format());
|
||||
const auto pack_info = get_format_type(src->get_internal_format());
|
||||
const auto unpack_info = get_format_type(dst->get_internal_format());
|
||||
|
||||
pixel_pack_settings pack_settings{};
|
||||
pack_settings.swap_bytes(std::get<2>(format_type));
|
||||
g_typeless_transfer_buffer.bind(buffer::target::pixel_pack);
|
||||
src->copy_to(nullptr, (texture::format)std::get<0>(format_type), (texture::type)std::get<1>(format_type), pack_settings);
|
||||
src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type, pack_settings);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||
|
||||
format_type = get_format_type(dst->get_internal_format());
|
||||
const bool src_is_ds = !!(src->aspect() & gl::image_aspect::stencil);
|
||||
const bool dst_is_ds = !!(src->aspect() & gl::image_aspect::stencil);
|
||||
|
||||
if (pack_info.swap_bytes || unpack_info.swap_bytes || src_is_ds || dst_is_ds)
|
||||
{
|
||||
gl::cs_shuffle_base *src_transform = nullptr, *dst_transform = nullptr;
|
||||
|
||||
if (src_is_ds)
|
||||
{
|
||||
if (pack_info.swap_bytes)
|
||||
{
|
||||
src_transform = gl::get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<true>>();
|
||||
}
|
||||
else
|
||||
{
|
||||
src_transform = gl::get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<false>>();
|
||||
}
|
||||
}
|
||||
else if (pack_info.swap_bytes)
|
||||
{
|
||||
switch (pack_info.size)
|
||||
{
|
||||
case 1:
|
||||
break;
|
||||
case 2:
|
||||
src_transform = gl::get_compute_task<gl::cs_shuffle_16>();
|
||||
break;
|
||||
case 4:
|
||||
src_transform = gl::get_compute_task<gl::cs_shuffle_32>();
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unsupported format");
|
||||
}
|
||||
}
|
||||
|
||||
if (dst_is_ds)
|
||||
{
|
||||
if (unpack_info.swap_bytes)
|
||||
{
|
||||
dst_transform = gl::get_compute_task<gl::cs_shuffle_x8d24_to_d24x8<true>>();
|
||||
}
|
||||
else
|
||||
{
|
||||
dst_transform = gl::get_compute_task<gl::cs_shuffle_x8d24_to_d24x8<false>>();
|
||||
}
|
||||
}
|
||||
else if (unpack_info.swap_bytes)
|
||||
{
|
||||
switch (unpack_info.size)
|
||||
{
|
||||
case 1:
|
||||
break;
|
||||
case 2:
|
||||
dst_transform = gl::get_compute_task<gl::cs_shuffle_16>();
|
||||
break;
|
||||
case 4:
|
||||
dst_transform = gl::get_compute_task<gl::cs_shuffle_32>();
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unsupported format");
|
||||
}
|
||||
|
||||
if (!src_is_ds)
|
||||
{
|
||||
if (src_transform == dst_transform)
|
||||
{
|
||||
src_transform = dst_transform = nullptr;
|
||||
}
|
||||
else if (src_transform)
|
||||
{
|
||||
src_transform = gl::get_compute_task<gl::cs_shuffle_32_16>();
|
||||
dst_transform = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (src_transform)
|
||||
{
|
||||
const auto image_size = src->pitch() * src->height();
|
||||
src_transform->run(&g_typeless_transfer_buffer, image_size);
|
||||
}
|
||||
|
||||
if (dst_transform)
|
||||
{
|
||||
const auto image_size = dst->pitch() * dst->height();
|
||||
dst_transform->run(&g_typeless_transfer_buffer, image_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pixel_unpack_settings unpack_settings{};
|
||||
unpack_settings.swap_bytes(std::get<2>(format_type));
|
||||
g_typeless_transfer_buffer.bind(buffer::target::pixel_unpack);
|
||||
dst->copy_from(nullptr, (texture::format)std::get<0>(format_type), (texture::type)std::get<1>(format_type), unpack_settings);
|
||||
dst->copy_from(nullptr, (texture::format)unpack_info.format, (texture::type)unpack_info.type, unpack_settings);
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,10 +13,18 @@ namespace rsx
|
|||
|
||||
namespace gl
|
||||
{
|
||||
struct pixel_buffer_layout
|
||||
{
|
||||
GLenum format;
|
||||
GLenum type;
|
||||
u8 size;
|
||||
bool swap_bytes;
|
||||
};
|
||||
|
||||
GLenum get_target(rsx::texture_dimension_extended type);
|
||||
GLenum get_sized_internal_format(u32 texture_format);
|
||||
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format);
|
||||
std::tuple<GLenum, GLenum, bool> get_format_type(texture::internal_format format);
|
||||
pixel_buffer_layout get_format_type(texture::internal_format format);
|
||||
GLenum wrap_mode(rsx::texture_wrap_mode wrap);
|
||||
float max_aniso(rsx::texture_max_anisotropy aniso);
|
||||
std::array<GLenum, 4> get_swizzle_remap(u32 texture_format);
|
||||
|
|
|
@ -163,12 +163,12 @@ namespace gl
|
|||
{
|
||||
// Determine unpack config dynamically
|
||||
const auto format_info = gl::get_format_type(src->get_internal_format());
|
||||
format = static_cast<gl::texture::format>(std::get<0>(format_info));
|
||||
type = static_cast<gl::texture::type>(std::get<1>(format_info));
|
||||
format = static_cast<gl::texture::format>(format_info.format);
|
||||
type = static_cast<gl::texture::type>(format_info.type);
|
||||
|
||||
if ((src->aspect() & gl::image_aspect::stencil) == 0)
|
||||
{
|
||||
pack_unpack_swap_bytes = std::get<2>(format_info);
|
||||
pack_unpack_swap_bytes = format_info.swap_bytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue