diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index e1a4c7eb76..1f516ea94b 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -114,6 +114,7 @@ namespace gl bool NV_texture_barrier_supported = false; bool NV_gpu_shader5_supported = false; bool AMD_gpu_shader_half_float_supported = false; + bool ARB_compute_shader_supported = false; bool initialized = false; bool vendor_INTEL = false; // has broken GLSL compiler bool vendor_AMD = false; // has broken ARB_multidraw @@ -133,7 +134,7 @@ namespace gl void initialize() { - int find_count = 10; + int find_count = 11; int ext_count = 0; glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count); @@ -212,6 +213,13 @@ namespace gl find_count--; continue; } + + if (check(ext_name, "GL_ARB_compute_shader")) + { + ARB_compute_shader_supported = true; + find_count--; + continue; + } } // Workaround for intel drivers which have terrible capability reporting diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 194496b94b..b652484907 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -738,116 +738,117 @@ namespace gl return false; } + cs_shuffle_base* get_pixel_transform_job(const pixel_buffer_layout& pack_info) + { + const bool is_depth_stencil = (pack_info.type == GL_UNSIGNED_INT_24_8); + if (LIKELY(!is_depth_stencil)) + { + if (!pack_info.swap_bytes) + { + return nullptr; + } + + switch (pack_info.size) + { + case 1: + return nullptr; + case 2: + return gl::get_compute_task(); + break; + case 4: + return gl::get_compute_task(); + break; + default: + fmt::throw_exception("Unsupported format"); + } + } + else + { + if (pack_info.swap_bytes) + { + return gl::get_compute_task>(); + } + else + { + return gl::get_compute_task>(); + } + } + } + void copy_typeless(texture * dst, const texture * src) { - GLsizeiptr src_mem = src->width() * src->height(); - GLsizeiptr dst_mem = dst->width() * dst->height(); + GLsizeiptr src_mem = src->pitch() * src->height(); + GLsizeiptr dst_mem = dst->pitch() * dst->height(); - auto max_mem = std::max(src_mem, dst_mem) * 16; + auto max_mem = std::max(src_mem, dst_mem); if (!g_typeless_transfer_buffer || max_mem > g_typeless_transfer_buffer.size()) { if (g_typeless_transfer_buffer) g_typeless_transfer_buffer.remove(); g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY); } + const auto& caps = gl::get_driver_caps(); const auto pack_info = get_format_type(src->get_internal_format()); const auto unpack_info = get_format_type(dst->get_internal_format()); - pixel_pack_settings pack_settings{}; + // Start pack operation g_typeless_transfer_buffer.bind(buffer::target::pixel_pack); - src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type, pack_settings); - glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); - const bool src_is_ds = !!(src->aspect() & gl::image_aspect::stencil); - const bool dst_is_ds = !!(src->aspect() & gl::image_aspect::stencil); - - if (pack_info.swap_bytes || unpack_info.swap_bytes || src_is_ds || dst_is_ds) + if (LIKELY(caps.ARB_compute_shader_supported)) { - gl::cs_shuffle_base *src_transform = nullptr, *dst_transform = nullptr; - - if (src_is_ds) - { - if (pack_info.swap_bytes) - { - src_transform = gl::get_compute_task>(); - } - else - { - src_transform = gl::get_compute_task>(); - } - } - else if (pack_info.swap_bytes) - { - switch (pack_info.size) - { - case 1: - break; - case 2: - src_transform = gl::get_compute_task(); - break; - case 4: - src_transform = gl::get_compute_task(); - break; - default: - fmt::throw_exception("Unsupported format"); - } - } - - if (dst_is_ds) - { - if (unpack_info.swap_bytes) - { - dst_transform = gl::get_compute_task>(); - } - else - { - dst_transform = gl::get_compute_task>(); - } - } - else if (unpack_info.swap_bytes) - { - switch (unpack_info.size) - { - case 1: - break; - case 2: - dst_transform = gl::get_compute_task(); - break; - case 4: - dst_transform = gl::get_compute_task(); - break; - default: - fmt::throw_exception("Unsupported format"); - } - - if (!src_is_ds) - { - if (src_transform == dst_transform) - { - src_transform = dst_transform = nullptr; - } - else if (src_transform) - { - src_transform = gl::get_compute_task(); - dst_transform = nullptr; - } - } - - if (src_transform) - { - const auto image_size = src->pitch() * src->height(); - src_transform->run(&g_typeless_transfer_buffer, image_size); - } - - if (dst_transform) - { - const auto image_size = dst->pitch() * dst->height(); - dst_transform->run(&g_typeless_transfer_buffer, image_size); - } - } + // Raw copy + src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type); + } + else + { + pixel_pack_settings pack_settings{}; + pack_settings.swap_bytes(pack_info.swap_bytes); + src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type, pack_settings); } + glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); + + // Start unpack operation pixel_unpack_settings unpack_settings{}; + + if (LIKELY(caps.ARB_compute_shader_supported)) + { + auto src_transform = get_pixel_transform_job(pack_info); + auto dst_transform = get_pixel_transform_job(unpack_info); + + if (src->aspect() == gl::image_aspect::color && dst->aspect() == gl::image_aspect::color) + { + if (src_transform == dst_transform) + { + src_transform = dst_transform = nullptr; + } + else if (src_transform && dst_transform) + { + src_transform = gl::get_compute_task(); + dst_transform = nullptr; + } + } + + const auto job_length = std::min(src_mem, dst_mem); + if (src_transform) + { + src_transform->run(&g_typeless_transfer_buffer, job_length); + } + + if (dst_transform) + { + dst_transform->run(&g_typeless_transfer_buffer, job_length); + } + + // NOTE: glBindBufferRange also binds the buffer to the old-school target. + // Unbind it to avoid glitching later + glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE); + } + else + { + unpack_settings.swap_bytes(unpack_info.swap_bytes); + } + g_typeless_transfer_buffer.bind(buffer::target::pixel_unpack); dst->copy_from(nullptr, (texture::format)unpack_info.format, (texture::type)unpack_info.type, unpack_settings); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE);