mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-21 03:55:32 +00:00
rsx: Fix texture readback
- gl: Fix up the calculation for internal image pitch - vk: Implement GPU-side resizing for read back textures (fixes WCB zoom)
This commit is contained in:
parent
278cb52f19
commit
f45dcfe18a
3 changed files with 183 additions and 69 deletions
|
@ -1469,6 +1469,8 @@ namespace gl
|
|||
GLuint m_height = 0;
|
||||
GLuint m_depth = 0;
|
||||
GLuint m_mipmaps = 0;
|
||||
GLuint m_pitch = 0;
|
||||
GLuint m_compressed = GL_FALSE;
|
||||
|
||||
target m_target = target::texture2D;
|
||||
internal_format m_internal_format = internal_format::rgba8;
|
||||
|
@ -1553,6 +1555,26 @@ namespace gl
|
|||
m_height = height;
|
||||
m_depth = depth;
|
||||
m_mipmaps = mipmaps;
|
||||
|
||||
GLenum query_target = (target == GL_TEXTURE_CUBE_MAP) ? GL_TEXTURE_CUBE_MAP_POSITIVE_X : target;
|
||||
glGetTexLevelParameteriv(query_target, 0, GL_TEXTURE_COMPRESSED, (GLint*)&m_compressed);
|
||||
|
||||
if (m_compressed)
|
||||
{
|
||||
GLint compressed_size;
|
||||
glGetTexLevelParameteriv(query_target, 0, GL_TEXTURE_COMPRESSED_IMAGE_SIZE, &compressed_size);
|
||||
m_pitch = compressed_size / height;
|
||||
}
|
||||
else
|
||||
{
|
||||
GLint r, g, b, a;
|
||||
glGetTexLevelParameteriv(query_target, 0, GL_TEXTURE_RED_SIZE, &r);
|
||||
glGetTexLevelParameteriv(query_target, 0, GL_TEXTURE_GREEN_SIZE, &g);
|
||||
glGetTexLevelParameteriv(query_target, 0, GL_TEXTURE_BLUE_SIZE, &b);
|
||||
glGetTexLevelParameteriv(query_target, 0, GL_TEXTURE_ALPHA_SIZE, &a);
|
||||
|
||||
m_pitch = width * (r + g + b + a) / 8;
|
||||
}
|
||||
}
|
||||
|
||||
m_target = static_cast<texture::target>(target);
|
||||
|
@ -1622,6 +1644,16 @@ namespace gl
|
|||
return m_mipmaps;
|
||||
}
|
||||
|
||||
GLuint pitch() const
|
||||
{
|
||||
return m_pitch;
|
||||
}
|
||||
|
||||
GLboolean compressed() const
|
||||
{
|
||||
return m_compressed;
|
||||
}
|
||||
|
||||
sizei size2D() const
|
||||
{
|
||||
return{ (int)m_width, (int)m_height };
|
||||
|
|
|
@ -296,6 +296,7 @@ namespace gl
|
|||
this->width = w;
|
||||
this->height = h;
|
||||
this->rsx_pitch = rsx_pitch;
|
||||
this->real_pitch = 0;
|
||||
this->depth = depth;
|
||||
this->mipmaps = mipmaps;
|
||||
|
||||
|
@ -330,7 +331,6 @@ namespace gl
|
|||
this->width = width;
|
||||
this->height = height;
|
||||
rsx_pitch = pitch;
|
||||
real_pitch = width * get_pixel_size(format, type);
|
||||
}
|
||||
|
||||
void set_format(texture::format gl_format, texture::type gl_type, bool swap_bytes)
|
||||
|
@ -351,8 +351,6 @@ namespace gl
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
real_pitch = width * get_pixel_size(format, type);
|
||||
}
|
||||
|
||||
void set_depth_flag(bool is_depth_fmt)
|
||||
|
@ -374,7 +372,7 @@ namespace gl
|
|||
|
||||
gl::texture* target_texture = vram_texture;
|
||||
if ((rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage) ||
|
||||
(real_pitch != rsx_pitch))
|
||||
(vram_texture->pitch() != rsx_pitch))
|
||||
{
|
||||
u32 real_width = width;
|
||||
u32 real_height = height;
|
||||
|
@ -435,6 +433,7 @@ namespace gl
|
|||
pack_settings.swap_bytes(pack_unpack_swap_bytes);
|
||||
|
||||
target_texture->copy_to(nullptr, format, type, pack_settings);
|
||||
real_pitch = target_texture->pitch();
|
||||
|
||||
if (auto error = glGetError())
|
||||
{
|
||||
|
@ -495,6 +494,8 @@ namespace gl
|
|||
result = false;
|
||||
}
|
||||
|
||||
verify(HERE), real_pitch > 0;
|
||||
|
||||
m_fence.wait_for_signal();
|
||||
flushed = true;
|
||||
|
||||
|
@ -514,7 +515,7 @@ namespace gl
|
|||
require_manual_shuffle = true;
|
||||
}
|
||||
|
||||
if (real_pitch >= rsx_pitch || scaled_texture != 0 || valid_range.second <= rsx_pitch)
|
||||
if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch)
|
||||
{
|
||||
memcpy(dst, src, valid_range.second);
|
||||
}
|
||||
|
|
|
@ -66,8 +66,6 @@ namespace vk
|
|||
else
|
||||
this->rsx_pitch = cpu_address_range / height;
|
||||
|
||||
real_pitch = vk::get_format_texel_width(image->info.format) * width;
|
||||
|
||||
//Even if we are managing the same vram section, we cannot guarantee contents are static
|
||||
//The create method is only invoked when a new mangaged session is required
|
||||
synchronized = false;
|
||||
|
@ -166,25 +164,61 @@ namespace vk
|
|||
cmd.begin();
|
||||
}
|
||||
|
||||
const u16 internal_width = (context != rsx::texture_upload_context::framebuffer_storage? width : std::min(width, rsx::apply_resolution_scale(width, true)));
|
||||
const u16 internal_height = (context != rsx::texture_upload_context::framebuffer_storage? height : std::min(height, rsx::apply_resolution_scale(height, true)));
|
||||
vk::image *target = vram_texture;
|
||||
real_pitch = vk::get_format_texel_width(vram_texture->info.format) * vram_texture->width();
|
||||
|
||||
VkImageAspectFlags aspect_flag = vk::get_aspect_flags(vram_texture->info.format);
|
||||
|
||||
//TODO: Read back stencil values (is this really necessary?)
|
||||
VkBufferImageCopy copyRegion = {};
|
||||
copyRegion.bufferOffset = 0;
|
||||
copyRegion.bufferRowLength = internal_width;
|
||||
copyRegion.bufferImageHeight = internal_height;
|
||||
copyRegion.imageSubresource = {aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1};
|
||||
copyRegion.imageOffset = {};
|
||||
copyRegion.imageExtent = {internal_width, internal_height, 1};
|
||||
|
||||
VkImageSubresourceRange subresource_range = { aspect_flag, 0, 1, 0, 1 };
|
||||
|
||||
VkImageLayout layout = vram_texture->current_layout;
|
||||
u32 transfer_width = width;
|
||||
u32 transfer_height = height;
|
||||
|
||||
VkImageLayout old_layout = vram_texture->current_layout;
|
||||
change_image_layout(cmd, vram_texture, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
|
||||
vkCmdCopyImageToBuffer(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dma_buffer->value, 1, ©Region);
|
||||
change_image_layout(cmd, vram_texture, layout, subresource_range);
|
||||
|
||||
if ((rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage) ||
|
||||
(real_pitch != rsx_pitch))
|
||||
{
|
||||
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||
{
|
||||
switch (static_cast<vk::render_target*>(vram_texture)->read_aa_mode)
|
||||
{
|
||||
case rsx::surface_antialiasing::center_1_sample:
|
||||
break;
|
||||
case rsx::surface_antialiasing::diagonal_centered_2_samples:
|
||||
transfer_width *= 2;
|
||||
break;
|
||||
default:
|
||||
transfer_width *= 2;
|
||||
transfer_height *= 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (transfer_width != vram_texture->width() || transfer_height != vram_texture->height())
|
||||
{
|
||||
// TODO: Synchronize access to typeles textures
|
||||
target = vk::get_typeless_helper(vram_texture->info.format);
|
||||
vk::copy_scaled_image(cmd, vram_texture->value, target->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, target->current_layout,
|
||||
0, 0, vram_texture->width(), vram_texture->height(), 0, 0, transfer_width, transfer_height, 1, aspect_flag, true, VK_FILTER_NEAREST,
|
||||
vram_texture->info.format, target->info.format);
|
||||
}
|
||||
}
|
||||
|
||||
if (target->current_layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
|
||||
{
|
||||
// Using a scaled intermediary
|
||||
verify(HERE), target != vram_texture;
|
||||
change_image_layout(cmd, target, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
|
||||
}
|
||||
|
||||
// TODO: Read back stencil values (is this really necessary?)
|
||||
VkBufferImageCopy region = {};
|
||||
region.imageSubresource = {aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1};
|
||||
region.imageExtent = {transfer_width, transfer_height, 1};
|
||||
vkCmdCopyImageToBuffer(cmd, target->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dma_buffer->value, 1, ®ion);
|
||||
|
||||
change_image_layout(cmd, vram_texture, old_layout, subresource_range);
|
||||
real_pitch = vk::get_format_texel_width(vram_texture->info.format) * transfer_width;
|
||||
|
||||
if (manage_cb_lifetime)
|
||||
{
|
||||
|
@ -205,7 +239,7 @@ namespace vk
|
|||
}
|
||||
|
||||
template<typename T, bool swapped>
|
||||
void do_memory_transfer(void *pixels_dst, const void *pixels_src, u32 max_length)
|
||||
void do_memory_transfer_packed(void *pixels_dst, const void *pixels_src, u32 max_length)
|
||||
{
|
||||
if (sizeof(T) == 1 || !swapped)
|
||||
{
|
||||
|
@ -222,6 +256,38 @@ namespace vk
|
|||
}
|
||||
}
|
||||
|
||||
template<typename T, bool swapped>
|
||||
void do_memory_transfer_padded(void *pixels_dst, const void *pixels_src, u32 src_pitch, u32 dst_pitch, u32 num_rows)
|
||||
{
|
||||
auto src = (char*)pixels_src;
|
||||
auto dst = (char*)pixels_dst;
|
||||
|
||||
if (sizeof(T) == 1 || !swapped)
|
||||
{
|
||||
for (u32 y = 0; y < num_rows; ++y)
|
||||
{
|
||||
memcpy(dst, src, src_pitch);
|
||||
src += src_pitch;
|
||||
dst += dst_pitch;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const u32 block_size = src_pitch / sizeof(T);
|
||||
for (u32 y = 0; y < num_rows; ++y)
|
||||
{
|
||||
auto typed_dst = (be_t<T> *)dst;
|
||||
auto typed_src = (T *)src;
|
||||
|
||||
for (u32 px = 0; px < block_size; ++px)
|
||||
typed_dst[px] = typed_src[px];
|
||||
|
||||
src += src_pitch;
|
||||
dst += dst_pitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool flush(vk::command_buffer& cmd, VkQueue submit_queue)
|
||||
{
|
||||
if (flushed) return true;
|
||||
|
@ -241,6 +307,7 @@ namespace vk
|
|||
result = false;
|
||||
}
|
||||
|
||||
verify(HERE), real_pitch > 0;
|
||||
flushed = true;
|
||||
|
||||
const auto valid_range = get_confirmed_range();
|
||||
|
@ -250,67 +317,81 @@ namespace vk
|
|||
const auto texel_layout = vk::get_format_element_size(vram_texture->info.format);
|
||||
const auto elem_size = texel_layout.first;
|
||||
|
||||
//We have to do our own byte swapping since the driver doesnt do it for us
|
||||
if (real_pitch == rsx_pitch)
|
||||
auto memory_transfer_packed = [=]()
|
||||
{
|
||||
switch (elem_size)
|
||||
{
|
||||
default:
|
||||
LOG_ERROR(RSX, "Invalid element width %d", elem_size);
|
||||
case 1:
|
||||
do_memory_transfer_packed<u8, false>(pixels_dst, pixels_src, valid_range.second);
|
||||
break;
|
||||
case 2:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer_packed<u16, true>(pixels_dst, pixels_src, valid_range.second);
|
||||
else
|
||||
do_memory_transfer_packed<u16, false>(pixels_dst, pixels_src, valid_range.second);
|
||||
break;
|
||||
case 4:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer_packed<u32, true>(pixels_dst, pixels_src, valid_range.second);
|
||||
else
|
||||
do_memory_transfer_packed<u32, false>(pixels_dst, pixels_src, valid_range.second);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
auto memory_transfer_padded = [=]()
|
||||
{
|
||||
const u32 num_rows = valid_range.second / rsx_pitch;
|
||||
switch (elem_size)
|
||||
{
|
||||
default:
|
||||
LOG_ERROR(RSX, "Invalid element width %d", elem_size);
|
||||
case 1:
|
||||
do_memory_transfer_padded<u8, false>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
|
||||
break;
|
||||
case 2:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer_padded<u16, true>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
|
||||
else
|
||||
do_memory_transfer_padded<u16, false>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
|
||||
break;
|
||||
case 4:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer_padded<u32, true>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
|
||||
else
|
||||
do_memory_transfer_padded<u32, false>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
// NOTE: We have to do our own byte swapping since the driver doesnt do it for us
|
||||
// TODO: Replace the cpu-side transformations with trivial compute pipelines
|
||||
if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch)
|
||||
{
|
||||
bool is_depth_format = true;
|
||||
switch (vram_texture->info.format)
|
||||
{
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
{
|
||||
rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_src, valid_range.second >> 2, 1);
|
||||
break;
|
||||
}
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
{
|
||||
rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_src, valid_range.second >> 2, 1);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
is_depth_format = false;
|
||||
{
|
||||
memory_transfer_packed();
|
||||
break;
|
||||
}
|
||||
|
||||
if (!is_depth_format)
|
||||
{
|
||||
switch (elem_size)
|
||||
{
|
||||
default:
|
||||
LOG_ERROR(RSX, "Invalid element width %d", elem_size);
|
||||
case 1:
|
||||
do_memory_transfer<u8, false>(pixels_dst, pixels_src, valid_range.second);
|
||||
break;
|
||||
case 2:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer<u16, true>(pixels_dst, pixels_src, valid_range.second);
|
||||
else
|
||||
do_memory_transfer<u16, false>(pixels_dst, pixels_src, valid_range.second);
|
||||
break;
|
||||
case 4:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer<u32, true>(pixels_dst, pixels_src, valid_range.second);
|
||||
else
|
||||
do_memory_transfer<u32, false>(pixels_dst, pixels_src, valid_range.second);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//Scale image to fit
|
||||
//usually we can just get away with nearest filtering
|
||||
u8 samples_u = 1, samples_v = 1;
|
||||
switch (static_cast<vk::render_target*>(vram_texture)->read_aa_mode)
|
||||
{
|
||||
case rsx::surface_antialiasing::diagonal_centered_2_samples:
|
||||
samples_u = 2;
|
||||
break;
|
||||
case rsx::surface_antialiasing::square_centered_4_samples:
|
||||
case rsx::surface_antialiasing::square_rotated_4_samples:
|
||||
samples_u = 2;
|
||||
samples_v = 2;
|
||||
break;
|
||||
}
|
||||
|
||||
const u16 row_length = u16(width * texel_layout.second);
|
||||
const u16 usable_height = (valid_range.second / rsx_pitch) / samples_v;
|
||||
rsx::scale_image_nearest(pixels_dst, pixels_src, row_length, usable_height, rsx_pitch, real_pitch, elem_size, samples_u, samples_v, pack_unpack_swap_bytes);
|
||||
memory_transfer_padded();
|
||||
|
||||
switch (vram_texture->info.format)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue