rsx: Allow GPU-accelerated stream manipulation when doing texture uploads

This commit is contained in:
kd-11 2019-08-27 17:01:36 +03:00 committed by kd-11
parent e0a7912d7c
commit 99fb6d6a5d
5 changed files with 244 additions and 131 deletions

View file

@ -469,48 +469,51 @@ std::vector<rsx_subresource_layout> get_subresources_layout(const rsx::vertex_te
return get_subresources_layout_impl(texture);
}
void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, bool vtc_support, size_t dst_row_pitch_multiple_of)
texture_memory_info upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, const texture_uploader_capabilities& caps)
{
u16 w = src_layout.width_in_block;
u16 h = src_layout.height_in_block;
u16 depth = src_layout.depth;
u32 pitch = src_layout.pitch_in_block;
texture_memory_info result{};
// Ignore when texture width > pitch
if (w > pitch)
return;
return result;
// Check if we can use a fast path
int word_size = 0;
int words_per_block;
u32 dst_pitch_in_block;
// NOTE: Avoid block optimization for formats that can be modified internally by the GPU itself
// Since the gpu code does not attempt to do wide translations (e.g WZYX32->XYZW32), only perform, per-channel transform and use proper swizzles to get the proper output
switch (format)
{
case CELL_GCM_TEXTURE_B8:
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), 1, w, h, depth, src_layout.border, get_row_pitch_in_block<u8>(w, dst_row_pitch_multiple_of));
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), 1, w, h, depth, src_layout.border, get_row_pitch_in_block<u8>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
word_size = words_per_block = 1;
dst_pitch_in_block = get_row_pitch_in_block<u8>(w, caps.alignment);
break;
}
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
{
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
break;
}
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
{
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u16>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u16>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
break;
}
case CELL_GCM_TEXTURE_R6G5B5:
{
if (is_swizzled)
copy_rgb655_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, dst_row_pitch_multiple_of));
copy_rgb655_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, caps.alignment));
else
copy_rgb655_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
copy_rgb655_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, caps.alignment), src_layout.pitch_in_block);
break;
}
@ -526,10 +529,9 @@ void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subre
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_G8B8:
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), 1, w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, dst_row_pitch_multiple_of));
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), 1, w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
word_size = 2;
words_per_block = 1;
dst_pitch_in_block = get_row_pitch_in_block<u16>(w, caps.alignment);
break;
}
@ -538,10 +540,9 @@ void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subre
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: // Untested
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), 1, w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, dst_row_pitch_multiple_of));
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), 1, w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
word_size = 4;
words_per_block = 1;
dst_pitch_in_block = get_row_pitch_in_block<u32>(w, caps.alignment);
break;
}
@ -554,13 +555,9 @@ void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subre
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
{
const u16 block_size = get_format_block_size_in_bytes(format);
const u16 words_per_block = block_size / 2;
const auto dst_pitch_in_block = get_row_pitch_in_block(block_size, w, dst_row_pitch_multiple_of);
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
word_size = 2;
words_per_block = block_size / 2;
dst_pitch_in_block = get_row_pitch_in_block(block_size, w, caps.alignment);
break;
}
@ -568,28 +565,24 @@ void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subre
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
{
const u16 block_size = get_format_block_size_in_bytes(format);
const u16 words_per_block = block_size / 4;
const auto dst_pitch_in_block = get_row_pitch_in_block(block_size, w, dst_row_pitch_multiple_of);
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
word_size = 4;
words_per_block = block_size / 4;
dst_pitch_in_block = get_row_pitch_in_block(block_size, w, caps.alignment);
break;
}
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
{
if (depth > 1 && !vtc_support)
if (depth > 1 && !caps.supports_vtc_decoding)
{
// PS3 uses the Nvidia VTC memory layout for compressed 3D textures.
// This is only supported using Nvidia OpenGL.
// Remove the VTC tiling to support ATI and Vulkan.
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
}
else
{
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u64>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
}
break;
}
@ -597,16 +590,16 @@ void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subre
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
{
if (depth > 1 && !vtc_support)
if (depth > 1 && !caps.supports_vtc_decoding)
{
// PS3 uses the Nvidia VTC memory layout for compressed 3D textures.
// This is only supported using Nvidia OpenGL.
// Remove the VTC tiling to support ATI and Vulkan.
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
}
else
{
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u128>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
}
break;
}
@ -614,6 +607,56 @@ void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subre
default:
fmt::throw_exception("Wrong format 0x%x" HERE, format);
}
if (word_size)
{
if (word_size == 1)
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
else if (caps.supports_byteswap)
{
result.require_swap = true;
result.element_size = word_size;
if (word_size == 2)
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
else if (word_size == 4)
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
}
else
{
if (word_size == 2)
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
else if (word_size == 4)
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
}
}
return result;
}
/**

View file

@ -110,6 +110,19 @@ struct rsx_subresource_layout
u32 pitch_in_block;
};
struct texture_memory_info
{
int element_size;
bool require_swap;
};
struct texture_uploader_capabilities
{
bool supports_byteswap;
bool supports_vtc_decoding;
size_t alignment;
};
/**
* Get size to store texture in a linear fashion.
* Storage is assumed to use a rowPitchAlignment boundary for every row of texture.
@ -125,7 +138,7 @@ size_t get_placed_texture_storage_size(const rsx::vertex_texture &texture, size_
std::vector<rsx_subresource_layout> get_subresources_layout(const rsx::fragment_texture &texture);
std::vector<rsx_subresource_layout> get_subresources_layout(const rsx::vertex_texture &texture);
void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, bool vtc_support, size_t dst_row_pitch_multiple_of);
texture_memory_info upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, const texture_uploader_capabilities& caps);
u8 get_format_block_size_in_bytes(int format);
u8 get_format_block_size_in_texel(int format);

View file

@ -115,7 +115,8 @@ namespace {
size_t offset_in_buffer = 0;
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(mapped_buffer.subspan(offset_in_buffer), layout, format, is_swizzled, false, 256);
texture_uploader_capabilities caps{ false, false, 256 };
upload_texture_subresource(mapped_buffer.subspan(offset_in_buffer), layout, format, is_swizzled, caps);
UINT row_pitch = align(layout.width_in_block * block_size_in_bytes, 256);
command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(existing_texture, (UINT)mip_level), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.get_heap(),

View file

@ -63,7 +63,7 @@ namespace gl
case CELL_GCM_TEXTURE_A1R5G5B5: return std::make_tuple(GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV);
case CELL_GCM_TEXTURE_A4R4G4B4: return std::make_tuple(GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4);
case CELL_GCM_TEXTURE_R5G6B5: return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5);
case CELL_GCM_TEXTURE_A8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_A8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8);
case CELL_GCM_TEXTURE_G8B8: return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_R6G5B5: return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5);
case CELL_GCM_TEXTURE_DEPTH24_D8: return std::make_tuple(GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8);
@ -370,7 +370,7 @@ namespace gl
case CELL_GCM_TEXTURE_R5G5B5A1:
case CELL_GCM_TEXTURE_R6G5B5:
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_A8R8G8B8: // TODO
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
@ -458,107 +458,122 @@ namespace gl
const std::vector<rsx_subresource_layout> &input_layouts, bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector<gsl::byte>& staging_buffer)
{
int mip_level = 0;
bool vtc_support = gl::get_driver_caps().vendor_NVIDIA;
texture_uploader_capabilities caps{ true, false, 4 };
if (is_compressed_format(format))
pixel_unpack_settings unpack_settings;
unpack_settings.row_length(0).alignment(4);
if (LIKELY(is_compressed_format(format)))
{
//Compressed formats have a 4-byte alignment
//TODO: Verify that samplers are not affected by the padding
width = align(width, 4);
height = align(height, 4);
}
if (dim == rsx::texture_dimension_extended::texture_dimension_1d)
{
if (!is_compressed_format(format))
caps.supports_vtc_decoding = gl::get_driver_caps().vendor_NVIDIA;
unpack_settings.apply();
for (const rsx_subresource_layout& layout : input_layouts)
{
for (const rsx_subresource_layout &layout : input_layouts)
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps);
switch (dim)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, gl_format, gl_type, staging_buffer.data());
}
}
else
{
for (const rsx_subresource_layout &layout : input_layouts)
case rsx::texture_dimension_extended::texture_dimension_1d:
{
u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, gl_format, size, staging_buffer.data());
break;
}
}
return;
}
if (dim == rsx::texture_dimension_extended::texture_dimension_2d)
{
if (!is_compressed_format(format))
{
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data());
}
}
else
{
for (const rsx_subresource_layout &layout : input_layouts)
case rsx::texture_dimension_extended::texture_dimension_2d:
{
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, gl_format, size, staging_buffer.data());
break;
}
}
return;
}
if (dim == rsx::texture_dimension_extended::texture_dimension_cubemap)
{
// Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap
// Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0.
// mip_level % mipmap_per_layer will always be equal to mip_level
if (!is_compressed_format(format))
{
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data());
mip_level++;
}
}
else
{
for (const rsx_subresource_layout &layout : input_layouts)
case rsx::texture_dimension_extended::texture_dimension_cubemap:
{
// Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap
// Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0.
// mip_level % mipmap_per_layer will always be equal to mip_level
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, gl_format, size, staging_buffer.data());
mip_level++;
break;
}
}
return;
}
if (dim == rsx::texture_dimension_extended::texture_dimension_3d)
{
if (!is_compressed_format(format))
{
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, gl_format, gl_type, staging_buffer.data());
}
}
else
{
for (const rsx_subresource_layout &layout : input_layouts)
case rsx::texture_dimension_extended::texture_dimension_3d:
{
u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, gl_format, size, staging_buffer.data());
break;
}
default:
{
ASSUME(0);
fmt::throw_exception("Unreachable" HERE);
}
}
}
}
else
{
bool apply_settings = true;
switch (gl_type)
{
case GL_UNSIGNED_INT_8_8_8_8:
// NOTE: GL_UNSIGNED_INT_8_8_8_8 is already a swapped type
// TODO: Remove reliance on format and type checks when compute acceleration is implemented
apply_settings = false;
break;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
// Multi-channel format uploaded one byte at a time. This is due to poor driver support for formats like GL_UNSIGNED SHORT_8_8
// Do byteswapping in software for now until compute acceleration is available
apply_settings = (gl_format == GL_RED);
caps.supports_byteswap = apply_settings;
break;
default:
break;
}
if (!apply_settings)
{
unpack_settings.apply();
}
for (const rsx_subresource_layout& layout : input_layouts)
{
auto op = upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps);
if (apply_settings)
{
unpack_settings.swap_bytes(op.require_swap);
unpack_settings.apply();
apply_settings = false;
}
switch (dim)
{
case rsx::texture_dimension_extended::texture_dimension_1d:
glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, gl_format, gl_type, staging_buffer.data());
break;
case rsx::texture_dimension_extended::texture_dimension_2d:
glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data());
break;
case rsx::texture_dimension_extended::texture_dimension_cubemap:
// Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap
// Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0.
// mip_level % mipmap_per_layer will always be equal to mip_level
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data());
mip_level++;
break;
case rsx::texture_dimension_extended::texture_dimension_3d:
glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, gl_format, gl_type, staging_buffer.data());
break;
default:
ASSUME(0);
fmt::throw_exception("Unreachable" HERE);
}
}
return;
}
}
@ -615,9 +630,6 @@ namespace gl
}
glBindTexture(target, id);
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1);
// The rest of sampler state is now handled by sampler state objects
@ -627,6 +639,7 @@ namespace gl
size_t texture_data_sz = depth * height * aligned_pitch;
std::vector<gsl::byte> data_upload_buf(texture_data_sz);
// TODO: GL drivers support byteswapping and this should be used instead of doing so manually
const auto format_type = get_format_type(gcm_format);
const GLenum gl_format = std::get<0>(format_type);
const GLenum gl_type = std::get<1>(format_type);

View file

@ -516,19 +516,26 @@ namespace vk
u32 block_in_pixel = get_format_block_size_in_texel(format);
u8 block_size_in_bytes = get_format_block_size_in_bytes(format);
texture_uploader_capabilities caps{ true, false, heap_align };
vk::buffer* scratch_buf = nullptr;
u32 scratch_offset = 0;
for (const rsx_subresource_layout &layout : subresource_layout)
{
u32 row_pitch = (((layout.width_in_block * block_size_in_bytes) + heap_align - 1) / heap_align) * heap_align;
if (heap_align != 256) verify(HERE), row_pitch == heap_align;
u32 image_linear_size = row_pitch * layout.height_in_block * layout.depth;
//Map with extra padding bytes in case of realignment
// Map with extra padding bytes in case of realignment
size_t offset_in_buffer = upload_heap.alloc<512>(image_linear_size + 8);
void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size + 8);
VkBuffer buffer_handle = upload_heap.heap->value;
// Only do GPU-side conversion if occupancy is good
caps.supports_byteswap = (image_linear_size >= 1024);
gsl::span<gsl::byte> mapped{ (gsl::byte*)mapped_buffer, ::narrow<int>(image_linear_size) };
upload_texture_subresource(mapped, layout, format, is_swizzled, false, heap_align);
auto opt = upload_texture_subresource(mapped, layout, format, is_swizzled, caps);
upload_heap.unmap();
VkBufferImageCopy copy_info = {};
@ -542,25 +549,61 @@ namespace vk
copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count;
copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes;
if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT ||
dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
if (opt.require_swap || dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT)
{
// Executing GPU tasks on host_visible RAM is awful, copy to device-local buffer instead
auto scratch_buf = vk::get_scratch_buffer();
if (!scratch_buf)
{
scratch_buf = vk::get_scratch_buffer();
}
else if ((scratch_offset + image_linear_size) > scratch_buf->size())
{
scratch_offset = 0;
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, scratch_buf->size(), VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
}
VkBufferCopy copy = {};
copy.srcOffset = offset_in_buffer;
copy.dstOffset = 0;
copy.dstOffset = scratch_offset;
copy.size = image_linear_size;
vkCmdCopyBuffer(cmd, buffer_handle, scratch_buf->value, 1, &copy);
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, image_linear_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
insert_buffer_memory_barrier(cmd, scratch_buf->value, scratch_offset, image_linear_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
}
copy_info.bufferOffset = 0;
if (opt.require_swap)
{
if (opt.element_size == 4)
{
vk::get_compute_task<vk::cs_shuffle_32>()->run(cmd, scratch_buf, image_linear_size, scratch_offset);
}
else if (opt.element_size == 2)
{
vk::get_compute_task<vk::cs_shuffle_16>()->run(cmd, scratch_buf, image_linear_size, scratch_offset);
}
else
{
fmt::throw_exception("Unreachable" HERE);
}
}
if (dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT)
{
copy_info.bufferOffset = scratch_offset;
scratch_offset = align(scratch_offset + image_linear_size, 512);
vk::copy_buffer_to_image(cmd, scratch_buf, dst_image, copy_info);
}
else if (opt.require_swap)
{
insert_buffer_memory_barrier(cmd, scratch_buf->value, scratch_offset, image_linear_size, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
copy_info.bufferOffset = scratch_offset;
scratch_offset = align(scratch_offset + image_linear_size, 512);
vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_info);
}
else
{
vkCmdCopyBufferToImage(cmd, buffer_handle, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_info);