mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
rsx: Allow GPU-accelerated stream manipulation when doing texture uploads
This commit is contained in:
parent
e0a7912d7c
commit
99fb6d6a5d
5 changed files with 244 additions and 131 deletions
|
@ -469,48 +469,51 @@ std::vector<rsx_subresource_layout> get_subresources_layout(const rsx::vertex_te
|
|||
return get_subresources_layout_impl(texture);
|
||||
}
|
||||
|
||||
void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, bool vtc_support, size_t dst_row_pitch_multiple_of)
|
||||
texture_memory_info upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, const texture_uploader_capabilities& caps)
|
||||
{
|
||||
u16 w = src_layout.width_in_block;
|
||||
u16 h = src_layout.height_in_block;
|
||||
u16 depth = src_layout.depth;
|
||||
u32 pitch = src_layout.pitch_in_block;
|
||||
|
||||
texture_memory_info result{};
|
||||
|
||||
// Ignore when texture width > pitch
|
||||
if (w > pitch)
|
||||
return;
|
||||
return result;
|
||||
|
||||
// Check if we can use a fast path
|
||||
int word_size = 0;
|
||||
int words_per_block;
|
||||
u32 dst_pitch_in_block;
|
||||
|
||||
// NOTE: Avoid block optimization for formats that can be modified internally by the GPU itself
|
||||
// Since the gpu code does not attempt to do wide translations (e.g WZYX32->XYZW32), only perform, per-channel transform and use proper swizzles to get the proper output
|
||||
switch (format)
|
||||
{
|
||||
case CELL_GCM_TEXTURE_B8:
|
||||
{
|
||||
if (is_swizzled)
|
||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), 1, w, h, depth, src_layout.border, get_row_pitch_in_block<u8>(w, dst_row_pitch_multiple_of));
|
||||
else
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), 1, w, h, depth, src_layout.border, get_row_pitch_in_block<u8>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
|
||||
word_size = words_per_block = 1;
|
||||
dst_pitch_in_block = get_row_pitch_in_block<u8>(w, caps.alignment);
|
||||
break;
|
||||
}
|
||||
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
|
||||
{
|
||||
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
|
||||
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
|
||||
break;
|
||||
}
|
||||
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
|
||||
{
|
||||
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u16>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
|
||||
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u16>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
|
||||
break;
|
||||
}
|
||||
|
||||
case CELL_GCM_TEXTURE_R6G5B5:
|
||||
{
|
||||
if (is_swizzled)
|
||||
copy_rgb655_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, dst_row_pitch_multiple_of));
|
||||
copy_rgb655_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, caps.alignment));
|
||||
else
|
||||
copy_rgb655_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
|
||||
copy_rgb655_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, caps.alignment), src_layout.pitch_in_block);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -526,10 +529,9 @@ void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subre
|
|||
case CELL_GCM_TEXTURE_R5G6B5:
|
||||
case CELL_GCM_TEXTURE_G8B8:
|
||||
{
|
||||
if (is_swizzled)
|
||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), 1, w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, dst_row_pitch_multiple_of));
|
||||
else
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), 1, w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
|
||||
word_size = 2;
|
||||
words_per_block = 1;
|
||||
dst_pitch_in_block = get_row_pitch_in_block<u16>(w, caps.alignment);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -538,10 +540,9 @@ void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subre
|
|||
case CELL_GCM_TEXTURE_DEPTH24_D8:
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: // Untested
|
||||
{
|
||||
if (is_swizzled)
|
||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), 1, w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, dst_row_pitch_multiple_of));
|
||||
else
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), 1, w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
|
||||
word_size = 4;
|
||||
words_per_block = 1;
|
||||
dst_pitch_in_block = get_row_pitch_in_block<u32>(w, caps.alignment);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -554,13 +555,9 @@ void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subre
|
|||
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
|
||||
{
|
||||
const u16 block_size = get_format_block_size_in_bytes(format);
|
||||
const u16 words_per_block = block_size / 2;
|
||||
const auto dst_pitch_in_block = get_row_pitch_in_block(block_size, w, dst_row_pitch_multiple_of);
|
||||
|
||||
if (is_swizzled)
|
||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||
else
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||
word_size = 2;
|
||||
words_per_block = block_size / 2;
|
||||
dst_pitch_in_block = get_row_pitch_in_block(block_size, w, caps.alignment);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -568,28 +565,24 @@ void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subre
|
|||
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
|
||||
{
|
||||
const u16 block_size = get_format_block_size_in_bytes(format);
|
||||
const u16 words_per_block = block_size / 4;
|
||||
const auto dst_pitch_in_block = get_row_pitch_in_block(block_size, w, dst_row_pitch_multiple_of);
|
||||
|
||||
if (is_swizzled)
|
||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||
else
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||
word_size = 4;
|
||||
words_per_block = block_size / 4;
|
||||
dst_pitch_in_block = get_row_pitch_in_block(block_size, w, caps.alignment);
|
||||
break;
|
||||
}
|
||||
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
|
||||
{
|
||||
if (depth > 1 && !vtc_support)
|
||||
if (depth > 1 && !caps.supports_vtc_decoding)
|
||||
{
|
||||
// PS3 uses the Nvidia VTC memory layout for compressed 3D textures.
|
||||
// This is only supported using Nvidia OpenGL.
|
||||
// Remove the VTC tiling to support ATI and Vulkan.
|
||||
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
|
||||
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
|
||||
}
|
||||
else
|
||||
{
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u64>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -597,16 +590,16 @@ void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subre
|
|||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
|
||||
{
|
||||
if (depth > 1 && !vtc_support)
|
||||
if (depth > 1 && !caps.supports_vtc_decoding)
|
||||
{
|
||||
// PS3 uses the Nvidia VTC memory layout for compressed 3D textures.
|
||||
// This is only supported using Nvidia OpenGL.
|
||||
// Remove the VTC tiling to support ATI and Vulkan.
|
||||
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
|
||||
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
|
||||
}
|
||||
else
|
||||
{
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u128>(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block);
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -614,6 +607,56 @@ void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subre
|
|||
default:
|
||||
fmt::throw_exception("Wrong format 0x%x" HERE, format);
|
||||
}
|
||||
|
||||
if (word_size)
|
||||
{
|
||||
if (word_size == 1)
|
||||
{
|
||||
if (is_swizzled)
|
||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||
else
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||
}
|
||||
else if (caps.supports_byteswap)
|
||||
{
|
||||
result.require_swap = true;
|
||||
result.element_size = word_size;
|
||||
|
||||
if (word_size == 2)
|
||||
{
|
||||
if (is_swizzled)
|
||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||
else
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||
}
|
||||
else if (word_size == 4)
|
||||
{
|
||||
if (is_swizzled)
|
||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||
else
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (word_size == 2)
|
||||
{
|
||||
if (is_swizzled)
|
||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||
else
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||
}
|
||||
else if (word_size == 4)
|
||||
{
|
||||
if (is_swizzled)
|
||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||
else
|
||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -110,6 +110,19 @@ struct rsx_subresource_layout
|
|||
u32 pitch_in_block;
|
||||
};
|
||||
|
||||
struct texture_memory_info
|
||||
{
|
||||
int element_size;
|
||||
bool require_swap;
|
||||
};
|
||||
|
||||
struct texture_uploader_capabilities
|
||||
{
|
||||
bool supports_byteswap;
|
||||
bool supports_vtc_decoding;
|
||||
size_t alignment;
|
||||
};
|
||||
|
||||
/**
|
||||
* Get size to store texture in a linear fashion.
|
||||
* Storage is assumed to use a rowPitchAlignment boundary for every row of texture.
|
||||
|
@ -125,7 +138,7 @@ size_t get_placed_texture_storage_size(const rsx::vertex_texture &texture, size_
|
|||
std::vector<rsx_subresource_layout> get_subresources_layout(const rsx::fragment_texture &texture);
|
||||
std::vector<rsx_subresource_layout> get_subresources_layout(const rsx::vertex_texture &texture);
|
||||
|
||||
void upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, bool vtc_support, size_t dst_row_pitch_multiple_of);
|
||||
texture_memory_info upload_texture_subresource(gsl::span<gsl::byte> dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, const texture_uploader_capabilities& caps);
|
||||
|
||||
u8 get_format_block_size_in_bytes(int format);
|
||||
u8 get_format_block_size_in_texel(int format);
|
||||
|
|
|
@ -115,7 +115,8 @@ namespace {
|
|||
size_t offset_in_buffer = 0;
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
{
|
||||
upload_texture_subresource(mapped_buffer.subspan(offset_in_buffer), layout, format, is_swizzled, false, 256);
|
||||
texture_uploader_capabilities caps{ false, false, 256 };
|
||||
upload_texture_subresource(mapped_buffer.subspan(offset_in_buffer), layout, format, is_swizzled, caps);
|
||||
UINT row_pitch = align(layout.width_in_block * block_size_in_bytes, 256);
|
||||
command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(existing_texture, (UINT)mip_level), 0, 0, 0,
|
||||
&CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.get_heap(),
|
||||
|
|
|
@ -63,7 +63,7 @@ namespace gl
|
|||
case CELL_GCM_TEXTURE_A1R5G5B5: return std::make_tuple(GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV);
|
||||
case CELL_GCM_TEXTURE_A4R4G4B4: return std::make_tuple(GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4);
|
||||
case CELL_GCM_TEXTURE_R5G6B5: return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5);
|
||||
case CELL_GCM_TEXTURE_A8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_BYTE);
|
||||
case CELL_GCM_TEXTURE_A8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8);
|
||||
case CELL_GCM_TEXTURE_G8B8: return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE);
|
||||
case CELL_GCM_TEXTURE_R6G5B5: return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5);
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8: return std::make_tuple(GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8);
|
||||
|
@ -370,7 +370,7 @@ namespace gl
|
|||
case CELL_GCM_TEXTURE_R5G5B5A1:
|
||||
case CELL_GCM_TEXTURE_R6G5B5:
|
||||
case CELL_GCM_TEXTURE_R5G6B5:
|
||||
case CELL_GCM_TEXTURE_A8R8G8B8: // TODO
|
||||
case CELL_GCM_TEXTURE_A8R8G8B8:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
|
||||
|
@ -458,107 +458,122 @@ namespace gl
|
|||
const std::vector<rsx_subresource_layout> &input_layouts, bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector<gsl::byte>& staging_buffer)
|
||||
{
|
||||
int mip_level = 0;
|
||||
bool vtc_support = gl::get_driver_caps().vendor_NVIDIA;
|
||||
texture_uploader_capabilities caps{ true, false, 4 };
|
||||
|
||||
if (is_compressed_format(format))
|
||||
pixel_unpack_settings unpack_settings;
|
||||
unpack_settings.row_length(0).alignment(4);
|
||||
|
||||
if (LIKELY(is_compressed_format(format)))
|
||||
{
|
||||
//Compressed formats have a 4-byte alignment
|
||||
//TODO: Verify that samplers are not affected by the padding
|
||||
width = align(width, 4);
|
||||
height = align(height, 4);
|
||||
}
|
||||
|
||||
if (dim == rsx::texture_dimension_extended::texture_dimension_1d)
|
||||
{
|
||||
if (!is_compressed_format(format))
|
||||
caps.supports_vtc_decoding = gl::get_driver_caps().vendor_NVIDIA;
|
||||
unpack_settings.apply();
|
||||
|
||||
for (const rsx_subresource_layout& layout : input_layouts)
|
||||
{
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps);
|
||||
|
||||
switch (dim)
|
||||
{
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
|
||||
glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, gl_format, gl_type, staging_buffer.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
case rsx::texture_dimension_extended::texture_dimension_1d:
|
||||
{
|
||||
u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
|
||||
glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, gl_format, size, staging_buffer.data());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (dim == rsx::texture_dimension_extended::texture_dimension_2d)
|
||||
{
|
||||
if (!is_compressed_format(format))
|
||||
{
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
{
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
case rsx::texture_dimension_extended::texture_dimension_2d:
|
||||
{
|
||||
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
|
||||
glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, gl_format, size, staging_buffer.data());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (dim == rsx::texture_dimension_extended::texture_dimension_cubemap)
|
||||
{
|
||||
// Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap
|
||||
// Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0.
|
||||
// mip_level % mipmap_per_layer will always be equal to mip_level
|
||||
if (!is_compressed_format(format))
|
||||
{
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
{
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
|
||||
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data());
|
||||
mip_level++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
case rsx::texture_dimension_extended::texture_dimension_cubemap:
|
||||
{
|
||||
// Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap
|
||||
// Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0.
|
||||
// mip_level % mipmap_per_layer will always be equal to mip_level
|
||||
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
|
||||
glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, gl_format, size, staging_buffer.data());
|
||||
mip_level++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (dim == rsx::texture_dimension_extended::texture_dimension_3d)
|
||||
{
|
||||
if (!is_compressed_format(format))
|
||||
{
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
{
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
|
||||
glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, gl_format, gl_type, staging_buffer.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
case rsx::texture_dimension_extended::texture_dimension_3d:
|
||||
{
|
||||
u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4);
|
||||
glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, gl_format, size, staging_buffer.data());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
ASSUME(0);
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
bool apply_settings = true;
|
||||
switch (gl_type)
|
||||
{
|
||||
case GL_UNSIGNED_INT_8_8_8_8:
|
||||
// NOTE: GL_UNSIGNED_INT_8_8_8_8 is already a swapped type
|
||||
// TODO: Remove reliance on format and type checks when compute acceleration is implemented
|
||||
apply_settings = false;
|
||||
break;
|
||||
case GL_BYTE:
|
||||
case GL_UNSIGNED_BYTE:
|
||||
// Multi-channel format uploaded one byte at a time. This is due to poor driver support for formats like GL_UNSIGNED SHORT_8_8
|
||||
// Do byteswapping in software for now until compute acceleration is available
|
||||
apply_settings = (gl_format == GL_RED);
|
||||
caps.supports_byteswap = apply_settings;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!apply_settings)
|
||||
{
|
||||
unpack_settings.apply();
|
||||
}
|
||||
|
||||
for (const rsx_subresource_layout& layout : input_layouts)
|
||||
{
|
||||
auto op = upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps);
|
||||
if (apply_settings)
|
||||
{
|
||||
unpack_settings.swap_bytes(op.require_swap);
|
||||
unpack_settings.apply();
|
||||
apply_settings = false;
|
||||
}
|
||||
|
||||
switch (dim)
|
||||
{
|
||||
case rsx::texture_dimension_extended::texture_dimension_1d:
|
||||
glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, gl_format, gl_type, staging_buffer.data());
|
||||
break;
|
||||
case rsx::texture_dimension_extended::texture_dimension_2d:
|
||||
glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data());
|
||||
break;
|
||||
case rsx::texture_dimension_extended::texture_dimension_cubemap:
|
||||
// Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap
|
||||
// Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0.
|
||||
// mip_level % mipmap_per_layer will always be equal to mip_level
|
||||
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data());
|
||||
mip_level++;
|
||||
break;
|
||||
case rsx::texture_dimension_extended::texture_dimension_3d:
|
||||
glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, gl_format, gl_type, staging_buffer.data());
|
||||
break;
|
||||
default:
|
||||
ASSUME(0);
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -615,9 +630,6 @@ namespace gl
|
|||
}
|
||||
|
||||
glBindTexture(target, id);
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
|
||||
glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0);
|
||||
glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1);
|
||||
// The rest of sampler state is now handled by sampler state objects
|
||||
|
@ -627,6 +639,7 @@ namespace gl
|
|||
size_t texture_data_sz = depth * height * aligned_pitch;
|
||||
std::vector<gsl::byte> data_upload_buf(texture_data_sz);
|
||||
|
||||
// TODO: GL drivers support byteswapping and this should be used instead of doing so manually
|
||||
const auto format_type = get_format_type(gcm_format);
|
||||
const GLenum gl_format = std::get<0>(format_type);
|
||||
const GLenum gl_type = std::get<1>(format_type);
|
||||
|
|
|
@ -516,19 +516,26 @@ namespace vk
|
|||
u32 block_in_pixel = get_format_block_size_in_texel(format);
|
||||
u8 block_size_in_bytes = get_format_block_size_in_bytes(format);
|
||||
|
||||
texture_uploader_capabilities caps{ true, false, heap_align };
|
||||
vk::buffer* scratch_buf = nullptr;
|
||||
u32 scratch_offset = 0;
|
||||
|
||||
for (const rsx_subresource_layout &layout : subresource_layout)
|
||||
{
|
||||
u32 row_pitch = (((layout.width_in_block * block_size_in_bytes) + heap_align - 1) / heap_align) * heap_align;
|
||||
if (heap_align != 256) verify(HERE), row_pitch == heap_align;
|
||||
u32 image_linear_size = row_pitch * layout.height_in_block * layout.depth;
|
||||
|
||||
//Map with extra padding bytes in case of realignment
|
||||
// Map with extra padding bytes in case of realignment
|
||||
size_t offset_in_buffer = upload_heap.alloc<512>(image_linear_size + 8);
|
||||
void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size + 8);
|
||||
VkBuffer buffer_handle = upload_heap.heap->value;
|
||||
|
||||
// Only do GPU-side conversion if occupancy is good
|
||||
caps.supports_byteswap = (image_linear_size >= 1024);
|
||||
|
||||
gsl::span<gsl::byte> mapped{ (gsl::byte*)mapped_buffer, ::narrow<int>(image_linear_size) };
|
||||
upload_texture_subresource(mapped, layout, format, is_swizzled, false, heap_align);
|
||||
auto opt = upload_texture_subresource(mapped, layout, format, is_swizzled, caps);
|
||||
upload_heap.unmap();
|
||||
|
||||
VkBufferImageCopy copy_info = {};
|
||||
|
@ -542,25 +549,61 @@ namespace vk
|
|||
copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count;
|
||||
copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes;
|
||||
|
||||
if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT ||
|
||||
dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
if (opt.require_swap || dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
{
|
||||
// Executing GPU tasks on host_visible RAM is awful, copy to device-local buffer instead
|
||||
auto scratch_buf = vk::get_scratch_buffer();
|
||||
if (!scratch_buf)
|
||||
{
|
||||
scratch_buf = vk::get_scratch_buffer();
|
||||
}
|
||||
else if ((scratch_offset + image_linear_size) > scratch_buf->size())
|
||||
{
|
||||
scratch_offset = 0;
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, scratch_buf->size(), VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
}
|
||||
|
||||
VkBufferCopy copy = {};
|
||||
copy.srcOffset = offset_in_buffer;
|
||||
copy.dstOffset = 0;
|
||||
copy.dstOffset = scratch_offset;
|
||||
copy.size = image_linear_size;
|
||||
|
||||
vkCmdCopyBuffer(cmd, buffer_handle, scratch_buf->value, 1, ©);
|
||||
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, image_linear_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, scratch_offset, image_linear_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
|
||||
}
|
||||
|
||||
copy_info.bufferOffset = 0;
|
||||
if (opt.require_swap)
|
||||
{
|
||||
if (opt.element_size == 4)
|
||||
{
|
||||
vk::get_compute_task<vk::cs_shuffle_32>()->run(cmd, scratch_buf, image_linear_size, scratch_offset);
|
||||
}
|
||||
else if (opt.element_size == 2)
|
||||
{
|
||||
vk::get_compute_task<vk::cs_shuffle_16>()->run(cmd, scratch_buf, image_linear_size, scratch_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
}
|
||||
|
||||
if (dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
{
|
||||
copy_info.bufferOffset = scratch_offset;
|
||||
scratch_offset = align(scratch_offset + image_linear_size, 512);
|
||||
vk::copy_buffer_to_image(cmd, scratch_buf, dst_image, copy_info);
|
||||
}
|
||||
else if (opt.require_swap)
|
||||
{
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, scratch_offset, image_linear_size, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
copy_info.bufferOffset = scratch_offset;
|
||||
scratch_offset = align(scratch_offset + image_linear_size, 512);
|
||||
vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_info);
|
||||
}
|
||||
else
|
||||
{
|
||||
vkCmdCopyBufferToImage(cmd, buffer_handle, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_info);
|
||||
|
|
Loading…
Add table
Reference in a new issue