mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-08-12 02:59:51 +00:00
vk: Enable gpu deswizzling
This commit is contained in:
parent
9cd3530c98
commit
1266b63135
5 changed files with 159 additions and 42 deletions
|
@ -668,13 +668,19 @@ texture_memory_info upload_texture_subresource(gsl::span<gsl::byte> dst_buffer,
|
||||||
{
|
{
|
||||||
result.require_swap = true;
|
result.require_swap = true;
|
||||||
result.element_size = word_size;
|
result.element_size = word_size;
|
||||||
|
result.block_length = words_per_block;
|
||||||
|
|
||||||
if (word_size == 2)
|
if (word_size == 2)
|
||||||
{
|
{
|
||||||
const bool skip_swizzle = ((word_size * words_per_block) & 3) == 0 && caps.supports_hw_deswizzle;
|
if (is_swizzled)
|
||||||
if (is_swizzled && skip_swizzle) result.require_deswizzle = true;
|
{
|
||||||
|
if (((word_size * words_per_block) & 3) == 0 && caps.supports_hw_deswizzle)
|
||||||
|
{
|
||||||
|
result.require_deswizzle = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (is_swizzled && !skip_swizzle)
|
if (is_swizzled && !result.require_deswizzle)
|
||||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||||
else
|
else
|
||||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
|
|
|
@ -104,6 +104,7 @@ struct rsx_subresource_layout
|
||||||
struct texture_memory_info
|
struct texture_memory_info
|
||||||
{
|
{
|
||||||
int element_size;
|
int element_size;
|
||||||
|
int block_length;
|
||||||
bool require_swap;
|
bool require_swap;
|
||||||
bool require_deswizzle;
|
bool require_deswizzle;
|
||||||
};
|
};
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
#include "VKHelpers.h"
|
#include "VKHelpers.h"
|
||||||
#include "Utilities/StrUtil.h"
|
#include "Utilities/StrUtil.h"
|
||||||
|
|
||||||
#define VK_MAX_COMPUTE_TASKS 1024 // Max number of jobs per frame
|
#define VK_MAX_COMPUTE_TASKS 32768 // Max number of jobs per frame
|
||||||
|
|
||||||
namespace vk
|
namespace vk
|
||||||
{
|
{
|
||||||
|
@ -22,7 +22,9 @@ namespace vk
|
||||||
bool initialized = false;
|
bool initialized = false;
|
||||||
bool unroll_loops = true;
|
bool unroll_loops = true;
|
||||||
bool uniform_inputs = false;
|
bool uniform_inputs = false;
|
||||||
|
bool use_push_constants = false;
|
||||||
u32 ssbo_count = 1;
|
u32 ssbo_count = 1;
|
||||||
|
u32 push_constants_size = 0;
|
||||||
u32 optimal_group_size = 1;
|
u32 optimal_group_size = 1;
|
||||||
u32 optimal_kernel_size = 1;
|
u32 optimal_kernel_size = 1;
|
||||||
|
|
||||||
|
@ -77,6 +79,16 @@ namespace vk
|
||||||
layout_info.setLayoutCount = 1;
|
layout_info.setLayoutCount = 1;
|
||||||
layout_info.pSetLayouts = &m_descriptor_layout;
|
layout_info.pSetLayouts = &m_descriptor_layout;
|
||||||
|
|
||||||
|
VkPushConstantRange push_constants{};
|
||||||
|
if (use_push_constants)
|
||||||
|
{
|
||||||
|
push_constants.size = push_constants_size;
|
||||||
|
push_constants.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||||
|
|
||||||
|
layout_info.pushConstantRangeCount = 1;
|
||||||
|
layout_info.pPushConstantRanges = &push_constants;
|
||||||
|
}
|
||||||
|
|
||||||
CHECK_RESULT(vkCreatePipelineLayout(*get_current_renderer(), &layout_info, nullptr, &m_pipeline_layout));
|
CHECK_RESULT(vkCreatePipelineLayout(*get_current_renderer(), &layout_info, nullptr, &m_pipeline_layout));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -258,7 +270,7 @@ namespace vk
|
||||||
"\n"
|
"\n"
|
||||||
"void main()\n"
|
"void main()\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" uint index = %idx;\n"
|
" uint index = gl_GlobalInvocationID.x;\n"
|
||||||
" uint value;\n"
|
" uint value;\n"
|
||||||
" %vars"
|
" %vars"
|
||||||
"\n";
|
"\n";
|
||||||
|
@ -550,19 +562,26 @@ namespace vk
|
||||||
};
|
};
|
||||||
|
|
||||||
// Reverse morton-order block arrangement
|
// Reverse morton-order block arrangement
|
||||||
|
struct cs_deswizzle_base : compute_task
|
||||||
|
{
|
||||||
|
virtual void run(VkCommandBuffer cmd, const vk::buffer* dst, u32 out_offset, const vk::buffer* src, u32 in_offset, u32 width, u32 height, u32 depth) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
template <typename _BlockType, typename _BaseType, bool _SwapBytes>
|
template <typename _BlockType, typename _BaseType, bool _SwapBytes>
|
||||||
struct cs_deswizzle_3d : compute_task
|
struct cs_deswizzle_3d : cs_deswizzle_base
|
||||||
{
|
{
|
||||||
union params_t
|
union params_t
|
||||||
{
|
{
|
||||||
u32 data[4];
|
u32 data[6];
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
u32 width;
|
u32 width;
|
||||||
u32 height;
|
u32 height;
|
||||||
|
u32 depth;
|
||||||
u32 logw;
|
u32 logw;
|
||||||
u32 logh;
|
u32 logh;
|
||||||
|
u32 logd;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
params;
|
params;
|
||||||
|
@ -578,21 +597,25 @@ namespace vk
|
||||||
verify("Unsupported block type" HERE), (sizeof(_BlockType) & 3) == 0;
|
verify("Unsupported block type" HERE), (sizeof(_BlockType) & 3) == 0;
|
||||||
|
|
||||||
ssbo_count = 2;
|
ssbo_count = 2;
|
||||||
uniform_inputs = true;
|
use_push_constants = true;
|
||||||
|
push_constants_size = 24;
|
||||||
|
|
||||||
create();
|
create();
|
||||||
|
|
||||||
m_src =
|
m_src =
|
||||||
"#version 450\n"
|
"#version 450\n"
|
||||||
"layout(local_size_x = 8, local_size_y = 8, local_size_z = 1)\n\n"
|
"layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;\n\n"
|
||||||
|
|
||||||
"layout(set=0, binding=0, std430) buffer ssbo{ uint data_in[]; }\n"
|
"layout(set=0, binding=0, std430) buffer ssbo0{ uint data_in[]; };\n"
|
||||||
"layout(set=0, binding=1, std430) buffer ssbo{ uint data_out[]; }\n"
|
"layout(set=0, binding=1, std430) buffer ssbo1{ uint data_out[]; };\n"
|
||||||
"layout(set=0, binding=2, std140) uniform buffer parameters\n"
|
"layout(push_constant) uniform parameters\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" uint image_width;\n"
|
" uint image_width;\n"
|
||||||
" uint image_height;\n"
|
" uint image_height;\n"
|
||||||
|
" uint image_depth;\n"
|
||||||
" uint image_logw;\n"
|
" uint image_logw;\n"
|
||||||
" uint image_logh;\n"
|
" uint image_logh;\n"
|
||||||
|
" uint image_logd;\n"
|
||||||
"};\n\n"
|
"};\n\n"
|
||||||
|
|
||||||
"#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8\n"
|
"#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8\n"
|
||||||
|
@ -629,24 +652,27 @@ namespace vk
|
||||||
" log2d--;\n"
|
" log2d--;\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
" while(x > 0 || y > 0 || z > 0)\n"
|
" while(x > 0 || y > 0 || z > 0);\n"
|
||||||
"\n"
|
"\n"
|
||||||
" return offset;\n"
|
" return offset;\n"
|
||||||
"}\n\n"
|
"}\n\n"
|
||||||
|
|
||||||
"void main()\n"
|
"void main()\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" if (gl_GlobalInvocationID.x >= image_width || gl_GlobalInvocationID.y >= image_height)\n"
|
" if (any(greaterThanEqual(gl_GlobalInvocationID, uvec3(image_width, image_height, image_depth))))\n"
|
||||||
" return;\n\n"
|
" return;\n\n"
|
||||||
|
|
||||||
" uint texel_id = (gl_GlobalInvocationID.y * image_width) + gl_GlobalInvocationID.x"
|
" uint texel_id = (gl_GlobalInvocationID.z * image_width * image_height) + (gl_GlobalInvocationID.y * image_width) + gl_GlobalInvocationID.x;\n"
|
||||||
" uint word_count = %_wordcount;\n"
|
" uint word_count = %_wordcount;\n"
|
||||||
" uint dst_id = (index * word_count);\n\n"
|
" uint dst_id = (texel_id * word_count);\n\n"
|
||||||
|
|
||||||
|
" uint src_id = get_z_index(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y, gl_GlobalInvocationID.z, image_logw, image_logh, image_logd);\n"
|
||||||
|
" src_id *= word_count;\n\n"
|
||||||
|
|
||||||
" uint src_id = get_z_index(gl_GlobalInvocationID.x, gl_GlobalInvocation.y, 0, image_logw, image_logh, 0);\n"
|
|
||||||
" for (uint i = 0; i < word_count; ++i)\n"
|
" for (uint i = 0; i < word_count; ++i)\n"
|
||||||
" {\n"
|
" {\n"
|
||||||
" data_out[dst_id++] = %f(data_in[src_id++]);\n"
|
" uint value = data_in[src_id++];\n"
|
||||||
|
" data_out[dst_id++] = %f(value);\n"
|
||||||
" }\n\n"
|
" }\n\n"
|
||||||
|
|
||||||
"}\n";
|
"}\n";
|
||||||
|
@ -681,24 +707,14 @@ namespace vk
|
||||||
{
|
{
|
||||||
m_program->bind_buffer({ src_buffer->value, in_offset, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
m_program->bind_buffer({ src_buffer->value, in_offset, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
||||||
m_program->bind_buffer({ dst_buffer->value, out_offset, block_length }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
m_program->bind_buffer({ dst_buffer->value, out_offset, block_length }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
||||||
m_program->bind_buffer({ m_param_buffer->value, 0, 16 }, 2, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_descriptor_set);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_parameters(VkCommandBuffer cmd)
|
void set_parameters(VkCommandBuffer cmd)
|
||||||
{
|
{
|
||||||
verify(HERE), uniform_inputs;
|
vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, params.data);
|
||||||
|
|
||||||
if (!m_param_buffer)
|
|
||||||
{
|
|
||||||
auto pdev = vk::get_current_renderer();
|
|
||||||
m_param_buffer = std::make_unique<vk::buffer>(*pdev, 256, pdev->get_memory_mapping().host_visible_coherent,
|
|
||||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vkCmdUpdateBuffer(cmd, m_param_buffer->value, 0, 16, params.data);
|
void run(VkCommandBuffer cmd, const vk::buffer* dst, u32 out_offset, const vk::buffer* src, u32 in_offset, u32 width, u32 height, u32 depth) override
|
||||||
}
|
|
||||||
|
|
||||||
void run(VkCommandBuffer cmd, const vk::buffer* dst, u32 out_offset, const vk::buffer* src, u32 in_offset, u32 width, u32 height, u32 depth)
|
|
||||||
{
|
{
|
||||||
dst_buffer = dst;
|
dst_buffer = dst;
|
||||||
src_buffer = src;
|
src_buffer = src;
|
||||||
|
@ -708,14 +724,16 @@ namespace vk
|
||||||
this->block_length = sizeof(_BlockType) * width * height * depth;
|
this->block_length = sizeof(_BlockType) * width * height * depth;
|
||||||
|
|
||||||
params.width = width;
|
params.width = width;
|
||||||
params.height = height * depth;
|
params.height = height;
|
||||||
|
params.depth = depth;
|
||||||
params.logw = rsx::ceil_log2(width);
|
params.logw = rsx::ceil_log2(width);
|
||||||
params.logh = rsx::ceil_log2(height);
|
params.logh = rsx::ceil_log2(height);
|
||||||
set_parameters();
|
params.logd = rsx::ceil_log2(depth);
|
||||||
|
set_parameters(cmd);
|
||||||
|
|
||||||
const u32 invocations_x = align(params.width, 8) / 8;
|
const u32 invocations_x = align(params.width, 8) / 8;
|
||||||
const u32 invocations_y = align(params.height, 8) / 8;
|
const u32 invocations_y = align(params.height, 8) / 8;
|
||||||
compute_task::run(cmd, invocations_x, invocations_y, 1);
|
compute_task::run(cmd, invocations_x, invocations_y, depth);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -3299,6 +3299,7 @@ public:
|
||||||
std::string shader_type = type == ::glsl::program_domain::glsl_vertex_program ? "vertex" :
|
std::string shader_type = type == ::glsl::program_domain::glsl_vertex_program ? "vertex" :
|
||||||
type == ::glsl::program_domain::glsl_fragment_program ? "fragment" : "compute";
|
type == ::glsl::program_domain::glsl_fragment_program ? "fragment" : "compute";
|
||||||
|
|
||||||
|
LOG_NOTICE(RSX, "%s", m_source);
|
||||||
fmt::throw_exception("Failed to compile %s shader" HERE, shader_type);
|
fmt::throw_exception("Failed to compile %s shader" HERE, shader_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -538,6 +538,90 @@ namespace vk
|
||||||
change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void gpu_deswizzle_sections_impl(VkCommandBuffer cmd, vk::buffer* scratch_buf, u32 dst_offset, int word_size, int word_count, bool swap_bytes, std::vector<VkBufferImageCopy>& sections)
|
||||||
|
{
|
||||||
|
// NOTE: This has to be done individually for every LOD
|
||||||
|
vk::cs_deswizzle_base* job = nullptr;
|
||||||
|
const auto block_size = (word_size * word_count);
|
||||||
|
|
||||||
|
verify(HERE), word_size == 4 || word_size == 2;
|
||||||
|
|
||||||
|
if (!swap_bytes)
|
||||||
|
{
|
||||||
|
if (word_size == 4)
|
||||||
|
{
|
||||||
|
switch (block_size)
|
||||||
|
{
|
||||||
|
case 4:
|
||||||
|
job = vk::get_compute_task<cs_deswizzle_3d<u32, u32, false>>();
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
job = vk::get_compute_task<cs_deswizzle_3d<u64, u32, false>>();
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
job = vk::get_compute_task<cs_deswizzle_3d<u128, u32, false>>();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
switch (block_size)
|
||||||
|
{
|
||||||
|
case 4:
|
||||||
|
job = vk::get_compute_task<cs_deswizzle_3d<u32, u16, false>>();
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
job = vk::get_compute_task<cs_deswizzle_3d<u64, u16, false>>();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (word_size == 4)
|
||||||
|
{
|
||||||
|
switch (block_size)
|
||||||
|
{
|
||||||
|
case 4:
|
||||||
|
job = vk::get_compute_task<cs_deswizzle_3d<u32, u32, true>>();
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
job = vk::get_compute_task<cs_deswizzle_3d<u64, u32, true>>();
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
job = vk::get_compute_task<cs_deswizzle_3d<u128, u32, true>>();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
switch (block_size)
|
||||||
|
{
|
||||||
|
case 4:
|
||||||
|
job = vk::get_compute_task<cs_deswizzle_3d<u32, u16, true>>();
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
job = vk::get_compute_task<cs_deswizzle_3d<u64, u16, true>>();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
verify(HERE), job;
|
||||||
|
|
||||||
|
for (auto §ion : sections)
|
||||||
|
{
|
||||||
|
job->run(cmd, scratch_buf, dst_offset, scratch_buf, section.bufferOffset,
|
||||||
|
section.imageExtent.width, section.imageExtent.height, section.imageExtent.depth);
|
||||||
|
|
||||||
|
const u32 packed_size = section.imageExtent.width * section.imageExtent.height * section.imageExtent.depth * block_size;
|
||||||
|
section.bufferOffset = dst_offset;
|
||||||
|
dst_offset += packed_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
verify(HERE), dst_offset <= scratch_buf->size();
|
||||||
|
}
|
||||||
|
|
||||||
void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image,
|
void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image,
|
||||||
const std::vector<rsx_subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
|
const std::vector<rsx_subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
|
||||||
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align)
|
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align)
|
||||||
|
@ -600,7 +684,7 @@ namespace vk
|
||||||
copy_info.imageSubresource.mipLevel = layout.level;
|
copy_info.imageSubresource.mipLevel = layout.level;
|
||||||
copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes;
|
copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes;
|
||||||
|
|
||||||
if (opt.require_swap || dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT)
|
if (opt.require_swap || opt.require_deswizzle || dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||||
{
|
{
|
||||||
if (!scratch_buf)
|
if (!scratch_buf)
|
||||||
{
|
{
|
||||||
|
@ -623,7 +707,7 @@ namespace vk
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opt.require_swap || dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT)
|
if (opt.require_swap || opt.require_deswizzle || dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||||
{
|
{
|
||||||
verify(HERE), scratch_buf;
|
verify(HERE), scratch_buf;
|
||||||
vkCmdCopyBuffer(cmd, upload_heap.heap->value, scratch_buf->value, (u32)buffer_copies.size(), buffer_copies.data());
|
vkCmdCopyBuffer(cmd, upload_heap.heap->value, scratch_buf->value, (u32)buffer_copies.size(), buffer_copies.data());
|
||||||
|
@ -632,8 +716,12 @@ namespace vk
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Swap if requested
|
// Swap and swizzle if requested
|
||||||
if (opt.require_swap)
|
if (opt.require_deswizzle)
|
||||||
|
{
|
||||||
|
gpu_deswizzle_sections_impl(cmd, scratch_buf, scratch_offset, opt.element_size, opt.block_length, opt.require_swap, copy_regions);
|
||||||
|
}
|
||||||
|
else if (opt.require_swap)
|
||||||
{
|
{
|
||||||
if (opt.element_size == 4)
|
if (opt.element_size == 4)
|
||||||
{
|
{
|
||||||
|
@ -658,9 +746,12 @@ namespace vk
|
||||||
vk::copy_buffer_to_image(cmd, scratch_buf, dst_image, *rIt);
|
vk::copy_buffer_to_image(cmd, scratch_buf, dst_image, *rIt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (opt.require_swap)
|
else if (scratch_buf)
|
||||||
{
|
{
|
||||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, scratch_offset, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
verify(HERE), opt.require_deswizzle || opt.require_swap;
|
||||||
|
|
||||||
|
const auto block_start = copy_regions.front().bufferOffset;
|
||||||
|
insert_buffer_memory_barrier(cmd, scratch_buf->value, block_start, scratch_offset, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||||
|
|
||||||
vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, (u32)copy_regions.size(), copy_regions.data());
|
vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, (u32)copy_regions.size(), copy_regions.data());
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue