vk: Add support for queue family migration and refactor some texture upload code

This commit is contained in:
kd-11 2021-02-25 21:57:10 +03:00 committed by kd-11
parent 67378c7dea
commit be1238cc95
10 changed files with 165 additions and 99 deletions

View file

@ -14,6 +14,7 @@
#include "vkutils/chip_class.h"
#include "Utilities/geometry.h"
#include "Emu/RSX/Common/TextureUtils.h"
#include "Emu/RSX/rsx_utils.h"
#define DESCRIPTOR_MAX_DRAW_CALLS 16384
#define OCCLUSION_MAX_POOL_SIZE DESCRIPTOR_MAX_DRAW_CALLS
@ -63,14 +64,20 @@ namespace vk
void destroy_global_resources();
void reset_global_resources();
/**
* Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer.
* Then copy all layers into dst_image.
* dst_image must be in TRANSFER_DST_OPTIMAL layout and upload_buffer have TRANSFER_SRC_BIT usage flag.
*/
void copy_mipmaped_image_using_buffer(const vk::command_buffer& cmd, vk::image* dst_image,
enum image_upload_options
{
upload_contents_async = 1,
initialize_image_layout = 2,
preserve_image_layout = 3,
// meta-flags
upload_contents_inline = 0,
upload_heap_align_default = 0
};
void upload_image(const vk::command_buffer& cmd, vk::image* dst_image,
const std::vector<rsx::subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align = 0);
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags);
//Other texture management helpers
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, bool swap_bytes = false);

View file

@ -247,7 +247,7 @@ namespace vk
if (g_cfg.video.resolution_scale_percent == 100 && spp == 1) [[likely]]
{
push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
vk::copy_mipmaped_image_using_buffer(cmd, this, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch);
vk::upload_image(cmd, this, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch, upload_contents_inline);
pop_layout(cmd);
}
else
@ -272,7 +272,7 @@ namespace vk
}
// Load Cell data into temp buffer
vk::copy_mipmaped_image_using_buffer(cmd, content, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch);
vk::upload_image(cmd, content, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch, upload_contents_inline);
// Write into final image
if (content != final_dst)

View file

@ -16,6 +16,22 @@
namespace vk
{
static void gpu_swap_bytes_impl(const vk::command_buffer& cmd, vk::buffer* buf, u32 element_size, u32 data_offset, u32 data_length)
{
if (element_size == 4)
{
vk::get_compute_task<vk::cs_shuffle_32>()->run(cmd, buf, data_length, data_offset);
}
else if (element_size == 2)
{
vk::get_compute_task<vk::cs_shuffle_16>()->run(cmd, buf, data_length, data_offset);
}
else
{
fmt::throw_exception("Unreachable");
}
}
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, bool swap_bytes)
{
// Always validate
@ -671,7 +687,23 @@ namespace vk
if (src != dst) dst->pop_layout(cmd);
}
void gpu_deswizzle_sections_impl(VkCommandBuffer cmd, vk::buffer* scratch_buf, u32 dst_offset, int word_size, int word_count, bool swap_bytes, std::vector<VkBufferImageCopy>& sections)
template <typename WordType, bool SwapBytes>
cs_deswizzle_base* get_deswizzle_transformation(u32 block_size)
{
switch (block_size)
{
case 4:
return vk::get_compute_task<cs_deswizzle_3d<u32, WordType, SwapBytes>>();
case 8:
return vk::get_compute_task<cs_deswizzle_3d<u64, WordType, SwapBytes>>();
case 16:
return vk::get_compute_task<cs_deswizzle_3d<u128, WordType, SwapBytes>>();
default:
fmt::throw_exception("Unreachable");
}
}
static void gpu_deswizzle_sections_impl(VkCommandBuffer cmd, vk::buffer* scratch_buf, u32 dst_offset, int word_size, int word_count, bool swap_bytes, std::vector<VkBufferImageCopy>& sections)
{
// NOTE: This has to be done individually for every LOD
vk::cs_deswizzle_base* job = nullptr;
@ -683,60 +715,22 @@ namespace vk
{
if (word_size == 4)
{
switch (block_size)
{
case 4:
job = vk::get_compute_task<cs_deswizzle_3d<u32, u32, false>>();
break;
case 8:
job = vk::get_compute_task<cs_deswizzle_3d<u64, u32, false>>();
break;
case 16:
job = vk::get_compute_task<cs_deswizzle_3d<u128, u32, false>>();
break;
}
job = get_deswizzle_transformation<u32, false>(block_size);
}
else
{
switch (block_size)
{
case 4:
job = vk::get_compute_task<cs_deswizzle_3d<u32, u16, false>>();
break;
case 8:
job = vk::get_compute_task<cs_deswizzle_3d<u64, u16, false>>();
break;
}
job = get_deswizzle_transformation<u16, false>(block_size);
}
}
else
{
if (word_size == 4)
{
switch (block_size)
{
case 4:
job = vk::get_compute_task<cs_deswizzle_3d<u32, u32, true>>();
break;
case 8:
job = vk::get_compute_task<cs_deswizzle_3d<u64, u32, true>>();
break;
case 16:
job = vk::get_compute_task<cs_deswizzle_3d<u128, u32, true>>();
break;
}
job = get_deswizzle_transformation<u32, true>(block_size);
}
else
{
switch (block_size)
{
case 4:
job = vk::get_compute_task<cs_deswizzle_3d<u32, u16, true>>();
break;
case 8:
job = vk::get_compute_task<cs_deswizzle_3d<u64, u16, true>>();
break;
}
job = get_deswizzle_transformation<u16, true>(block_size);
}
}
@ -803,9 +797,45 @@ namespace vk
ensure(dst_offset <= scratch_buf->size());
}
void copy_mipmaped_image_using_buffer(const vk::command_buffer& cmd, vk::image* dst_image,
static const vk::command_buffer& prepare_for_transfer(const vk::command_buffer& primary_cb, vk::image* dst_image, rsx::flags32_t& flags)
{
const vk::command_buffer* pcmd = nullptr;
#if 0
if (flags & image_upload_options::upload_contents_async)
{
auto cb = vk::async_transfer_get_current();
cb->begin();
pcmd = cb;
if (!(flags & image_upload_options::preserve_image_layout))
{
flags |= image_upload_options::initialize_image_layout;
}
}
else
#endif
{
if (vk::is_renderpass_open(primary_cb))
{
vk::end_renderpass(primary_cb);
}
pcmd = &primary_cb;
}
ensure(pcmd);
if (flags & image_upload_options::initialize_image_layout)
{
dst_image->change_layout(*pcmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, pcmd->get_queue_family());
}
return *pcmd;
}
void upload_image(const vk::command_buffer& cmd, vk::image* dst_image,
const std::vector<rsx::subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align)
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags)
{
const bool requires_depth_processing = (dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || (format == CELL_GCM_TEXTURE_DEPTH16_FLOAT);
u32 block_in_pixel = rsx::get_format_block_size_in_texel(format);
@ -827,11 +857,6 @@ namespace vk
std::vector<std::pair<VkBuffer, u32>> upload_commands;
copy_regions.reserve(subresource_layout.size());
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
for (const rsx::subresource_layout &layout : subresource_layout)
{
if (!heap_align) [[likely]]
@ -974,6 +999,7 @@ namespace vk
}
ensure(upload_buffer);
auto& cmd2 = prepare_for_transfer(cmd, dst_image, image_setup_flags);
if (opt.require_swap || opt.require_deswizzle || requires_depth_processing)
{
@ -984,38 +1010,27 @@ namespace vk
auto range_ptr = buffer_copies.data();
for (const auto& op : upload_commands)
{
vkCmdCopyBuffer(cmd, op.first, scratch_buf->value, op.second, range_ptr);
vkCmdCopyBuffer(cmd2, op.first, scratch_buf->value, op.second, range_ptr);
range_ptr += op.second;
}
}
else
{
vkCmdCopyBuffer(cmd, upload_buffer->value, scratch_buf->value, static_cast<u32>(buffer_copies.size()), buffer_copies.data());
vkCmdCopyBuffer(cmd2, upload_buffer->value, scratch_buf->value, static_cast<u32>(buffer_copies.size()), buffer_copies.data());
}
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, scratch_offset, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
insert_buffer_memory_barrier(cmd2, scratch_buf->value, 0, scratch_offset, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
}
// Swap and swizzle if requested
// Swap and deswizzle if requested
if (opt.require_deswizzle)
{
gpu_deswizzle_sections_impl(cmd, scratch_buf, scratch_offset, opt.element_size, opt.block_length, opt.require_swap, copy_regions);
gpu_deswizzle_sections_impl(cmd2, scratch_buf, scratch_offset, opt.element_size, opt.block_length, opt.require_swap, copy_regions);
}
else if (opt.require_swap)
{
if (opt.element_size == 4)
{
vk::get_compute_task<vk::cs_shuffle_32>()->run(cmd, scratch_buf, scratch_offset);
}
else if (opt.element_size == 2)
{
vk::get_compute_task<vk::cs_shuffle_16>()->run(cmd, scratch_buf, scratch_offset);
}
else
{
fmt::throw_exception("Unreachable");
}
gpu_swap_bytes_impl(cmd2, scratch_buf, opt.element_size, 0, scratch_offset);
}
// CopyBufferToImage routines
@ -1024,7 +1039,7 @@ namespace vk
// Upload in reverse to avoid polluting data in lower space
for (auto rIt = copy_regions.crbegin(); rIt != copy_regions.crend(); ++rIt)
{
vk::copy_buffer_to_image(cmd, scratch_buf, dst_image, *rIt);
vk::copy_buffer_to_image(cmd2, scratch_buf, dst_image, *rIt);
}
}
else if (scratch_buf)
@ -1032,23 +1047,23 @@ namespace vk
ensure(opt.require_deswizzle || opt.require_swap);
const auto block_start = copy_regions.front().bufferOffset;
insert_buffer_memory_barrier(cmd, scratch_buf->value, block_start, scratch_offset, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
insert_buffer_memory_barrier(cmd2, scratch_buf->value, block_start, scratch_offset, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<u32>(copy_regions.size()), copy_regions.data());
vkCmdCopyBufferToImage(cmd2, scratch_buf->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<u32>(copy_regions.size()), copy_regions.data());
}
else if (upload_commands.size() > 1)
{
auto region_ptr = copy_regions.data();
for (const auto& op : upload_commands)
{
vkCmdCopyBufferToImage(cmd, op.first, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, op.second, region_ptr);
vkCmdCopyBufferToImage(cmd2, op.first, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, op.second, region_ptr);
region_ptr += op.second;
}
}
else
{
vkCmdCopyBufferToImage(cmd, upload_buffer->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<u32>(copy_regions.size()), copy_regions.data());
vkCmdCopyBufferToImage(cmd2, upload_buffer->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<u32>(copy_regions.size()), copy_regions.data());
}
}

View file

@ -898,7 +898,7 @@ namespace vk
return &region;
}
cached_texture_section* create_nul_section(vk::command_buffer& cmd, const utils::address_range& rsx_range, bool memory_load) override
cached_texture_section* create_nul_section(vk::command_buffer& /*cmd*/, const utils::address_range& rsx_range, bool memory_load) override
{
auto& region = *find_cached_texture(rsx_range, { .gcm_format = RSX_GCM_FORMAT_IGNORED }, true, false, false);
ensure(!region.is_locked());
@ -950,8 +950,8 @@ namespace vk
input_swizzled = false;
}
vk::copy_mipmaped_image_using_buffer(cmd, image, subresource_layout, gcm_format, input_swizzled, mipmaps, subres_range.aspectMask,
*m_texture_upload_heap);
vk::upload_image(cmd, image, subresource_layout, gcm_format, input_swizzled, mipmaps, subres_range.aspectMask,
*m_texture_upload_heap, upload_heap_align_default, upload_contents_inline);
vk::leave_uninterruptible();

View file

@ -8,12 +8,15 @@ namespace vk
// This queue flushing method to be implemented by the backend as behavior depends on config
void queue_submit(VkQueue queue, const VkSubmitInfo* info, fence* pfence, VkBool32 flush = VK_FALSE);
void command_pool::create(vk::render_device& dev)
void command_pool::create(vk::render_device& dev, u32 queue_family)
{
owner = &dev;
owner = &dev;
queue_family = queue_family;
VkCommandPoolCreateInfo infos = {};
infos.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
infos.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
infos.queueFamilyIndex = queue_family;
CHECK_RESULT(vkCreateCommandPool(dev, &infos, nullptr, &pool));
}
@ -27,12 +30,17 @@ namespace vk
pool = nullptr;
}
vk::render_device& command_pool::get_owner()
vk::render_device& command_pool::get_owner() const
{
return (*owner);
}
command_pool::operator VkCommandPool()
u32 command_pool::get_queue_family() const
{
return queue_family;
}
command_pool::operator VkCommandPool() const
{
return pool;
}

View file

@ -10,17 +10,19 @@ namespace vk
{
vk::render_device* owner = nullptr;
VkCommandPool pool = nullptr;
u32 queue_family = 0;
public:
command_pool() = default;
~command_pool() = default;
void create(vk::render_device& dev);
void create(vk::render_device& dev, u32 queue_family = 0);
void destroy();
vk::render_device& get_owner();
vk::render_device& get_owner() const;
u32 get_queue_family() const;
operator VkCommandPool();
operator VkCommandPool() const;
};
class command_buffer
@ -70,6 +72,11 @@ namespace vk
return *pool;
}
u32 get_queue_family() const
{
return pool->get_queue_family();
}
void clear_flags()
{
flags = 0;

View file

@ -182,7 +182,7 @@ namespace vk
change_image_layout(cmd, this, layout);
}
void image::change_layout(command_buffer& cmd, VkImageLayout new_layout)
void image::change_layout(const command_buffer& cmd, VkImageLayout new_layout)
{
if (current_layout == new_layout)
return;
@ -191,6 +191,31 @@ namespace vk
change_image_layout(cmd, this, new_layout);
}
void image::change_layout(const command_buffer& cmd, VkImageLayout new_layout, u32 new_queue_family)
{
if (current_layout == new_layout && current_queue_family == new_queue_family)
{
// Nothing to do
return;
}
ensure(m_layout_stack.empty());
change_image_layout(cmd, this, new_layout);
u32 dst_queue = new_queue_family;
if (current_queue_family == VK_QUEUE_FAMILY_IGNORED)
{
// Implicit acquisition
dst_queue = VK_QUEUE_FAMILY_IGNORED;
}
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
change_image_layout(cmd, value, current_layout, new_layout, range, current_queue_family, dst_queue);
current_layout = new_layout;
current_queue_family = new_queue_family;
}
image_view::image_view(VkDevice dev, VkImage image, VkImageViewType view_type, VkFormat format, VkComponentMapping mapping, VkImageSubresourceRange range)
: m_device(dev)
{

View file

@ -39,6 +39,7 @@ namespace vk
VkImage value = VK_NULL_HANDLE;
VkComponentMapping native_component_map = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
VkImageLayout current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
u32 current_queue_family = VK_QUEUE_FAMILY_IGNORED;
VkImageCreateInfo info = {};
std::shared_ptr<vk::memory_block> memory;
@ -77,7 +78,8 @@ namespace vk
void push_layout(VkCommandBuffer cmd, VkImageLayout layout);
void push_barrier(VkCommandBuffer cmd, VkImageLayout layout);
void pop_layout(VkCommandBuffer cmd);
void change_layout(command_buffer& cmd, VkImageLayout new_layout);
void change_layout(const command_buffer& cmd, VkImageLayout new_layout);
void change_layout(const command_buffer& cmd, VkImageLayout new_layout, u32 new_queue_family);
private:
VkDevice m_device;

View file

@ -55,7 +55,8 @@ namespace vk
return{ final_mapping[1], final_mapping[2], final_mapping[3], final_mapping[0] };
}
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range)
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range,
u32 src_queue_family, u32 dst_queue_family)
{
if (vk::is_renderpass_open(cmd))
{
@ -70,8 +71,8 @@ namespace vk
barrier.image = image;
barrier.srcAccessMask = 0;
barrier.dstAccessMask = 0;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = src_queue_family;
barrier.dstQueueFamilyIndex = dst_queue_family;
barrier.subresourceRange = range;
VkPipelineStageFlags src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;

View file

@ -9,7 +9,8 @@ namespace vk
VkImageAspectFlags get_aspect_flags(VkFormat format);
VkComponentMapping apply_swizzle_remap(const std::array<VkComponentSwizzle, 4>& base_remap, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector);
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range);
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range,
u32 src_queue_family = VK_QUEUE_FAMILY_IGNORED, u32 dst_queue_family = VK_QUEUE_FAMILY_IGNORED);
void change_image_layout(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout, const VkImageSubresourceRange& range);
void change_image_layout(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout);
}