diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index ab1bf0408b..f5883598a2 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -14,6 +14,7 @@ #include "vkutils/chip_class.h" #include "Utilities/geometry.h" #include "Emu/RSX/Common/TextureUtils.h" +#include "Emu/RSX/rsx_utils.h" #define DESCRIPTOR_MAX_DRAW_CALLS 16384 #define OCCLUSION_MAX_POOL_SIZE DESCRIPTOR_MAX_DRAW_CALLS @@ -63,14 +64,20 @@ namespace vk void destroy_global_resources(); void reset_global_resources(); - /** - * Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer. - * Then copy all layers into dst_image. - * dst_image must be in TRANSFER_DST_OPTIMAL layout and upload_buffer have TRANSFER_SRC_BIT usage flag. - */ - void copy_mipmaped_image_using_buffer(const vk::command_buffer& cmd, vk::image* dst_image, + enum image_upload_options + { + upload_contents_async = 1, + initialize_image_layout = 2, + preserve_image_layout = 3, + + // meta-flags + upload_contents_inline = 0, + upload_heap_align_default = 0 + }; + + void upload_image(const vk::command_buffer& cmd, vk::image* dst_image, const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, - VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align = 0); + VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags); //Other texture management helpers void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, bool swap_bytes = false); diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index d6b1d3fe94..c0ab48ac50 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -247,7 +247,7 @@ namespace vk if (g_cfg.video.resolution_scale_percent == 100 && spp == 1) [[likely]] { push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vk::copy_mipmaped_image_using_buffer(cmd, this, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch); + vk::upload_image(cmd, this, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch, upload_contents_inline); pop_layout(cmd); } else @@ -272,7 +272,7 @@ namespace vk } // Load Cell data into temp buffer - vk::copy_mipmaped_image_using_buffer(cmd, content, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch); + vk::upload_image(cmd, content, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch, upload_contents_inline); // Write into final image if (content != final_dst) diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 84702a9d94..50a5043254 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -16,6 +16,22 @@ namespace vk { + static void gpu_swap_bytes_impl(const vk::command_buffer& cmd, vk::buffer* buf, u32 element_size, u32 data_offset, u32 data_length) + { + if (element_size == 4) + { + vk::get_compute_task()->run(cmd, buf, data_length, data_offset); + } + else if (element_size == 2) + { + vk::get_compute_task()->run(cmd, buf, data_length, data_offset); + } + else + { + fmt::throw_exception("Unreachable"); + } + } + void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, bool swap_bytes) { // Always validate @@ -671,7 +687,23 @@ namespace vk if (src != dst) dst->pop_layout(cmd); } - void gpu_deswizzle_sections_impl(VkCommandBuffer cmd, vk::buffer* scratch_buf, u32 dst_offset, int word_size, int word_count, bool swap_bytes, std::vector& sections) + template + cs_deswizzle_base* get_deswizzle_transformation(u32 block_size) + { + switch (block_size) + { + case 4: + return vk::get_compute_task>(); + case 8: + return vk::get_compute_task>(); + case 16: + return vk::get_compute_task>(); + default: + fmt::throw_exception("Unreachable"); + } + } + + static void gpu_deswizzle_sections_impl(VkCommandBuffer cmd, vk::buffer* scratch_buf, u32 dst_offset, int word_size, int word_count, bool swap_bytes, std::vector& sections) { // NOTE: This has to be done individually for every LOD vk::cs_deswizzle_base* job = nullptr; @@ -683,60 +715,22 @@ namespace vk { if (word_size == 4) { - switch (block_size) - { - case 4: - job = vk::get_compute_task>(); - break; - case 8: - job = vk::get_compute_task>(); - break; - case 16: - job = vk::get_compute_task>(); - break; - } + job = get_deswizzle_transformation(block_size); } else { - switch (block_size) - { - case 4: - job = vk::get_compute_task>(); - break; - case 8: - job = vk::get_compute_task>(); - break; - } + job = get_deswizzle_transformation(block_size); } } else { if (word_size == 4) { - switch (block_size) - { - case 4: - job = vk::get_compute_task>(); - break; - case 8: - job = vk::get_compute_task>(); - break; - case 16: - job = vk::get_compute_task>(); - break; - } + job = get_deswizzle_transformation(block_size); } else { - switch (block_size) - { - case 4: - job = vk::get_compute_task>(); - break; - case 8: - job = vk::get_compute_task>(); - break; - } + job = get_deswizzle_transformation(block_size); } } @@ -803,9 +797,45 @@ namespace vk ensure(dst_offset <= scratch_buf->size()); } - void copy_mipmaped_image_using_buffer(const vk::command_buffer& cmd, vk::image* dst_image, + static const vk::command_buffer& prepare_for_transfer(const vk::command_buffer& primary_cb, vk::image* dst_image, rsx::flags32_t& flags) + { + const vk::command_buffer* pcmd = nullptr; +#if 0 + if (flags & image_upload_options::upload_contents_async) + { + auto cb = vk::async_transfer_get_current(); + cb->begin(); + pcmd = cb; + + if (!(flags & image_upload_options::preserve_image_layout)) + { + flags |= image_upload_options::initialize_image_layout; + } + } + else +#endif + { + if (vk::is_renderpass_open(primary_cb)) + { + vk::end_renderpass(primary_cb); + } + + pcmd = &primary_cb; + } + + ensure(pcmd); + + if (flags & image_upload_options::initialize_image_layout) + { + dst_image->change_layout(*pcmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, pcmd->get_queue_family()); + } + + return *pcmd; + } + + void upload_image(const vk::command_buffer& cmd, vk::image* dst_image, const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, - VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align) + VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags) { const bool requires_depth_processing = (dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || (format == CELL_GCM_TEXTURE_DEPTH16_FLOAT); u32 block_in_pixel = rsx::get_format_block_size_in_texel(format); @@ -827,11 +857,6 @@ namespace vk std::vector> upload_commands; copy_regions.reserve(subresource_layout.size()); - if (vk::is_renderpass_open(cmd)) - { - vk::end_renderpass(cmd); - } - for (const rsx::subresource_layout &layout : subresource_layout) { if (!heap_align) [[likely]] @@ -974,6 +999,7 @@ namespace vk } ensure(upload_buffer); + auto& cmd2 = prepare_for_transfer(cmd, dst_image, image_setup_flags); if (opt.require_swap || opt.require_deswizzle || requires_depth_processing) { @@ -984,38 +1010,27 @@ namespace vk auto range_ptr = buffer_copies.data(); for (const auto& op : upload_commands) { - vkCmdCopyBuffer(cmd, op.first, scratch_buf->value, op.second, range_ptr); + vkCmdCopyBuffer(cmd2, op.first, scratch_buf->value, op.second, range_ptr); range_ptr += op.second; } } else { - vkCmdCopyBuffer(cmd, upload_buffer->value, scratch_buf->value, static_cast(buffer_copies.size()), buffer_copies.data()); + vkCmdCopyBuffer(cmd2, upload_buffer->value, scratch_buf->value, static_cast(buffer_copies.size()), buffer_copies.data()); } - insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, scratch_offset, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + insert_buffer_memory_barrier(cmd2, scratch_buf->value, 0, scratch_offset, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); } - // Swap and swizzle if requested + // Swap and deswizzle if requested if (opt.require_deswizzle) { - gpu_deswizzle_sections_impl(cmd, scratch_buf, scratch_offset, opt.element_size, opt.block_length, opt.require_swap, copy_regions); + gpu_deswizzle_sections_impl(cmd2, scratch_buf, scratch_offset, opt.element_size, opt.block_length, opt.require_swap, copy_regions); } else if (opt.require_swap) { - if (opt.element_size == 4) - { - vk::get_compute_task()->run(cmd, scratch_buf, scratch_offset); - } - else if (opt.element_size == 2) - { - vk::get_compute_task()->run(cmd, scratch_buf, scratch_offset); - } - else - { - fmt::throw_exception("Unreachable"); - } + gpu_swap_bytes_impl(cmd2, scratch_buf, opt.element_size, 0, scratch_offset); } // CopyBufferToImage routines @@ -1024,7 +1039,7 @@ namespace vk // Upload in reverse to avoid polluting data in lower space for (auto rIt = copy_regions.crbegin(); rIt != copy_regions.crend(); ++rIt) { - vk::copy_buffer_to_image(cmd, scratch_buf, dst_image, *rIt); + vk::copy_buffer_to_image(cmd2, scratch_buf, dst_image, *rIt); } } else if (scratch_buf) @@ -1032,23 +1047,23 @@ namespace vk ensure(opt.require_deswizzle || opt.require_swap); const auto block_start = copy_regions.front().bufferOffset; - insert_buffer_memory_barrier(cmd, scratch_buf->value, block_start, scratch_offset, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + insert_buffer_memory_barrier(cmd2, scratch_buf->value, block_start, scratch_offset, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); - vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast(copy_regions.size()), copy_regions.data()); + vkCmdCopyBufferToImage(cmd2, scratch_buf->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast(copy_regions.size()), copy_regions.data()); } else if (upload_commands.size() > 1) { auto region_ptr = copy_regions.data(); for (const auto& op : upload_commands) { - vkCmdCopyBufferToImage(cmd, op.first, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, op.second, region_ptr); + vkCmdCopyBufferToImage(cmd2, op.first, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, op.second, region_ptr); region_ptr += op.second; } } else { - vkCmdCopyBufferToImage(cmd, upload_buffer->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast(copy_regions.size()), copy_regions.data()); + vkCmdCopyBufferToImage(cmd2, upload_buffer->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast(copy_regions.size()), copy_regions.data()); } } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index b00ae1b3e2..62aa0b11f9 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -898,7 +898,7 @@ namespace vk return ®ion; } - cached_texture_section* create_nul_section(vk::command_buffer& cmd, const utils::address_range& rsx_range, bool memory_load) override + cached_texture_section* create_nul_section(vk::command_buffer& /*cmd*/, const utils::address_range& rsx_range, bool memory_load) override { auto& region = *find_cached_texture(rsx_range, { .gcm_format = RSX_GCM_FORMAT_IGNORED }, true, false, false); ensure(!region.is_locked()); @@ -950,8 +950,8 @@ namespace vk input_swizzled = false; } - vk::copy_mipmaped_image_using_buffer(cmd, image, subresource_layout, gcm_format, input_swizzled, mipmaps, subres_range.aspectMask, - *m_texture_upload_heap); + vk::upload_image(cmd, image, subresource_layout, gcm_format, input_swizzled, mipmaps, subres_range.aspectMask, + *m_texture_upload_heap, upload_heap_align_default, upload_contents_inline); vk::leave_uninterruptible(); diff --git a/rpcs3/Emu/RSX/VK/vkutils/commands.cpp b/rpcs3/Emu/RSX/VK/vkutils/commands.cpp index f68e0eae45..45cabab322 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/commands.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/commands.cpp @@ -8,12 +8,15 @@ namespace vk // This queue flushing method to be implemented by the backend as behavior depends on config void queue_submit(VkQueue queue, const VkSubmitInfo* info, fence* pfence, VkBool32 flush = VK_FALSE); - void command_pool::create(vk::render_device& dev) + void command_pool::create(vk::render_device& dev, u32 queue_family) { - owner = &dev; + owner = &dev; + queue_family = queue_family; + VkCommandPoolCreateInfo infos = {}; - infos.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + infos.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + infos.queueFamilyIndex = queue_family; CHECK_RESULT(vkCreateCommandPool(dev, &infos, nullptr, &pool)); } @@ -27,12 +30,17 @@ namespace vk pool = nullptr; } - vk::render_device& command_pool::get_owner() + vk::render_device& command_pool::get_owner() const { return (*owner); } - command_pool::operator VkCommandPool() + u32 command_pool::get_queue_family() const + { + return queue_family; + } + + command_pool::operator VkCommandPool() const { return pool; } diff --git a/rpcs3/Emu/RSX/VK/vkutils/commands.h b/rpcs3/Emu/RSX/VK/vkutils/commands.h index 24dbae1986..d116535518 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/commands.h +++ b/rpcs3/Emu/RSX/VK/vkutils/commands.h @@ -10,17 +10,19 @@ namespace vk { vk::render_device* owner = nullptr; VkCommandPool pool = nullptr; + u32 queue_family = 0; public: command_pool() = default; ~command_pool() = default; - void create(vk::render_device& dev); + void create(vk::render_device& dev, u32 queue_family = 0); void destroy(); - vk::render_device& get_owner(); + vk::render_device& get_owner() const; + u32 get_queue_family() const; - operator VkCommandPool(); + operator VkCommandPool() const; }; class command_buffer @@ -70,6 +72,11 @@ namespace vk return *pool; } + u32 get_queue_family() const + { + return pool->get_queue_family(); + } + void clear_flags() { flags = 0; diff --git a/rpcs3/Emu/RSX/VK/vkutils/image.cpp b/rpcs3/Emu/RSX/VK/vkutils/image.cpp index dfd22daaaf..06d901cd20 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/image.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/image.cpp @@ -182,7 +182,7 @@ namespace vk change_image_layout(cmd, this, layout); } - void image::change_layout(command_buffer& cmd, VkImageLayout new_layout) + void image::change_layout(const command_buffer& cmd, VkImageLayout new_layout) { if (current_layout == new_layout) return; @@ -191,6 +191,31 @@ namespace vk change_image_layout(cmd, this, new_layout); } + void image::change_layout(const command_buffer& cmd, VkImageLayout new_layout, u32 new_queue_family) + { + if (current_layout == new_layout && current_queue_family == new_queue_family) + { + // Nothing to do + return; + } + + ensure(m_layout_stack.empty()); + change_image_layout(cmd, this, new_layout); + + u32 dst_queue = new_queue_family; + if (current_queue_family == VK_QUEUE_FAMILY_IGNORED) + { + // Implicit acquisition + dst_queue = VK_QUEUE_FAMILY_IGNORED; + } + + VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() }; + change_image_layout(cmd, value, current_layout, new_layout, range, current_queue_family, dst_queue); + + current_layout = new_layout; + current_queue_family = new_queue_family; + } + image_view::image_view(VkDevice dev, VkImage image, VkImageViewType view_type, VkFormat format, VkComponentMapping mapping, VkImageSubresourceRange range) : m_device(dev) { diff --git a/rpcs3/Emu/RSX/VK/vkutils/image.h b/rpcs3/Emu/RSX/VK/vkutils/image.h index 0e585d45a4..6490114982 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/image.h +++ b/rpcs3/Emu/RSX/VK/vkutils/image.h @@ -39,6 +39,7 @@ namespace vk VkImage value = VK_NULL_HANDLE; VkComponentMapping native_component_map = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; VkImageLayout current_layout = VK_IMAGE_LAYOUT_UNDEFINED; + u32 current_queue_family = VK_QUEUE_FAMILY_IGNORED; VkImageCreateInfo info = {}; std::shared_ptr memory; @@ -77,7 +78,8 @@ namespace vk void push_layout(VkCommandBuffer cmd, VkImageLayout layout); void push_barrier(VkCommandBuffer cmd, VkImageLayout layout); void pop_layout(VkCommandBuffer cmd); - void change_layout(command_buffer& cmd, VkImageLayout new_layout); + void change_layout(const command_buffer& cmd, VkImageLayout new_layout); + void change_layout(const command_buffer& cmd, VkImageLayout new_layout, u32 new_queue_family); private: VkDevice m_device; diff --git a/rpcs3/Emu/RSX/VK/vkutils/image_helpers.cpp b/rpcs3/Emu/RSX/VK/vkutils/image_helpers.cpp index d5870fad95..b6e2a8aafa 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/image_helpers.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/image_helpers.cpp @@ -55,7 +55,8 @@ namespace vk return{ final_mapping[1], final_mapping[2], final_mapping[3], final_mapping[0] }; } - void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range) + void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range, + u32 src_queue_family, u32 dst_queue_family) { if (vk::is_renderpass_open(cmd)) { @@ -70,8 +71,8 @@ namespace vk barrier.image = image; barrier.srcAccessMask = 0; barrier.dstAccessMask = 0; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.srcQueueFamilyIndex = src_queue_family; + barrier.dstQueueFamilyIndex = dst_queue_family; barrier.subresourceRange = range; VkPipelineStageFlags src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; diff --git a/rpcs3/Emu/RSX/VK/vkutils/image_helpers.h b/rpcs3/Emu/RSX/VK/vkutils/image_helpers.h index 466a3e03fe..639ed0070a 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/image_helpers.h +++ b/rpcs3/Emu/RSX/VK/vkutils/image_helpers.h @@ -9,7 +9,8 @@ namespace vk VkImageAspectFlags get_aspect_flags(VkFormat format); VkComponentMapping apply_swizzle_remap(const std::array& base_remap, const std::pair, std::array>& remap_vector); - void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range); + void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range, + u32 src_queue_family = VK_QUEUE_FAMILY_IGNORED, u32 dst_queue_family = VK_QUEUE_FAMILY_IGNORED); void change_image_layout(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout, const VkImageSubresourceRange& range); void change_image_layout(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout); }