diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 4530f690e..4fa7c6bdb 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -607,7 +607,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, } if (!copies.empty()) { scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, copies); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d019ff034..d7954bf79 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -202,7 +202,8 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead, cmdbuf); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}, + cmdbuf); const std::array pre_barrier{ vk::ImageMemoryBarrier{ @@ -228,7 +229,7 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop // Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image. cmdbuf.blitImage( - image.image, image.layout, frame->image, vk::ImageLayout::eTransferDstOptimal, + image.image, image.last_state.layout, frame->image, vk::ImageLayout::eTransferDstOptimal, MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height), vk::Filter::eLinear); @@ -269,6 +270,9 @@ void RendererVulkan::Present(Frame* frame) { auto& scheduler = present_scheduler; const auto cmdbuf = scheduler.CommandBuffer(); + + ImGui::Core::Render(cmdbuf, frame); + { auto* profiler_ctx = instance.GetProfilerContext(); TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame", @@ -326,8 +330,6 @@ void RendererVulkan::Present(Frame* frame) { }, }; - ImGui::Core::Render(cmdbuf, frame); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index d9296b501..54f354969 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -220,7 +220,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; const auto& image_view = texture_cache.FindTexture(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); + image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, + image.last_state.layout); } else if (instance.IsNullDescriptorSupported()) { image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); } else { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index dc311a7c6..7a851386a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -453,7 +453,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; const auto& image_view = texture_cache.FindTexture(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); + image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, + image.last_state.layout); } else if (instance.IsNullDescriptorSupported()) { image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); } else { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 23f60da13..9d8b42523 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -240,7 +240,7 @@ void Rasterizer::BeginRendering() { state.depth_image = image.image; state.depth_attachment = { .imageView = *image_view.image_view, - .imageLayout = image.layout, + .imageLayout = image.last_state.layout, .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, .storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore, .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 9e8c38f0d..bec11ea1d 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #define VULKAN_HPP_NO_EXCEPTIONS +#include #include "common/assert.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -179,52 +180,128 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, info.guest_size_bytes); } -void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, - vk::CommandBuffer cmdbuf) { - if (dst_layout == layout && dst_mask == access_mask) { - return; +boost::container::small_vector Image::GetBarriers( + vk::ImageLayout dst_layout, vk::Flags dst_mask, + vk::PipelineStageFlags2 dst_stage, std::optional subres_range) { + const bool needs_partial_transition = + subres_range && + (subres_range->base != SubresourceBase{} || subres_range->extent != info.resources); + const bool partially_transited = !subresource_states.empty(); + + boost::container::small_vector barriers{}; + if (needs_partial_transition || partially_transited) { + if (!partially_transited) { + subresource_states.resize(info.resources.levels * info.resources.layers); + std::fill(subresource_states.begin(), subresource_states.end(), last_state); + } + + // In case of partial transition, we need to change the specified subresources only. + // Otherwise all subresources need to be set to the same state so we can use a full + // resource transition for the next time. + const auto mips = + needs_partial_transition + ? std::ranges::views::iota(subres_range->base.level, subres_range->extent.levels) + : std::views::iota(0u, info.resources.levels); + const auto layers = + needs_partial_transition + ? std::ranges::views::iota(subres_range->base.layer, subres_range->extent.layers) + : std::views::iota(0u, info.resources.layers); + + for (u32 mip : mips) { + for (u32 layer : layers) { + // NOTE: these loops may produce a lot of small barriers. + // If this becomes a problem, we can optimize it by merging adjacent barriers. + auto& state = subresource_states[mip * info.resources.layers + layer]; + + if (state.layout != dst_layout || state.access_mask != dst_mask) { + barriers.emplace_back(vk::ImageMemoryBarrier2{ + .srcStageMask = state.pl_stage, + .srcAccessMask = state.access_mask, + .dstStageMask = dst_stage, + .dstAccessMask = dst_mask, + .oldLayout = state.layout, + .newLayout = dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = mip, + .levelCount = 1, + .baseArrayLayer = layer, + .layerCount = 1, + }, + }); + state.layout = dst_layout; + state.access_mask = dst_mask; + state.pl_stage = dst_stage; + } + } + } + + if (!needs_partial_transition) { + subresource_states.clear(); + } + } else { // Full resource transition + if (last_state.layout == dst_layout && last_state.access_mask == dst_mask) { + return {}; + } + + barriers.emplace_back(vk::ImageMemoryBarrier2{ + .srcStageMask = last_state.pl_stage, + .srcAccessMask = last_state.access_mask, + .dstStageMask = dst_stage, + .dstAccessMask = dst_mask, + .oldLayout = last_state.layout, + .newLayout = dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); } - const vk::ImageMemoryBarrier barrier = { - .srcAccessMask = access_mask, - .dstAccessMask = dst_mask, - .oldLayout = layout, - .newLayout = dst_layout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; + last_state.layout = dst_layout; + last_state.access_mask = dst_mask; + last_state.pl_stage = dst_stage; + return barriers; +} + +void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, + std::optional range, vk::CommandBuffer cmdbuf /*= {}*/) { // Adjust pipieline stage - const vk::PipelineStageFlags dst_pl_stage = - (dst_mask == vk::AccessFlagBits::eTransferRead || - dst_mask == vk::AccessFlagBits::eTransferWrite) - ? vk::PipelineStageFlagBits::eTransfer - : vk::PipelineStageFlagBits::eAllGraphics | vk::PipelineStageFlagBits::eComputeShader; + const vk::PipelineStageFlags2 dst_pl_stage = + (dst_mask == vk::AccessFlagBits2::eTransferRead || + dst_mask == vk::AccessFlagBits2::eTransferWrite) + ? vk::PipelineStageFlagBits2::eTransfer + : vk::PipelineStageFlagBits2::eAllGraphics | vk::PipelineStageFlagBits2::eComputeShader; + + const auto barriers = GetBarriers(dst_layout, dst_mask, dst_pl_stage, range); + if (barriers.empty()) { + return; + } if (!cmdbuf) { // When using external cmdbuf you are responsible for ending rp. scheduler->EndRendering(); cmdbuf = scheduler->CommandBuffer(); } - cmdbuf.pipelineBarrier(pl_stage, dst_pl_stage, vk::DependencyFlagBits::eByRegion, {}, {}, - barrier); - - layout = dst_layout; - access_mask = dst_mask; - pl_stage = dst_pl_stage; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .imageMemoryBarrierCount = static_cast(barriers.size()), + .pImageMemoryBarriers = barriers.data(), + }); } void Image::Upload(vk::Buffer buffer, u64 offset) { scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); // Copy to the image. const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil @@ -248,12 +325,12 @@ void Image::Upload(vk::Buffer buffer, u64 offset) { cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, image_copy); Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } void Image::CopyImage(const Image& image) { scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); auto cmdbuf = scheduler->CommandBuffer(); @@ -279,15 +356,16 @@ void Image::CopyImage(const Image& image) { .extent = {mip_w, mip_h, mip_d}, }); } - cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy); + cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout, + image_copy); Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } void Image::CopyMip(const Image& image, u32 mip) { scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); auto cmdbuf = scheduler->CommandBuffer(); @@ -313,10 +391,11 @@ void Image::CopyMip(const Image& image, u32 mip) { }, .extent = {mip_w, mip_h, mip_d}, }; - cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy); + cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout, + image_copy); Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } Image::~Image() = default; diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 1bbb975ba..e644b81c0 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -91,8 +91,11 @@ struct Image { return image_view_ids[std::distance(image_view_infos.begin(), it)]; } - void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, - vk::CommandBuffer cmdbuf = {}); + boost::container::small_vector GetBarriers( + vk::ImageLayout dst_layout, vk::Flags dst_mask, + vk::PipelineStageFlags2 dst_stage, std::optional subres_range); + void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, + std::optional range, vk::CommandBuffer cmdbuf = {}); void Upload(vk::Buffer buffer, u64 offset); void CopyImage(const Image& image); @@ -111,10 +114,16 @@ struct Image { // Resource state tracking vk::ImageUsageFlags usage; - vk::Flags pl_stage = vk::PipelineStageFlagBits::eAllCommands; - vk::Flags access_mask = vk::AccessFlagBits::eNone; - vk::ImageLayout layout = vk::ImageLayout::eUndefined; - boost::container::small_vector mip_hashes; + struct State { + u32 mip_level{}; + u32 layer{}; + vk::Flags pl_stage = vk::PipelineStageFlagBits2::eAllCommands; + vk::Flags access_mask = vk::AccessFlagBits2::eNone; + vk::ImageLayout layout = vk::ImageLayout::eUndefined; + }; + State last_state{}; + std::vector subresource_states{}; + boost::container::small_vector mip_hashes{}; u64 tick_accessed_last{0}; }; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 8621e95f5..f7b89fac9 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -154,7 +154,7 @@ ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_ if (tex_cache_image.info.IsMipOf(image_info)) { tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal, - vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eTransferRead, {}); const auto num_mips_to_copy = tex_cache_image.info.resources.levels; ASSERT(num_mips_to_copy == 1); @@ -176,7 +176,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { auto& src_image = slot_images[image_id]; auto& new_image = slot_images[new_image_id]; - src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); new_image.CopyImage(src_image); FreeImage(image_id); @@ -263,13 +263,14 @@ ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& if (view_info.is_storage) { image.Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite, + view_info.range); usage.storage = true; } else { const auto new_layout = image.info.IsDepthStencil() ? vk::ImageLayout::eDepthStencilReadOnlyOptimal : vk::ImageLayout::eShaderReadOnlyOptimal; - image.Transit(new_layout, vk::AccessFlagBits::eShaderRead); + image.Transit(new_layout, vk::AccessFlagBits2::eShaderRead, view_info.range); usage.texture = true; } @@ -284,8 +285,9 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, UpdateImage(image_id); image.Transit(vk::ImageLayout::eColorAttachmentOptimal, - vk::AccessFlagBits::eColorAttachmentWrite | - vk::AccessFlagBits::eColorAttachmentRead); + vk::AccessFlagBits2::eColorAttachmentWrite | + vk::AccessFlagBits2::eColorAttachmentRead, + view_info.range); // Register meta data for this color buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { @@ -330,8 +332,10 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, : vk::ImageLayout::eDepthAttachmentOptimal : has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal : vk::ImageLayout::eDepthReadOnlyOptimal; - image.Transit(new_layout, vk::AccessFlagBits::eDepthStencilAttachmentWrite | - vk::AccessFlagBits::eDepthStencilAttachmentRead); + image.Transit(new_layout, + vk::AccessFlagBits2::eDepthStencilAttachmentWrite | + vk::AccessFlagBits2::eDepthStencilAttachmentRead, + view_info.range); // Register meta data for this depth buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { @@ -404,7 +408,8 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule sched_ptr->EndRendering(); const auto cmdbuf = sched_ptr->CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf); + image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}, + cmdbuf); const VAddr image_addr = image.info.guest_address; const size_t image_size = image.info.guest_size_bytes;