video_core: texture: image subresources state tracking

This commit is contained in:
psucien 2024-09-11 22:29:06 +02:00
parent 913a46173a
commit e387c22199
8 changed files with 159 additions and 62 deletions

View file

@ -607,7 +607,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
}
if (!copies.empty()) {
scheduler.EndRendering();
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
copies);

View file

@ -202,7 +202,8 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead, cmdbuf);
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {},
cmdbuf);
const std::array pre_barrier{
vk::ImageMemoryBarrier{
@ -228,7 +229,7 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop
// Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image.
cmdbuf.blitImage(
image.image, image.layout, frame->image, vk::ImageLayout::eTransferDstOptimal,
image.image, image.last_state.layout, frame->image, vk::ImageLayout::eTransferDstOptimal,
MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height),
vk::Filter::eLinear);
@ -269,6 +270,9 @@ void RendererVulkan::Present(Frame* frame) {
auto& scheduler = present_scheduler;
const auto cmdbuf = scheduler.CommandBuffer();
ImGui::Core::Render(cmdbuf, frame);
{
auto* profiler_ctx = instance.GetProfilerContext();
TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame",
@ -326,8 +330,6 @@ void RendererVulkan::Present(Frame* frame) {
},
};
ImGui::Core::Render(cmdbuf, frame);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);

View file

@ -220,7 +220,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
const auto& image = texture_cache.GetImage(image_view.image_id);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
image.last_state.layout);
} else if (instance.IsNullDescriptorSupported()) {
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
} else {

View file

@ -453,7 +453,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
const auto& image = texture_cache.GetImage(image_view.image_id);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
image.last_state.layout);
} else if (instance.IsNullDescriptorSupported()) {
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
} else {

View file

@ -240,7 +240,7 @@ void Rasterizer::BeginRendering() {
state.depth_image = image.image;
state.depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = image.layout,
.imageLayout = image.last_state.layout,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#define VULKAN_HPP_NO_EXCEPTIONS
#include <ranges>
#include "common/assert.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
@ -179,52 +180,128 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
info.guest_size_bytes);
}
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,
vk::CommandBuffer cmdbuf) {
if (dst_layout == layout && dst_mask == access_mask) {
return;
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range) {
const bool needs_partial_transition =
subres_range &&
(subres_range->base != SubresourceBase{} || subres_range->extent != info.resources);
const bool partially_transited = !subresource_states.empty();
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> barriers{};
if (needs_partial_transition || partially_transited) {
if (!partially_transited) {
subresource_states.resize(info.resources.levels * info.resources.layers);
std::fill(subresource_states.begin(), subresource_states.end(), last_state);
}
// In case of partial transition, we need to change the specified subresources only.
// Otherwise all subresources need to be set to the same state so we can use a full
// resource transition for the next time.
const auto mips =
needs_partial_transition
? std::ranges::views::iota(subres_range->base.level, subres_range->extent.levels)
: std::views::iota(0u, info.resources.levels);
const auto layers =
needs_partial_transition
? std::ranges::views::iota(subres_range->base.layer, subres_range->extent.layers)
: std::views::iota(0u, info.resources.layers);
for (u32 mip : mips) {
for (u32 layer : layers) {
// NOTE: these loops may produce a lot of small barriers.
// If this becomes a problem, we can optimize it by merging adjacent barriers.
auto& state = subresource_states[mip * info.resources.layers + layer];
if (state.layout != dst_layout || state.access_mask != dst_mask) {
barriers.emplace_back(vk::ImageMemoryBarrier2{
.srcStageMask = state.pl_stage,
.srcAccessMask = state.access_mask,
.dstStageMask = dst_stage,
.dstAccessMask = dst_mask,
.oldLayout = state.layout,
.newLayout = dst_layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = mip,
.levelCount = 1,
.baseArrayLayer = layer,
.layerCount = 1,
},
});
state.layout = dst_layout;
state.access_mask = dst_mask;
state.pl_stage = dst_stage;
}
}
}
if (!needs_partial_transition) {
subresource_states.clear();
}
} else { // Full resource transition
if (last_state.layout == dst_layout && last_state.access_mask == dst_mask) {
return {};
}
barriers.emplace_back(vk::ImageMemoryBarrier2{
.srcStageMask = last_state.pl_stage,
.srcAccessMask = last_state.access_mask,
.dstStageMask = dst_stage,
.dstAccessMask = dst_mask,
.oldLayout = last_state.layout,
.newLayout = dst_layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
});
}
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = access_mask,
.dstAccessMask = dst_mask,
.oldLayout = layout,
.newLayout = dst_layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
last_state.layout = dst_layout;
last_state.access_mask = dst_mask;
last_state.pl_stage = dst_stage;
return barriers;
}
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf /*= {}*/) {
// Adjust pipieline stage
const vk::PipelineStageFlags dst_pl_stage =
(dst_mask == vk::AccessFlagBits::eTransferRead ||
dst_mask == vk::AccessFlagBits::eTransferWrite)
? vk::PipelineStageFlagBits::eTransfer
: vk::PipelineStageFlagBits::eAllGraphics | vk::PipelineStageFlagBits::eComputeShader;
const vk::PipelineStageFlags2 dst_pl_stage =
(dst_mask == vk::AccessFlagBits2::eTransferRead ||
dst_mask == vk::AccessFlagBits2::eTransferWrite)
? vk::PipelineStageFlagBits2::eTransfer
: vk::PipelineStageFlagBits2::eAllGraphics | vk::PipelineStageFlagBits2::eComputeShader;
const auto barriers = GetBarriers(dst_layout, dst_mask, dst_pl_stage, range);
if (barriers.empty()) {
return;
}
if (!cmdbuf) {
// When using external cmdbuf you are responsible for ending rp.
scheduler->EndRendering();
cmdbuf = scheduler->CommandBuffer();
}
cmdbuf.pipelineBarrier(pl_stage, dst_pl_stage, vk::DependencyFlagBits::eByRegion, {}, {},
barrier);
layout = dst_layout;
access_mask = dst_mask;
pl_stage = dst_pl_stage;
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
.pImageMemoryBarriers = barriers.data(),
});
}
void Image::Upload(vk::Buffer buffer, u64 offset) {
scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
// Copy to the image.
const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil
@ -248,12 +325,12 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, image_copy);
Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
}
void Image::CopyImage(const Image& image) {
scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
auto cmdbuf = scheduler->CommandBuffer();
@ -279,15 +356,16 @@ void Image::CopyImage(const Image& image) {
.extent = {mip_w, mip_h, mip_d},
});
}
cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy);
cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout,
image_copy);
Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
}
void Image::CopyMip(const Image& image, u32 mip) {
scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
auto cmdbuf = scheduler->CommandBuffer();
@ -313,10 +391,11 @@ void Image::CopyMip(const Image& image, u32 mip) {
},
.extent = {mip_w, mip_h, mip_d},
};
cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy);
cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout,
image_copy);
Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
}
Image::~Image() = default;

View file

@ -91,8 +91,11 @@ struct Image {
return image_view_ids[std::distance(image_view_infos.begin(), it)];
}
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,
vk::CommandBuffer cmdbuf = {});
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> GetBarriers(
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range);
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
void Upload(vk::Buffer buffer, u64 offset);
void CopyImage(const Image& image);
@ -111,10 +114,16 @@ struct Image {
// Resource state tracking
vk::ImageUsageFlags usage;
vk::Flags<vk::PipelineStageFlagBits> pl_stage = vk::PipelineStageFlagBits::eAllCommands;
vk::Flags<vk::AccessFlagBits> access_mask = vk::AccessFlagBits::eNone;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
boost::container::small_vector<u64, 14> mip_hashes;
struct State {
u32 mip_level{};
u32 layer{};
vk::Flags<vk::PipelineStageFlagBits2> pl_stage = vk::PipelineStageFlagBits2::eAllCommands;
vk::Flags<vk::AccessFlagBits2> access_mask = vk::AccessFlagBits2::eNone;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
};
State last_state{};
std::vector<State> subresource_states{};
boost::container::small_vector<u64, 14> mip_hashes{};
u64 tick_accessed_last{0};
};

View file

@ -154,7 +154,7 @@ ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_
if (tex_cache_image.info.IsMipOf(image_info)) {
tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal,
vk::AccessFlagBits::eTransferRead);
vk::AccessFlagBits2::eTransferRead, {});
const auto num_mips_to_copy = tex_cache_image.info.resources.levels;
ASSERT(num_mips_to_copy == 1);
@ -176,7 +176,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
auto& src_image = slot_images[image_id];
auto& new_image = slot_images[new_image_id];
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
new_image.CopyImage(src_image);
FreeImage(image_id);
@ -263,13 +263,14 @@ ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo&
if (view_info.is_storage) {
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite,
view_info.range);
usage.storage = true;
} else {
const auto new_layout = image.info.IsDepthStencil()
? vk::ImageLayout::eDepthStencilReadOnlyOptimal
: vk::ImageLayout::eShaderReadOnlyOptimal;
image.Transit(new_layout, vk::AccessFlagBits::eShaderRead);
image.Transit(new_layout, vk::AccessFlagBits2::eShaderRead, view_info.range);
usage.texture = true;
}
@ -284,8 +285,9 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info,
UpdateImage(image_id);
image.Transit(vk::ImageLayout::eColorAttachmentOptimal,
vk::AccessFlagBits::eColorAttachmentWrite |
vk::AccessFlagBits::eColorAttachmentRead);
vk::AccessFlagBits2::eColorAttachmentWrite |
vk::AccessFlagBits2::eColorAttachmentRead,
view_info.range);
// Register meta data for this color buffer
if (!(image.flags & ImageFlagBits::MetaRegistered)) {
@ -330,8 +332,10 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
: vk::ImageLayout::eDepthAttachmentOptimal
: has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal
: vk::ImageLayout::eDepthReadOnlyOptimal;
image.Transit(new_layout, vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead);
image.Transit(new_layout,
vk::AccessFlagBits2::eDepthStencilAttachmentWrite |
vk::AccessFlagBits2::eDepthStencilAttachmentRead,
view_info.range);
// Register meta data for this depth buffer
if (!(image.flags & ImageFlagBits::MetaRegistered)) {
@ -404,7 +408,8 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
sched_ptr->EndRendering();
const auto cmdbuf = sched_ptr->CommandBuffer();
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf);
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {},
cmdbuf);
const VAddr image_addr = image.info.guest_address;
const size_t image_size = image.info.guest_size_bytes;