video_core: Allow using texture cache to validate texture buffers

This commit is contained in:
IndecisiveTurtle 2024-09-03 16:21:58 +03:00
parent f2b714f6bb
commit 0f2892a93a
8 changed files with 108 additions and 54 deletions

View file

@ -10,15 +10,19 @@
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCore {
static constexpr size_t NumVertexBuffers = 32;
static constexpr size_t StagingBufferSize = 512_MB;
static constexpr size_t UboStreamBufferSize = 64_MB;
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
const AmdGpu::Liverpool* liverpool_, PageManager& tracker_)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, tracker{tracker_},
const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
PageManager& tracker_)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
texture_cache{texture_cache_}, tracker{tracker_},
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
memory_tracker{&tracker} {
@ -100,9 +104,9 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
return false;
}
std::array<vk::Buffer, NUM_VERTEX_BUFFERS> host_buffers;
std::array<vk::DeviceSize, NUM_VERTEX_BUFFERS> host_offsets;
boost::container::static_vector<AmdGpu::Buffer, NUM_VERTEX_BUFFERS> guest_buffers;
std::array<vk::Buffer, NumVertexBuffers> host_buffers;
std::array<vk::DeviceSize, NumVertexBuffers> host_offsets;
boost::container::static_vector<AmdGpu::Buffer, NumVertexBuffers> guest_buffers;
struct BufferRange {
VAddr base_address;
@ -117,7 +121,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
// Calculate buffers memory overlaps
bool has_step_rate = false;
boost::container::static_vector<BufferRange, NUM_VERTEX_BUFFERS> ranges{};
boost::container::static_vector<BufferRange, NumVertexBuffers> ranges{};
for (const auto& input : vs_info.vs_inputs) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
@ -152,7 +156,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
return lhv.base_address < rhv.base_address;
});
boost::container::static_vector<BufferRange, NUM_VERTEX_BUFFERS> ranges_merged{ranges[0]};
boost::container::static_vector<BufferRange, NumVertexBuffers> ranges_merged{ranges[0]};
for (auto range : ranges) {
auto& prev_range = ranges_merged.back();
if (prev_range.end_address < range.base_address) {
@ -241,7 +245,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
const BufferId buffer_id = FindBuffer(device_addr, size);
Buffer& buffer = slot_buffers[buffer_id];
SynchronizeBuffer(buffer, device_addr, size);
SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer);
if (is_written) {
memory_tracker.MarkRegionAsGpuModified(device_addr, size);
}
@ -459,8 +463,13 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {
}
}
bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size) {
void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
bool is_texel_buffer) {
std::scoped_lock lk{mutex};
if (is_texel_buffer && SynchronizeBufferFromImage(buffer, device_addr, size)) {
return;
}
boost::container::small_vector<vk::BufferCopy, 4> copies;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
@ -480,7 +489,7 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size)
// gpu_modified_ranges.ForEachNotInRange(device_addr_out, range_size, add_copy);
});
if (total_size_bytes == 0) {
return true;
return;
}
vk::Buffer src_buffer = staging_buffer.Handle();
if (total_size_bytes < StagingBufferSize) {
@ -524,7 +533,48 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size)
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
return false;
}
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
constexpr FindFlags flags = FindFlags::NoCreate | FindFlags::ExactSize | FindFlags::RelaxDim;
ImageInfo info{};
info.guest_address = device_addr;
info.guest_size_bytes = size;
const ImageId image_id = texture_cache.FindImage(info, flags);
if (!image_id) {
return false;
}
Image& image = texture_cache.GetImage(image_id);
boost::container::small_vector<vk::BufferImageCopy, 8> copies;
u32 offset = buffer.Offset(device_addr);
const u32 num_layers = image.info.resources.layers;
for (u32 m = 0; m < image.info.resources.levels; m++) {
const u32 width = std::max(image.info.size.width >> m, 1u);
const u32 height = std::max(image.info.size.height >> m, 1u);
const u32 depth =
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
copies.push_back({
.bufferOffset = offset,
.bufferRowLength = static_cast<u32>(mip_pitch),
.bufferImageHeight = static_cast<u32>(mip_height),
.imageSubresource{
.aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
.mipLevel = m,
.baseArrayLayer = 0,
.layerCount = num_layers,
},
.imageOffset = {0, 0, 0},
.imageExtent = {width, height, depth},
});
offset += mip_ofs * num_layers;
}
scheduler.EndRendering();
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
copies);
return true;
}
void BufferCache::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {

View file

@ -28,7 +28,7 @@ using BufferId = Common::SlotId;
static constexpr BufferId NULL_BUFFER_ID{0};
static constexpr u32 NUM_VERTEX_BUFFERS = 32;
class TextureCache;
class BufferCache {
public:
@ -53,7 +53,8 @@ public:
public:
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
const AmdGpu::Liverpool* liverpool, PageManager& tracker);
const AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
PageManager& tracker);
~BufferCache();
/// Invalidates any buffer in the logical page range.
@ -116,13 +117,16 @@ private:
template <bool insert>
void ChangeRegister(BufferId buffer_id);
bool SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size);
void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer);
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false);
const Vulkan::Instance& instance;
Vulkan::Scheduler& scheduler;
const AmdGpu::Liverpool* liverpool;
TextureCache& texture_cache;
PageManager& tracker;
StreamBuffer staging_buffer;
StreamBuffer stream_buffer;

View file

@ -206,7 +206,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
for (const auto& image_desc : info->images) {
const auto tsharp = image_desc.GetSharp(*info);
if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
VideoCore::ImageInfo image_info{tsharp};
VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth};
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
const auto& image = texture_cache.GetImage(image_view.image_id);

View file

@ -17,7 +17,7 @@ namespace Vulkan {
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
buffer_cache{instance, scheduler, liverpool_, page_manager},
buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager},
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
if (!Config::nullGpu()) {

View file

@ -7,7 +7,7 @@
#pragma once
#include <unordered_map>
#include <vulkan/vulkan.h>
#include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore {
/**
@ -383,9 +383,10 @@ static const std::unordered_map<VkFormat, FORMAT_COMPATIBILITY_CLASS> vkFormatCl
* @url
* https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility
*/
static bool IsVulkanFormatCompatible(VkFormat lhs, VkFormat rhs) {
if (lhs == rhs)
static bool IsVulkanFormatCompatible(vk::Format lhs, vk::Format rhs) {
if (lhs == rhs) {
return true;
return vkFormatClassTable.at(lhs) == vkFormatClassTable.at(rhs);
}
return vkFormatClassTable.at(VkFormat(lhs)) == vkFormatClassTable.at(VkFormat(rhs));
}
} // namespace VideoCore

View file

@ -207,7 +207,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n
if (force_depth || tiling_mode == AmdGpu::TilingMode::Depth_MacroTiled) {
if (pixel_format == vk::Format::eR32Sfloat) {
pixel_format = vk::Format::eD32SfloatS8Uint;
} else if (pixel_format == vk::Format::eR16Sfloat) {
} else if (pixel_format == vk::Format::eR16Unorm) {
pixel_format = vk::Format::eD16UnormS8Uint;
} else {
UNREACHABLE();

View file

@ -173,50 +173,37 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
return new_image_id;
}
ImageId TextureCache::FindImage(const ImageInfo& info) {
ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) {
if (info.guest_address == 0) [[unlikely]] {
return NULL_IMAGE_VIEW_ID;
}
std::unique_lock lock{mutex};
std::scoped_lock lock{mutex};
boost::container::small_vector<ImageId, 8> image_ids;
ForEachImageInRegion(
info.guest_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
// Ignore images scheduled for deletion
if (True(image.flags & ImageFlagBits::Deleted)) {
return;
}
// Check if image is fully outside of the region
const auto in_image_cpu_addr = info.guest_address;
const auto in_image_cpu_addr_end = info.guest_address + info.guest_size_bytes;
if (in_image_cpu_addr_end <= image.cpu_addr) {
return;
}
if (in_image_cpu_addr >= image.cpu_addr_end) {
return;
}
image_ids.push_back(image_id);
});
ForEachImageInRegion(info.guest_address, info.guest_size_bytes,
[&](ImageId image_id, Image& image) { image_ids.push_back(image_id); });
ImageId image_id{};
// Check for a perfect match first
for (const auto& cache_id : image_ids) {
auto& cache_image = slot_images[cache_id];
if (cache_image.info.guest_address == info.guest_address &&
cache_image.info.guest_size_bytes == info.guest_size_bytes &&
cache_image.info.size == info.size) {
ASSERT(cache_image.info.type == info.type);
if (IsVulkanFormatCompatible((VkFormat)info.pixel_format,
(VkFormat)cache_image.info.pixel_format)) {
image_id = cache_id;
}
break;
if (cache_image.info.guest_address != info.guest_address) {
continue;
}
if (False(flags & FindFlags::RelaxSize) &&
cache_image.info.guest_size_bytes != info.guest_size_bytes) {
continue;
}
if (False(flags & FindFlags::RelaxDim) && cache_image.info.size != info.size) {
continue;
}
if (False(flags & FindFlags::RelaxFmt) &&
!IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format)) {
continue;
}
ASSERT(cache_image.info.type == info.type);
image_id = cache_id;
}
// Try to resolve overlaps (if any)
@ -227,6 +214,10 @@ ImageId TextureCache::FindImage(const ImageInfo& info) {
}
}
if (True(flags & FindFlags::NoCreate) && !image_id) {
return {};
}
// Create and register a new image
if (!image_id) {
image_id = slot_images.insert(instance, scheduler, info);

View file

@ -23,6 +23,14 @@ namespace VideoCore {
class BufferCache;
class PageManager;
enum class FindFlags {
NoCreate = 1 << 0, ///< Do not create an image if searching for one fails.
RelaxDim = 1 << 1, ///< Do not check the dimentions of image, only address.
RelaxSize = 1 << 2, ///< Do not check that the size matches exactly.
RelaxFmt = 1 << 3, ///< Do not check that format is compatible.
};
DECLARE_ENUM_FLAG_OPERATORS(FindFlags)
class TextureCache {
struct Traits {
using Entry = boost::container::small_vector<ImageId, 16>;
@ -44,7 +52,7 @@ public:
void UnmapMemory(VAddr cpu_addr, size_t size);
/// Retrieves the image handle of the image with the provided attributes.
[[nodiscard]] ImageId FindImage(const ImageInfo& info);
[[nodiscard]] ImageId FindImage(const ImageInfo& info, FindFlags flags = {});
/// Retrieves an image view with the properties of the specified image descriptor.
[[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info,