diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index d67e953eb..f8382608d 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -10,15 +10,19 @@ #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/texture_cache/texture_cache.h" namespace VideoCore { +static constexpr size_t NumVertexBuffers = 32; static constexpr size_t StagingBufferSize = 512_MB; static constexpr size_t UboStreamBufferSize = 64_MB; BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, - const AmdGpu::Liverpool* liverpool_, PageManager& tracker_) - : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, tracker{tracker_}, + const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, + PageManager& tracker_) + : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, + texture_cache{texture_cache_}, tracker{tracker_}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, memory_tracker{&tracker} { @@ -100,9 +104,9 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { return false; } - std::array host_buffers; - std::array host_offsets; - boost::container::static_vector guest_buffers; + std::array host_buffers; + std::array host_offsets; + boost::container::static_vector guest_buffers; struct BufferRange { VAddr base_address; @@ -117,7 +121,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { // Calculate buffers memory overlaps bool has_step_rate = false; - boost::container::static_vector ranges{}; + boost::container::static_vector ranges{}; for (const auto& input : vs_info.vs_inputs) { if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { @@ -152,7 +156,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { return lhv.base_address < rhv.base_address; }); - boost::container::static_vector ranges_merged{ranges[0]}; + boost::container::static_vector ranges_merged{ranges[0]}; for (auto range : ranges) { auto& prev_range = ranges_merged.back(); if (prev_range.end_address < range.base_address) { @@ -241,7 +245,7 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b const BufferId buffer_id = FindBuffer(device_addr, size); Buffer& buffer = slot_buffers[buffer_id]; - SynchronizeBuffer(buffer, device_addr, size); + SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer); if (is_written) { memory_tracker.MarkRegionAsGpuModified(device_addr, size); } @@ -459,8 +463,13 @@ void BufferCache::ChangeRegister(BufferId buffer_id) { } } -bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size) { +void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, + bool is_texel_buffer) { std::scoped_lock lk{mutex}; + if (is_texel_buffer && SynchronizeBufferFromImage(buffer, device_addr, size)) { + return; + } + boost::container::small_vector copies; u64 total_size_bytes = 0; u64 largest_copy = 0; @@ -480,7 +489,7 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size) // gpu_modified_ranges.ForEachNotInRange(device_addr_out, range_size, add_copy); }); if (total_size_bytes == 0) { - return true; + return; } vk::Buffer src_buffer = staging_buffer.Handle(); if (total_size_bytes < StagingBufferSize) { @@ -524,7 +533,48 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size) cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); - return false; +} + +bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { + constexpr FindFlags flags = FindFlags::NoCreate | FindFlags::ExactSize | FindFlags::RelaxDim; + ImageInfo info{}; + info.guest_address = device_addr; + info.guest_size_bytes = size; + const ImageId image_id = texture_cache.FindImage(info, flags); + if (!image_id) { + return false; + } + Image& image = texture_cache.GetImage(image_id); + boost::container::small_vector copies; + u32 offset = buffer.Offset(device_addr); + const u32 num_layers = image.info.resources.layers; + for (u32 m = 0; m < image.info.resources.levels; m++) { + const u32 width = std::max(image.info.size.width >> m, 1u); + const u32 height = std::max(image.info.size.height >> m, 1u); + const u32 depth = + image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; + const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + copies.push_back({ + .bufferOffset = offset, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), + .imageSubresource{ + .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = num_layers, + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {width, height, depth}, + }); + offset += mip_ofs * num_layers; + } + scheduler.EndRendering(); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, + copies); + return true; } void BufferCache::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9be258ab9..b38b00f07 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -28,7 +28,7 @@ using BufferId = Common::SlotId; static constexpr BufferId NULL_BUFFER_ID{0}; -static constexpr u32 NUM_VERTEX_BUFFERS = 32; +class TextureCache; class BufferCache { public: @@ -53,7 +53,8 @@ public: public: explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - const AmdGpu::Liverpool* liverpool, PageManager& tracker); + const AmdGpu::Liverpool* liverpool, TextureCache& texture_cache, + PageManager& tracker); ~BufferCache(); /// Invalidates any buffer in the logical page range. @@ -116,13 +117,16 @@ private: template void ChangeRegister(BufferId buffer_id); - bool SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size); + void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer); + + bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size); void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; const AmdGpu::Liverpool* liverpool; + TextureCache& texture_cache; PageManager& tracker; StreamBuffer staging_buffer; StreamBuffer stream_buffer; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index b1a23532d..33e20674d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -206,7 +206,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, for (const auto& image_desc : info->images) { const auto tsharp = image_desc.GetSharp(*info); if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { - VideoCore::ImageInfo image_info{tsharp}; + VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; const auto& image_view = texture_cache.FindTexture(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4207c18d6..9f72d0448 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -17,7 +17,7 @@ namespace Vulkan { Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) : instance{instance_}, scheduler{scheduler_}, page_manager{this}, - buffer_cache{instance, scheduler, liverpool_, page_manager}, + buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager}, texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_}, memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} { if (!Config::nullGpu()) { diff --git a/src/video_core/texture_cache/host_compatibility.h b/src/video_core/texture_cache/host_compatibility.h index 0b4b6764e..a73f7e6be 100644 --- a/src/video_core/texture_cache/host_compatibility.h +++ b/src/video_core/texture_cache/host_compatibility.h @@ -7,7 +7,7 @@ #pragma once #include -#include +#include "video_core/renderer_vulkan/vk_common.h" namespace VideoCore { /** @@ -383,9 +383,10 @@ static const std::unordered_map vkFormatCl * @url * https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility */ -static bool IsVulkanFormatCompatible(VkFormat lhs, VkFormat rhs) { - if (lhs == rhs) +static bool IsVulkanFormatCompatible(vk::Format lhs, vk::Format rhs) { + if (lhs == rhs) { return true; - return vkFormatClassTable.at(lhs) == vkFormatClassTable.at(rhs); + } + return vkFormatClassTable.at(VkFormat(lhs)) == vkFormatClassTable.at(VkFormat(rhs)); } } // namespace VideoCore diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index bd4671688..c723557db 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -207,7 +207,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n if (force_depth || tiling_mode == AmdGpu::TilingMode::Depth_MacroTiled) { if (pixel_format == vk::Format::eR32Sfloat) { pixel_format = vk::Format::eD32SfloatS8Uint; - } else if (pixel_format == vk::Format::eR16Sfloat) { + } else if (pixel_format == vk::Format::eR16Unorm) { pixel_format = vk::Format::eD16UnormS8Uint; } else { UNREACHABLE(); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index d49b4e337..89b263b78 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -173,50 +173,37 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { return new_image_id; } -ImageId TextureCache::FindImage(const ImageInfo& info) { +ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) { if (info.guest_address == 0) [[unlikely]] { return NULL_IMAGE_VIEW_ID; } - std::unique_lock lock{mutex}; + std::scoped_lock lock{mutex}; boost::container::small_vector image_ids; - ForEachImageInRegion( - info.guest_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) { - // Ignore images scheduled for deletion - if (True(image.flags & ImageFlagBits::Deleted)) { - return; - } - - // Check if image is fully outside of the region - const auto in_image_cpu_addr = info.guest_address; - const auto in_image_cpu_addr_end = info.guest_address + info.guest_size_bytes; - if (in_image_cpu_addr_end <= image.cpu_addr) { - return; - } - if (in_image_cpu_addr >= image.cpu_addr_end) { - return; - } - - image_ids.push_back(image_id); - }); + ForEachImageInRegion(info.guest_address, info.guest_size_bytes, + [&](ImageId image_id, Image& image) { image_ids.push_back(image_id); }); ImageId image_id{}; // Check for a perfect match first for (const auto& cache_id : image_ids) { auto& cache_image = slot_images[cache_id]; - - if (cache_image.info.guest_address == info.guest_address && - cache_image.info.guest_size_bytes == info.guest_size_bytes && - cache_image.info.size == info.size) { - - ASSERT(cache_image.info.type == info.type); - if (IsVulkanFormatCompatible((VkFormat)info.pixel_format, - (VkFormat)cache_image.info.pixel_format)) { - image_id = cache_id; - } - break; + if (cache_image.info.guest_address != info.guest_address) { + continue; } + if (False(flags & FindFlags::RelaxSize) && + cache_image.info.guest_size_bytes != info.guest_size_bytes) { + continue; + } + if (False(flags & FindFlags::RelaxDim) && cache_image.info.size != info.size) { + continue; + } + if (False(flags & FindFlags::RelaxFmt) && + !IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format)) { + continue; + } + ASSERT(cache_image.info.type == info.type); + image_id = cache_id; } // Try to resolve overlaps (if any) @@ -227,6 +214,10 @@ ImageId TextureCache::FindImage(const ImageInfo& info) { } } + if (True(flags & FindFlags::NoCreate) && !image_id) { + return {}; + } + // Create and register a new image if (!image_id) { image_id = slot_images.insert(instance, scheduler, info); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6ed1d7b94..8a5e22c41 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -23,6 +23,14 @@ namespace VideoCore { class BufferCache; class PageManager; +enum class FindFlags { + NoCreate = 1 << 0, ///< Do not create an image if searching for one fails. + RelaxDim = 1 << 1, ///< Do not check the dimentions of image, only address. + RelaxSize = 1 << 2, ///< Do not check that the size matches exactly. + RelaxFmt = 1 << 3, ///< Do not check that format is compatible. +}; +DECLARE_ENUM_FLAG_OPERATORS(FindFlags) + class TextureCache { struct Traits { using Entry = boost::container::small_vector; @@ -44,7 +52,7 @@ public: void UnmapMemory(VAddr cpu_addr, size_t size); /// Retrieves the image handle of the image with the provided attributes. - [[nodiscard]] ImageId FindImage(const ImageInfo& info); + [[nodiscard]] ImageId FindImage(const ImageInfo& info, FindFlags flags = {}); /// Retrieves an image view with the properties of the specified image descriptor. [[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info,