mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-22 04:24:44 +00:00
video_core: Allow using texture cache to validate texture buffers
This commit is contained in:
parent
f2b714f6bb
commit
0f2892a93a
8 changed files with 108 additions and 54 deletions
|
@ -10,15 +10,19 @@
|
|||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
static constexpr size_t NumVertexBuffers = 32;
|
||||
static constexpr size_t StagingBufferSize = 512_MB;
|
||||
static constexpr size_t UboStreamBufferSize = 64_MB;
|
||||
|
||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
const AmdGpu::Liverpool* liverpool_, PageManager& tracker_)
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, tracker{tracker_},
|
||||
const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||
PageManager& tracker_)
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
||||
texture_cache{texture_cache_}, tracker{tracker_},
|
||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||
memory_tracker{&tracker} {
|
||||
|
@ -100,9 +104,9 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
|||
return false;
|
||||
}
|
||||
|
||||
std::array<vk::Buffer, NUM_VERTEX_BUFFERS> host_buffers;
|
||||
std::array<vk::DeviceSize, NUM_VERTEX_BUFFERS> host_offsets;
|
||||
boost::container::static_vector<AmdGpu::Buffer, NUM_VERTEX_BUFFERS> guest_buffers;
|
||||
std::array<vk::Buffer, NumVertexBuffers> host_buffers;
|
||||
std::array<vk::DeviceSize, NumVertexBuffers> host_offsets;
|
||||
boost::container::static_vector<AmdGpu::Buffer, NumVertexBuffers> guest_buffers;
|
||||
|
||||
struct BufferRange {
|
||||
VAddr base_address;
|
||||
|
@ -117,7 +121,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
|||
|
||||
// Calculate buffers memory overlaps
|
||||
bool has_step_rate = false;
|
||||
boost::container::static_vector<BufferRange, NUM_VERTEX_BUFFERS> ranges{};
|
||||
boost::container::static_vector<BufferRange, NumVertexBuffers> ranges{};
|
||||
for (const auto& input : vs_info.vs_inputs) {
|
||||
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||
|
@ -152,7 +156,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
|||
return lhv.base_address < rhv.base_address;
|
||||
});
|
||||
|
||||
boost::container::static_vector<BufferRange, NUM_VERTEX_BUFFERS> ranges_merged{ranges[0]};
|
||||
boost::container::static_vector<BufferRange, NumVertexBuffers> ranges_merged{ranges[0]};
|
||||
for (auto range : ranges) {
|
||||
auto& prev_range = ranges_merged.back();
|
||||
if (prev_range.end_address < range.base_address) {
|
||||
|
@ -241,7 +245,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
|
|||
|
||||
const BufferId buffer_id = FindBuffer(device_addr, size);
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
SynchronizeBuffer(buffer, device_addr, size);
|
||||
SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer);
|
||||
if (is_written) {
|
||||
memory_tracker.MarkRegionAsGpuModified(device_addr, size);
|
||||
}
|
||||
|
@ -459,8 +463,13 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {
|
|||
}
|
||||
}
|
||||
|
||||
bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size) {
|
||||
void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
bool is_texel_buffer) {
|
||||
std::scoped_lock lk{mutex};
|
||||
if (is_texel_buffer && SynchronizeBufferFromImage(buffer, device_addr, size)) {
|
||||
return;
|
||||
}
|
||||
|
||||
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||
u64 total_size_bytes = 0;
|
||||
u64 largest_copy = 0;
|
||||
|
@ -480,7 +489,7 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size)
|
|||
// gpu_modified_ranges.ForEachNotInRange(device_addr_out, range_size, add_copy);
|
||||
});
|
||||
if (total_size_bytes == 0) {
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
vk::Buffer src_buffer = staging_buffer.Handle();
|
||||
if (total_size_bytes < StagingBufferSize) {
|
||||
|
@ -524,7 +533,48 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size)
|
|||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
|
||||
constexpr FindFlags flags = FindFlags::NoCreate | FindFlags::ExactSize | FindFlags::RelaxDim;
|
||||
ImageInfo info{};
|
||||
info.guest_address = device_addr;
|
||||
info.guest_size_bytes = size;
|
||||
const ImageId image_id = texture_cache.FindImage(info, flags);
|
||||
if (!image_id) {
|
||||
return false;
|
||||
}
|
||||
Image& image = texture_cache.GetImage(image_id);
|
||||
boost::container::small_vector<vk::BufferImageCopy, 8> copies;
|
||||
u32 offset = buffer.Offset(device_addr);
|
||||
const u32 num_layers = image.info.resources.layers;
|
||||
for (u32 m = 0; m < image.info.resources.levels; m++) {
|
||||
const u32 width = std::max(image.info.size.width >> m, 1u);
|
||||
const u32 height = std::max(image.info.size.height >> m, 1u);
|
||||
const u32 depth =
|
||||
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
|
||||
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
|
||||
copies.push_back({
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = static_cast<u32>(mip_pitch),
|
||||
.bufferImageHeight = static_cast<u32>(mip_height),
|
||||
.imageSubresource{
|
||||
.aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
|
||||
.mipLevel = m,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = num_layers,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {width, height, depth},
|
||||
});
|
||||
offset += mip_ofs * num_layers;
|
||||
}
|
||||
scheduler.EndRendering();
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
|
||||
copies);
|
||||
return true;
|
||||
}
|
||||
|
||||
void BufferCache::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
|
||||
|
|
|
@ -28,7 +28,7 @@ using BufferId = Common::SlotId;
|
|||
|
||||
static constexpr BufferId NULL_BUFFER_ID{0};
|
||||
|
||||
static constexpr u32 NUM_VERTEX_BUFFERS = 32;
|
||||
class TextureCache;
|
||||
|
||||
class BufferCache {
|
||||
public:
|
||||
|
@ -53,7 +53,8 @@ public:
|
|||
|
||||
public:
|
||||
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
const AmdGpu::Liverpool* liverpool, PageManager& tracker);
|
||||
const AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
||||
PageManager& tracker);
|
||||
~BufferCache();
|
||||
|
||||
/// Invalidates any buffer in the logical page range.
|
||||
|
@ -116,13 +117,16 @@ private:
|
|||
template <bool insert>
|
||||
void ChangeRegister(BufferId buffer_id);
|
||||
|
||||
bool SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size);
|
||||
void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer);
|
||||
|
||||
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
|
||||
|
||||
void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false);
|
||||
|
||||
const Vulkan::Instance& instance;
|
||||
Vulkan::Scheduler& scheduler;
|
||||
const AmdGpu::Liverpool* liverpool;
|
||||
TextureCache& texture_cache;
|
||||
PageManager& tracker;
|
||||
StreamBuffer staging_buffer;
|
||||
StreamBuffer stream_buffer;
|
||||
|
|
|
@ -206,7 +206,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
for (const auto& image_desc : info->images) {
|
||||
const auto tsharp = image_desc.GetSharp(*info);
|
||||
if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
|
||||
VideoCore::ImageInfo image_info{tsharp};
|
||||
VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth};
|
||||
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
|
||||
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
|
||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
|
|
|
@ -17,7 +17,7 @@ namespace Vulkan {
|
|||
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||
AmdGpu::Liverpool* liverpool_)
|
||||
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
||||
buffer_cache{instance, scheduler, liverpool_, page_manager},
|
||||
buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager},
|
||||
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
|
||||
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
|
||||
if (!Config::nullGpu()) {
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vulkan/vulkan.h>
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace VideoCore {
|
||||
/**
|
||||
|
@ -383,9 +383,10 @@ static const std::unordered_map<VkFormat, FORMAT_COMPATIBILITY_CLASS> vkFormatCl
|
|||
* @url
|
||||
* https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility
|
||||
*/
|
||||
static bool IsVulkanFormatCompatible(VkFormat lhs, VkFormat rhs) {
|
||||
if (lhs == rhs)
|
||||
static bool IsVulkanFormatCompatible(vk::Format lhs, vk::Format rhs) {
|
||||
if (lhs == rhs) {
|
||||
return true;
|
||||
return vkFormatClassTable.at(lhs) == vkFormatClassTable.at(rhs);
|
||||
}
|
||||
return vkFormatClassTable.at(VkFormat(lhs)) == vkFormatClassTable.at(VkFormat(rhs));
|
||||
}
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -207,7 +207,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n
|
|||
if (force_depth || tiling_mode == AmdGpu::TilingMode::Depth_MacroTiled) {
|
||||
if (pixel_format == vk::Format::eR32Sfloat) {
|
||||
pixel_format = vk::Format::eD32SfloatS8Uint;
|
||||
} else if (pixel_format == vk::Format::eR16Sfloat) {
|
||||
} else if (pixel_format == vk::Format::eR16Unorm) {
|
||||
pixel_format = vk::Format::eD16UnormS8Uint;
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
|
|
|
@ -173,50 +173,37 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
|
|||
return new_image_id;
|
||||
}
|
||||
|
||||
ImageId TextureCache::FindImage(const ImageInfo& info) {
|
||||
ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) {
|
||||
if (info.guest_address == 0) [[unlikely]] {
|
||||
return NULL_IMAGE_VIEW_ID;
|
||||
}
|
||||
|
||||
std::unique_lock lock{mutex};
|
||||
std::scoped_lock lock{mutex};
|
||||
boost::container::small_vector<ImageId, 8> image_ids;
|
||||
ForEachImageInRegion(
|
||||
info.guest_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
|
||||
// Ignore images scheduled for deletion
|
||||
if (True(image.flags & ImageFlagBits::Deleted)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if image is fully outside of the region
|
||||
const auto in_image_cpu_addr = info.guest_address;
|
||||
const auto in_image_cpu_addr_end = info.guest_address + info.guest_size_bytes;
|
||||
if (in_image_cpu_addr_end <= image.cpu_addr) {
|
||||
return;
|
||||
}
|
||||
if (in_image_cpu_addr >= image.cpu_addr_end) {
|
||||
return;
|
||||
}
|
||||
|
||||
image_ids.push_back(image_id);
|
||||
});
|
||||
ForEachImageInRegion(info.guest_address, info.guest_size_bytes,
|
||||
[&](ImageId image_id, Image& image) { image_ids.push_back(image_id); });
|
||||
|
||||
ImageId image_id{};
|
||||
|
||||
// Check for a perfect match first
|
||||
for (const auto& cache_id : image_ids) {
|
||||
auto& cache_image = slot_images[cache_id];
|
||||
|
||||
if (cache_image.info.guest_address == info.guest_address &&
|
||||
cache_image.info.guest_size_bytes == info.guest_size_bytes &&
|
||||
cache_image.info.size == info.size) {
|
||||
|
||||
ASSERT(cache_image.info.type == info.type);
|
||||
if (IsVulkanFormatCompatible((VkFormat)info.pixel_format,
|
||||
(VkFormat)cache_image.info.pixel_format)) {
|
||||
image_id = cache_id;
|
||||
}
|
||||
break;
|
||||
if (cache_image.info.guest_address != info.guest_address) {
|
||||
continue;
|
||||
}
|
||||
if (False(flags & FindFlags::RelaxSize) &&
|
||||
cache_image.info.guest_size_bytes != info.guest_size_bytes) {
|
||||
continue;
|
||||
}
|
||||
if (False(flags & FindFlags::RelaxDim) && cache_image.info.size != info.size) {
|
||||
continue;
|
||||
}
|
||||
if (False(flags & FindFlags::RelaxFmt) &&
|
||||
!IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format)) {
|
||||
continue;
|
||||
}
|
||||
ASSERT(cache_image.info.type == info.type);
|
||||
image_id = cache_id;
|
||||
}
|
||||
|
||||
// Try to resolve overlaps (if any)
|
||||
|
@ -227,6 +214,10 @@ ImageId TextureCache::FindImage(const ImageInfo& info) {
|
|||
}
|
||||
}
|
||||
|
||||
if (True(flags & FindFlags::NoCreate) && !image_id) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Create and register a new image
|
||||
if (!image_id) {
|
||||
image_id = slot_images.insert(instance, scheduler, info);
|
||||
|
|
|
@ -23,6 +23,14 @@ namespace VideoCore {
|
|||
class BufferCache;
|
||||
class PageManager;
|
||||
|
||||
enum class FindFlags {
|
||||
NoCreate = 1 << 0, ///< Do not create an image if searching for one fails.
|
||||
RelaxDim = 1 << 1, ///< Do not check the dimentions of image, only address.
|
||||
RelaxSize = 1 << 2, ///< Do not check that the size matches exactly.
|
||||
RelaxFmt = 1 << 3, ///< Do not check that format is compatible.
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(FindFlags)
|
||||
|
||||
class TextureCache {
|
||||
struct Traits {
|
||||
using Entry = boost::container::small_vector<ImageId, 16>;
|
||||
|
@ -44,7 +52,7 @@ public:
|
|||
void UnmapMemory(VAddr cpu_addr, size_t size);
|
||||
|
||||
/// Retrieves the image handle of the image with the provided attributes.
|
||||
[[nodiscard]] ImageId FindImage(const ImageInfo& info);
|
||||
[[nodiscard]] ImageId FindImage(const ImageInfo& info, FindFlags flags = {});
|
||||
|
||||
/// Retrieves an image view with the properties of the specified image descriptor.
|
||||
[[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info,
|
||||
|
|
Loading…
Add table
Reference in a new issue