mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-21 20:14:45 +00:00
video_core: Improve invalidate heuristic
This commit is contained in:
parent
324180cd2e
commit
842a5dabd4
9 changed files with 110 additions and 78 deletions
|
@ -10,6 +10,7 @@
|
|||
#include <arpa/inet.h>
|
||||
#endif
|
||||
|
||||
#include <thread>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
|
@ -559,6 +560,7 @@ int PS4_SYSV_ABI sceNetEpollDestroy() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceNetEpollWait() {
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
LOG_TRACE(Lib_Net, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
|
|
@ -541,46 +541,61 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
|||
}
|
||||
|
||||
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
|
||||
constexpr FindFlags flags = FindFlags::NoCreate | FindFlags::RelaxSize |
|
||||
FindFlags::RelaxFmt | FindFlags::RelaxDim;
|
||||
ImageInfo info{};
|
||||
info.guest_address = device_addr;
|
||||
info.guest_size_bytes = size;
|
||||
info.type = vk::ImageType::e2D;
|
||||
const ImageId image_id = texture_cache.FindImage(info, flags);
|
||||
if (!image_id) {
|
||||
boost::container::small_vector<ImageId, 8> image_ids;
|
||||
size = std::min(size, MaxInvalidateDist);
|
||||
texture_cache.ForEachImageInRegion(device_addr, size, [&](ImageId image_id, Image& image) {
|
||||
if (True(image.flags & ImageFlagBits::CpuModified) ||
|
||||
False(image.flags & ImageFlagBits::GpuModified)) {
|
||||
return;
|
||||
}
|
||||
if (image.cpu_addr < device_addr || image.cpu_addr > device_addr + size) {
|
||||
return;
|
||||
}
|
||||
image_ids.push_back(image_id);
|
||||
});
|
||||
if (image_ids.empty()) {
|
||||
return false;
|
||||
}
|
||||
Image& image = texture_cache.GetImage(image_id);
|
||||
// Sort images by modification tick. If there are overlaps we want to
|
||||
// copy from least to most recently modified.
|
||||
std::ranges::sort(image_ids, [&](ImageId lhs_id, ImageId rhs_id) {
|
||||
const Image& lhs = texture_cache.GetImage(lhs_id);
|
||||
const Image& rhs = texture_cache.GetImage(rhs_id);
|
||||
return lhs.tick_accessed_last < rhs.tick_accessed_last;
|
||||
});
|
||||
boost::container::small_vector<vk::BufferImageCopy, 8> copies;
|
||||
u32 offset = buffer.Offset(device_addr);
|
||||
const u32 num_layers = image.info.resources.layers;
|
||||
for (u32 m = 0; m < image.info.resources.levels; m++) {
|
||||
const u32 width = std::max(image.info.size.width >> m, 1u);
|
||||
const u32 height = std::max(image.info.size.height >> m, 1u);
|
||||
const u32 depth =
|
||||
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
|
||||
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
|
||||
copies.push_back({
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = static_cast<u32>(mip_pitch),
|
||||
.bufferImageHeight = static_cast<u32>(mip_height),
|
||||
.imageSubresource{
|
||||
.aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
|
||||
.mipLevel = m,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = num_layers,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {width, height, depth},
|
||||
});
|
||||
offset += mip_ofs * num_layers;
|
||||
for (const ImageId image_id : image_ids) {
|
||||
copies.clear();
|
||||
Image& image = texture_cache.GetImage(image_id);
|
||||
u32 offset = buffer.Offset(image.cpu_addr);
|
||||
const u32 num_layers = image.info.resources.layers;
|
||||
for (u32 m = 0; m < image.info.resources.levels; m++) {
|
||||
const u32 width = std::max(image.info.size.width >> m, 1u);
|
||||
const u32 height = std::max(image.info.size.height >> m, 1u);
|
||||
const u32 depth =
|
||||
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
|
||||
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
|
||||
copies.push_back({
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = static_cast<u32>(mip_pitch),
|
||||
.bufferImageHeight = static_cast<u32>(mip_height),
|
||||
.imageSubresource{
|
||||
.aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
|
||||
.mipLevel = m,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = num_layers,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {width, height, depth},
|
||||
});
|
||||
offset += mip_ofs * num_layers;
|
||||
}
|
||||
scheduler.EndRendering();
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
|
||||
copies);
|
||||
}
|
||||
scheduler.EndRendering();
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
|
||||
copies);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -167,9 +167,6 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
|
||||
}
|
||||
}
|
||||
if (desc.is_written) {
|
||||
texture_cache.InvalidateMemory(address, size);
|
||||
}
|
||||
const u32 alignment = instance.TexelBufferMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, desc.is_written, true);
|
||||
|
@ -184,13 +181,15 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
}
|
||||
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
|
||||
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
|
||||
|
||||
if (auto barrier =
|
||||
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
|
||||
: vk::AccessFlagBits2::eShaderRead,
|
||||
vk::PipelineStageFlagBits2::eComputeShader)) {
|
||||
buffer_barriers.emplace_back(*barrier);
|
||||
}
|
||||
if (desc.is_written) {
|
||||
texture_cache.InvalidateMemory(address, size);
|
||||
}
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
|
@ -252,10 +251,11 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
||||
if (!buffer_barriers.empty()) {
|
||||
auto dependencies = vk::DependencyInfo{
|
||||
const auto dependencies = vk::DependencyInfo{
|
||||
.bufferMemoryBarrierCount = u32(buffer_barriers.size()),
|
||||
.pBufferMemoryBarriers = buffer_barriers.data(),
|
||||
};
|
||||
scheduler.EndRendering();
|
||||
cmdbuf.pipelineBarrier2(dependencies);
|
||||
}
|
||||
|
||||
|
|
|
@ -405,15 +405,15 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
});
|
||||
}
|
||||
|
||||
for (const auto& tex_buffer : stage->texture_buffers) {
|
||||
const auto vsharp = tex_buffer.GetSharp(*stage);
|
||||
for (const auto& desc : stage->texture_buffers) {
|
||||
const auto vsharp = desc.GetSharp(*stage);
|
||||
vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE);
|
||||
const u32 size = vsharp.GetSize();
|
||||
if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && size != 0) {
|
||||
const VAddr address = vsharp.base_address;
|
||||
const u32 alignment = instance.TexelBufferMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, tex_buffer.is_written, true);
|
||||
buffer_cache.ObtainBuffer(address, size, desc.is_written, true);
|
||||
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
|
||||
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
|
||||
"Texel buffer stride must match format stride");
|
||||
|
@ -423,22 +423,25 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
ASSERT(adjust % fmt_stride == 0);
|
||||
push_data.AddOffset(binding, adjust / fmt_stride);
|
||||
}
|
||||
buffer_view = vk_buffer->View(offset_aligned, size + adjust, tex_buffer.is_written,
|
||||
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
|
||||
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
|
||||
const auto dst_access = tex_buffer.is_written ? vk::AccessFlagBits2::eShaderWrite
|
||||
: vk::AccessFlagBits2::eShaderRead;
|
||||
const auto dst_access = desc.is_written ? vk::AccessFlagBits2::eShaderWrite
|
||||
: vk::AccessFlagBits2::eShaderRead;
|
||||
if (auto barrier = vk_buffer->GetBarrier(
|
||||
dst_access, vk::PipelineStageFlagBits2::eVertexShader)) {
|
||||
buffer_barriers.emplace_back(*barrier);
|
||||
}
|
||||
if (desc.is_written) {
|
||||
texture_cache.InvalidateMemory(address, size);
|
||||
}
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.pTexelBufferView = &buffer_view,
|
||||
});
|
||||
}
|
||||
|
@ -497,10 +500,11 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
||||
if (!buffer_barriers.empty()) {
|
||||
auto dependencies = vk::DependencyInfo{
|
||||
const auto dependencies = vk::DependencyInfo{
|
||||
.bufferMemoryBarrierCount = u32(buffer_barriers.size()),
|
||||
.pBufferMemoryBarriers = buffer_barriers.data(),
|
||||
};
|
||||
scheduler.EndRendering();
|
||||
cmdbuf.pipelineBarrier2(dependencies);
|
||||
}
|
||||
|
||||
|
|
|
@ -43,6 +43,7 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback(
|
|||
case 0x609a13b: // Vertex attribute at location not consumed by shader
|
||||
case 0xc81ad50e:
|
||||
case 0xb7c39078:
|
||||
case 0x32868fde: // vkCreateBufferView(): pCreateInfo->range does not equal VK_WHOLE_SIZE
|
||||
case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error)
|
||||
return VK_FALSE;
|
||||
default:
|
||||
|
|
|
@ -166,8 +166,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||
|
||||
image.Create(image_ci);
|
||||
|
||||
Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {:#x}:{:#x}",
|
||||
info.guest_address, info.guest_size_bytes);
|
||||
Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {}x{}x{} {:#x}:{:#x}",
|
||||
info.size.width, info.size.height, info.size.depth, info.guest_address,
|
||||
info.guest_size_bytes);
|
||||
}
|
||||
|
||||
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,
|
||||
|
|
|
@ -117,6 +117,7 @@ struct Image {
|
|||
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
|
||||
boost::container::small_vector<u64, 14> mip_hashes;
|
||||
u64 tick_accessed_last{0};
|
||||
u64 modification_tick{0};
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -38,12 +38,11 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
|
|||
TextureCache::~TextureCache() = default;
|
||||
|
||||
void TextureCache::InvalidateMemory(VAddr address, size_t size) {
|
||||
static constexpr size_t MaxInvalidateDist = 128_MB;
|
||||
std::unique_lock lock{mutex};
|
||||
std::scoped_lock lock{mutex};
|
||||
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
|
||||
const size_t image_dist =
|
||||
image.cpu_addr > address ? image.cpu_addr - address : address - image.cpu_addr;
|
||||
if (image_dist < MaxInvalidateDist && image.info.size.width > 16) {
|
||||
if (image_dist < MaxInvalidateDist) {
|
||||
// Ensure image is reuploaded when accessed again.
|
||||
image.flags |= ImageFlagBits::CpuModified;
|
||||
}
|
||||
|
@ -152,7 +151,6 @@ ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_
|
|||
}
|
||||
|
||||
ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
|
||||
|
||||
const auto new_image_id = slot_images.insert(instance, scheduler, info);
|
||||
RegisterImage(new_image_id);
|
||||
|
||||
|
@ -220,7 +218,9 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) {
|
|||
RegisterImage(image_id);
|
||||
}
|
||||
|
||||
slot_images[image_id].tick_accessed_last = scheduler.CurrentTick();
|
||||
Image& image = slot_images[image_id];
|
||||
image.tick_accessed_last = scheduler.CurrentTick();
|
||||
image.modification_tick = ++modification_tick;
|
||||
|
||||
return image_id;
|
||||
}
|
||||
|
@ -248,8 +248,11 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
|
|||
|
||||
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
|
||||
const ImageId image_id = FindImage(info);
|
||||
UpdateImage(image_id);
|
||||
Image& image = slot_images[image_id];
|
||||
if (view_info.is_storage) {
|
||||
image.flags |= ImageFlagBits::GpuModified;
|
||||
}
|
||||
UpdateImage(image_id);
|
||||
auto& usage = image.info.usage;
|
||||
|
||||
if (view_info.is_storage) {
|
||||
|
@ -405,7 +408,8 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
|||
// hazard
|
||||
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
|
||||
vk::PipelineStageFlagBits2::eTransfer)) {
|
||||
auto dependencies = vk::DependencyInfo{
|
||||
const auto dependencies = vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &barrier.value(),
|
||||
};
|
||||
|
|
|
@ -31,6 +31,8 @@ enum class FindFlags {
|
|||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(FindFlags)
|
||||
|
||||
static constexpr u32 MaxInvalidateDist = 12_MB;
|
||||
|
||||
class TextureCache {
|
||||
struct Traits {
|
||||
using Entry = boost::container::small_vector<ImageId, 16>;
|
||||
|
@ -114,25 +116,6 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info);
|
||||
|
||||
/// Iterate over all page indices in a range
|
||||
template <typename Func>
|
||||
static void ForEachPage(PAddr addr, size_t size, Func&& func) {
|
||||
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
|
||||
const u64 page_end = (addr + size - 1) >> Traits::PageBits;
|
||||
for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) {
|
||||
if constexpr (RETURNS_BOOL) {
|
||||
if (func(page)) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
func(page);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
|
||||
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
|
||||
|
@ -174,6 +157,26 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
private:
|
||||
/// Iterate over all page indices in a range
|
||||
template <typename Func>
|
||||
static void ForEachPage(PAddr addr, size_t size, Func&& func) {
|
||||
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
|
||||
const u64 page_end = (addr + size - 1) >> Traits::PageBits;
|
||||
for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) {
|
||||
if constexpr (RETURNS_BOOL) {
|
||||
if (func(page)) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
func(page);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Registers an image view for provided image
|
||||
ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info);
|
||||
|
||||
/// Create an image from the given parameters
|
||||
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr);
|
||||
|
||||
|
@ -209,6 +212,7 @@ private:
|
|||
tsl::robin_map<u64, Sampler> samplers;
|
||||
PageTable page_table;
|
||||
std::mutex mutex;
|
||||
u64 modification_tick{0};
|
||||
|
||||
struct MetaDataInfo {
|
||||
enum class Type {
|
||||
|
|
Loading…
Add table
Reference in a new issue