diff --git a/src/core/libraries/network/net.cpp b/src/core/libraries/network/net.cpp index 2c03dde3e..9c467a104 100644 --- a/src/core/libraries/network/net.cpp +++ b/src/core/libraries/network/net.cpp @@ -10,6 +10,7 @@ #include #endif +#include #include "common/assert.h" #include "common/logging/log.h" #include "core/libraries/error_codes.h" @@ -559,6 +560,7 @@ int PS4_SYSV_ABI sceNetEpollDestroy() { } int PS4_SYSV_ABI sceNetEpollWait() { + std::this_thread::sleep_for(std::chrono::microseconds(1)); LOG_TRACE(Lib_Net, "(STUBBED) called"); return ORBIS_OK; } diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index ceedc1746..123bbe5ef 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -541,46 +541,61 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, } bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { - constexpr FindFlags flags = FindFlags::NoCreate | FindFlags::RelaxSize | - FindFlags::RelaxFmt | FindFlags::RelaxDim; - ImageInfo info{}; - info.guest_address = device_addr; - info.guest_size_bytes = size; - info.type = vk::ImageType::e2D; - const ImageId image_id = texture_cache.FindImage(info, flags); - if (!image_id) { + boost::container::small_vector image_ids; + size = std::min(size, MaxInvalidateDist); + texture_cache.ForEachImageInRegion(device_addr, size, [&](ImageId image_id, Image& image) { + if (True(image.flags & ImageFlagBits::CpuModified) || + False(image.flags & ImageFlagBits::GpuModified)) { + return; + } + if (image.cpu_addr < device_addr || image.cpu_addr > device_addr + size) { + return; + } + image_ids.push_back(image_id); + }); + if (image_ids.empty()) { return false; } - Image& image = texture_cache.GetImage(image_id); + // Sort images by modification tick. If there are overlaps we want to + // copy from least to most recently modified. + std::ranges::sort(image_ids, [&](ImageId lhs_id, ImageId rhs_id) { + const Image& lhs = texture_cache.GetImage(lhs_id); + const Image& rhs = texture_cache.GetImage(rhs_id); + return lhs.tick_accessed_last < rhs.tick_accessed_last; + }); boost::container::small_vector copies; - u32 offset = buffer.Offset(device_addr); - const u32 num_layers = image.info.resources.layers; - for (u32 m = 0; m < image.info.resources.levels; m++) { - const u32 width = std::max(image.info.size.width >> m, 1u); - const u32 height = std::max(image.info.size.height >> m, 1u); - const u32 depth = - image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; - const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; - copies.push_back({ - .bufferOffset = offset, - .bufferRowLength = static_cast(mip_pitch), - .bufferImageHeight = static_cast(mip_height), - .imageSubresource{ - .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, - .mipLevel = m, - .baseArrayLayer = 0, - .layerCount = num_layers, - }, - .imageOffset = {0, 0, 0}, - .imageExtent = {width, height, depth}, - }); - offset += mip_ofs * num_layers; + for (const ImageId image_id : image_ids) { + copies.clear(); + Image& image = texture_cache.GetImage(image_id); + u32 offset = buffer.Offset(image.cpu_addr); + const u32 num_layers = image.info.resources.layers; + for (u32 m = 0; m < image.info.resources.levels; m++) { + const u32 width = std::max(image.info.size.width >> m, 1u); + const u32 height = std::max(image.info.size.height >> m, 1u); + const u32 depth = + image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; + const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + copies.push_back({ + .bufferOffset = offset, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), + .imageSubresource{ + .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = num_layers, + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {width, height, depth}, + }); + offset += mip_ofs * num_layers; + } + scheduler.EndRendering(); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, + copies); } - scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, - copies); return true; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 33e20674d..64092a79d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -167,9 +167,6 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); } } - if (desc.is_written) { - texture_cache.InvalidateMemory(address, size); - } const u32 alignment = instance.TexelBufferMinAlignment(); const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(address, size, desc.is_written, true); @@ -184,13 +181,15 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, } buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written, vsharp.GetDataFmt(), vsharp.GetNumberFmt()); - if (auto barrier = vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite : vk::AccessFlagBits2::eShaderRead, vk::PipelineStageFlagBits2::eComputeShader)) { buffer_barriers.emplace_back(*barrier); } + if (desc.is_written) { + texture_cache.InvalidateMemory(address, size); + } } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, @@ -252,10 +251,11 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, const auto cmdbuf = scheduler.CommandBuffer(); if (!buffer_barriers.empty()) { - auto dependencies = vk::DependencyInfo{ + const auto dependencies = vk::DependencyInfo{ .bufferMemoryBarrierCount = u32(buffer_barriers.size()), .pBufferMemoryBarriers = buffer_barriers.data(), }; + scheduler.EndRendering(); cmdbuf.pipelineBarrier2(dependencies); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5aec456fb..af4bb5695 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -405,15 +405,15 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, }); } - for (const auto& tex_buffer : stage->texture_buffers) { - const auto vsharp = tex_buffer.GetSharp(*stage); + for (const auto& desc : stage->texture_buffers) { + const auto vsharp = desc.GetSharp(*stage); vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE); const u32 size = vsharp.GetSize(); if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && size != 0) { const VAddr address = vsharp.base_address; const u32 alignment = instance.TexelBufferMinAlignment(); const auto [vk_buffer, offset] = - buffer_cache.ObtainBuffer(address, size, tex_buffer.is_written, true); + buffer_cache.ObtainBuffer(address, size, desc.is_written, true); const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; ASSERT_MSG(fmt_stride == vsharp.GetStride(), "Texel buffer stride must match format stride"); @@ -423,22 +423,25 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, ASSERT(adjust % fmt_stride == 0); push_data.AddOffset(binding, adjust / fmt_stride); } - buffer_view = vk_buffer->View(offset_aligned, size + adjust, tex_buffer.is_written, + buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written, vsharp.GetDataFmt(), vsharp.GetNumberFmt()); - const auto dst_access = tex_buffer.is_written ? vk::AccessFlagBits2::eShaderWrite - : vk::AccessFlagBits2::eShaderRead; + const auto dst_access = desc.is_written ? vk::AccessFlagBits2::eShaderWrite + : vk::AccessFlagBits2::eShaderRead; if (auto barrier = vk_buffer->GetBarrier( dst_access, vk::PipelineStageFlagBits2::eVertexShader)) { buffer_barriers.emplace_back(*barrier); } + if (desc.is_written) { + texture_cache.InvalidateMemory(address, size); + } } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, .dstBinding = binding++, .dstArrayElement = 0, .descriptorCount = 1, - .descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer - : vk::DescriptorType::eUniformTexelBuffer, + .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer + : vk::DescriptorType::eUniformTexelBuffer, .pTexelBufferView = &buffer_view, }); } @@ -497,10 +500,11 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, const auto cmdbuf = scheduler.CommandBuffer(); if (!buffer_barriers.empty()) { - auto dependencies = vk::DependencyInfo{ + const auto dependencies = vk::DependencyInfo{ .bufferMemoryBarrierCount = u32(buffer_barriers.size()), .pBufferMemoryBarriers = buffer_barriers.data(), }; + scheduler.EndRendering(); cmdbuf.pipelineBarrier2(dependencies); } diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index f9650c159..feadda96c 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -43,6 +43,7 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback( case 0x609a13b: // Vertex attribute at location not consumed by shader case 0xc81ad50e: case 0xb7c39078: + case 0x32868fde: // vkCreateBufferView(): pCreateInfo->range does not equal VK_WHOLE_SIZE case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error) return VK_FALSE; default: diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 13ea7ce93..2a5c4c434 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -166,8 +166,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, image.Create(image_ci); - Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {:#x}:{:#x}", - info.guest_address, info.guest_size_bytes); + Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {}x{}x{} {:#x}:{:#x}", + info.size.width, info.size.height, info.size.depth, info.guest_address, + info.guest_size_bytes); } void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index f932b25a0..11ccdc1d1 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -117,6 +117,7 @@ struct Image { vk::ImageLayout layout = vk::ImageLayout::eUndefined; boost::container::small_vector mip_hashes; u64 tick_accessed_last{0}; + u64 modification_tick{0}; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index f4aefa7eb..4ad50c65f 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -38,12 +38,11 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& TextureCache::~TextureCache() = default; void TextureCache::InvalidateMemory(VAddr address, size_t size) { - static constexpr size_t MaxInvalidateDist = 128_MB; - std::unique_lock lock{mutex}; + std::scoped_lock lock{mutex}; ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { const size_t image_dist = image.cpu_addr > address ? image.cpu_addr - address : address - image.cpu_addr; - if (image_dist < MaxInvalidateDist && image.info.size.width > 16) { + if (image_dist < MaxInvalidateDist) { // Ensure image is reuploaded when accessed again. image.flags |= ImageFlagBits::CpuModified; } @@ -152,7 +151,6 @@ ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_ } ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { - const auto new_image_id = slot_images.insert(instance, scheduler, info); RegisterImage(new_image_id); @@ -220,7 +218,9 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) { RegisterImage(image_id); } - slot_images[image_id].tick_accessed_last = scheduler.CurrentTick(); + Image& image = slot_images[image_id]; + image.tick_accessed_last = scheduler.CurrentTick(); + image.modification_tick = ++modification_tick; return image_id; } @@ -248,8 +248,11 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) { const ImageId image_id = FindImage(info); - UpdateImage(image_id); Image& image = slot_images[image_id]; + if (view_info.is_storage) { + image.flags |= ImageFlagBits::GpuModified; + } + UpdateImage(image_id); auto& usage = image.info.usage; if (view_info.is_storage) { @@ -405,7 +408,8 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule // hazard if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead, vk::PipelineStageFlagBits2::eTransfer)) { - auto dependencies = vk::DependencyInfo{ + const auto dependencies = vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &barrier.value(), }; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8224de0cf..3a305452d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -31,6 +31,8 @@ enum class FindFlags { }; DECLARE_ENUM_FLAG_OPERATORS(FindFlags) +static constexpr u32 MaxInvalidateDist = 12_MB; + class TextureCache { struct Traits { using Entry = boost::container::small_vector; @@ -114,25 +116,6 @@ public: return false; } -private: - ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info); - - /// Iterate over all page indices in a range - template - static void ForEachPage(PAddr addr, size_t size, Func&& func) { - static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; - const u64 page_end = (addr + size - 1) >> Traits::PageBits; - for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) { - if constexpr (RETURNS_BOOL) { - if (func(page)) { - break; - } - } else { - func(page); - } - } - } - template void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { using FuncReturn = typename std::invoke_result::type; @@ -174,6 +157,26 @@ private: } } +private: + /// Iterate over all page indices in a range + template + static void ForEachPage(PAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; + const u64 page_end = (addr + size - 1) >> Traits::PageBits; + for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + /// Registers an image view for provided image + ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info); + /// Create an image from the given parameters [[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr); @@ -209,6 +212,7 @@ private: tsl::robin_map samplers; PageTable page_table; std::mutex mutex; + u64 modification_tick{0}; struct MetaDataInfo { enum class Type {