video_core: Fix some cases of "Attempted to track non-GPU memory" (#2447)

* memory: Consider flexible mappings as gpu accessible

Multiple guest apps do this with perfectly valid sharps in simple shaders. This needs some hw testing to see how it is handled but for now doesnt hurt to handle it

* memory: Clamp large buffers to mapped area

Sometimes huge buffers can be bound that start on some valid mapping but arent fully contained by it. It is not reasonable to expect the game needing all of the memory, so clamp the size to avoid the gpu tracking assert

* clang-format fix

---------

Co-authored-by: georgemoralis <giorgosmrls@gmail.com>
This commit is contained in:
TheTurtle 2025-02-19 13:31:35 +02:00 committed by GitHub
parent 50aa0f5564
commit da0ab005c7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 30 additions and 14 deletions

View file

@ -56,6 +56,23 @@ void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1
total_flexible_size, total_direct_size);
}
u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) {
static constexpr u64 MinSizeToClamp = 1_GB;
// Dont bother with clamping if the size is small so we dont pay a map lookup on every buffer.
if (size < MinSizeToClamp) {
return size;
}
const auto vma = FindVMA(virtual_addr);
ASSERT_MSG(vma != vma_map.end(), "Attempted to access invalid GPU address {:#x}", virtual_addr);
const u64 clamped_size =
std::min<u64>(size, vma->second.base + vma->second.size - virtual_addr);
if (size != clamped_size) {
LOG_WARNING(Kernel_Vmm, "Clamped requested buffer range addr={:#x}, size={:#x} to {:#x}",
virtual_addr, size, clamped_size);
}
return clamped_size;
}
bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) {
const VAddr virtual_addr = std::bit_cast<VAddr>(address);
const auto& vma = FindVMA(virtual_addr)->second;
@ -314,11 +331,11 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
if (type == VMAType::Direct) {
new_vma.phys_base = phys_addr;
rasterizer->MapMemory(mapped_addr, size);
}
if (type == VMAType::Flexible) {
flexible_usage += size;
}
rasterizer->MapMemory(mapped_addr, size);
return ORBIS_OK;
}
@ -406,12 +423,10 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma
if (type == VMAType::Free) {
return adjusted_size;
}
if (type == VMAType::Direct || type == VMAType::Pooled) {
rasterizer->UnmapMemory(virtual_addr, adjusted_size);
}
if (type == VMAType::Flexible) {
flexible_usage -= adjusted_size;
}
rasterizer->UnmapMemory(virtual_addr, adjusted_size);
// Mark region as free and attempt to coalesce it with neighbours.
const auto new_it = CarveVMA(virtual_addr, adjusted_size);

View file

@ -164,6 +164,8 @@ public:
return virtual_addr >= vma_map.begin()->first && virtual_addr < end_addr;
}
u64 ClampRangeSize(VAddr virtual_addr, u64 size);
bool TryWriteBacking(void* address, const void* data, u32 num_bytes);
void SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, bool use_extended_mem2);

View file

@ -502,16 +502,17 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
for (const auto& desc : stage.buffers) {
const auto vsharp = desc.GetSharp(stage);
if (!desc.IsSpecial() && vsharp.base_address != 0 && vsharp.GetSize() > 0) {
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
buffer_bindings.emplace_back(buffer_id, vsharp);
const u64 size = memory->ClampRangeSize(vsharp.base_address, vsharp.GetSize());
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, size);
buffer_bindings.emplace_back(buffer_id, vsharp, size);
} else {
buffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp);
buffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp, 0);
}
}
// Second pass to re-bind buffers that were updated after binding
for (u32 i = 0; i < buffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = buffer_bindings[i];
const auto& [buffer_id, vsharp, size] = buffer_bindings[i];
const auto& desc = stage.buffers[i];
const bool is_storage = desc.IsStorage(vsharp, pipeline_cache.GetProfile());
// Buffer is not from the cache, either a special buffer or unbound.
@ -540,17 +541,15 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
}
} else {
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(vsharp.base_address, vsharp.GetSize(), desc.is_written,
desc.is_formatted, buffer_id);
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
vsharp.base_address, size, desc.is_written, desc.is_formatted, buffer_id);
const u32 alignment =
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
ASSERT(adjust % 4 == 0);
push_data.AddOffset(binding.buffer, adjust);
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned,
vsharp.GetSize() + adjust);
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
if (auto barrier =
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead,
@ -558,7 +557,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
buffer_barriers.emplace_back(*barrier);
}
if (desc.is_written && desc.is_formatted) {
texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize());
texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, size);
}
}