From c43242111daaba875ef2f2e2195d4e546717777d Mon Sep 17 00:00:00 2001 From: riperiperi Date: Fri, 10 May 2024 20:09:10 +0100 Subject: [PATCH] GPU: Migrate buffers on GPU project, pre-emptively flush device local mappings Essentially retreading #4540, but it's on the GPU project now instead of the backend. This allows us to have a lot more control + knowledge of where the buffer backing has been changed and allows us to pre-emptively flush pages to host memory for quicker readback. It will allow us to do other stuff in the future, but we'll get there when we get there. Performance greatly improved in Hyrule Warriors: Age of Calamity. Performance notably improved in TOTK (average). Performance for BOTW restored to how it was before #4911, perhaps a bit better. - Rewrites a bunch of buffer migration stuff. Might want to tighten up how dispose stuff works. - Fixed an issue where the copy for texture pre-flush would happen _after_ the syncpoint. TODO: remove a page from pre-flush if it isn't flushed after a certain number of copies. --- src/Ryujinx.Graphics.GAL/BufferAccess.cs | 11 +- src/Ryujinx.Graphics.GAL/Capabilities.cs | 3 + src/Ryujinx.Graphics.GAL/SystemMemoryType.cs | 29 ++ .../Engine/MME/MacroHLE.cs | 4 +- .../Threed/ComputeDraw/VtgAsComputeState.cs | 6 +- .../Engine/Threed/DrawManager.cs | 6 +- src/Ryujinx.Graphics.Gpu/GpuContext.cs | 5 +- .../Image/TextureBindingsArrayCache.cs | 8 +- .../Image/TextureGroup.cs | 2 +- src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs | 165 +++++++++++- .../Memory/BufferBackingState.cs | 228 ++++++++++++++++ .../Memory/BufferCache.cs | 118 +++++---- .../Memory/BufferManager.cs | 47 ++-- .../Memory/BufferMigration.cs | 223 +++++++++++----- .../Memory/BufferModifiedRangeList.cs | 138 ++++++---- .../Memory/BufferPreFlush.cs | 247 ++++++++++++++++++ .../Memory/BufferStage.cs | 92 +++++++ src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 3 +- src/Ryujinx.Graphics.Vulkan/EnumConversion.cs | 12 +- src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 14 + 20 files changed, 1158 insertions(+), 203 deletions(-) create mode 100644 src/Ryujinx.Graphics.GAL/SystemMemoryType.cs create mode 100644 src/Ryujinx.Graphics.Gpu/Memory/BufferBackingState.cs create mode 100644 src/Ryujinx.Graphics.Gpu/Memory/BufferPreFlush.cs create mode 100644 src/Ryujinx.Graphics.Gpu/Memory/BufferStage.cs diff --git a/src/Ryujinx.Graphics.GAL/BufferAccess.cs b/src/Ryujinx.Graphics.GAL/BufferAccess.cs index faefa51882..1e7736f8fa 100644 --- a/src/Ryujinx.Graphics.GAL/BufferAccess.cs +++ b/src/Ryujinx.Graphics.GAL/BufferAccess.cs @@ -6,8 +6,13 @@ namespace Ryujinx.Graphics.GAL public enum BufferAccess { Default = 0, - FlushPersistent = 1 << 0, - Stream = 1 << 1, - SparseCompatible = 1 << 2, + HostMemory = 1, + DeviceMemory = 2, + DeviceMemoryMapped = 3, + + MemoryTypeMask = 0xf, + + Stream = 1 << 4, + SparseCompatible = 1 << 5, } } diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs index 70736fbd61..7e958b593a 100644 --- a/src/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs @@ -6,6 +6,7 @@ namespace Ryujinx.Graphics.GAL { public readonly TargetApi Api; public readonly string VendorName; + public readonly SystemMemoryType MemoryType; public readonly bool HasFrontFacingBug; public readonly bool HasVectorIndexingBug; @@ -65,6 +66,7 @@ namespace Ryujinx.Graphics.GAL public Capabilities( TargetApi api, string vendorName, + SystemMemoryType memoryType, bool hasFrontFacingBug, bool hasVectorIndexingBug, bool needsFragmentOutputSpecialization, @@ -118,6 +120,7 @@ namespace Ryujinx.Graphics.GAL { Api = api; VendorName = vendorName; + MemoryType = memoryType; HasFrontFacingBug = hasFrontFacingBug; HasVectorIndexingBug = hasVectorIndexingBug; NeedsFragmentOutputSpecialization = needsFragmentOutputSpecialization; diff --git a/src/Ryujinx.Graphics.GAL/SystemMemoryType.cs b/src/Ryujinx.Graphics.GAL/SystemMemoryType.cs new file mode 100644 index 0000000000..532921298a --- /dev/null +++ b/src/Ryujinx.Graphics.GAL/SystemMemoryType.cs @@ -0,0 +1,29 @@ +namespace Ryujinx.Graphics.GAL +{ + public enum SystemMemoryType + { + /// + /// The backend manages the ownership of memory. This mode never supports host imported memory. + /// + BackendManaged, + + /// + /// Device memory has similar performance to host memory, usually because it's shared between CPU/GPU. + /// Use host memory whenever possible. + /// + UnifiedMemory, + + /// + /// GPU storage to host memory goes though a slow interconnect, but it would still be preferable to use it if the data is flushed back often. + /// Assumes constant buffer access to host memory is rather fast. + /// + DedicatedMemory, + + /// + /// GPU storage to host memory goes though a slow interconnect, that is very slow when doing access from storage. + /// When frequently accessed, copy buffers to host memory using DMA. + /// Assumes constant buffer access to host memory is rather fast. + /// + DedicatedMemorySlowStorage + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs index 7f3772f44b..6bfb22cf70 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs @@ -495,8 +495,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME ulong indirectBufferSize = (ulong)maxDrawCount * (ulong)stride; - MultiRange indirectBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize); - MultiRange parameterBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, parameterBufferGpuVa, 4); + MultiRange indirectBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize, Memory.BufferStage.Indirect); + MultiRange parameterBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, parameterBufferGpuVa, 4, Memory.BufferStage.Indirect); _processor.ThreedClass.DrawIndirect( topology, diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs index 6324e6a15a..13186ef409 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs @@ -370,7 +370,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw { var memoryManager = _channel.MemoryManager; - BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(memoryManager.GetPhysicalRegions(address, size)); + BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(memoryManager.GetPhysicalRegions(address, size), Gpu.Memory.BufferStage.VertexBuffer); ITexture bufferTexture = _vacContext.EnsureBufferTexture(index + 2, format); bufferTexture.SetStorage(range); @@ -412,7 +412,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw var memoryManager = _channel.MemoryManager; ulong misalign = address & ((ulong)_context.Capabilities.TextureBufferOffsetAlignment - 1); - BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(memoryManager.GetPhysicalRegions(address + indexOffset - misalign, size + misalign)); + BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange( + memoryManager.GetPhysicalRegions(address + indexOffset - misalign, size + misalign), + Gpu.Memory.BufferStage.IndexBuffer); misalignedOffset = (int)misalign >> shift; SetIndexBufferTexture(reservations, range, format); diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs index d8de14de09..56ef64c6ec 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs @@ -684,8 +684,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed if (hasCount) { - var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange); - var parameterBuffer = memory.BufferCache.GetBufferRange(parameterBufferRange); + var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange, BufferStage.Indirect); + var parameterBuffer = memory.BufferCache.GetBufferRange(parameterBufferRange, BufferStage.Indirect); if (indexed) { @@ -698,7 +698,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed } else { - var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange); + var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange, BufferStage.Indirect); if (indexed) { diff --git a/src/Ryujinx.Graphics.Gpu/GpuContext.cs b/src/Ryujinx.Graphics.Gpu/GpuContext.cs index 53ea8cb277..048d32fb79 100644 --- a/src/Ryujinx.Graphics.Gpu/GpuContext.cs +++ b/src/Ryujinx.Graphics.Gpu/GpuContext.cs @@ -393,17 +393,18 @@ namespace Ryujinx.Graphics.Gpu if (force || _pendingSync || (syncpoint && SyncpointActions.Count > 0)) { - Renderer.CreateSync(SyncNumber, strict); - foreach (var action in SyncActions) { action.SyncPreAction(syncpoint); } + foreach (var action in SyncpointActions) { action.SyncPreAction(syncpoint); } + Renderer.CreateSync(SyncNumber, strict); + SyncNumber++; SyncActions.RemoveAll(action => action.SyncAction(syncpoint)); diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs index 7e486e0a84..a54d070004 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs @@ -708,11 +708,11 @@ namespace Ryujinx.Graphics.Gpu.Image format = texture.Format; } - _channel.BufferManager.SetBufferTextureStorage(entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); } else { - _channel.BufferManager.SetBufferTextureStorage(entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); } } else if (isImage) @@ -921,11 +921,11 @@ namespace Ryujinx.Graphics.Gpu.Image format = texture.Format; } - _channel.BufferManager.SetBufferTextureStorage(entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); } else { - _channel.BufferManager.SetBufferTextureStorage(entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); } } else if (isImage) diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs index de9c47c976..b8028d455a 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs @@ -645,7 +645,7 @@ namespace Ryujinx.Graphics.Gpu.Image } else { - _flushBuffer = _context.Renderer.CreateBuffer((int)Storage.Size, BufferAccess.FlushPersistent); + _flushBuffer = _context.Renderer.CreateBuffer((int)Storage.Size, BufferAccess.HostMemory); _flushBufferImported = false; } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs b/src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs index d293060b5a..bd946b52dc 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Synchronization; using Ryujinx.Memory.Range; @@ -10,6 +11,8 @@ using System.Threading; namespace Ryujinx.Graphics.Gpu.Memory { + delegate void BufferFlushAction(ulong address, ulong size, ulong syncNumber); + /// /// Buffer, used to store vertex and index data, uniform and storage buffers, and others. /// @@ -23,7 +26,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Host buffer handle. /// - public BufferHandle Handle { get; } + public BufferHandle Handle { get; private set; } /// /// Start address of the buffer in guest memory. @@ -60,6 +63,17 @@ namespace Ryujinx.Graphics.Gpu.Memory /// private BufferModifiedRangeList _modifiedRanges = null; + /// + /// A structure that is used to flush buffer data back to a host mapped buffer for cached readback. + /// Only used if the buffer data is explicitly owned by device local memory. + /// + private BufferPreFlush _preFlush = null; + + /// + /// Usage tracking state that determines what type of backing the buffer should use. + /// + public BufferBackingState BackingState; + private readonly MultiRegionHandle _memoryTrackingGranular; private readonly RegionHandle _memoryTracking; @@ -87,6 +101,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Physical memory where the buffer is mapped /// Start address of the buffer /// Size of the buffer in bytes + /// The type of usage that created the buffer /// Indicates if the buffer can be used in a sparse buffer mapping /// Buffers which this buffer contains, and will inherit tracking handles from public Buffer( @@ -94,6 +109,7 @@ namespace Ryujinx.Graphics.Gpu.Memory PhysicalMemory physicalMemory, ulong address, ulong size, + BufferStage stage, bool sparseCompatible, IEnumerable baseBuffers = null) { @@ -103,7 +119,9 @@ namespace Ryujinx.Graphics.Gpu.Memory Size = size; SparseCompatible = sparseCompatible; - BufferAccess access = sparseCompatible ? BufferAccess.SparseCompatible : BufferAccess.Default; + BackingState = new BufferBackingState(_context, this, stage, baseBuffers); + + BufferAccess access = BackingState.SwitchAccess(this); Handle = context.Renderer.CreateBuffer((int)size, access, baseBuffers?.MaxBy(x => x.Size).Handle ?? BufferHandle.Null); @@ -161,6 +179,26 @@ namespace Ryujinx.Graphics.Gpu.Memory _virtualDependenciesLock = new ReaderWriterLockSlim(); } + private void ChangeBacking() + { + var before = BackingState.Active; + BufferAccess access = BackingState.SwitchAccess(this); + + Logger.Warning?.PrintMsg(LogClass.Gpu, $"Migrating {Size} from {before} to {BackingState.Active}"); + + BufferHandle newHandle = _context.Renderer.CreateBuffer((int)Size, access, Handle); + + _context.Renderer.Pipeline.CopyBuffer(Handle, newHandle, 0, 0, (int)Size); + + _modifiedRanges?.SelfMigration(); + + // If swtiching from device local to host mapped, pre-flushing data no longer makes sense. + // This is set to null and disposed when the migration fully completes. + _preFlush = null; + + Handle = newHandle; + } + /// /// Gets a sub-range from the buffer, from a start address til a page boundary after the given size. /// @@ -246,6 +284,7 @@ namespace Ryujinx.Graphics.Gpu.Memory } else { + BackingState.RecordSet(); _context.Renderer.SetBufferData(Handle, 0, _physicalMemory.GetSpan(Address, (int)Size)); CopyToDependantVirtualBuffers(); } @@ -283,15 +322,35 @@ namespace Ryujinx.Graphics.Gpu.Memory _modifiedRanges ??= new BufferModifiedRangeList(_context, this, Flush); } + /// + /// Checks if a backing change is deemed necessary from the given usage. + /// If it is, queues a backing change to happen on the next sync action. + /// + /// Buffer stage that can change backing type + private void TryQueueBackingChange(BufferStage stage) + { + if (BackingState.ShouldChangeBacking(stage)) + { + if (!_syncActionRegistered) + { + _context.RegisterSyncAction(this); + _syncActionRegistered = true; + } + } + } + /// /// Signal that the given region of the buffer has been modified. /// /// The start address of the modified region /// The size of the modified region - public void SignalModified(ulong address, ulong size) + /// Buffer stage that triggered the modification + public void SignalModified(ulong address, ulong size, BufferStage stage) { EnsureRangeList(); + TryQueueBackingChange(stage); + _modifiedRanges.SignalModified(address, size); if (!_syncActionRegistered) @@ -311,6 +370,39 @@ namespace Ryujinx.Graphics.Gpu.Memory _modifiedRanges?.Clear(address, size); } + /// + /// Action to be performed immediately before sync is created. + /// This will copy any buffer ranges designated for pre-flushing. + /// + /// True if the action is a guest syncpoint + public void SyncPreAction(bool syncpoint) + { + if (_referenceCount == 0) + { + return; + } + + if (BackingState.ShouldChangeBacking()) + { + ChangeBacking(); + + _physicalMemory.BufferCache.BufferBackingChanged(this); + } + + if (BackingState.IsDeviceLocal) + { + _preFlush ??= new BufferPreFlush(_context, _physicalMemory, this, FlushImpl); + + if (_preFlush.ShouldCopy) + { + _modifiedRanges?.GetRangesAtSync(Address, Size, _context.SyncNumber, (address, size) => + { + _preFlush.CopyModified(address, size); + }); + } + } + } + /// /// Action to be performed when a syncpoint is reached after modification. /// This will register read/write tracking to flush the buffer from GPU when its memory is used. @@ -466,6 +558,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size of the modified region private void LoadRegion(ulong mAddress, ulong mSize) { + BackingState.RecordSet(); + int offset = (int)(mAddress - Address); _context.Renderer.SetBufferData(Handle, offset, _physicalMemory.GetSpan(mAddress, (int)mSize)); @@ -535,20 +629,73 @@ namespace Ryujinx.Graphics.Gpu.Memory _context.Renderer.Pipeline.CopyBuffer(Handle, destination.Handle, 0, dstOffset, (int)Size); } + private void FlushImpl(BufferHandle handle, ulong address, ulong size) + { + int offset = (int)(address - Address); + + using PinnedSpan data = _context.Renderer.GetBufferData(handle, offset, (int)size); + + // TODO: When write tracking shaders, they will need to be aware of changes in overlapping buffers. + _physicalMemory.WriteUntracked(address, CopyFromDependantVirtualBuffers(data.Get(), address, size)); + } + + private void FlushImpl(ulong address, ulong size) + { + FlushImpl(Handle, address, size); + } + /// /// Flushes a range of the buffer. /// This writes the range data back into guest memory. /// /// Start address of the range /// Size in bytes of the range - public void Flush(ulong address, ulong size) + /// Sync number waited for before flushing the data + public void Flush(ulong address, ulong size, ulong syncNumber) { - int offset = (int)(address - Address); + BackingState.RecordFlush(); - using PinnedSpan data = _context.Renderer.GetBufferData(Handle, offset, (int)size); + BufferPreFlush preFlush = _preFlush; - // TODO: When write tracking shaders, they will need to be aware of changes in overlapping buffers. - _physicalMemory.WriteUntracked(address, CopyFromDependantVirtualBuffers(data.Get(), address, size)); + if (preFlush != null) + { + preFlush.FlushWithAction(address, size, syncNumber); + } + else + { + FlushImpl(address, size); + } + } + /// + /// Gets an action that disposes the backing buffer using its current handle. + /// Useful for deleting an old copy of the buffer after the handle changes. + /// + /// An action that flushes data from the specified range, using the buffer handle at the time this the method is generated + public Action GetSnapshotDisposeAction() + { + BufferHandle handle = Handle; + BufferPreFlush preFlush = _preFlush; + + return () => + { + _context.Renderer.DeleteBuffer(handle); + preFlush?.Dispose(); + }; + } + + /// + /// Gets an action that flushes a range of the buffer using its current handle. + /// Useful for flushing data from old copies of the buffer after the handle changes. + /// + /// An action that flushes data from the specified range, using the buffer handle at the time this the method is generated + public BufferFlushAction GetSnapshotFlushAction() + { + BufferHandle handle = Handle; + + return (ulong address, ulong size, ulong _) => + { + FlushImpl(handle, address, size); + }; } /// @@ -857,6 +1004,8 @@ namespace Ryujinx.Graphics.Gpu.Memory _modifiedRanges?.Clear(); _context.Renderer.DeleteBuffer(Handle); + _preFlush?.Dispose(); + _preFlush = null; UnmappedSequence++; } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferBackingState.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferBackingState.cs new file mode 100644 index 0000000000..37b5fb9e14 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferBackingState.cs @@ -0,0 +1,228 @@ +using Ryujinx.Graphics.GAL; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + internal enum BufferBackingType + { + HostMemory, + DeviceMemory, + DeviceMemoryWithFlush + } + + internal struct BufferBackingState + { + private const int DeviceLocalSizeThreshold = 256 * 1024; // 256kb + + private const int SetCountThreshold = 100; + private const int WriteCountThreshold = 50; + private const int FlushCountThreshold = 5; + + public readonly bool IsDeviceLocal => _activeType != BufferBackingType.HostMemory; + + private readonly SystemMemoryType _systemMemoryType; + private BufferBackingType _activeType; + private BufferBackingType _desiredType; + + private bool _canSwap; + + private int _setCount; + private int _writeCount; + private int _flushCount; + private int _flushTemp; + private int _lastFlushWrite; + + private readonly int _size; + + public BufferBackingState(GpuContext context, Buffer parent, BufferStage stage, IEnumerable baseBuffers = null) + { + _size = (int)parent.Size; + _systemMemoryType = context.Capabilities.MemoryType; + + // Backend managed is always auto, unified memory is always host. + _desiredType = BufferBackingType.HostMemory; + _canSwap = _systemMemoryType != SystemMemoryType.BackendManaged && _systemMemoryType != SystemMemoryType.UnifiedMemory; + + if (_canSwap) + { + // Might want to start certain buffers as being device local, + // and the usage might also _lock_ those buffers into being device local. + + BufferStage storageFlags = stage & BufferStage.StorageMask; + + if (parent.Size > DeviceLocalSizeThreshold) + { + _desiredType = BufferBackingType.DeviceMemory; + } + + if (storageFlags != 0) + { + // Storage buffer bindings may require special treatment. + + var rawStage = stage & BufferStage.StageMask; + + if (rawStage == BufferStage.Fragment) + { + // Fragment read should start device local. Fragment write should always be device local. + + _desiredType = BufferBackingType.DeviceMemory; + + if (storageFlags != BufferStage.StorageRead) + { + _canSwap = false; + } + } + + // TODO: atomic access should likely always be device local + } + + + if (baseBuffers != null) + { + foreach (Buffer buffer in baseBuffers) + { + CombineState(buffer.BackingState); + } + } + } + } + + private void CombineState(BufferBackingState oldState) + { + _setCount += oldState._setCount; + _writeCount += oldState._writeCount; + _flushCount += oldState._flushCount; + _flushTemp += oldState._flushTemp; + _lastFlushWrite = -1; + + _canSwap &= oldState._canSwap; + + _desiredType = (BufferBackingType)Math.Max((int)_desiredType, (int)oldState._desiredType); + } + + public BufferBackingType Active => _activeType; + + public BufferAccess SwitchAccess(Buffer parent) + { + BufferAccess access = parent.SparseCompatible ? BufferAccess.SparseCompatible : BufferAccess.Default; + + bool isBackendManaged = _systemMemoryType == SystemMemoryType.BackendManaged; + + if (!isBackendManaged) + { + switch (_desiredType) + { + case BufferBackingType.HostMemory: + access |= BufferAccess.HostMemory; + break; + case BufferBackingType.DeviceMemory: + access |= BufferAccess.DeviceMemory; + break; + case BufferBackingType.DeviceMemoryWithFlush: + access |= BufferAccess.DeviceMemoryMapped; + break; + } + } + + _activeType = _desiredType; + + return access; + } + + public void RecordSet() + { + _setCount++; + + ConsiderUseCounts(); + } + + public void RecordFlush() + { + if (_lastFlushWrite != _writeCount) + { + // If it's on the same page as the last flush, ignore it. + _lastFlushWrite = _writeCount; + _flushCount++; + } + } + + public readonly bool ShouldChangeBacking() + { + return _desiredType != _activeType; + } + + public bool ShouldChangeBacking(BufferStage stage) + { + if (!_canSwap) + { + return false; + } + + BufferStage storageFlags = stage & BufferStage.StorageMask; + + if (storageFlags != 0) + { + if (storageFlags != BufferStage.StorageRead) + { + // Storage write. + _writeCount++; + + var rawStage = stage & BufferStage.StageMask; + + if (rawStage == BufferStage.Fragment) + { + // Switch to device memory, never swap back. + + _desiredType = BufferBackingType.DeviceMemory; + _canSwap = false; + + // TODO: atomic access should likely always be device local + } + } + + ConsiderUseCounts(); + } + + return _desiredType != _activeType; + } + + private void ConsiderUseCounts() + { + if (_canSwap) + { + if (_writeCount >= WriteCountThreshold || _setCount >= SetCountThreshold || _flushCount >= FlushCountThreshold) + { + if (_flushCount > 0 || _flushTemp-- > 0) + { + // Buffers that flush should ideally be mapped in host address space for easy copies. + // If the buffer is large it will do better on GPU memory, as there will be more writes than data flushes (typically individual pages). + // If it is small, then it's likely most of the buffer will be flushed so we want it on host memory, as access is cached. + _desiredType = _size > DeviceLocalSizeThreshold ? BufferBackingType.DeviceMemoryWithFlush : BufferBackingType.HostMemory; + + // It's harder for a buffer that is flushed to revert to another type of mapping. + if (_flushCount > 0) + { + _flushTemp = 1000; + } + } + else if (_writeCount >= WriteCountThreshold) + { + // Buffers that are written often should ideally be in the device local heap. (Storage buffers) + _desiredType = BufferBackingType.DeviceMemory; + } + else if (_setCount > SetCountThreshold) + { + // Buffers that have their data set often should ideally be host mapped. (Constant buffers) + _desiredType = BufferBackingType.HostMemory; + } + + _lastFlushWrite = -1; + _flushCount = 0; + _writeCount = 0; + _setCount = 0; + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs index c6284780d3..722dc43d23 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs @@ -107,8 +107,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// GPU memory manager where the buffer is mapped /// Start GPU virtual address of the buffer /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Contiguous physical range of the buffer, after address translation - public MultiRange TranslateAndCreateBuffer(MemoryManager memoryManager, ulong gpuVa, ulong size) + public MultiRange TranslateAndCreateBuffer(MemoryManager memoryManager, ulong gpuVa, ulong size, BufferStage stage) { if (gpuVa == 0) { @@ -119,7 +120,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (address != MemoryManager.PteUnmapped) { - CreateBuffer(address, size); + CreateBuffer(address, size, stage); } return new MultiRange(address, size); @@ -132,8 +133,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// GPU memory manager where the buffer is mapped /// Start GPU virtual address of the buffer /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Physical ranges of the buffer, after address translation - public MultiRange TranslateAndCreateMultiBuffers(MemoryManager memoryManager, ulong gpuVa, ulong size) + public MultiRange TranslateAndCreateMultiBuffers(MemoryManager memoryManager, ulong gpuVa, ulong size, BufferStage stage) { if (gpuVa == 0) { @@ -149,7 +151,7 @@ namespace Ryujinx.Graphics.Gpu.Memory return range; } - CreateBuffer(range); + CreateBuffer(range, stage); return range; } @@ -161,8 +163,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// GPU memory manager where the buffer is mapped /// Start GPU virtual address of the buffer /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Physical ranges of the buffer, after address translation - public MultiRange TranslateAndCreateMultiBuffersPhysicalOnly(MemoryManager memoryManager, ulong gpuVa, ulong size) + public MultiRange TranslateAndCreateMultiBuffersPhysicalOnly(MemoryManager memoryManager, ulong gpuVa, ulong size, BufferStage stage) { if (gpuVa == 0) { @@ -186,11 +189,11 @@ namespace Ryujinx.Graphics.Gpu.Memory { if (range.Count > 1) { - CreateBuffer(subRange.Address, subRange.Size, SparseBufferAlignmentSize); + CreateBuffer(subRange.Address, subRange.Size, stage, SparseBufferAlignmentSize); } else { - CreateBuffer(subRange.Address, subRange.Size); + CreateBuffer(subRange.Address, subRange.Size, stage); } } } @@ -203,11 +206,12 @@ namespace Ryujinx.Graphics.Gpu.Memory /// This can be used to ensure the existance of a buffer. /// /// Physical ranges of memory where the buffer data is located - public void CreateBuffer(MultiRange range) + /// The type of usage that created the buffer + public void CreateBuffer(MultiRange range, BufferStage stage) { if (range.Count > 1) { - CreateMultiRangeBuffer(range); + CreateMultiRangeBuffer(range, stage); } else { @@ -215,7 +219,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (subRange.Address != MemoryManager.PteUnmapped) { - CreateBuffer(subRange.Address, subRange.Size); + CreateBuffer(subRange.Address, subRange.Size, stage); } } } @@ -226,7 +230,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in memory /// Size of the buffer in bytes - public void CreateBuffer(ulong address, ulong size) + /// The type of usage that created the buffer + public void CreateBuffer(ulong address, ulong size, BufferStage stage) { ulong endAddress = address + size; @@ -239,7 +244,7 @@ namespace Ryujinx.Graphics.Gpu.Memory alignedEndAddress += BufferAlignmentSize; } - CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress); + CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress, stage); } /// @@ -248,8 +253,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in memory /// Size of the buffer in bytes + /// The type of usage that created the buffer /// Alignment of the start address of the buffer in bytes - public void CreateBuffer(ulong address, ulong size, ulong alignment) + public void CreateBuffer(ulong address, ulong size, BufferStage stage, ulong alignment) { ulong alignmentMask = alignment - 1; ulong pageAlignmentMask = BufferAlignmentMask; @@ -264,7 +270,7 @@ namespace Ryujinx.Graphics.Gpu.Memory alignedEndAddress += pageAlignmentMask; } - CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress, alignment); + CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress, stage, alignment); } /// @@ -272,7 +278,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// if it does not exist yet. /// /// Physical ranges of memory - private void CreateMultiRangeBuffer(MultiRange range) + /// The type of usage that created the buffer + private void CreateMultiRangeBuffer(MultiRange range, BufferStage stage) { // Ensure all non-contiguous buffer we might use are sparse aligned. for (int i = 0; i < range.Count; i++) @@ -281,7 +288,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (subRange.Address != MemoryManager.PteUnmapped) { - CreateBuffer(subRange.Address, subRange.Size, SparseBufferAlignmentSize); + CreateBuffer(subRange.Address, subRange.Size, stage, SparseBufferAlignmentSize); } } @@ -431,9 +438,9 @@ namespace Ryujinx.Graphics.Gpu.Memory result.EndGpuAddress < gpuVa + size || result.UnmappedSequence != result.Buffer.UnmappedSequence) { - MultiRange range = TranslateAndCreateBuffer(memoryManager, gpuVa, size); + MultiRange range = TranslateAndCreateBuffer(memoryManager, gpuVa, size, BufferStage.Internal); ulong address = range.GetSubRange(0).Address; - result = new BufferCacheEntry(address, gpuVa, GetBuffer(address, size)); + result = new BufferCacheEntry(address, gpuVa, GetBuffer(address, size, BufferStage.Internal)); _dirtyCache[gpuVa] = result; } @@ -466,9 +473,9 @@ namespace Ryujinx.Graphics.Gpu.Memory result.EndGpuAddress < alignedEndGpuVa || result.UnmappedSequence != result.Buffer.UnmappedSequence) { - MultiRange range = TranslateAndCreateBuffer(memoryManager, alignedGpuVa, size); + MultiRange range = TranslateAndCreateBuffer(memoryManager, alignedGpuVa, size, BufferStage.None); ulong address = range.GetSubRange(0).Address; - result = new BufferCacheEntry(address, alignedGpuVa, GetBuffer(address, size)); + result = new BufferCacheEntry(address, alignedGpuVa, GetBuffer(address, size, BufferStage.None)); _modifiedCache[alignedGpuVa] = result; } @@ -485,7 +492,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in guest memory /// Size in bytes of the buffer - private void CreateBufferAligned(ulong address, ulong size) + /// The type of usage that created the buffer + private void CreateBufferAligned(ulong address, ulong size, BufferStage stage) { Buffer[] overlaps = _bufferOverlaps; int overlapsCount = _buffers.FindOverlapsNonOverlapping(address, size, ref overlaps); @@ -546,13 +554,13 @@ namespace Ryujinx.Graphics.Gpu.Memory ulong newSize = endAddress - address; - CreateBufferAligned(address, newSize, anySparseCompatible, overlaps, overlapsCount); + CreateBufferAligned(address, newSize, stage, anySparseCompatible, overlaps, overlapsCount); } } else { // No overlap, just create a new buffer. - Buffer buffer = new(_context, _physicalMemory, address, size, sparseCompatible: false); + Buffer buffer = new(_context, _physicalMemory, address, size, stage, sparseCompatible: false); lock (_buffers) { @@ -570,8 +578,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in guest memory /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Alignment of the start address of the buffer - private void CreateBufferAligned(ulong address, ulong size, ulong alignment) + private void CreateBufferAligned(ulong address, ulong size, BufferStage stage, ulong alignment) { Buffer[] overlaps = _bufferOverlaps; int overlapsCount = _buffers.FindOverlapsNonOverlapping(address, size, ref overlaps); @@ -624,13 +633,13 @@ namespace Ryujinx.Graphics.Gpu.Memory ulong newSize = endAddress - address; - CreateBufferAligned(address, newSize, sparseAligned, overlaps, overlapsCount); + CreateBufferAligned(address, newSize, stage, sparseAligned, overlaps, overlapsCount); } } else { // No overlap, just create a new buffer. - Buffer buffer = new(_context, _physicalMemory, address, size, sparseAligned); + Buffer buffer = new(_context, _physicalMemory, address, size, stage, sparseAligned); lock (_buffers) { @@ -648,12 +657,13 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in guest memory /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Indicates if the buffer can be used in a sparse buffer mapping /// Buffers overlapping the range /// Total of overlaps - private void CreateBufferAligned(ulong address, ulong size, bool sparseCompatible, Buffer[] overlaps, int overlapsCount) + private void CreateBufferAligned(ulong address, ulong size, BufferStage stage, bool sparseCompatible, Buffer[] overlaps, int overlapsCount) { - Buffer newBuffer = new Buffer(_context, _physicalMemory, address, size, sparseCompatible, overlaps.Take(overlapsCount)); + Buffer newBuffer = new Buffer(_context, _physicalMemory, address, size, stage, sparseCompatible, overlaps.Take(overlapsCount)); lock (_buffers) { @@ -704,7 +714,7 @@ namespace Ryujinx.Graphics.Gpu.Memory for (int index = 0; index < overlapCount; index++) { - CreateMultiRangeBuffer(overlaps[index].Range); + CreateMultiRangeBuffer(overlaps[index].Range, BufferStage.None); } } @@ -731,8 +741,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the copy public void CopyBuffer(MemoryManager memoryManager, ulong srcVa, ulong dstVa, ulong size) { - MultiRange srcRange = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, srcVa, size); - MultiRange dstRange = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, dstVa, size); + MultiRange srcRange = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, srcVa, size, BufferStage.Copy); + MultiRange dstRange = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, dstVa, size, BufferStage.Copy); if (srcRange.Count == 1 && dstRange.Count == 1) { @@ -788,8 +798,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the copy private void CopyBufferSingleRange(MemoryManager memoryManager, ulong srcAddress, ulong dstAddress, ulong size) { - Buffer srcBuffer = GetBuffer(srcAddress, size); - Buffer dstBuffer = GetBuffer(dstAddress, size); + Buffer srcBuffer = GetBuffer(srcAddress, size, BufferStage.Copy); + Buffer dstBuffer = GetBuffer(dstAddress, size, BufferStage.Copy); int srcOffset = (int)(srcAddress - srcBuffer.Address); int dstOffset = (int)(dstAddress - dstBuffer.Address); @@ -803,7 +813,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (srcBuffer.IsModified(srcAddress, size)) { - dstBuffer.SignalModified(dstAddress, size); + dstBuffer.SignalModified(dstAddress, size, BufferStage.Copy); } else { @@ -828,12 +838,12 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Value to be written into the buffer public void ClearBuffer(MemoryManager memoryManager, ulong gpuVa, ulong size, uint value) { - MultiRange range = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, gpuVa, size); + MultiRange range = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, gpuVa, size, BufferStage.Copy); for (int index = 0; index < range.Count; index++) { MemoryRange subRange = range.GetSubRange(index); - Buffer buffer = GetBuffer(subRange.Address, subRange.Size); + Buffer buffer = GetBuffer(subRange.Address, subRange.Size, BufferStage.Copy); int offset = (int)(subRange.Address - buffer.Address); @@ -849,18 +859,19 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Gets a buffer sub-range starting at a given memory address, aligned to the next page boundary. /// /// Physical regions of memory where the buffer is mapped + /// Buffer stage that triggered the access /// Whether the buffer will be written to by this use /// The buffer sub-range starting at the given memory address - public BufferRange GetBufferRangeAligned(MultiRange range, bool write = false) + public BufferRange GetBufferRangeAligned(MultiRange range, BufferStage stage, bool write = false) { if (range.Count > 1) { - return GetBuffer(range, write).GetRange(range); + return GetBuffer(range, stage, write).GetRange(range); } else { MemoryRange subRange = range.GetSubRange(0); - return GetBuffer(subRange.Address, subRange.Size, write).GetRangeAligned(subRange.Address, subRange.Size, write); + return GetBuffer(subRange.Address, subRange.Size, stage, write).GetRangeAligned(subRange.Address, subRange.Size, write); } } @@ -868,18 +879,19 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Gets a buffer sub-range for a given memory range. /// /// Physical regions of memory where the buffer is mapped + /// Buffer stage that triggered the access /// Whether the buffer will be written to by this use /// The buffer sub-range for the given range - public BufferRange GetBufferRange(MultiRange range, bool write = false) + public BufferRange GetBufferRange(MultiRange range, BufferStage stage, bool write = false) { if (range.Count > 1) { - return GetBuffer(range, write).GetRange(range); + return GetBuffer(range, stage, write).GetRange(range); } else { MemoryRange subRange = range.GetSubRange(0); - return GetBuffer(subRange.Address, subRange.Size, write).GetRange(subRange.Address, subRange.Size, write); + return GetBuffer(subRange.Address, subRange.Size, stage, write).GetRange(subRange.Address, subRange.Size, write); } } @@ -888,9 +900,10 @@ namespace Ryujinx.Graphics.Gpu.Memory /// A buffer overlapping with the specified range is assumed to already exist on the cache. /// /// Physical regions of memory where the buffer is mapped + /// Buffer stage that triggered the access /// Whether the buffer will be written to by this use /// The buffer where the range is fully contained - private MultiRangeBuffer GetBuffer(MultiRange range, bool write = false) + private MultiRangeBuffer GetBuffer(MultiRange range, BufferStage stage, bool write = false) { for (int i = 0; i < range.Count; i++) { @@ -902,7 +915,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (write) { - subBuffer.SignalModified(subRange.Address, subRange.Size); + subBuffer.SignalModified(subRange.Address, subRange.Size, stage); } } @@ -935,9 +948,10 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Start address of the memory range /// Size in bytes of the memory range + /// Buffer stage that triggered the access /// Whether the buffer will be written to by this use /// The buffer where the range is fully contained - private Buffer GetBuffer(ulong address, ulong size, bool write = false) + private Buffer GetBuffer(ulong address, ulong size, BufferStage stage, bool write = false) { Buffer buffer; @@ -950,7 +964,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (write) { - buffer.SignalModified(address, size); + buffer.SignalModified(address, size, stage); } } else @@ -1004,6 +1018,18 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Signal that the given buffer's handle has changed, + /// Foricng rebind and forcing any overlapping multi-range buffers to be recreated. + /// + /// The buffer that has changed handle + public void BufferBackingChanged(Buffer buffer) + { + NotifyBuffersModified?.Invoke(); + + RecreateMultiRangeBuffers(buffer.Address, buffer.Size); + } + /// /// Prune any invalid entries from a quick access dictionary. /// diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs index 8f2201e0aa..26d9501c68 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs @@ -156,7 +156,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Type of each index buffer element public void SetIndexBuffer(ulong gpuVa, ulong size, IndexType type) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size, BufferStage.IndexBuffer); _indexBuffer.Range = range; _indexBuffer.Type = type; @@ -186,7 +186,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Vertex divisor of the buffer, for instanced draws public void SetVertexBuffer(int index, ulong gpuVa, ulong size, int stride, int divisor) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size, BufferStage.VertexBuffer); _vertexBuffers[index].Range = range; _vertexBuffers[index].Stride = stride; @@ -213,7 +213,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the transform feedback buffer public void SetTransformFeedbackBuffer(int index, ulong gpuVa, ulong size) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size, BufferStage.TransformFeedback); _transformFeedbackBuffers[index] = new BufferBounds(range); _transformFeedbackBuffersDirty = true; @@ -260,7 +260,7 @@ namespace Ryujinx.Graphics.Gpu.Memory gpuVa = BitUtils.AlignDown(gpuVa, (ulong)_context.Capabilities.StorageBufferOffsetAlignment); - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size, BufferStageUtils.ComputeStorage(flags)); _cpStorageBuffers.SetBounds(index, range, flags); } @@ -284,7 +284,7 @@ namespace Ryujinx.Graphics.Gpu.Memory gpuVa = BitUtils.AlignDown(gpuVa, (ulong)_context.Capabilities.StorageBufferOffsetAlignment); - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size, BufferStageUtils.GraphicsStorage(stage, flags)); if (!buffers.Buffers[index].Range.Equals(range)) { @@ -303,7 +303,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the storage buffer public void SetComputeUniformBuffer(int index, ulong gpuVa, ulong size) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size, BufferStage.Compute); _cpUniformBuffers.SetBounds(index, range); } @@ -318,7 +318,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the storage buffer public void SetGraphicsUniformBuffer(int stage, int index, ulong gpuVa, ulong size) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size, BufferStageUtils.FromShaderStage(stage)); _gpUniformBuffers[stage].SetBounds(index, range); _gpUniformBuffersDirty = true; @@ -502,7 +502,7 @@ namespace Ryujinx.Graphics.Gpu.Memory foreach (var binding in _bufferTextures) { var isStore = binding.BindingInfo.Flags.HasFlag(TextureUsageFlags.ImageStore); - var range = bufferCache.GetBufferRange(binding.Range, isStore); + var range = bufferCache.GetBufferRange(binding.Range, BufferStageUtils.TextureBuffer(binding.Stage, binding.BindingInfo.Flags), isStore); binding.Texture.SetStorage(range); // The texture must be rebound to use the new storage if it was updated. @@ -526,7 +526,7 @@ namespace Ryujinx.Graphics.Gpu.Memory foreach (var binding in _bufferTextureArrays) { - var range = bufferCache.GetBufferRange(binding.Range); + var range = bufferCache.GetBufferRange(binding.Range, BufferStage.None); binding.Texture.SetStorage(range); textureArray[0] = binding.Texture; @@ -536,7 +536,7 @@ namespace Ryujinx.Graphics.Gpu.Memory foreach (var binding in _bufferImageArrays) { var isStore = binding.BindingInfo.Flags.HasFlag(TextureUsageFlags.ImageStore); - var range = bufferCache.GetBufferRange(binding.Range, isStore); + var range = bufferCache.GetBufferRange(binding.Range, BufferStage.None, isStore); binding.Texture.SetStorage(range); textureArray[0] = binding.Texture; @@ -565,7 +565,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (!_indexBuffer.Range.IsUnmapped) { - BufferRange buffer = bufferCache.GetBufferRange(_indexBuffer.Range); + BufferRange buffer = bufferCache.GetBufferRange(_indexBuffer.Range, BufferStage.IndexBuffer); _context.Renderer.Pipeline.SetIndexBuffer(buffer, _indexBuffer.Type); } @@ -597,7 +597,7 @@ namespace Ryujinx.Graphics.Gpu.Memory continue; } - BufferRange buffer = bufferCache.GetBufferRange(vb.Range); + BufferRange buffer = bufferCache.GetBufferRange(vb.Range, BufferStage.VertexBuffer); vertexBuffers[index] = new VertexBufferDescriptor(buffer, vb.Stride, vb.Divisor); } @@ -637,7 +637,7 @@ namespace Ryujinx.Graphics.Gpu.Memory continue; } - tfbs[index] = bufferCache.GetBufferRange(tfb.Range, write: true); + tfbs[index] = bufferCache.GetBufferRange(tfb.Range, BufferStage.TransformFeedback, write: true); } _context.Renderer.Pipeline.SetTransformFeedbackBuffers(tfbs); @@ -684,7 +684,7 @@ namespace Ryujinx.Graphics.Gpu.Memory _context.SupportBufferUpdater.SetTfeOffset(index, tfeOffset); - buffers[index] = new BufferAssignment(index, bufferCache.GetBufferRange(range, write: true)); + buffers[index] = new BufferAssignment(index, bufferCache.GetBufferRange(range, BufferStage.TransformFeedback, write: true)); } } @@ -751,6 +751,7 @@ namespace Ryujinx.Graphics.Gpu.Memory for (ShaderStage stage = ShaderStage.Vertex; stage <= ShaderStage.Fragment; stage++) { ref var buffers = ref bindings[(int)stage - 1]; + BufferStage bufferStage = BufferStageUtils.FromShaderStage(stage); for (int index = 0; index < buffers.Count; index++) { @@ -762,8 +763,8 @@ namespace Ryujinx.Graphics.Gpu.Memory { var isWrite = bounds.Flags.HasFlag(BufferUsageFlags.Write); var range = isStorage - ? bufferCache.GetBufferRangeAligned(bounds.Range, isWrite) - : bufferCache.GetBufferRange(bounds.Range); + ? bufferCache.GetBufferRangeAligned(bounds.Range, bufferStage | BufferStageUtils.FromUsage(bounds.Flags), isWrite) + : bufferCache.GetBufferRange(bounds.Range, bufferStage); ranges[rangesCount++] = new BufferAssignment(bindingInfo.Binding, range); } @@ -799,8 +800,8 @@ namespace Ryujinx.Graphics.Gpu.Memory { var isWrite = bounds.Flags.HasFlag(BufferUsageFlags.Write); var range = isStorage - ? bufferCache.GetBufferRangeAligned(bounds.Range, isWrite) - : bufferCache.GetBufferRange(bounds.Range); + ? bufferCache.GetBufferRangeAligned(bounds.Range, BufferStageUtils.ComputeStorage(bounds.Flags), isWrite) + : bufferCache.GetBufferRange(bounds.Range, BufferStage.Compute); ranges[rangesCount++] = new BufferAssignment(bindingInfo.Binding, range); } @@ -875,7 +876,7 @@ namespace Ryujinx.Graphics.Gpu.Memory Format format, bool isImage) { - _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range); + _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range, BufferStageUtils.TextureBuffer(stage, bindingInfo.Flags)); _bufferTextures.Add(new BufferTextureBinding(stage, texture, range, bindingInfo, format, isImage)); } @@ -883,6 +884,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Sets the buffer storage of a buffer texture array element. This will be bound when the buffer manager commits bindings. /// + /// Shader stage accessing the texture /// Texture array where the element will be inserted /// Buffer texture /// Physical ranges of memory where the buffer texture data is located @@ -890,6 +892,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Index of the binding on the array /// Format of the buffer texture public void SetBufferTextureStorage( + ShaderStage stage, ITextureArray array, ITexture texture, MultiRange range, @@ -897,7 +900,7 @@ namespace Ryujinx.Graphics.Gpu.Memory int index, Format format) { - _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range); + _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range, BufferStageUtils.TextureBuffer(stage, bindingInfo.Flags)); _bufferTextureArrays.Add(new BufferTextureArrayBinding(array, texture, range, bindingInfo, index, format)); } @@ -905,6 +908,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Sets the buffer storage of a buffer image array element. This will be bound when the buffer manager commits bindings. /// + /// Shader stage accessing the texture /// Image array where the element will be inserted /// Buffer texture /// Physical ranges of memory where the buffer texture data is located @@ -912,6 +916,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Index of the binding on the array /// Format of the buffer texture public void SetBufferTextureStorage( + ShaderStage stage, IImageArray array, ITexture texture, MultiRange range, @@ -919,7 +924,7 @@ namespace Ryujinx.Graphics.Gpu.Memory int index, Format format) { - _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range); + _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range, BufferStageUtils.TextureBuffer(stage, bindingInfo.Flags)); _bufferImageArrays.Add(new BufferTextureArrayBinding(array, texture, range, bindingInfo, index, format)); } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferMigration.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferMigration.cs index 0a52680316..b4b4852355 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferMigration.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferMigration.cs @@ -3,35 +3,18 @@ using System; namespace Ryujinx.Graphics.Gpu.Memory { /// - /// A record of when buffer data was copied from one buffer to another, along with the SyncNumber when the migration will be complete. - /// Keeps the source buffer alive for data flushes until the migration is complete. + /// A record of when buffer data was copied from multiple buffers to one migration target, + /// along with the SyncNumber when the migration will be complete. + /// Keeps the source buffers alive for data flushes until the migration is complete. + /// All spans cover the full range of the "destination" buffer. /// internal class BufferMigration : IDisposable { /// - /// The offset for the migrated region. + /// Ranges from source buffers that were copied as part of this migration. + /// Ordered by increasing base address. /// - private readonly ulong _offset; - - /// - /// The size for the migrated region. - /// - private readonly ulong _size; - - /// - /// The buffer that was migrated from. - /// - private readonly Buffer _buffer; - - /// - /// The source range action, to be called on overlap with an unreached sync number. - /// - private readonly Action _sourceRangeAction; - - /// - /// The source range list. - /// - private readonly BufferModifiedRangeList _source; + public BufferMigrationSpan[] Spans { get; private set; } /// /// The destination range list. This range list must be updated when flushing the source. @@ -43,55 +26,167 @@ namespace Ryujinx.Graphics.Gpu.Memory /// public readonly ulong SyncNumber; + public BufferMigration(BufferMigrationSpan[] spans, BufferModifiedRangeList destination, ulong syncNumber) + { + Spans = spans; + Destination = destination; + SyncNumber = syncNumber; + } + + /// + /// Add a span to the migration. Allocates a new array with the target size, and replaces it. + /// + /// + /// The base address for the span is assumed to be higher than all other spans in the migration, + /// to keep the span array ordered. + /// + public void AddSpanToEnd(BufferMigrationSpan span) + { + BufferMigrationSpan[] oldSpans = Spans; + + BufferMigrationSpan[] newSpans = new BufferMigrationSpan[oldSpans.Length + 1]; + + oldSpans.CopyTo(newSpans, 0); + + newSpans[oldSpans.Length] = span; + + Spans = newSpans; + } + + /// + /// Performs the given range action, or one from a migration that overlaps and has not synced yet. + /// + /// The offset to pass to the action + /// The size to pass to the action + /// The sync number that has been reached + /// The action to perform + public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferFlushAction rangeAction) + { + long syncDiff = (long)(syncNumber - SyncNumber); + + if (syncDiff >= 0) + { + // The migration has completed. Run the parent action. + rangeAction(offset, size, syncNumber); + } + else + { + ulong prevAddress = offset; + ulong endAddress = offset + size; + + foreach (BufferMigrationSpan span in Spans) + { + if (!span.Overlaps(offset, size)) + { + continue; + } + + if (span.Address > prevAddress) + { + // There's a gap between this span and the last (or the start address). Flush the range using the parent action. + + rangeAction(prevAddress, span.Address - prevAddress, syncNumber); + } + + span.RangeActionWithMigration(offset, size, syncNumber); + + prevAddress = span.Address + span.Size; + } + + if (endAddress > prevAddress) + { + // There's a gap at the end of the range with no migration. Flush the range using the parent action. + rangeAction(prevAddress, endAddress - prevAddress, syncNumber); + } + } + } + + public void Dispose() + { + Destination.RemoveMigration(this); + + foreach (BufferMigrationSpan span in Spans) + { + span.Dispose(); + } + } + } + + /// + /// A record of when buffer data was copied from one buffer to another, for a specific range in a source buffer. + /// Keeps the source buffer alive for data flushes until the migration is complete. + /// + internal readonly struct BufferMigrationSpan : IDisposable + { + /// + /// The offset for the migrated region. + /// + public readonly ulong Address; + + /// + /// The size for the migrated region. + /// + public readonly ulong Size; + + /// + /// The action to perform when the migration isn't needed anymore. + /// + private readonly Action _disposeAction; + + /// + /// The source range action, to be called on overlap with an unreached sync number. + /// + private readonly BufferFlushAction _sourceRangeAction; + + /// + /// Optional migration for the source data. Can chain together if many migrations happen in a short time. + /// If this is null, then _sourceRangeAction will always provide up to date data. + /// + private readonly BufferMigration _source; + /// /// Creates a record for a buffer migration. /// /// The source buffer for this migration + /// The action to perform when the migration isn't needed anymore /// The flush action for the source buffer - /// The modified range list for the source buffer - /// The modified range list for the destination buffer - /// The sync number for when the migration is complete - public BufferMigration( + /// Pending migration for the source buffer + public BufferMigrationSpan( Buffer buffer, - Action sourceRangeAction, - BufferModifiedRangeList source, - BufferModifiedRangeList dest, - ulong syncNumber) + Action disposeAction, + BufferFlushAction sourceRangeAction, + BufferMigration source) { - _offset = buffer.Address; - _size = buffer.Size; - _buffer = buffer; + Address = buffer.Address; + Size = buffer.Size; + _disposeAction = disposeAction; _sourceRangeAction = sourceRangeAction; _source = source; - Destination = dest; - SyncNumber = syncNumber; } + /// + /// Creates a record for a buffer migration, using the default buffer dispose action. + /// + /// The source buffer for this migration + /// The flush action for the source buffer + /// Pending migration for the source buffer + public BufferMigrationSpan( + Buffer buffer, + BufferFlushAction sourceRangeAction, + BufferMigration source) : this(buffer, buffer.DecrementReferenceCount, sourceRangeAction, source) { } + /// /// Determine if the given range overlaps this migration, and has not been completed yet. /// /// Start offset /// Range size - /// The sync number that was waited on /// True if overlapping and in progress, false otherwise - public bool Overlaps(ulong offset, ulong size, ulong syncNumber) + public bool Overlaps(ulong offset, ulong size) { ulong end = offset + size; - ulong destEnd = _offset + _size; - long syncDiff = (long)(syncNumber - SyncNumber); // syncNumber is less if the copy has not completed. + ulong destEnd = Address + Size; - return !(end <= _offset || offset >= destEnd) && syncDiff < 0; - } - - /// - /// Determine if the given range matches this migration. - /// - /// Start offset - /// Range size - /// True if the range exactly matches, false otherwise - public bool FullyMatches(ulong offset, ulong size) - { - return _offset == offset && _size == size; + return !(end <= Address || offset >= destEnd); } /// @@ -100,26 +195,30 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Start offset /// Range size /// Current sync number - /// The modified range list that originally owned this range - public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferModifiedRangeList parent) + public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber) { ulong end = offset + size; - end = Math.Min(_offset + _size, end); - offset = Math.Max(_offset, offset); + end = Math.Min(Address + Size, end); + offset = Math.Max(Address, offset); size = end - offset; - _source.RangeActionWithMigration(offset, size, syncNumber, parent, _sourceRangeAction); + if (_source != null) + { + _source.RangeActionWithMigration(offset, size, syncNumber, _sourceRangeAction); + } + else + { + _sourceRangeAction(offset, size, syncNumber); + } } /// - /// Removes this reference to the range list, potentially allowing for the source buffer to be disposed. + /// Removes this migration span, potentially allowing for the source buffer to be disposed. /// public void Dispose() { - Destination.RemoveMigration(this); - - _buffer.DecrementReferenceCount(); + _disposeAction(); } } } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs index 6ada8a4b25..d330de638a 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs @@ -1,7 +1,6 @@ using Ryujinx.Common.Pools; using Ryujinx.Memory.Range; using System; -using System.Collections.Generic; using System.Linq; namespace Ryujinx.Graphics.Gpu.Memory @@ -72,10 +71,10 @@ namespace Ryujinx.Graphics.Gpu.Memory private readonly GpuContext _context; private readonly Buffer _parent; - private readonly Action _flushAction; + private readonly BufferFlushAction _flushAction; - private List _sources; - private BufferMigration _migrationTarget; + private BufferMigration _source; + private BufferModifiedRangeList _migrationTarget; private readonly object _lock = new(); @@ -99,7 +98,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// GPU context that the buffer range list belongs to /// The parent buffer that owns this range list /// The flush action for the parent buffer - public BufferModifiedRangeList(GpuContext context, Buffer parent, Action flushAction) : base(BackingInitialSize) + public BufferModifiedRangeList(GpuContext context, Buffer parent, BufferFlushAction flushAction) : base(BackingInitialSize) { _context = context; _parent = parent; @@ -199,6 +198,36 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Gets modified ranges within the specified region, and then fires the given action for each range individually. + /// + /// Start address to query + /// Size to query + /// Sync number required for a range to be signalled + /// The action to call for each modified range + public void GetRangesAtSync(ulong address, ulong size, ulong syncNumber, Action rangeAction) + { + int count = 0; + + ref var overlaps = ref ThreadStaticArray.Get(); + + // Range list must be consistent for this operation. + lock (_lock) + { + count = FindOverlapsNonOverlapping(address, size, ref overlaps); + } + + for (int i = 0; i < count; i++) + { + BufferModifiedRange overlap = overlaps[i]; + + if (overlap.SyncNumber == syncNumber) + { + rangeAction(overlap.Address, overlap.Size); + } + } + } + /// /// Gets modified ranges within the specified region, and then fires the given action for each range individually. /// @@ -245,41 +274,16 @@ namespace Ryujinx.Graphics.Gpu.Memory /// The offset to pass to the action /// The size to pass to the action /// The sync number that has been reached - /// The modified range list that originally owned this range /// The action to perform - public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferModifiedRangeList parent, Action rangeAction) + public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferFlushAction rangeAction) { - bool firstSource = true; - - if (parent != this) + if (_source != null) { - lock (_lock) - { - if (_sources != null) - { - foreach (BufferMigration source in _sources) - { - if (source.Overlaps(offset, size, syncNumber)) - { - if (firstSource && !source.FullyMatches(offset, size)) - { - // Perform this buffer's action first. The migrations will run after. - rangeAction(offset, size); - } - - source.RangeActionWithMigration(offset, size, syncNumber, parent); - - firstSource = false; - } - } - } - } + _source.RangeActionWithMigration(offset, size, syncNumber, rangeAction); } - - if (firstSource) + else { - // No overlapping migrations, or they are not meant for this range, flush the data using the given action. - rangeAction(offset, size); + rangeAction(offset, size, syncNumber); } } @@ -319,7 +323,7 @@ namespace Ryujinx.Graphics.Gpu.Memory ClearPart(overlap, clampAddress, clampEnd); - RangeActionWithMigration(clampAddress, clampEnd - clampAddress, waitSync, overlap.Parent, _flushAction); + RangeActionWithMigration(clampAddress, clampEnd - clampAddress, waitSync, _flushAction); } } @@ -329,7 +333,7 @@ namespace Ryujinx.Graphics.Gpu.Memory // There is a migration target to call instead. This can't be changed after set so accessing it outside the lock is fine. - _migrationTarget.Destination.RemoveRangesAndFlush(overlaps, rangeCount, highestDiff, currentSync, address, endAddress); + _migrationTarget.RemoveRangesAndFlush(overlaps, rangeCount, highestDiff, currentSync, address, endAddress); } /// @@ -367,7 +371,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (rangeCount == -1) { - _migrationTarget.Destination.WaitForAndFlushRanges(address, size); + _migrationTarget.WaitForAndFlushRanges(address, size); return; } @@ -407,6 +411,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Inherit ranges from another modified range list. /// + /// + /// Assumes that ranges will be inherited in address ascending order. + /// /// The range list to inherit from /// The action to call for each modified range public void InheritRanges(BufferModifiedRangeList ranges, Action registerRangeAction) @@ -415,18 +422,31 @@ namespace Ryujinx.Graphics.Gpu.Memory lock (ranges._lock) { - BufferMigration migration = new(ranges._parent, ranges._flushAction, ranges, this, _context.SyncNumber); - - ranges._parent.IncrementReferenceCount(); - ranges._migrationTarget = migration; - - _context.RegisterBufferMigration(migration); - inheritRanges = ranges.ToArray(); lock (_lock) { - (_sources ??= new List()).Add(migration); + // Copy over the migration from the previous range list + + BufferMigration oldMigration = ranges._source; + + BufferMigrationSpan span = new BufferMigrationSpan(ranges._parent, ranges._flushAction, oldMigration); + ranges._parent.IncrementReferenceCount(); + + if (_source == null) + { + // Create a new migration. + _source = new BufferMigration(new BufferMigrationSpan[] { span }, this, _context.SyncNumber); + + _context.RegisterBufferMigration(_source); + } + else + { + // Extend the migration + _source.AddSpanToEnd(span); + } + + ranges._migrationTarget = this; foreach (BufferModifiedRange range in inheritRanges) { @@ -445,6 +465,27 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Register a migration from previous buffer storage. This migration is from a snapshot of the buffer's + /// current handle to its handle in the future, and is assumed to be complete when the sync action completes. + /// When the migration completes, the handle is disposed. + /// + public void SelfMigration() + { + lock (_lock) + { + BufferMigrationSpan span = new(_parent, _parent.GetSnapshotDisposeAction(), _parent.GetSnapshotFlushAction(), _source); + BufferMigration migration = new(new BufferMigrationSpan[] { span }, this, _context.SyncNumber); + + // Migration target is used to redirect flush actions to the latest range list, + // so we don't need to set it here. (this range list is still the latest) + + _context.RegisterBufferMigration(migration); + + _source = migration; + } + } + /// /// Removes a source buffer migration, indicating its copy has completed. /// @@ -453,7 +494,10 @@ namespace Ryujinx.Graphics.Gpu.Memory { lock (_lock) { - _sources.Remove(migration); + if (_source == migration) + { + _source = null; + } } } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferPreFlush.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferPreFlush.cs new file mode 100644 index 0000000000..49aa141b05 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferPreFlush.cs @@ -0,0 +1,247 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using System; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + /// + /// Manages flushing ranges from buffers in advance for easy access, if they are flushed often. + /// Typically, from device local memory to a host mapped target for cached access. + /// + internal class BufferPreFlush : IDisposable + { + private const ulong PageSize = 4096; + + private enum PreFlushState + { + None, + HasFlushed, + HasCopied + } + + // TODO: avoid copies when write to flush ratio is something like 100:1 + private struct PreFlushPage + { + public PreFlushState State; + public ulong FirstActivatedSync; + public ulong LastCopiedSync; + } + + /// + /// True if there are ranges that should copy to the flush buffer, false otherwise. + /// + public bool ShouldCopy { get; private set; } + + private readonly GpuContext _context; + private readonly PhysicalMemory _physicalMemory; + private readonly Buffer _buffer; + private readonly PreFlushPage[] _pages; + private readonly ulong _address; + private readonly ulong _size; + private readonly ulong _misalignment; + private readonly Action _flushAction; + + private BufferHandle _flushBuffer; + + public BufferPreFlush(GpuContext context, PhysicalMemory physicalMemory, Buffer parent, Action flushAction) + { + _context = context; + _physicalMemory = physicalMemory; + _buffer = parent; + _address = parent.Address; + _size = parent.Size; + _pages = new PreFlushPage[BitUtils.DivRoundUp(_size, PageSize)]; + _misalignment = _address & (PageSize - 1); + + _flushAction = flushAction; + } + + public void EnsureFlushBuffer() + { + if (_flushBuffer == BufferHandle.Null) + { + _flushBuffer = _context.Renderer.CreateBuffer((int)_size, BufferAccess.HostMemory); + } + } + + private (int index, int count) GetPageRange(ulong address, ulong size) + { + ulong offset = address - _address; + ulong endOffset = offset + size; + + int basePage = (int)(offset / PageSize); + int endPage = (int)((endOffset - 1) / PageSize); + + return (basePage, 1 + endPage - basePage); + } + + private (int offset, int size) GetOffset(int startPage, int count) + { + int offset = (int)((ulong)startPage * PageSize - _misalignment); + int endOffset = (int)((ulong)(startPage + count) * PageSize - _misalignment); + + offset = Math.Max(0, offset); + endOffset = Math.Min((int)_size, endOffset); + + return (offset, endOffset - offset); + } + + private void CopyPageRange(int startPage, int count) + { + (int offset, int size) = GetOffset(startPage, count); + + EnsureFlushBuffer(); + + _context.Renderer.Pipeline.CopyBuffer(_buffer.Handle, _flushBuffer, offset, offset, size); + } + + public void CopyModified(ulong address, ulong size) + { + (int baseIndex, int count) = GetPageRange(address, size); + ulong syncNumber = _context.SyncNumber; + + int startPage = -1; + + for (int i = 0; i < count; i++) + { + int pageIndex = baseIndex + i; + ref PreFlushPage page = ref _pages[pageIndex]; + + if (page.State > PreFlushState.None) + { + // Perform the copy, and update the state of each page. + if (startPage == -1) + { + startPage = pageIndex; + } + + if (page.State != PreFlushState.HasCopied) + { + page.FirstActivatedSync = syncNumber; + page.State = PreFlushState.HasCopied; + } + + if (page.LastCopiedSync != syncNumber) + { + page.LastCopiedSync = syncNumber; + } + } + else if (startPage != -1) + { + CopyPageRange(startPage, pageIndex - startPage); + + startPage = -1; + } + } + + if (startPage != -1) + { + CopyPageRange(startPage, (baseIndex + count) - startPage); + } + } + + private void FlushPageRange(ulong address, ulong size, int startPage, int count, bool preFlush) + { + (int pageOffset, int pageSize) = GetOffset(startPage, count); + + int offset = (int)(address - _address); + int end = offset + (int)size; + + offset = Math.Max(offset, pageOffset); + end = Math.Min(end, pageOffset + pageSize); + + if (end >= offset) + { + if (preFlush) + { + // Flush from the host mapped buffer + using PinnedSpan data = _context.Renderer.GetBufferData(_flushBuffer, offset, end - offset); + + // TODO: dependant virtual buffers thing? maybe just disable pre-flush when that is active + _physicalMemory.WriteUntracked(_address + (ulong)offset, data.Get()); + } + else + { + // Flush from the real buffer + _flushAction(_address + (ulong)offset, (ulong)(end - offset)); + } + } + } + + public void FlushWithAction(ulong address, ulong size, ulong syncNumber) + { + // Copy the parts of the range that have pre-flush copies that have been completed. + // Run the flush action for ranges that don't have pre-flush copies. + + // If a range doesn't have a pre-flush copy, consider adding one. + + (int baseIndex, int count) = GetPageRange(address, size); + + bool rangePreFlushed = false; + int startPage = -1; + + for (int i = 0; i < count; i++) + { + int pageIndex = baseIndex + i; + ref PreFlushPage page = ref _pages[pageIndex]; + + bool flushPage = false; + + if (page.State == PreFlushState.HasCopied) + { + if (syncNumber >= page.FirstActivatedSync) + { + // After the range is first activated, its data will always be copied to the preflush buffer on each sync. + flushPage = true; + } + } + else if (page.State == PreFlushState.None) + { + page.State = PreFlushState.HasFlushed; + ShouldCopy = true; + } + + if (flushPage) + { + if (!rangePreFlushed || startPage == -1) + { + if (startPage != -1) + { + FlushPageRange(address, size, startPage, pageIndex - startPage, false); + } + + rangePreFlushed = true; + startPage = pageIndex; + } + } + else if (rangePreFlushed || startPage == -1) + { + if (startPage != -1) + { + FlushPageRange(address, size, startPage, pageIndex - startPage, true); + } + + rangePreFlushed = false; + startPage = pageIndex; + } + } + + if (startPage != -1) + { + FlushPageRange(address, size, startPage, (baseIndex + count) - startPage, rangePreFlushed); + } + } + + public void Dispose() + { + if (_flushBuffer != BufferHandle.Null) + { + _context.Renderer.DeleteBuffer(_flushBuffer); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferStage.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferStage.cs new file mode 100644 index 0000000000..e6ddf991d6 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferStage.cs @@ -0,0 +1,92 @@ +using Ryujinx.Graphics.Shader; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + internal enum BufferStage : byte + { + Compute, + Vertex, + TessellationControl, + TessellationEvaluation, + Geometry, + Fragment, + + Indirect, + VertexBuffer, + IndexBuffer, + Copy, + TransformFeedback, + Internal, + None, + + StageMask = 0x3f, + StorageMask = 0xc0, + + StorageRead = 0x40, + StorageWrite = 0x80, + +#pragma warning disable CA1069 // Enums values should not be duplicated + StorageAtomic = 0xc0 +#pragma warning restore CA1069 // Enums values should not be duplicated + } + + internal static class BufferStageUtils + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage FromShaderStage(ShaderStage stage) + { + return (BufferStage)stage; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage FromShaderStage(int stageIndex) + { + return (BufferStage)(stageIndex + 1); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage FromUsage(BufferUsageFlags flags) + { + if (flags.HasFlag(BufferUsageFlags.Write)) + { + return BufferStage.StorageWrite; + } + else + { + return BufferStage.StorageRead; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage FromUsage(TextureUsageFlags flags) + { + if (flags.HasFlag(TextureUsageFlags.ImageStore)) + { + return BufferStage.StorageWrite; + } + else + { + return BufferStage.StorageRead; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage TextureBuffer(ShaderStage shaderStage, TextureUsageFlags flags) + { + return (BufferStage)shaderStage | FromUsage(flags); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage GraphicsStorage(int stageIndex, BufferUsageFlags flags) + { + return (BufferStage)(stageIndex + 1) | FromUsage(flags); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage ComputeStorage(BufferUsageFlags flags) + { + return BufferStage.Compute | FromUsage(flags); + } + } +} diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index d56c40af46..be79a14a4b 100644 --- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -61,7 +61,7 @@ namespace Ryujinx.Graphics.OpenGL { BufferCount++; - if (access.HasFlag(GAL.BufferAccess.FlushPersistent)) + if (access.HasFlag(GAL.BufferAccess.HostMemory)) { BufferHandle handle = Buffer.CreatePersistent(size); @@ -148,6 +148,7 @@ namespace Ryujinx.Graphics.OpenGL return new Capabilities( api: TargetApi.OpenGL, vendorName: GpuVendor, + memoryType: SystemMemoryType.BackendManaged, hasFrontFacingBug: intelWindows, hasVectorIndexingBug: amdWindows, needsFragmentOutputSpecialization: false, diff --git a/src/Ryujinx.Graphics.Vulkan/EnumConversion.cs b/src/Ryujinx.Graphics.Vulkan/EnumConversion.cs index f9243bf831..9d1fd9ffd3 100644 --- a/src/Ryujinx.Graphics.Vulkan/EnumConversion.cs +++ b/src/Ryujinx.Graphics.Vulkan/EnumConversion.cs @@ -424,10 +424,20 @@ namespace Ryujinx.Graphics.Vulkan public static BufferAllocationType Convert(this BufferAccess access) { - if (access.HasFlag(BufferAccess.FlushPersistent) || access.HasFlag(BufferAccess.Stream)) + BufferAccess memType = access & BufferAccess.MemoryTypeMask; + + if (memType == BufferAccess.HostMemory || access.HasFlag(BufferAccess.Stream)) { return BufferAllocationType.HostMapped; } + else if (memType == BufferAccess.DeviceMemory) + { + return BufferAllocationType.DeviceLocal; + } + else if (memType == BufferAccess.DeviceMemoryMapped) + { + return BufferAllocationType.DeviceLocalMapped; + } return BufferAllocationType.Auto; } diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index b46ba9c461..f05a42a26e 100644 --- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -675,9 +675,23 @@ namespace Ryujinx.Graphics.Vulkan var limits = _physicalDevice.PhysicalDeviceProperties.Limits; var mainQueueProperties = _physicalDevice.QueueFamilyProperties[QueueFamilyIndex]; + SystemMemoryType memoryType; + + if (IsSharedMemory) + { + memoryType = SystemMemoryType.UnifiedMemory; + } + else + { + memoryType = Vendor == Vendor.Nvidia ? + SystemMemoryType.DedicatedMemorySlowStorage : + SystemMemoryType.DedicatedMemory; + } + return new Capabilities( api: TargetApi.Vulkan, GpuVendor, + memoryType: memoryType, hasFrontFacingBug: IsIntelWindows, hasVectorIndexingBug: Vendor == Vendor.Qualcomm, needsFragmentOutputSpecialization: IsMoltenVk,