Optimized memory modified check
This was initially in some cases more expensive than plainly sending the data. Now it should have way better performance.
This commit is contained in:
parent
36e8e074c9
commit
ee5699838b
3 changed files with 71 additions and 145 deletions
|
@ -17,7 +17,7 @@ namespace ChocolArm64.Memory
|
||||||
{
|
{
|
||||||
private const int PtLvl0Bits = 13;
|
private const int PtLvl0Bits = 13;
|
||||||
private const int PtLvl1Bits = 14;
|
private const int PtLvl1Bits = 14;
|
||||||
private const int PtPageBits = 12;
|
public const int PtPageBits = 12;
|
||||||
|
|
||||||
private const int PtLvl0Size = 1 << PtLvl0Bits;
|
private const int PtLvl0Size = 1 << PtLvl0Bits;
|
||||||
private const int PtLvl1Size = 1 << PtLvl1Bits;
|
private const int PtLvl1Size = 1 << PtLvl1Bits;
|
||||||
|
@ -55,6 +55,8 @@ namespace ChocolArm64.Memory
|
||||||
|
|
||||||
public event EventHandler<InvalidAccessEventArgs> InvalidAccess;
|
public event EventHandler<InvalidAccessEventArgs> InvalidAccess;
|
||||||
|
|
||||||
|
public event EventHandler<InvalidAccessEventArgs> ObservedAccess;
|
||||||
|
|
||||||
public MemoryManager(IntPtr ram)
|
public MemoryManager(IntPtr ram)
|
||||||
{
|
{
|
||||||
_monitors = new Dictionary<int, ArmMonitor>();
|
_monitors = new Dictionary<int, ArmMonitor>();
|
||||||
|
@ -728,14 +730,18 @@ Unmapped:
|
||||||
{
|
{
|
||||||
long key = position >> PtPageBits;
|
long key = position >> PtPageBits;
|
||||||
|
|
||||||
|
InvalidAccessEventArgs e = new InvalidAccessEventArgs(position);
|
||||||
|
|
||||||
if (_observedPages.TryGetValue(key, out IntPtr ptr))
|
if (_observedPages.TryGetValue(key, out IntPtr ptr))
|
||||||
{
|
{
|
||||||
SetPtEntry(position, (byte*)ptr);
|
SetPtEntry(position, (byte*)ptr);
|
||||||
|
|
||||||
|
ObservedAccess?.Invoke(this, e);
|
||||||
|
|
||||||
return (byte*)ptr + (position & PageMask);
|
return (byte*)ptr + (position & PageMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
InvalidAccess?.Invoke(this, new InvalidAccessEventArgs(position));
|
InvalidAccess?.Invoke(this, e);
|
||||||
|
|
||||||
throw new VmmPageFaultException(position);
|
throw new VmmPageFaultException(position);
|
||||||
}
|
}
|
||||||
|
@ -784,45 +790,15 @@ Unmapped:
|
||||||
_pageTable[l0][l1] = ptr;
|
_pageTable[l0][l1] = ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
public (bool[], int) IsRegionModified(long position, long size)
|
public void StartObservingRegion(long position, long size)
|
||||||
{
|
{
|
||||||
long endPosition = (position + size + PageMask) & ~PageMask;
|
long endPosition = (position + size + PageMask) & ~PageMask;
|
||||||
|
|
||||||
position &= ~PageMask;
|
position &= ~PageMask;
|
||||||
|
|
||||||
size = endPosition - position;
|
while ((ulong)position < (ulong)endPosition)
|
||||||
|
|
||||||
bool[] modified = new bool[size >> PtPageBits];
|
|
||||||
|
|
||||||
int count = 0;
|
|
||||||
|
|
||||||
lock (_observedPages)
|
|
||||||
{
|
{
|
||||||
for (int page = 0; page < modified.Length; page++)
|
_observedPages[position >> PtPageBits] = (IntPtr)Translate(position);
|
||||||
{
|
|
||||||
byte* ptr = Translate(position);
|
|
||||||
|
|
||||||
if (_observedPages.TryAdd(position >> PtPageBits, (IntPtr)ptr))
|
|
||||||
{
|
|
||||||
modified[page] = true;
|
|
||||||
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
long l0 = (position >> PtLvl0Bit) & PtLvl0Mask;
|
|
||||||
long l1 = (position >> PtLvl1Bit) & PtLvl1Mask;
|
|
||||||
|
|
||||||
byte** lvl1 = _pageTable[l0];
|
|
||||||
|
|
||||||
if (lvl1 != null)
|
|
||||||
{
|
|
||||||
if (modified[page] = lvl1[l1] != null)
|
|
||||||
{
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
SetPtEntry(position, null);
|
SetPtEntry(position, null);
|
||||||
|
|
||||||
|
@ -830,9 +806,6 @@ Unmapped:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return (modified, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void StopObservingRegion(long position, long size)
|
public void StopObservingRegion(long position, long size)
|
||||||
{
|
{
|
||||||
long endPosition = (position + size + PageMask) & ~PageMask;
|
long endPosition = (position + size + PageMask) & ~PageMask;
|
||||||
|
|
|
@ -36,7 +36,7 @@ namespace Ryujinx.Graphics.Memory
|
||||||
{
|
{
|
||||||
this.Memory = Memory;
|
this.Memory = Memory;
|
||||||
|
|
||||||
Cache = new NvGpuVmmCache();
|
Cache = new NvGpuVmmCache(Memory);
|
||||||
|
|
||||||
PageTable = new long[PTLvl0Size][];
|
PageTable = new long[PTLvl0Size][];
|
||||||
}
|
}
|
||||||
|
@ -262,7 +262,7 @@ namespace Ryujinx.Graphics.Memory
|
||||||
|
|
||||||
public bool IsRegionModified(long PA, long Size, NvGpuBufferType BufferType)
|
public bool IsRegionModified(long PA, long Size, NvGpuBufferType BufferType)
|
||||||
{
|
{
|
||||||
return Cache.IsRegionModified(Memory, BufferType, PA, Size);
|
return Cache.IsRegionModified(PA, Size, BufferType);
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool TryGetHostAddress(long Position, long Size, out IntPtr Ptr)
|
public bool TryGetHostAddress(long Position, long Size, out IntPtr Ptr)
|
||||||
|
|
|
@ -1,130 +1,83 @@
|
||||||
|
using ChocolArm64.Events;
|
||||||
using ChocolArm64.Memory;
|
using ChocolArm64.Memory;
|
||||||
using System;
|
using System.Collections.Concurrent;
|
||||||
|
|
||||||
namespace Ryujinx.Graphics.Memory
|
namespace Ryujinx.Graphics.Memory
|
||||||
{
|
{
|
||||||
class NvGpuVmmCache
|
class NvGpuVmmCache
|
||||||
{
|
{
|
||||||
private struct CachedResource
|
private const int PageBits = MemoryManager.PtPageBits;
|
||||||
|
|
||||||
|
private const long PageSize = MemoryManager.PageSize;
|
||||||
|
private const long PageMask = MemoryManager.PageMask;
|
||||||
|
|
||||||
|
private ConcurrentDictionary<long, int>[] CachedPages;
|
||||||
|
|
||||||
|
private MemoryManager _memory;
|
||||||
|
|
||||||
|
public NvGpuVmmCache(MemoryManager memory)
|
||||||
{
|
{
|
||||||
public long Key;
|
_memory = memory;
|
||||||
public int Mask;
|
|
||||||
|
|
||||||
public CachedResource(long Key, int Mask)
|
_memory.ObservedAccess += MemoryAccessHandler;
|
||||||
|
|
||||||
|
CachedPages = new ConcurrentDictionary<long, int>[1 << 20];
|
||||||
|
}
|
||||||
|
|
||||||
|
private void MemoryAccessHandler(object sender, InvalidAccessEventArgs e)
|
||||||
{
|
{
|
||||||
this.Key = Key;
|
long pa = _memory.GetPhysicalAddress(e.Position);
|
||||||
this.Mask = Mask;
|
|
||||||
|
CachedPages[pa >> PageBits]?.Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
public override int GetHashCode()
|
public bool IsRegionModified(long position, long size, NvGpuBufferType bufferType)
|
||||||
{
|
{
|
||||||
return (int)(Key * 23 + Mask);
|
long pa = _memory.GetPhysicalAddress(position);
|
||||||
}
|
|
||||||
|
|
||||||
public override bool Equals(object obj)
|
long addr = pa;
|
||||||
|
|
||||||
|
long endAddr = (addr + size + PageMask) & ~PageMask;
|
||||||
|
|
||||||
|
int newBuffMask = 1 << (int)bufferType;
|
||||||
|
|
||||||
|
_memory.StartObservingRegion(position, size);
|
||||||
|
|
||||||
|
long cachedPagesCount = 0;
|
||||||
|
|
||||||
|
while (addr < endAddr)
|
||||||
{
|
{
|
||||||
return obj is CachedResource Cached && Equals(Cached);
|
long page = addr >> PageBits;
|
||||||
}
|
|
||||||
|
|
||||||
public bool Equals(CachedResource other)
|
ConcurrentDictionary<long, int> dictionary = CachedPages[page];
|
||||||
|
|
||||||
|
if (dictionary == null)
|
||||||
{
|
{
|
||||||
return Key == other.Key && Mask == other.Mask;
|
dictionary = new ConcurrentDictionary<long, int>();
|
||||||
}
|
|
||||||
|
CachedPages[page] = dictionary;
|
||||||
}
|
}
|
||||||
|
|
||||||
private ValueRangeSet<CachedResource> CachedRanges;
|
if (dictionary.TryGetValue(pa, out int currBuffMask))
|
||||||
|
|
||||||
public NvGpuVmmCache()
|
|
||||||
{
|
{
|
||||||
CachedRanges = new ValueRangeSet<CachedResource>();
|
if ((currBuffMask & newBuffMask) != 0)
|
||||||
}
|
|
||||||
|
|
||||||
public bool IsRegionModified(MemoryManager Memory, NvGpuBufferType BufferType, long Start, long Size)
|
|
||||||
{
|
{
|
||||||
(bool[] Modified, long ModifiedCount) = Memory.IsRegionModified(Start, Size);
|
cachedPagesCount++;
|
||||||
|
}
|
||||||
//Remove all modified ranges.
|
else
|
||||||
int Index = 0;
|
|
||||||
|
|
||||||
long Position = Start & ~NvGpuVmm.PageMask;
|
|
||||||
|
|
||||||
while (ModifiedCount > 0)
|
|
||||||
{
|
{
|
||||||
if (Modified[Index++])
|
dictionary[pa] |= newBuffMask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
CachedRanges.Remove(new ValueRange<CachedResource>(Position, Position + NvGpuVmm.PageSize));
|
dictionary[pa] = newBuffMask;
|
||||||
|
|
||||||
ModifiedCount--;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Position += NvGpuVmm.PageSize;
|
addr += PageSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Mask has the bit set for the current resource type.
|
return cachedPagesCount != (endAddr - pa + PageMask) >> PageBits;
|
||||||
//If the region is not yet present on the list, then a new ValueRange
|
|
||||||
//is directly added with the current resource type as the only bit set.
|
|
||||||
//Otherwise, it just sets the bit for this new resource type on the current mask.
|
|
||||||
//The physical address of the resource is used as key, those keys are used to keep
|
|
||||||
//track of resources that are already on the cache. A resource may be inside another
|
|
||||||
//resource, and in this case we should return true if the "sub-resource" was not
|
|
||||||
//yet cached.
|
|
||||||
int Mask = 1 << (int)BufferType;
|
|
||||||
|
|
||||||
CachedResource NewCachedValue = new CachedResource(Start, Mask);
|
|
||||||
|
|
||||||
ValueRange<CachedResource> NewCached = new ValueRange<CachedResource>(Start, Start + Size);
|
|
||||||
|
|
||||||
ValueRange<CachedResource>[] Ranges = CachedRanges.GetAllIntersections(NewCached);
|
|
||||||
|
|
||||||
bool IsKeyCached = Ranges.Length > 0 && Ranges[0].Value.Key == Start;
|
|
||||||
|
|
||||||
long LastEnd = NewCached.Start;
|
|
||||||
|
|
||||||
long Coverage = 0;
|
|
||||||
|
|
||||||
for (Index = 0; Index < Ranges.Length; Index++)
|
|
||||||
{
|
|
||||||
ValueRange<CachedResource> Current = Ranges[Index];
|
|
||||||
|
|
||||||
CachedResource Cached = Current.Value;
|
|
||||||
|
|
||||||
long RgStart = Math.Max(Current.Start, NewCached.Start);
|
|
||||||
long RgEnd = Math.Min(Current.End, NewCached.End);
|
|
||||||
|
|
||||||
if ((Cached.Mask & Mask) != 0)
|
|
||||||
{
|
|
||||||
Coverage += RgEnd - RgStart;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Highest key value has priority, this prevents larger resources
|
|
||||||
//for completely invalidating smaller ones on the cache. For example,
|
|
||||||
//consider that a resource in the range [100, 200) was added, and then
|
|
||||||
//another one in the range [50, 200). We prevent the new resource from
|
|
||||||
//completely replacing the old one by spliting it like this:
|
|
||||||
//New resource key is added at [50, 100), old key is still present at [100, 200).
|
|
||||||
if (Cached.Key < Start)
|
|
||||||
{
|
|
||||||
Cached.Key = Start;
|
|
||||||
}
|
|
||||||
|
|
||||||
Cached.Mask |= Mask;
|
|
||||||
|
|
||||||
CachedRanges.Add(new ValueRange<CachedResource>(RgStart, RgEnd, Cached));
|
|
||||||
|
|
||||||
if (RgStart > LastEnd)
|
|
||||||
{
|
|
||||||
CachedRanges.Add(new ValueRange<CachedResource>(LastEnd, RgStart, NewCachedValue));
|
|
||||||
}
|
|
||||||
|
|
||||||
LastEnd = RgEnd;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (LastEnd < NewCached.End)
|
|
||||||
{
|
|
||||||
CachedRanges.Add(new ValueRange<CachedResource>(LastEnd, NewCached.End, NewCachedValue));
|
|
||||||
}
|
|
||||||
|
|
||||||
return !IsKeyCached || Coverage != Size;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Add table
Add a link
Reference in a new issue