diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs index 86efd375e6..86e9ad7e43 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs @@ -32,7 +32,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private int _operationsCount; - private class AllocationContext + private class AllocationContext : IDisposable { public RegisterMasks Masks { get; } @@ -49,8 +49,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators StackAlloc = stackAlloc; Masks = masks; - Active = new BitMap(intervalsCount); - Inactive = new BitMap(intervalsCount); + Active = BitMapPool.Allocate(intervalsCount); + Inactive = BitMapPool.Allocate(intervalsCount); } public void MoveActiveToInactive(int bit) @@ -69,6 +69,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators dest.Set(bit); } + + public void Dispose() + { + BitMapPool.Release(); + } } public AllocationResult RunPass( @@ -121,10 +126,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators InsertSplitCopies(); InsertSplitCopiesAtEdges(cfg); - return new AllocationResult( + AllocationResult result = new AllocationResult( context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize); + + context.Dispose(); + + return result; } private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex) @@ -618,15 +627,22 @@ namespace ARMeilleure.CodeGen.RegisterAllocators bool hasSingleOrNoSuccessor = block.Next == null || block.Branch == null; - foreach (BasicBlock successor in Successors(block)) + for (int i = 0; i < 2; i++) { + // This used to use an enumerable, but it ended up generating a lot of garbage, so now it is a loop. + BasicBlock successor = (i == 0) ? block.Next : block.Branch; + if (successor == null) + { + continue; + } + int succIndex = successor.Index; // If the current node is a split node, then the actual successor node // (the successor before the split) should be right after it. if (IsSplitEdgeBlock(successor)) { - succIndex = Successors(successor).First().Index; + succIndex = FirstSuccessor(successor).Index; } CopyResolver copyResolver = new CopyResolver(); @@ -699,8 +715,10 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { Operand register = GetRegister(current); - foreach (int usePosition in current.UsePositions()) + IList usePositions = current.UsePositions(); + for (int i = usePositions.Count - 1; i >= 0; i--) { + int usePosition = -usePositions[i]; (_, Node operation) = GetOperationNode(usePosition); for (int index = 0; index < operation.SourcesCount; index++) @@ -778,8 +796,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { _operationNodes.Add((block.Operations, node)); - foreach (Operand dest in Destinations(node)) + for (int i = 0; i < node.DestinationsCount; i++) { + Operand dest = node.GetDestination(i); if (dest.Kind == OperandKind.LocalVariable && visited.Add(dest)) { dest.NumberLocal(_intervals.Count); @@ -815,12 +834,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators // Compute local live sets. for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) { - BitMap liveGen = new BitMap(mapSize); - BitMap liveKill = new BitMap(mapSize); + BitMap liveGen = BitMapPool.Allocate(mapSize); + BitMap liveKill = BitMapPool.Allocate(mapSize); for (Node node = block.Operations.First; node != null; node = node.ListNext) { - foreach (Operand source in Sources(node)) + Sources(node, (source) => { int id = GetOperandId(source); @@ -828,10 +847,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { liveGen.Set(id); } - } + }); - foreach (Operand dest in Destinations(node)) + for (int i = 0; i < node.DestinationsCount; i++) { + Operand dest = node.GetDestination(i); liveKill.Set(GetOperandId(dest)); } } @@ -846,8 +866,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators for (int index = 0; index < cfg.Blocks.Count; index++) { - blkLiveIn [index] = new BitMap(mapSize); - blkLiveOut[index] = new BitMap(mapSize); + blkLiveIn [index] = BitMapPool.Allocate(mapSize); + blkLiveOut[index] = BitMapPool.Allocate(mapSize); } bool modified; @@ -862,12 +882,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators BitMap liveOut = blkLiveOut[block.Index]; - foreach (BasicBlock successor in Successors(block)) + if ((block.Next != null && liveOut.Set(blkLiveIn[block.Next.Index])) || (block.Branch != null && liveOut.Set(blkLiveIn[block.Branch.Index]))) { - if (liveOut.Set(blkLiveIn[successor.Index])) - { - modified = true; - } + modified = true; } BitMap liveIn = blkLiveIn[block.Index]; @@ -920,21 +937,22 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { operationPos -= InstructionGap; - foreach (Operand dest in Destinations(node)) + for (int i = 0; i < node.DestinationsCount; i++) { + Operand dest = node.GetDestination(i); LiveInterval interval = _intervals[GetOperandId(dest)]; interval.SetStart(operationPos + 1); interval.AddUsePosition(operationPos + 1); } - foreach (Operand source in Sources(node)) + Sources(node, (source) => { LiveInterval interval = _intervals[GetOperandId(source)]; interval.AddRange(blockStart, operationPos + 1); interval.AddUsePosition(operationPos); - } + }); if (node is Operation operation && operation.Instruction == Instruction.Call) { @@ -982,17 +1000,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0); } - private static IEnumerable Successors(BasicBlock block) + private static BasicBlock FirstSuccessor(BasicBlock block) { - if (block.Next != null) - { - yield return block.Next; - } - - if (block.Branch != null) - { - yield return block.Branch; - } + return block.Next ?? block.Branch; } private static IEnumerable BottomOperations(BasicBlock block) @@ -1007,15 +1017,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - private static IEnumerable Destinations(Node node) - { - for (int index = 0; index < node.DestinationsCount; index++) - { - yield return node.GetDestination(index); - } - } - - private static IEnumerable Sources(Node node) + private static void Sources(Node node, Action action) { for (int index = 0; index < node.SourcesCount; index++) { @@ -1023,7 +1025,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators if (IsLocalOrRegister(source.Kind)) { - yield return source; + action(source); } else if (source.Kind == OperandKind.Memory) { diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs index 18858a7689..8b795271ad 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Common; using ARMeilleure.IntermediateRepresentation; using System; using System.Collections.Generic; @@ -12,7 +13,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private LiveInterval _parent; - private SortedSet _usePositions; + private SortedIntegerList _usePositions; public int UsesCount => _usePositions.Count; @@ -38,7 +39,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators Local = local; _parent = parent ?? this; - _usePositions = new SortedSet(); + _usePositions = new SortedIntegerList(); _ranges = new List(); @@ -196,7 +197,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public void AddUsePosition(int position) { - _usePositions.Add(position); + // Inserts are in descending order, but ascending is faster for SortedList<>. + // We flip the ordering, then iterate backwards when using the final list. + _usePositions.Add(-position); } public bool Overlaps(int position) @@ -247,9 +250,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return _childs.Values; } - public IEnumerable UsePositions() + public IList UsePositions() { - return _usePositions; + return _usePositions.GetList(); } public int FirstUse() @@ -259,20 +262,19 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return NotFound; } - return _usePositions.First(); + return -_usePositions.Last(); } public int NextUseAfter(int position) { - foreach (int usePosition in _usePositions) - { - if (usePosition >= position) - { - return usePosition; - } - } + int index = _usePositions.FindLessEqualIndex(-position); + return (index >= 0) ? -_usePositions[index] : NotFound; + } - return NotFound; + public void RemoveAfter(int position) + { + int index = _usePositions.FindLessEqualIndex(-position); + _usePositions.RemoveRange(0, index + 1); } public LiveInterval Split(int position) @@ -311,12 +313,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators _ranges.RemoveRange(splitIndex, count); } - foreach (int usePosition in _usePositions.Where(x => x >= position)) + int addAfter = _usePositions.FindLessEqualIndex(-position); + for (int index = addAfter; index >= 0; index--) { + int usePosition = _usePositions[index]; right._usePositions.Add(usePosition); } - _usePositions.RemoveWhere(x => x >= position); + RemoveAfter(position); Debug.Assert(_ranges.Count != 0, "Left interval is empty after split."); diff --git a/ARMeilleure/Common/BitMap.cs b/ARMeilleure/Common/BitMap.cs index 9dff271b4c..359f69d7be 100644 --- a/ARMeilleure/Common/BitMap.cs +++ b/ARMeilleure/Common/BitMap.cs @@ -3,18 +3,47 @@ using System.Collections.Generic; namespace ARMeilleure.Common { - class BitMap : IEnumerable + class BitMap : IEnumerator { - private const int IntSize = 32; + private const int IntSize = 64; private const int IntMask = IntSize - 1; - private List _masks; + private List _masks; + + private int _enumIndex; + private long _enumMask; + private int _enumBit; + + public int Current => _enumIndex * IntSize + _enumBit; + object IEnumerator.Current => Current; + + public BitMap() + { + _masks = new List(0); + } public BitMap(int initialCapacity) { int count = (initialCapacity + IntMask) / IntSize; - _masks = new List(count); + _masks = new List(count); + + while (count-- > 0) + { + _masks.Add(0); + } + } + + public void Reset(int initialCapacity) + { + int count = (initialCapacity + IntMask) / IntSize; + + if (count > _masks.Capacity) + { + _masks.Capacity = count; + } + + _masks.Clear(); while (count-- > 0) { @@ -29,7 +58,7 @@ namespace ARMeilleure.Common int wordIndex = bit / IntSize; int wordBit = bit & IntMask; - int wordMask = 1 << wordBit; + long wordMask = 1L << wordBit; if ((_masks[wordIndex] & wordMask) != 0) { @@ -48,7 +77,7 @@ namespace ARMeilleure.Common int wordIndex = bit / IntSize; int wordBit = bit & IntMask; - int wordMask = 1 << wordBit; + long wordMask = 1L << wordBit; _masks[wordIndex] &= ~wordMask; } @@ -60,7 +89,7 @@ namespace ARMeilleure.Common int wordIndex = bit / IntSize; int wordBit = bit & IntMask; - return (_masks[wordIndex] & (1 << wordBit)) != 0; + return (_masks[wordIndex] & (1L << wordBit)) != 0; } public bool Set(BitMap map) @@ -71,7 +100,7 @@ namespace ARMeilleure.Common for (int index = 0; index < _masks.Count; index++) { - int newValue = _masks[index] | map._masks[index]; + long newValue = _masks[index] | map._masks[index]; if (_masks[index] != newValue) { @@ -92,7 +121,7 @@ namespace ARMeilleure.Common for (int index = 0; index < _masks.Count; index++) { - int newValue = _masks[index] & ~map._masks[index]; + long newValue = _masks[index] & ~map._masks[index]; if (_masks[index] != newValue) { @@ -105,6 +134,10 @@ namespace ARMeilleure.Common return modified; } + #region IEnumerable Methods + + // Note: The bit enumerator is embedded in this class to avoid creating garbage when enumerating. + private void EnsureCapacity(int size) { while (_masks.Count * IntSize < size) @@ -115,24 +148,38 @@ namespace ARMeilleure.Common public IEnumerator GetEnumerator() { - for (int index = 0; index < _masks.Count; index++) - { - int mask = _masks[index]; - - while (mask != 0) - { - int bit = BitUtils.LowestBitSet(mask); - - mask &= ~(1 << bit); - - yield return index * IntSize + bit; - } - } + Reset(); + return this; } - IEnumerator IEnumerable.GetEnumerator() + public bool MoveNext() { - return GetEnumerator(); + if (_enumMask != 0) + { + _enumMask &= ~(1L << _enumBit); + } + while (_enumMask == 0) + { + if (++_enumIndex >= _masks.Count) + { + return false; + } + _enumMask = _masks[_enumIndex]; + } + _enumBit = BitUtils.LowestBitSet(_enumMask); + return true; } + + public void Reset() + { + _enumIndex = -1; + _enumMask = 0; + _enumBit = 0; + } + + public void Dispose() { } + +#endregion + } } \ No newline at end of file diff --git a/ARMeilleure/Common/BitMapPool.cs b/ARMeilleure/Common/BitMapPool.cs new file mode 100644 index 0000000000..caba231716 --- /dev/null +++ b/ARMeilleure/Common/BitMapPool.cs @@ -0,0 +1,19 @@ +using System; + +namespace ARMeilleure.Common +{ + static class BitMapPool + { + public static BitMap Allocate(int initialCapacity) + { + BitMap result = ThreadStaticPool.Instance.Allocate(); + result.Reset(initialCapacity); + return result; + } + + public static void Release() + { + ThreadStaticPool.Instance.Clear(); + } + } +} diff --git a/ARMeilleure/Common/BitUtils.cs b/ARMeilleure/Common/BitUtils.cs index 7a29dcff7f..6991b45c10 100644 --- a/ARMeilleure/Common/BitUtils.cs +++ b/ARMeilleure/Common/BitUtils.cs @@ -1,3 +1,5 @@ +using System.Runtime.CompilerServices; + namespace ARMeilleure.Common { static class BitUtils @@ -6,11 +8,16 @@ namespace ARMeilleure.Common private static readonly int[] DeBrujinLbsLut; + private const long DeBrujinSequence64 = 0x37e84a99dae458f; + + private static readonly int[] DeBrujinLbsLut64; + private static readonly sbyte[] HbsNibbleLut; static BitUtils() { DeBrujinLbsLut = new int[32]; + DeBrujinLbsLut64 = new int[64]; for (int index = 0; index < DeBrujinLbsLut.Length; index++) { @@ -19,6 +26,13 @@ namespace ARMeilleure.Common DeBrujinLbsLut[lutIndex] = index; } + for (int index = 0; index < DeBrujinLbsLut64.Length; index++) + { + ulong lutIndex = (ulong)(DeBrujinSequence64 * (1L << index)) >> 58; + + DeBrujinLbsLut64[lutIndex] = index; + } + HbsNibbleLut = new sbyte[] { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 }; } @@ -64,6 +78,7 @@ namespace ARMeilleure.Common return HbsNibbleLut[value]; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int LowestBitSet(int value) { if (value == 0) @@ -76,6 +91,19 @@ namespace ARMeilleure.Common return DeBrujinLbsLut[(uint)(DeBrujinSequence * lsb) >> 27]; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int LowestBitSet(long value) + { + if (value == 0) + { + return -1; + } + + long lsb = value & -value; + + return DeBrujinLbsLut64[(ulong)(DeBrujinSequence64 * lsb) >> 58]; + } + public static long Replicate(long bits, int size) { long output = 0; diff --git a/ARMeilleure/Common/SortedIntegerList.cs b/ARMeilleure/Common/SortedIntegerList.cs new file mode 100644 index 0000000000..73dc5036cb --- /dev/null +++ b/ARMeilleure/Common/SortedIntegerList.cs @@ -0,0 +1,109 @@ +using System.Collections.Generic; + +namespace ARMeilleure.Common +{ + public class SortedIntegerList + { + private List _items; + + public int Count => _items.Count; + + public int this[int index] + { + get + { + return _items[index]; + } + set + { + _items[index] = value; + } + } + + public SortedIntegerList() + { + _items = new List(); + } + + public bool Add(int value) + { + if (_items.Count > 0 && value > Last()) + { + _items.Add(value); + return true; + } + else + { + // Binary search for the location to insert. + int min = 0; + int max = Count - 1; + + while (min <= max) + { + int mid = min + (max - min) / 2; + int existing = _items[mid]; + if (value > existing) + { + min = mid + 1; + } + else if (value < existing) + { + max = mid - 1; + } + else + { + // This value already exists in the list. Return false. + return false; + } + } + + _items.Insert(min, value); + return true; + } + } + + public int FindLessEqualIndex(int value) + { + int min = 0; + int max = Count - 1; + + while (min <= max) + { + int mid = min + (max - min) / 2; + int existing = _items[mid]; + if (value > existing) + { + min = mid + 1; + } + else if (value < existing) + { + max = mid - 1; + } + else + { + return mid; + } + } + + return max; + } + + public void RemoveRange(int index, int count) + { + if (count > 0) + { + _items.RemoveRange(index, count); + } + } + + public int Last() + { + return _items[Count - 1]; + } + + public List GetList() + { + return _items; + } + } +} diff --git a/ARMeilleure/ThreadStaticPool.cs b/ARMeilleure/ThreadStaticPool.cs index 542ba29fbc..4dd8eb396c 100644 --- a/ARMeilleure/ThreadStaticPool.cs +++ b/ARMeilleure/ThreadStaticPool.cs @@ -3,7 +3,7 @@ using System.Threading; namespace ARMeilleure { - public class ThreadStaticPool where T : class, new() + internal class ThreadStaticPool where T : class, new() { [ThreadStatic] private static ThreadStaticPool _instance;