From 6f654c681e480050403a04030acc29b4cff8bc28 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 18 Jun 2019 15:54:19 -0300 Subject: [PATCH] Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes --- .../CodeGen/RegisterAllocators/LinearScan.cs | 248 ++++++------------ .../RegisterAllocators/LiveInterval.cs | 166 ++++-------- .../CodeGen/RegisterAllocators/LiveRange.cs | 14 +- .../RegisterAllocators/StackAllocator.cs | 137 +--------- ARMeilleure/CodeGen/X86/Assembler.cs | 33 ++- ARMeilleure/CodeGen/X86/CodeGenContext.cs | 5 +- ARMeilleure/CodeGen/X86/CodeGenerator.cs | 80 ++++-- ARMeilleure/CodeGen/X86/PreAllocator.cs | 143 +++++++--- ARMeilleure/CodeGen/X86/X86Instruction.cs | 1 + ARMeilleure/Decoders/OpCodeTable.cs | 20 +- ARMeilleure/Diagnostics/IRDumper.cs | 3 +- ARMeilleure/Instructions/InstEmitSimdCmp.cs | 11 +- ...nstEmitCrypto.cs => InstEmitSimdCrypto.cs} | 0 ARMeilleure/Instructions/InstEmitSimdCvt.cs | 44 +++- ARMeilleure/Instructions/InstEmitSimdHash.cs | 169 ++++++++++++ ARMeilleure/Instructions/InstEmitSimdShift.cs | 4 +- ARMeilleure/Instructions/SoftFallback.cs | 241 +++++++++++++++++ .../IntermediateRepresentation/Instruction.cs | 1 + .../IntermediateRepresentation/OperandType.cs | 14 + ARMeilleure/State/V128.cs | 38 +++ ARMeilleure/Translation/EmitterContext.cs | 5 + 21 files changed, 861 insertions(+), 516 deletions(-) rename ARMeilleure/Instructions/{InstEmitCrypto.cs => InstEmitSimdCrypto.cs} (100%) create mode 100644 ARMeilleure/Instructions/InstEmitSimdHash.cs diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScan.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScan.cs index 5e61c980f7..bf4e2fc8f7 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LinearScan.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScan.cs @@ -76,22 +76,22 @@ namespace ARMeilleure.CodeGen.RegisterAllocators NumberLocals(cfg); - BuildIntervals(cfg, regMasks, out int maxCallArgs); - - //CoalesceCopies(cfg.Blocks); - AllocationContext context = new AllocationContext(regMasks, _intervals.Count); + BuildIntervals(cfg, context, out int maxCallArgs); + for (int index = 0; index < _intervals.Count; index++) { - LiveInterval current = GetInterval(index); + LiveInterval current = _intervals[index]; + + if (current.IsEmpty) + { + continue; + } if (current.IsFixed) { - if (!current.IsEmpty) - { - context.Active.Set(index); - } + context.Active.Set(index); continue; } @@ -125,13 +125,13 @@ namespace ARMeilleure.CodeGen.RegisterAllocators //Check active intervals that already ended. foreach (int iIndex in context.Active) { - LiveInterval interval = GetInterval(iIndex); + LiveInterval interval = _intervals[iIndex]; - if (interval.End < current.Start) + if (interval.GetEnd() < current.GetStart()) { context.Active.Clear(iIndex); } - else if (!interval.Overlaps(current.Start)) + else if (!interval.Overlaps(current.GetStart())) { context.MoveActiveToInactive(iIndex); } @@ -140,13 +140,13 @@ namespace ARMeilleure.CodeGen.RegisterAllocators //Check inactive intervals that already ended or were reactivated. foreach (int iIndex in context.Inactive) { - LiveInterval interval = GetInterval(iIndex); + LiveInterval interval = _intervals[iIndex]; - if (interval.End < current.Start) + if (interval.GetEnd() < current.GetStart()) { context.Inactive.Clear(iIndex); } - else if (interval.Overlaps(current.Start)) + else if (interval.Overlaps(current.GetStart())) { context.MoveInactiveToActive(iIndex); } @@ -176,7 +176,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators foreach (int iIndex in context.Active) { - LiveInterval interval = GetInterval(iIndex); + LiveInterval interval = _intervals[iIndex]; if (interval.Register.Type == regType) { @@ -186,7 +186,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators foreach (int iIndex in context.Inactive) { - LiveInterval interval = GetInterval(iIndex); + LiveInterval interval = _intervals[iIndex]; if (interval.Register.Type == regType && interval.Overlaps(current)) { @@ -203,21 +203,21 @@ namespace ARMeilleure.CodeGen.RegisterAllocators int selectedNextUse = freePositions[selectedReg]; - if (GetSplitPosition(selectedNextUse) <= current.Start) + if (GetSplitPosition(selectedNextUse) <= current.GetStart()) { return false; } - else if (selectedNextUse < current.End) + else if (selectedNextUse < current.GetEnd()) { int splitPosition = GetSplitPosition(selectedNextUse); - Debug.Assert(splitPosition > current.Start, "Trying to split interval at the start."); + Debug.Assert(splitPosition > current.GetStart(), "Trying to split interval at the start."); LiveInterval splitChild = current.Split(splitPosition); if (splitChild.UsesCount != 0) { - Debug.Assert(splitChild.Start > current.Start, "Split interval has an invalid start position."); + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); InsertInterval(splitChild); } @@ -277,11 +277,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators foreach (int iIndex in context.Active) { - LiveInterval interval = GetInterval(iIndex); + LiveInterval interval = _intervals[iIndex]; if (!interval.IsFixed && interval.Register.Type == regType) { - int nextUse = interval.NextUseAfter(current.Start); + int nextUse = interval.NextUseAfter(current.GetStart()); if (nextUse != -1) { @@ -292,11 +292,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators foreach (int iIndex in context.Inactive) { - LiveInterval interval = GetInterval(iIndex); + LiveInterval interval = _intervals[iIndex]; if (!interval.IsFixed && interval.Register.Type == regType && interval.Overlaps(current)) { - int nextUse = interval.NextUseAfter(current.Start); + int nextUse = interval.NextUseAfter(current.GetStart()); if (nextUse != -1) { @@ -307,7 +307,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators foreach (int iIndex in context.Active) { - LiveInterval interval = GetInterval(iIndex); + LiveInterval interval = _intervals[iIndex]; if (interval.IsFixed && interval.Register.Type == regType) { @@ -317,7 +317,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators foreach (int iIndex in context.Inactive) { - LiveInterval interval = GetInterval(iIndex); + LiveInterval interval = _intervals[iIndex]; if (interval.IsFixed && interval.Register.Type == regType && interval.Overlaps(current)) { @@ -335,17 +335,17 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { //All intervals on inactive and active are being used before current, //so spill the current interval. - Debug.Assert(currentFirstUse > current.Start, "Trying to spill a interval currently being used."); + Debug.Assert(currentFirstUse > current.GetStart(), "Trying to spill a interval currently being used."); LiveInterval splitChild = current.Split(GetSplitPosition(currentFirstUse)); - Debug.Assert(splitChild.Start > current.Start, "Split interval has an invalid start position."); + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); InsertInterval(splitChild); Spill(context, current); } - else if (blockedPositions[selectedReg] > current.End) + else if (blockedPositions[selectedReg] > current.GetEnd()) { //Spill made the register available for the entire current lifetime, //so we only need to split the intervals using the selected register. @@ -366,7 +366,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators if (splitChild.UsesCount != 0) { - Debug.Assert(splitChild.Start > current.Start, "Split interval has an invalid start position."); + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); InsertInterval(splitChild); } @@ -416,7 +416,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { foreach (int iIndex in context.Active) { - LiveInterval interval = GetInterval(iIndex); + LiveInterval interval = _intervals[iIndex]; if (!interval.IsFixed && interval.Register == current.Register) { @@ -428,7 +428,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators foreach (int iIndex in context.Inactive) { - LiveInterval interval = GetInterval(iIndex); + LiveInterval interval = _intervals[iIndex]; if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current)) { @@ -448,13 +448,13 @@ namespace ARMeilleure.CodeGen.RegisterAllocators //we need to split the spilled interval twice, and re-insert it //on the "pending" list to ensure that it will get a new register //on that use position. - int nextUse = interval.NextUseAfter(current.Start); + int nextUse = interval.NextUseAfter(current.GetStart()); LiveInterval splitChild; - if (interval.Start < current.Start) + if (interval.GetStart() < current.GetStart()) { - splitChild = interval.Split(GetSplitPosition(current.Start)); + splitChild = interval.Split(GetSplitPosition(current.GetStart())); } else { @@ -463,26 +463,26 @@ namespace ARMeilleure.CodeGen.RegisterAllocators if (nextUse != -1) { - Debug.Assert(nextUse > current.Start, "Trying to spill a interval currently being used."); + Debug.Assert(nextUse > current.GetStart(), "Trying to spill a interval currently being used."); - if (GetSplitPosition(nextUse) > splitChild.Start) + if (GetSplitPosition(nextUse) > splitChild.GetStart()) { LiveInterval right = splitChild.Split(GetSplitPosition(nextUse)); Spill(context, splitChild); - Debug.Assert(right.Start > current.Start, "Split interval has an invalid start position."); + Debug.Assert(right.GetStart() > current.GetStart(), "Split interval has an invalid start position."); InsertInterval(right); } else { - if (nextUse == splitChild.Start) + if (nextUse == splitChild.GetStart()) { splitChild.SetStart(GetSplitPosition(nextUse)); } - Debug.Assert(splitChild.Start > current.Start, "Split interval has an invalid start position."); + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); InsertInterval(splitChild); } @@ -493,6 +493,22 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } + private void InsertInterval(LiveInterval interval) + { + Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses."); + Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval."); + Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval."); + + int insertIndex = _intervals.BinarySearch(interval); + + if (insertIndex < 0) + { + insertIndex = ~insertIndex; + } + + _intervals.Insert(insertIndex, interval); + } + private void Spill(AllocationContext context, LiveInterval interval) { Debug.Assert(!interval.IsFixed, "Trying to spill a fixed interval."); @@ -503,7 +519,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators //This prevents stack-to-stack copies being necessary for a split interval. if (!interval.TrySpillWithSiblingOffset()) { - interval.SpillOffset = context.StackAlloc.Allocate(interval.Local.Type); + interval.Spill(context.StackAlloc.Allocate(interval.Local.Type)); } } @@ -529,11 +545,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators foreach (LiveInterval splitChild in interval.SplitChilds()) { - int splitPosition = splitChild.Start; + int splitPosition = splitChild.GetStart(); int alignedSplitPosition = GetAlignedSplitPosition(splitPosition); - if (!_blockEdges.Contains(alignedSplitPosition) && previous.End == splitPosition) + if (!_blockEdges.Contains(alignedSplitPosition) && previous.GetEnd() == splitPosition) { GetCopyResolver(splitPosition).AddSplit(previous, splitChild); } @@ -659,53 +675,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - private void InsertInterval(LiveInterval interval) - { - Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses."); - Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval."); - Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval."); - - int insertIndex = 0; - - int left = RegistersCount; - int right = _intervals.Count - 1; - - while (left <= right) - { - int size = right - left; - - int middle = left + (size >> 1); - - LiveInterval current = _intervals[middle]; - - insertIndex = middle; - - if (interval.Start == current.Start) - { - break; - } - - if (interval.Start < current.Start) - { - right = middle - 1; - } - else - { - left = middle + 1; - } - } - - //If we have multiple intervals with the same start position, then the new one should - //always be inserted after all the existing interval with the same position, in order - //to ensure they will be processed (it works like a queue in this case). - while (insertIndex < _intervals.Count && _intervals[insertIndex].Start <= interval.Start) - { - insertIndex++; - } - - _intervals.Insert(insertIndex, interval); - } - private void ReplaceLocalWithRegister(LiveInterval current) { Operand register = GetRegister(current); @@ -813,7 +782,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators _parentIntervals = _intervals.ToArray(); } - private void BuildIntervals(ControlFlowGraph cfg, RegisterMasks regMasks, out int maxCallArgs) + private void BuildIntervals(ControlFlowGraph cfg, AllocationContext context, out int maxCallArgs) { maxCallArgs = 0; @@ -945,14 +914,27 @@ namespace ARMeilleure.CodeGen.RegisterAllocators interval.AddUsePosition(operationPos); } - if (node is Operation operation && operation.Inst == Instruction.Call) + if (node is Operation operation) { - AddIntervalCallerSavedReg(regMasks.IntCallerSavedRegisters, operationPos, RegisterType.Integer); - AddIntervalCallerSavedReg(regMasks.VecCallerSavedRegisters, operationPos, RegisterType.Vector); - - if (maxCallArgs < operation.SourcesCount - 1) + if (operation.Inst == Instruction.Call) { - maxCallArgs = operation.SourcesCount - 1; + AddIntervalCallerSavedReg(context.Masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer); + AddIntervalCallerSavedReg(context.Masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector); + + if (maxCallArgs < operation.SourcesCount - 1) + { + maxCallArgs = operation.SourcesCount - 1; + } + } + else if (operation.Inst == Instruction.StackAlloc) + { + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant, "StackAlloc has non-constant size."); + + int spillOffset = context.StackAlloc.Allocate(offset.AsInt32()); + + operation.SetSource(0, new Operand(spillOffset)); } } } @@ -998,76 +980,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0); } - private void CoalesceCopies(BasicBlock[] blocks) - { - foreach (BasicBlock block in blocks) - { - for (LinkedListNode node = block.Operations.First; node != null;) - { - LinkedListNode nextNode = node.Next; - - Node operation = node.Value; - - if (!IsCopyOp(operation)) - { - node = nextNode; - - continue; - } - - Operand dest = operation.Dest; - Operand source = operation.GetSource(0); - - if (TryCoalesce(dest, source)) - { - operation.SetSource(0, null); - operation.Dest = null; - - block.Operations.Remove(node); - } - - node = nextNode; - } - } - } - - private static bool IsCopyOp(Node node) - { - return node is Operation operation && operation.Inst == Instruction.Copy; - } - - private bool TryCoalesce(Operand x, Operand y) - { - if (x.Kind != OperandKind.LocalVariable || y.Kind != OperandKind.LocalVariable) - { - return false; - } - - LiveInterval intervalX = GetInterval(x.AsInt32()); - LiveInterval intervalY = GetInterval(y.AsInt32()); - - if (intervalX == intervalY || intervalX.Overlaps(intervalY)) - { - return false; - } - - intervalY.Join(intervalX); - - return true; - } - - private LiveInterval GetInterval(int index) - { - LiveInterval interval = _intervals[index]; - - while (interval != interval.Representative) - { - interval = interval.Representative; - } - - return interval; - } - private static IEnumerable Successors(BasicBlock block) { if (block.Next != null) diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs index e5db43c508..d5809fb467 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs @@ -6,7 +6,7 @@ using System.Linq; namespace ARMeilleure.CodeGen.RegisterAllocators { - class LiveInterval + class LiveInterval : IComparable { private const int NotFound = -1; @@ -16,15 +16,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public int UsesCount => _usePositions.Count; - private List _ranges; - private List _childs; + private List _ranges; + + private SortedList _childs; public bool IsSplit => _childs.Count != 0; public Operand Local { get; } - public LiveInterval Representative { get; private set; } - private Register _register; public bool HasRegister { get; private set; } @@ -43,17 +42,13 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - public int SpillOffset { get; set; } + public int SpillOffset { get; private set; } public bool IsSpilled => SpillOffset != -1; - public bool IsFixed { get; } public bool IsEmpty => _ranges.Count == 0; - public int Start => _ranges[0].Start; - public int End => _ranges[_ranges.Count - 1].End; - public LiveInterval(Operand local = null, LiveInterval parent = null) { Local = local; @@ -62,9 +57,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators _usePositions = new SortedSet(); _ranges = new List(); - _childs = new List(); - Representative = this; + _childs = new SortedList(); SpillOffset = -1; } @@ -87,9 +81,31 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } + public int GetStart() + { + if (_ranges.Count == 0) + { + throw new InvalidOperationException("Empty interval."); + } + + return _ranges[0].Start; + } + + public int GetEnd() + { + if (_ranges.Count == 0) + { + throw new InvalidOperationException("Empty interval."); + } + + return _ranges[_ranges.Count - 1].End; + } + public void AddRange(int start, int end) { - if (BinarySearch(new LiveRange(start, end), out int index)) + int index = _ranges.BinarySearch(new LiveRange(start, end)); + + if (index >= 0) { //New range insersects with an existing range, we need to remove //all the intersecting ranges before adding the new one. @@ -122,13 +138,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators InsertRange(lIndex, start, end); } - else if (index < _ranges.Count && _ranges[index].Start < start) - { - InsertRange(index + 1, start, end); - } else { - InsertRange(index, start, end); + InsertRange(~index, start, end); } } @@ -182,19 +194,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public bool Overlaps(int position) { - if (BinarySearch(new LiveRange(position, position + 1), out _)) - { - return true; - } - - return false; + return _ranges.BinarySearch(new LiveRange(position, position + 1)) >= 0; } public bool Overlaps(LiveInterval other) { foreach (LiveRange range in other._ranges) { - if (BinarySearch(range, out _)) + if (_ranges.BinarySearch(range) >= 0) { return true; } @@ -205,7 +212,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public IEnumerable SplitChilds() { - return _childs; + return _childs.Values; } public IEnumerable UsePositions() @@ -240,7 +247,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { foreach (LiveRange range in other._ranges) { - if (BinarySearch(range, out int overlapIndex)) + int overlapIndex = _ranges.BinarySearch(range); + + if (overlapIndex >= 0) { LiveRange overlappingRange = _ranges[overlapIndex]; @@ -258,18 +267,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return NotFound; } - public void Join(LiveInterval other) - { - foreach (LiveRange range in _ranges) - { - other.AddRange(range.Start, range.End); - } - - Representative = other; - - _ranges.Clear(); - } - public LiveInterval Split(int position) { LiveInterval right = new LiveInterval(Local, _parent); @@ -322,46 +319,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return right; } - private bool BinarySearch(LiveRange comparand, out int index) - { - index = 0; - - int left = 0; - int right = _ranges.Count - 1; - - while (left <= right) - { - int size = right - left; - - int middle = left + (size >> 1); - - LiveRange range = _ranges[middle]; - - index = middle; - - if (range.Start < comparand.End && comparand.Start < range.End) - { - return true; - } - - if (comparand.Start < range.Start) - { - right = middle - 1; - } - else - { - left = middle + 1; - } - } - - return false; - } - private void AddSplitChild(LiveInterval child) { Debug.Assert(!child.IsEmpty, "Trying to insert a empty interval."); - child.InsertSorted(_parent._childs); + _parent._childs.Add(child.GetStart(), child); } public LiveInterval GetSplitChild(int position) @@ -374,15 +336,15 @@ namespace ARMeilleure.CodeGen.RegisterAllocators //as they are sorted by start/end position, and there are no overlaps. for (int index = _childs.Count - 1; index >= 0; index--) { - LiveInterval splitChild = _childs[index]; + LiveInterval splitChild = _childs.Values[index]; - if (position >= splitChild.Start && position <= splitChild.End) + if (position >= splitChild.GetStart() && position <= splitChild.GetEnd()) { return splitChild; } } - if (position >= Start && position <= End) + if (position >= GetStart() && position <= GetEnd()) { return this; } @@ -392,11 +354,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public bool TrySpillWithSiblingOffset() { - foreach (LiveInterval splitChild in _parent._childs) + foreach (LiveInterval splitChild in _parent._childs.Values) { if (splitChild.IsSpilled) { - SpillOffset = splitChild.SpillOffset; + Spill(splitChild.SpillOffset); return true; } @@ -405,47 +367,19 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return false; } - public void InsertSorted(List list) + public void Spill(int offset) { - int insertIndex = 0; + SpillOffset = offset; + } - int left = 0; - int right = list.Count - 1; - - while (left <= right) + public int CompareTo(LiveInterval other) + { + if (_ranges.Count == 0 || other._ranges.Count == 0) { - int size = right - left; - - int middle = left + (size >> 1); - - LiveInterval current = list[middle]; - - insertIndex = middle; - - if (Start == current.Start) - { - break; - } - - if (Start < current.Start) - { - right = middle - 1; - } - else - { - left = middle + 1; - } + return _ranges.Count.CompareTo(other._ranges.Count); } - //If we have multiple intervals with the same start position, then the new one should - //always be inserted after all the existing interval with the same position, in order - //to ensure they will be processed (it works like a queue in this case). - while (insertIndex < list.Count && list[insertIndex].Start <= Start) - { - insertIndex++; - } - - list.Insert(insertIndex, this); + return _ranges[0].Start.CompareTo(other._ranges[0].Start); } public override string ToString() diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs index 7883d24eb4..b5faeffd59 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs @@ -1,6 +1,8 @@ +using System; + namespace ARMeilleure.CodeGen.RegisterAllocators { - struct LiveRange + struct LiveRange : IComparable { public int Start { get; } public int End { get; } @@ -11,6 +13,16 @@ namespace ARMeilleure.CodeGen.RegisterAllocators End = end; } + public int CompareTo(LiveRange other) + { + if (Start < other.End && other.Start < End) + { + return 0; + } + + return Start.CompareTo(other.Start); + } + public override string ToString() { return $"[{Start}, {End}["; diff --git a/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs index fef86395b8..a6233d6eef 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs @@ -1,150 +1,27 @@ using ARMeilleure.Common; using ARMeilleure.IntermediateRepresentation; using System; -using System.Collections.Generic; namespace ARMeilleure.CodeGen.RegisterAllocators { class StackAllocator { - public int TotalSize { get; private set; } + private int _offset; - private List _masks; - - public StackAllocator() - { - _masks = new List(); - } + public int TotalSize => _offset; public int Allocate(OperandType type) { - return Allocate(GetSizeInWords(type)); + return Allocate(type.GetSizeInBytes()); } - public int Free(int offset, OperandType type) + public int Allocate(int sizeInBytes) { - return Allocate(GetSizeInWords(type)); - } + int offset = _offset; - private int Allocate(int sizeInWords) - { - ulong requiredMask = (1UL << sizeInWords) - 1; + _offset += sizeInBytes; - for (int index = 0; ; index++) - { - ulong free = GetFreeMask(index); - - while ((int)free != 0) - { - int freeBit = BitUtils.LowestBitSet((int)free); - - ulong useMask = requiredMask << freeBit; - - if ((free & useMask) == useMask) - { - free &= ~useMask; - - SetFreeMask(index, free); - - int offset = (index * 32 + freeBit) * 4; - - int size = offset + sizeInWords * 4; - - if (TotalSize < size) - { - TotalSize = size; - } - - return offset; - } - - free &= ~useMask; - } - } - } - - private void Free(int offset, int sizeInWords) - { - int index = offset / 32; - - ulong requiredMask = (1UL << sizeInWords) - 1; - - ulong freeMask = (requiredMask << (offset & 31)) - 1; - - SetFreeMask(index, GetFreeMask(index) & ~freeMask); - } - - private ulong GetFreeMask(int index) - { - int hi = index >> 1; - - EnsureSize(hi); - - ulong mask; - - if ((index & 1) != 0) - { - EnsureSize(hi + 1); - - mask = _masks[hi + 0] >> 32; - mask |= _masks[hi + 1] << 32; - } - else - { - EnsureSize(hi); - - mask = _masks[hi]; - } - - return mask; - } - - private void SetFreeMask(int index, ulong mask) - { - int hi = index >> 1; - - if ((index & 1) != 0) - { - EnsureSize(hi + 1); - - _masks[hi + 0] &= 0x00000000ffffffffUL; - _masks[hi + 1] &= 0xffffffff00000000UL; - - _masks[hi + 0] |= mask << 32; - _masks[hi + 1] |= mask >> 32; - } - else - { - EnsureSize(hi); - - _masks[hi] = mask; - } - } - - private void EnsureSize(int size) - { - while (size >= _masks.Count) - { - _masks.Add(ulong.MaxValue); - } - } - - private static int GetSizeInWords(OperandType type) - { - switch (type) - { - case OperandType.I32: - case OperandType.FP32: - return 1; - - case OperandType.I64: - case OperandType.FP64: - return 2; - - case OperandType.V128: return 4; - } - - throw new ArgumentException($"Invalid operand type \"{type}\"."); + return offset; } } } \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index 31fd3a335b..158e155a3b 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -17,7 +17,8 @@ namespace ARMeilleure.CodeGen.X86 None = 0, RegOnly = 1 << 0, Reg8 = 1 << 1, - Vex = 1 << 2, + NoRexW = 1 << 2, + Vex = 1 << 3, PrefixBit = 16, PrefixMask = 3 << PrefixBit, @@ -72,7 +73,7 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Andnps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstFlags.Vex)); Add(X86Instruction.Bsr, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstFlags.None)); Add(X86Instruction.Bswap, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc8, InstFlags.RegOnly)); - Add(X86Instruction.Call, new InstInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Call, new InstInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstFlags.NoRexW)); Add(X86Instruction.Cmovcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstFlags.None)); Add(X86Instruction.Cmp, new InstInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstFlags.None)); Add(X86Instruction.Cmppd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstFlags.Vex | InstFlags.Prefix66)); @@ -103,6 +104,7 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Imul, new InstInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstFlags.None)); Add(X86Instruction.Imul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstFlags.None)); Add(X86Instruction.Insertps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Lea, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstFlags.None)); Add(X86Instruction.Maxpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.Prefix66)); Add(X86Instruction.Maxps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex)); Add(X86Instruction.Maxsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.PrefixF2)); @@ -551,6 +553,11 @@ namespace ARMeilleure.CodeGen.X86 } } + public void Lea(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Lea); + } + public void Maxpd(Operand dest, Operand source, Operand source1) { WriteInstruction(dest, source, X86Instruction.Maxpd, source1); @@ -1327,7 +1334,7 @@ namespace ARMeilleure.CodeGen.X86 } else if (dest != null && IsR64(dest) && info.OpRImm64 != BadOp) { - int rexPrefix = GetRexPrefix(dest, source, rrm: false); + int rexPrefix = GetRexPrefix(dest, source, rexW: true, rrm: false); if (rexPrefix != 0) { @@ -1388,7 +1395,9 @@ namespace ARMeilleure.CodeGen.X86 Operand source1 = null, bool rrm = false) { - int rexPrefix = GetRexPrefix(dest, source, rrm); + bool rexW = (flags & InstFlags.NoRexW) == 0; + + int rexPrefix = GetRexPrefix(dest, source, rexW, rrm); int modRM = (opCode >> OpModRMBits) << 3; @@ -1462,13 +1471,13 @@ namespace ARMeilleure.CodeGen.X86 } } + if (baseReg.Index >= 8) + { + rexPrefix |= 0x40 | (baseReg.Index >> 3); + } + if (needsSibByte) { - if (baseReg.Index >= 8) - { - rexPrefix |= 0x40 | (baseReg.Index >> 3); - } - sib = (int)baseRegLow; if (memOp.Index != null) @@ -1619,7 +1628,7 @@ namespace ARMeilleure.CodeGen.X86 WriteByte((byte)(opCode + (regIndex & 0b111))); } - private static int GetRexPrefix(Operand dest, Operand source, bool rrm) + private static int GetRexPrefix(Operand dest, Operand source, bool rexW, bool rrm) { int rexPrefix = 0; @@ -1633,7 +1642,7 @@ namespace ARMeilleure.CodeGen.X86 if (dest != null) { - if (dest.Type == OperandType.I64) + if (dest.Type == OperandType.I64 && rexW) { rexPrefix = 0x48; } @@ -1646,7 +1655,7 @@ namespace ARMeilleure.CodeGen.X86 if (source != null) { - if (source.Type == OperandType.I64) + if (source.Type == OperandType.I64 && rexW) { rexPrefix |= 0x48; } diff --git a/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/ARMeilleure/CodeGen/X86/CodeGenContext.cs index 7e3c90e6b2..5fcae53e6a 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenContext.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenContext.cs @@ -65,8 +65,9 @@ namespace ARMeilleure.CodeGen.X86 private List _jumps; private X86Condition _jNearCondition; - private long _jNearPosition; - private int _jNearLength; + + private long _jNearPosition; + private int _jNearLength; public CodeGenContext(Stream stream, AllocationResult allocResult, int blocksCount) { diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index 4428e94a20..b626e7528c 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -66,6 +66,7 @@ namespace ARMeilleure.CodeGen.X86 Add(Instruction.SignExtend8, GenerateSignExtend8); Add(Instruction.Spill, GenerateSpill); Add(Instruction.SpillArg, GenerateSpillArg); + Add(Instruction.StackAlloc, GenerateStackAlloc); Add(Instruction.Store, GenerateStore); Add(Instruction.Store16, GenerateStore16); Add(Instruction.Store8, GenerateStore8); @@ -225,7 +226,7 @@ namespace ARMeilleure.CodeGen.X86 PreAllocator.RunPass(cfg, memory); - Logger.EndPass(PassName.PreAllocation); + Logger.EndPass(PassName.PreAllocation, cfg); Logger.StartPass(PassName.RegisterAllocation); @@ -596,7 +597,17 @@ namespace ARMeilleure.CodeGen.X86 private static void GenerateLoad(CodeGenContext context, Operation operation) { - context.Assembler.Mov(operation.Dest, operation.GetSource(0)); + Operand value = operation.Dest; + Operand address = operation.GetSource(0); + + if (value.GetRegister().Type == RegisterType.Integer) + { + context.Assembler.Mov(value, address); + } + else + { + context.Assembler.Movdqu(value, address); + } } private static void GenerateLoadFromContext(CodeGenContext context, Operation operation) @@ -746,41 +757,41 @@ namespace ARMeilleure.CodeGen.X86 private static void GenerateSpill(CodeGenContext context, Operation operation) { - Operand offset = operation.GetSource(0); - Operand source = operation.GetSource(1); - - if (offset.Kind != OperandKind.Constant) - { - throw new InvalidOperationException("Spill has non-constant stack offset."); - } - - int offs = offset.AsInt32() + context.CallArgsRegionSize; - - X86MemoryOperand memOp = new X86MemoryOperand(source.Type, Register(X86Register.Rsp), null, Scale.x1, offs); - - context.Assembler.Mov(memOp, source); + GenerateSpill(context, operation, context.CallArgsRegionSize); } private static void GenerateSpillArg(CodeGenContext context, Operation operation) { + GenerateSpill(context, operation, 0); + } + + private static void GenerateStackAlloc(CodeGenContext context, Operation operation) + { + Operand dest = operation.Dest; Operand offset = operation.GetSource(0); - Operand source = operation.GetSource(1); - if (offset.Kind != OperandKind.Constant) - { - throw new InvalidOperationException("Spill has non-constant stack offset."); - } + Debug.Assert(offset.Kind == OperandKind.Constant, "StackAlloc has non-constant stack offset."); - int offs = offset.AsInt32(); + int offs = offset.AsInt32() + context.CallArgsRegionSize; - X86MemoryOperand memOp = new X86MemoryOperand(source.Type, Register(X86Register.Rsp), null, Scale.x1, offs); + X86MemoryOperand memOp = new X86MemoryOperand(OperandType.I64, Register(X86Register.Rsp), null, Scale.x1, offs); - context.Assembler.Mov(memOp, source); + context.Assembler.Lea(dest, memOp); } private static void GenerateStore(CodeGenContext context, Operation operation) { - context.Assembler.Mov(operation.GetSource(0), operation.GetSource(1)); + Operand address = operation.GetSource(0); + Operand value = operation.GetSource(1); + + if (value.GetRegister().Type == RegisterType.Integer) + { + context.Assembler.Mov(address, value); + } + else + { + context.Assembler.Movdqu(address, value); + } } private static void GenerateStore16(CodeGenContext context, Operation operation) @@ -1686,6 +1697,27 @@ namespace ARMeilleure.CodeGen.X86 context.Assembler.Setcc(operation.Dest, condition); } + private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset) + { + Operand offset = operation.GetSource(0); + Operand source = operation.GetSource(1); + + Debug.Assert(offset.Kind == OperandKind.Constant, "Spill has non-constant stack offset."); + + int offs = offset.AsInt32() + baseOffset; + + X86MemoryOperand memOp = new X86MemoryOperand(source.Type, Register(X86Register.Rsp), null, Scale.x1, offs); + + if (source.GetRegister().Type == RegisterType.Integer) + { + context.Assembler.Mov(memOp, source); + } + else + { + context.Assembler.Movdqu(memOp, source); + } + } + private static void ValidateDestSrc1(Operation operation) { Operand dest = operation.Dest; diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs index 0192f1ddc2..576e36c1f3 100644 --- a/ARMeilleure/CodeGen/X86/PreAllocator.cs +++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -383,55 +383,114 @@ namespace ARMeilleure.CodeGen.X86 //as mandated by the ABI. if (inst == Instruction.Call) { - int argsCount = operation.SourcesCount; + HandleCallWindowsAbi(node, operation); + } + } - int maxArgs = CallingConvention.GetArgumentsOnRegsCount(); + private static void HandleCallWindowsAbi(LinkedListNode node, Operation operation) + { + Operand dest = operation.Dest; - if (argsCount > maxArgs + 1) + //Handle struct arguments. + int retArgs = 0; + + Operand retValueAddr = null; + + if (dest.Type == OperandType.V128) + { + retValueAddr = Local(OperandType.I64); + + Operation allocOp = new Operation(Instruction.StackAlloc, retValueAddr, Const(dest.Type.GetSizeInBytes())); + + node.List.AddBefore(node, allocOp); + + Operand arg0Reg = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64); + + Operation copyOp = new Operation(Instruction.Copy, arg0Reg, retValueAddr); + + node.List.AddBefore(node, copyOp); + + retArgs = 1; + } + + for (int index = 1; index < operation.SourcesCount; index++) + { + Operand source = operation.GetSource(index); + + if (source.Type == OperandType.V128) { - argsCount = maxArgs + 1; + Operand stackAddr = Local(OperandType.I64); + + Operation allocOp = new Operation(Instruction.StackAlloc, stackAddr, Const(source.Type.GetSizeInBytes())); + + node.List.AddBefore(node, allocOp); + + X86MemoryOperand memOp = new X86MemoryOperand(source.Type, stackAddr, null, Scale.x1, 0); + + Operation storeOp = new Operation(Instruction.Store, null, memOp, source); + + AddMemoryOperandUse(memOp, storeOp); + + node.List.AddBefore(node, storeOp); + + operation.SetSource(index, stackAddr); + } + } + + //Handle arguments passed on registers. + int argsCount = operation.SourcesCount - 1; + + int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs; + + if (argsCount > maxArgs) + { + argsCount = maxArgs; + } + + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(index + 1); + + RegisterType regType = source.Type.ToRegisterType(); + + Operand argReg; + + int argIndex = index + retArgs; + + if (regType == RegisterType.Integer) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(argIndex), source.Type); + } + else /* if (regType == RegisterType.Vector) */ + { + argReg = Xmm(CallingConvention.GetVecArgumentRegister(argIndex), source.Type); } - for (int index = 1; index < argsCount; index++) - { - Operand source = operation.GetSource(index); + Operation srcCopyOp = new Operation(Instruction.Copy, argReg, source); - RegisterType regType = source.Type.ToRegisterType(); + node.List.AddBefore(node, srcCopyOp); - Operand argReg; + operation.SetSource(index + 1, argReg); + } - if (regType == RegisterType.Integer) - { - argReg = Gpr(CallingConvention.GetIntArgumentRegister(index - 1), source.Type); - } - else /* if (regType == RegisterType.Vector) */ - { - argReg = Xmm(CallingConvention.GetVecArgumentRegister(index - 1), source.Type); - } + //The remaining arguments (those that are not passed on registers) + //should be passed on the stack, we write them to the stack with "SpillArg". + for (int index = argsCount; index < operation.SourcesCount - 1; index++) + { + Operand source = operation.GetSource(index + 1); - Operation srcCopyOp = new Operation(Instruction.Copy, argReg, source); + Operand offset = new Operand(index * 8); - node.List.AddBefore(node, srcCopyOp); + Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source); - operation.SetSource(index, argReg); - } + node.List.AddBefore(node, spillOp); - //The remaining arguments (those that are not passed on registers) - //should be passed on the stack, we write them to the stack with "SpillArg". - for (int index = argsCount; index < operation.SourcesCount; index++) - { - Operand source = operation.GetSource(index); + operation.SetSource(index + 1, new Operand(OperandKind.Undefined)); + } - Operand offset = new Operand((index - 1) * 8); - - Operation srcSpillOp = new Operation(Instruction.SpillArg, null, offset, source); - - node.List.AddBefore(node, srcSpillOp); - - operation.SetSource(index, new Operand(OperandKind.Undefined)); - } - - if (dest != null) + if (dest != null) + { + if (retValueAddr == null) { RegisterType regType = dest.Type.ToRegisterType(); @@ -452,6 +511,18 @@ namespace ARMeilleure.CodeGen.X86 operation.Dest = retReg; } + else + { + X86MemoryOperand memOp = new X86MemoryOperand(dest.Type, retValueAddr, null, Scale.x1, 0); + + Operation loadOp = new Operation(Instruction.Load, dest, memOp); + + AddMemoryOperandUse(memOp, loadOp); + + node.List.AddAfter(node, loadOp); + + operation.Dest = null; + } } } diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index 6538d07dab..8fbae89b0f 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -43,6 +43,7 @@ namespace ARMeilleure.CodeGen.X86 Imul, Imul128, Insertps, + Lea, Maxpd, Maxps, Maxsd, diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index fae2630ff0..c52ebdb0ab 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -415,16 +415,16 @@ namespace ARMeilleure.Decoders SetA64("0>0011100<100001110110xxxxxxxxxx", InstName.Scvtf_V, InstEmit.Scvtf_V, typeof(OpCodeSimd)); SetA64("0x001111001xxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed, InstEmit.Scvtf_V_Fixed, typeof(OpCodeSimdShImm)); SetA64("0100111101xxxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed, InstEmit.Scvtf_V_Fixed, typeof(OpCodeSimdShImm)); - SetA64("01011110000xxxxx000000xxxxxxxxxx", InstName.Sha1c_V, null, typeof(OpCodeSimdReg)); - SetA64("0101111000101000000010xxxxxxxxxx", InstName.Sha1h_V, null, typeof(OpCodeSimd)); - SetA64("01011110000xxxxx001000xxxxxxxxxx", InstName.Sha1m_V, null, typeof(OpCodeSimdReg)); - SetA64("01011110000xxxxx000100xxxxxxxxxx", InstName.Sha1p_V, null, typeof(OpCodeSimdReg)); - SetA64("01011110000xxxxx001100xxxxxxxxxx", InstName.Sha1su0_V, null, typeof(OpCodeSimdReg)); - SetA64("0101111000101000000110xxxxxxxxxx", InstName.Sha1su1_V, null, typeof(OpCodeSimd)); - SetA64("01011110000xxxxx010000xxxxxxxxxx", InstName.Sha256h_V, null, typeof(OpCodeSimdReg)); - SetA64("01011110000xxxxx010100xxxxxxxxxx", InstName.Sha256h2_V, null, typeof(OpCodeSimdReg)); - SetA64("0101111000101000001010xxxxxxxxxx", InstName.Sha256su0_V, null, typeof(OpCodeSimd)); - SetA64("01011110000xxxxx011000xxxxxxxxxx", InstName.Sha256su1_V, null, typeof(OpCodeSimdReg)); + SetA64("01011110000xxxxx000000xxxxxxxxxx", InstName.Sha1c_V, InstEmit.Sha1c_V, typeof(OpCodeSimdReg)); + SetA64("0101111000101000000010xxxxxxxxxx", InstName.Sha1h_V, InstEmit.Sha1h_V, typeof(OpCodeSimd)); + SetA64("01011110000xxxxx001000xxxxxxxxxx", InstName.Sha1m_V, InstEmit.Sha1m_V, typeof(OpCodeSimdReg)); + SetA64("01011110000xxxxx000100xxxxxxxxxx", InstName.Sha1p_V, InstEmit.Sha1p_V, typeof(OpCodeSimdReg)); + SetA64("01011110000xxxxx001100xxxxxxxxxx", InstName.Sha1su0_V, InstEmit.Sha1su0_V, typeof(OpCodeSimdReg)); + SetA64("0101111000101000000110xxxxxxxxxx", InstName.Sha1su1_V, InstEmit.Sha1su1_V, typeof(OpCodeSimd)); + SetA64("01011110000xxxxx010000xxxxxxxxxx", InstName.Sha256h_V, InstEmit.Sha256h_V, typeof(OpCodeSimdReg)); + SetA64("01011110000xxxxx010100xxxxxxxxxx", InstName.Sha256h2_V, InstEmit.Sha256h2_V, typeof(OpCodeSimdReg)); + SetA64("0101111000101000001010xxxxxxxxxx", InstName.Sha256su0_V, InstEmit.Sha256su0_V, typeof(OpCodeSimd)); + SetA64("01011110000xxxxx011000xxxxxxxxxx", InstName.Sha256su1_V, InstEmit.Sha256su1_V, typeof(OpCodeSimdReg)); SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", InstName.Shadd_V, InstEmit.Shadd_V, typeof(OpCodeSimdReg)); SetA64("0101111101xxxxxx010101xxxxxxxxxx", InstName.Shl_S, InstEmit.Shl_S, typeof(OpCodeSimdShImm)); SetA64("0x00111100>>>xxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, typeof(OpCodeSimdShImm)); diff --git a/ARMeilleure/Diagnostics/IRDumper.cs b/ARMeilleure/Diagnostics/IRDumper.cs index d157e20583..b9c30eb642 100644 --- a/ARMeilleure/Diagnostics/IRDumper.cs +++ b/ARMeilleure/Diagnostics/IRDumper.cs @@ -1,5 +1,4 @@ using ARMeilleure.IntermediateRepresentation; -using ARMeilleure.State; using ARMeilleure.Translation; using System; using System.Collections.Generic; @@ -140,7 +139,7 @@ namespace ARMeilleure.Diagnostics } else { - name = operand.ToString().ToLower(); + name = operand.Kind.ToString().ToLower(); } return GetTypeName(operand.Type) + " " + name; diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs index 3b2e1c797b..d27dc18a8d 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCmp.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs @@ -3,7 +3,6 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using ARMeilleure.Translation; using System; -using System.Reflection; using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.Instructions.InstEmitSimdHelper; @@ -31,7 +30,7 @@ namespace ARMeilleure.Instructions if (op is OpCodeSimdReg binOp) { - m = GetVec(op.Rn); + m = GetVec(binOp.Rm); } else { @@ -71,7 +70,7 @@ namespace ARMeilleure.Instructions if (op is OpCodeSimdReg binOp) { - m = GetVec(op.Rn); + m = GetVec(binOp.Rm); } else { @@ -80,7 +79,7 @@ namespace ARMeilleure.Instructions Instruction cmpInst = X86PcmpgtInstruction[op.Size]; - Operand res = context.AddIntrinsic(cmpInst, n, m); + Operand res = context.AddIntrinsic(cmpInst, m, n); Operand mask = X86GetAllElements(context, -1L); @@ -115,7 +114,7 @@ namespace ARMeilleure.Instructions if (op is OpCodeSimdReg binOp) { - m = GetVec(op.Rn); + m = GetVec(binOp.Rm); } else { @@ -702,4 +701,4 @@ namespace ARMeilleure.Instructions } } } -} +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitCrypto.cs b/ARMeilleure/Instructions/InstEmitSimdCrypto.cs similarity index 100% rename from ARMeilleure/Instructions/InstEmitCrypto.cs rename to ARMeilleure/Instructions/InstEmitSimdCrypto.cs diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs index ddc57e5ce3..9053c382ab 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs @@ -124,7 +124,7 @@ namespace ARMeilleure.Instructions res = n; } - res = context.AddIntrinsic(Instruction.X86Cvtps2pd, n); + res = context.AddIntrinsic(Instruction.X86Cvtps2pd, res); context.Copy(GetVec(op.Rd), res); } @@ -146,7 +146,7 @@ namespace ARMeilleure.Instructions Operand e = context.Call(info, ne); - res = context.VectorInsert(res, e, part + index); + res = context.VectorInsert(res, e, index); } else /* if (sizeF == 1) */ { @@ -154,7 +154,7 @@ namespace ARMeilleure.Instructions Operand e = context.ConvertToFP(OperandType.FP64, ne); - res = context.VectorInsert(res, e, part + index); + res = context.VectorInsert(res, e, index); } } @@ -298,7 +298,7 @@ namespace ARMeilleure.Instructions public static void Fcvtzs_Gp_Fixed(EmitterContext context) { - EmitFcvt_s_Gp(context, (op1) => op1); + EmitFcvtzs_Gp_Fixed(context); } public static void Fcvtzs_S(EmitterContext context) @@ -344,7 +344,7 @@ namespace ARMeilleure.Instructions public static void Fcvtzu_Gp_Fixed(EmitterContext context) { - EmitFcvt_u_Gp(context, (op1) => op1); + EmitFcvtzu_Gp_Fixed(context); } public static void Fcvtzu_S(EmitterContext context) @@ -658,8 +658,38 @@ namespace ARMeilleure.Instructions Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), 0); Operand res = signed - ? EmitScalarFcvts(context, emit(ne), op.FBits) - : EmitScalarFcvtu(context, emit(ne), op.FBits); + ? EmitScalarFcvts(context, emit(ne), 0) + : EmitScalarFcvtu(context, emit(ne), 0); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + res = context.Copy(Local(OperandType.I64), res); + } + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitFcvtzs_Gp_Fixed(EmitterContext context) + { + EmitFcvtz__Gp_Fixed(context, signed: true); + } + + private static void EmitFcvtzu_Gp_Fixed(EmitterContext context) + { + EmitFcvtz__Gp_Fixed(context, signed: false); + } + + private static void EmitFcvtz__Gp_Fixed(EmitterContext context, bool signed) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), 0); + + Operand res = signed + ? EmitScalarFcvts(context, ne, op.FBits) + : EmitScalarFcvtu(context, ne, op.FBits); if (context.CurrOp.RegisterSize == RegisterSize.Int32) { diff --git a/ARMeilleure/Instructions/InstEmitSimdHash.cs b/ARMeilleure/Instructions/InstEmitSimdHash.cs new file mode 100644 index 0000000000..91f90a0866 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHash.cs @@ -0,0 +1,169 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { +#region "Sha1" + public static void Sha1c_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(OperandType.I32), 0); + + Operand m = GetVec(op.Rm); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashChoose)); + + Operand res = context.Call(info, d, ne, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1h_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(OperandType.I32), 0); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.FixedRotate)); + + Operand res = context.Call(info, ne); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1m_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(OperandType.I32), 0); + + Operand m = GetVec(op.Rm); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashMajority)); + + Operand res = context.Call(info, d, ne, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1p_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(OperandType.I32), 0); + + Operand m = GetVec(op.Rm); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashParity)); + + Operand res = context.Call(info, d, ne, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1su0_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart1)); + + Operand res = context.Call(info, d, n, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1su1_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart2)); + + Operand res = context.Call(info, d, n); + + context.Copy(GetVec(op.Rd), res); + } +#endregion + +#region "Sha256" + public static void Sha256h_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashLower)); + + Operand res = context.Call(info, d, n, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha256h2_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashUpper)); + + Operand res = context.Call(info, d, n, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha256su0_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)); + + Operand res = context.Call(info, d, n); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha256su1_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)); + + Operand res = context.Call(info, d, n, m); + + context.Copy(GetVec(op.Rd), res); + } +#endregion + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs index 7483c7cc95..2ebf55c547 100644 --- a/ARMeilleure/Instructions/InstEmitSimdShift.cs +++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs @@ -132,7 +132,7 @@ namespace ARMeilleure.Instructions Instruction sllInst = X86PsllInstruction[op.Size + 1]; - res = context.AddIntrinsic(sllInst, res); + res = context.AddIntrinsic(sllInst, res, Const(shift)); context.Copy(GetVec(op.Rd), res); } @@ -615,7 +615,7 @@ namespace ARMeilleure.Instructions Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); - MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShlRegSatQ)); + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShlReg)); Operand e = context.Call(info, ne, me, Const(1), Const(op.Size)); diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs index e8b4f8a21b..1f5c726bcf 100644 --- a/ARMeilleure/Instructions/SoftFallback.cs +++ b/ARMeilleure/Instructions/SoftFallback.cs @@ -987,11 +987,252 @@ namespace ARMeilleure.Instructions #endregion #region "Sha1" + public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaChoose(hash_abcd.GetUInt32(1), + hash_abcd.GetUInt32(2), + hash_abcd.GetUInt32(3)); + hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e); + + t = Rol(hash_abcd.GetUInt32(1), 30); + + hash_abcd.Insert(1, t); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static uint FixedRotate(uint hash_e) + { + return hash_e.Rol(30); + } + + public static V128 HashMajority(V128 hash_abcd, uint hash_e, V128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaMajority(hash_abcd.GetUInt32(1), + hash_abcd.GetUInt32(2), + hash_abcd.GetUInt32(3)); + + hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e); + + t = Rol(hash_abcd.GetUInt32(1), 30); + + hash_abcd.Insert(1, t); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static V128 HashParity(V128 hash_abcd, uint hash_e, V128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaParity(hash_abcd.GetUInt32(1), + hash_abcd.GetUInt32(2), + hash_abcd.GetUInt32(3)); + + hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e); + + t = Rol(hash_abcd.GetUInt32(1), 30); + + hash_abcd.Insert(1, t); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static V128 Sha1SchedulePart1(V128 w0_3, V128 w4_7, V128 w8_11) + { + ulong t2 = w4_7.GetUInt64(0); + ulong t1 = w0_3.GetUInt64(1); + + V128 result = new V128(t1, t2); + + return result ^ (w0_3 ^ w8_11); + } + + public static V128 Sha1SchedulePart2(V128 tw0_3, V128 w12_15) + { + V128 t = tw0_3 ^ (w12_15 >> 32); + + uint tE0 = t.GetUInt32(0); + uint tE1 = t.GetUInt32(1); + uint tE2 = t.GetUInt32(2); + uint tE3 = t.GetUInt32(3); + + return new V128(tE0.Rol(1), tE1.Rol(1), tE2.Rol(1), tE3.Rol(1) ^ tE0.Rol(2)); + } + + private static void Rol32_160(ref uint y, ref V128 x) + { + uint xE3 = x.GetUInt32(3); + + x <<= 32; + x.Insert(0, y); + + y = xE3; + } + + private static uint ShaChoose(uint x, uint y, uint z) + { + return ((y ^ z) & x) ^ z; + } + + private static uint ShaMajority(uint x, uint y, uint z) + { + return (x & y) | ((x | y) & z); + } + + private static uint ShaParity(uint x, uint y, uint z) + { + return x ^ y ^ z; + } + + private static uint Rol(this uint value, int count) + { + return (value << count) | (value >> (32 - count)); + } #endregion #region "Sha256" + public static V128 HashLower(V128 hash_abcd, V128 hash_efgh, V128 wk) + { + return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true); + } + public static V128 HashUpper(V128 hash_efgh, V128 hash_abcd, V128 wk) + { + return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false); + } + + public static V128 Sha256SchedulePart1(V128 w0_3, V128 w4_7) + { + V128 result = new V128(); + + for (int e = 0; e <= 3; e++) + { + uint elt = (e <= 2 ? w0_3 : w4_7).GetUInt32(e <= 2 ? e + 1 : 0); + + elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3); + + elt += w0_3.GetUInt32(e); + + result.Insert(e, elt); + } + + return result; + } + + public static V128 Sha256SchedulePart2(V128 w0_3, V128 w8_11, V128 w12_15) + { + V128 result = new V128(); + + ulong t1 = w12_15.GetUInt64(1); + + for (int e = 0; e <= 1; e++) + { + uint elt = t1.ULongPart(e); + + elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10); + + elt += w0_3.GetUInt32(e) + w8_11.GetUInt32(e + 1); + + result.Insert(e, elt); + } + + t1 = result.GetUInt64(0); + + for (int e = 2; e <= 3; e++) + { + uint elt = t1.ULongPart(e - 2); + + elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10); + + elt += w0_3.GetUInt32(e) + (e == 2 ? w8_11 : w12_15).GetUInt32(e == 2 ? 3 : 0); + + result.Insert(e, elt); + } + + return result; + } + + private static V128 Sha256Hash(V128 x, V128 y, V128 w, bool part1) + { + for (int e = 0; e <= 3; e++) + { + uint chs = ShaChoose(y.GetUInt32(0), + y.GetUInt32(1), + y.GetUInt32(2)); + + uint maj = ShaMajority(x.GetUInt32(0), + x.GetUInt32(1), + x.GetUInt32(2)); + + uint t1 = y.GetUInt32(3) + ShaHashSigma1(y.GetUInt32(0)) + chs + w.GetUInt32(e); + + uint t2 = t1 + x.GetUInt32(3); + + x.Insert(3, t2); + + t2 = t1 + ShaHashSigma0(x.GetUInt32(0)) + maj; + + y.Insert(3, t2); + + Rol32_256(ref y, ref x); + } + + return part1 ? x : y; + } + + private static void Rol32_256(ref V128 y, ref V128 x) + { + uint yE3 = y.GetUInt32(3); + uint xE3 = x.GetUInt32(3); + + y <<= 32; + x <<= 32; + + y.Insert(0, xE3); + x.Insert(0, yE3); + } + + private static uint ShaHashSigma0(uint x) + { + return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22); + } + + private static uint ShaHashSigma1(uint x) + { + return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25); + } + + private static uint Ror(this uint value, int count) + { + return (value >> count) | (value << (32 - count)); + } + + private static uint Lsr(this uint value, int count) + { + return value >> count; + } + + private static uint ULongPart(this ulong value, int part) + { + return part == 0 + ? (uint)(value & 0xFFFFFFFFUL) + : (uint)(value >> 32); + } #endregion #region "Reverse" diff --git a/ARMeilleure/IntermediateRepresentation/Instruction.cs b/ARMeilleure/IntermediateRepresentation/Instruction.cs index e828580805..c707f5c55f 100644 --- a/ARMeilleure/IntermediateRepresentation/Instruction.cs +++ b/ARMeilleure/IntermediateRepresentation/Instruction.cs @@ -51,6 +51,7 @@ namespace ARMeilleure.IntermediateRepresentation SignExtend32, Spill, SpillArg, + StackAlloc, Store, Store16, Store8, diff --git a/ARMeilleure/IntermediateRepresentation/OperandType.cs b/ARMeilleure/IntermediateRepresentation/OperandType.cs index 764d5eb8e4..bfdf5130cf 100644 --- a/ARMeilleure/IntermediateRepresentation/OperandType.cs +++ b/ARMeilleure/IntermediateRepresentation/OperandType.cs @@ -33,5 +33,19 @@ namespace ARMeilleure.IntermediateRepresentation throw new InvalidOperationException($"Invalid operand type \"{type}\"."); } + + public static int GetSizeInBytes(this OperandType type) + { + switch (type) + { + case OperandType.FP32: return 4; + case OperandType.FP64: return 8; + case OperandType.I32: return 4; + case OperandType.I64: return 8; + case OperandType.V128: return 16; + } + + throw new InvalidOperationException($"Invalid operand type \"{type}\"."); + } } } \ No newline at end of file diff --git a/ARMeilleure/State/V128.cs b/ARMeilleure/State/V128.cs index 8060f96422..412df9ae24 100644 --- a/ARMeilleure/State/V128.cs +++ b/ARMeilleure/State/V128.cs @@ -59,6 +59,30 @@ namespace ARMeilleure.State _e1 = (ulong)BitConverter.ToInt64(data, 8); } + public void Insert(int index, uint value) + { + switch (index) + { + case 0: _e0 = (_e0 & 0xffffffff00000000) | ((ulong)value << 0); break; + case 1: _e0 = (_e0 & 0x00000000ffffffff) | ((ulong)value << 32); break; + case 2: _e1 = (_e1 & 0xffffffff00000000) | ((ulong)value << 0); break; + case 3: _e1 = (_e1 & 0x00000000ffffffff) | ((ulong)value << 32); break; + + default: throw new ArgumentOutOfRangeException(nameof(index)); + } + } + + public void Insert(int index, ulong value) + { + switch (index) + { + case 0: _e0 = value; break; + case 1: _e1 = value; break; + + default: throw new ArgumentOutOfRangeException(nameof(index)); + } + } + public float AsFloat() { return GetFloat(0); @@ -144,6 +168,20 @@ namespace ARMeilleure.State return new V128(x._e0 ^ y._e0, x._e1 ^ y._e1); } + public static V128 operator <<(V128 x, int shift) + { + ulong shiftOut = x._e0 >> (64 - shift); + + return new V128(x._e0 << shift, (x._e1 << shift) | shiftOut); + } + + public static V128 operator >>(V128 x, int shift) + { + ulong shiftOut = x._e1 & ((1UL << shift) - 1); + + return new V128((x._e0 >> shift) | (shiftOut << (64 - shift)), x._e1 >> shift); + } + public static bool operator ==(V128 x, V128 y) { return x.Equals(y); diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs index b00c9fdb0a..57e7cae956 100644 --- a/ARMeilleure/Translation/EmitterContext.cs +++ b/ARMeilleure/Translation/EmitterContext.cs @@ -135,6 +135,11 @@ namespace ARMeilleure.Translation return OperandType.FP64; } + if (type == typeof(V128)) + { + return OperandType.V128; + } + throw new ArgumentException($"Invalid type \"{type.Name}\"."); }