Add a new register allocator, higher quality code for hot code (tier up), and other tweaks

This commit is contained in:
gdkchan 2019-07-31 22:11:54 -03:00
parent 9f4e6815c7
commit ac5f4894ac
20 changed files with 698 additions and 627 deletions

View file

@ -1,579 +0,0 @@
using ARMeilleure.Common;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.CodeGen.RegisterAllocators
{
class FastLinearScan
{
private const int InstructionGap = 2;
private const int RegistersCount = 16;
private const int MaxIROperands = 4;
private class OperationInfo
{
public LinkedListNode<Node> Node { get; }
public int IntSpillUsedRegisters { get; set; }
public int VecSpillUsedRegisters { get; set; }
public OperationInfo(LinkedListNode<Node> node)
{
Node = node;
}
}
private List<OperationInfo> _operationNodes;
private int _intSpillTemps;
private int _vecSpillTemps;
private List<LiveInterval> _intervals;
private class CompareIntervalsEnd : IComparer<LiveInterval>
{
public int Compare(LiveInterval lhs, LiveInterval rhs)
{
if (lhs.GetEnd() == rhs.GetEnd())
{
return 0;
}
else if (lhs.GetEnd() < rhs.GetEnd())
{
return 1;
}
else
{
return -1;
}
}
}
public AllocationResult RunPass(ControlFlowGraph cfg, StackAllocator stackAlloc, RegisterMasks regMasks)
{
BuildIntervals(cfg, regMasks);
List<LiveInterval>[] fixedIntervals = new List<LiveInterval>[2];
fixedIntervals[0] = new List<LiveInterval>();
fixedIntervals[1] = new List<LiveInterval>();
int intUsedRegisters = 0;
int vecUsedRegisters = 0;
for (int index = 0; index < RegistersCount * 2; index++)
{
LiveInterval interval = _intervals[index];
if (!interval.IsEmpty)
{
if (interval.Register.Type == RegisterType.Integer)
{
intUsedRegisters |= 1 << interval.Register.Index;
}
else /* if (interval.Register.Type == RegisterType.Vector) */
{
vecUsedRegisters |= 1 << interval.Register.Index;
}
InsertSorted(fixedIntervals[index & 1], interval);
}
}
List<LiveInterval> activeIntervals = new List<LiveInterval>();
CompareIntervalsEnd comparer = new CompareIntervalsEnd();
int intFreeRegisters = regMasks.IntAvailableRegisters;
int vecFreeRegisters = regMasks.VecAvailableRegisters;
intFreeRegisters = ReserveSpillTemps(ref _intSpillTemps, intFreeRegisters);
vecFreeRegisters = ReserveSpillTemps(ref _vecSpillTemps, vecFreeRegisters);
for (int index = RegistersCount * 2; index < _intervals.Count; index++)
{
LiveInterval current = _intervals[index];
while (activeIntervals.Count != 0 &&
activeIntervals[activeIntervals.Count - 1].GetEnd() < current.GetStart())
{
int iIndex = activeIntervals.Count - 1;
LiveInterval interval = activeIntervals[iIndex];
if (interval.Register.Type == RegisterType.Integer)
{
intFreeRegisters |= 1 << interval.Register.Index;
}
else /* if (interval.Register.Type == RegisterType.Vector) */
{
vecFreeRegisters |= 1 << interval.Register.Index;
}
activeIntervals.RemoveAt(iIndex);
}
Operand local = current.Local;
bool localIsInteger = local.Type.IsInteger();
int freeMask = localIsInteger ? intFreeRegisters : vecFreeRegisters;
if (freeMask != 0)
{
List<LiveInterval> fixedIntervalsForType = fixedIntervals[localIsInteger ? 0 : 1];
for (int iIndex = 0; iIndex < fixedIntervalsForType.Count; iIndex++)
{
LiveInterval interval = fixedIntervalsForType[iIndex];
if (interval.GetStart() >= current.GetEnd())
{
break;
}
if (interval.Overlaps(current))
{
freeMask &= ~(1 << interval.Register.Index);
}
}
}
if (freeMask != 0)
{
int selectedReg = BitUtils.LowestBitSet(freeMask);
current.Register = new Register(selectedReg, local.Type.ToRegisterType());
int regMask = 1 << selectedReg;
if (localIsInteger)
{
intUsedRegisters |= regMask;
intFreeRegisters &= ~regMask;
}
else
{
vecUsedRegisters |= regMask;
vecFreeRegisters &= ~regMask;
}
}
else
{
// Spill the interval that will free the register for the longest
// amount of time, as long there's no interference of the current
// interval with a fixed interval using the same register.
bool hasRegisterSelected = false;
RegisterType regType = current.Local.Type.ToRegisterType();
for (int iIndex = 0; iIndex < activeIntervals.Count; iIndex++)
{
LiveInterval spillCandidate = activeIntervals[iIndex];
if (spillCandidate.Register.Type != regType)
{
continue;
}
LiveInterval fixedInterval = _intervals[GetRegisterId(spillCandidate.Register)];
if (fixedInterval.IsEmpty || !fixedInterval.Overlaps(current))
{
current.Register = spillCandidate.Register;
spillCandidate.Spill(stackAlloc.Allocate(spillCandidate.Local.Type));
activeIntervals.RemoveAt(iIndex);
hasRegisterSelected = true;
break;
}
}
Debug.Assert(hasRegisterSelected, "Failure allocating register with spill.");
}
InsertSorted(activeIntervals, current, comparer);
}
for (int index = RegistersCount * 2; index < _intervals.Count; index++)
{
LiveInterval interval = _intervals[index];
if (interval.IsSpilled)
{
ReplaceLocalWithSpill(interval, ref intUsedRegisters, ref vecUsedRegisters);
}
else
{
ReplaceLocalWithRegister(interval);
}
}
return new AllocationResult(intUsedRegisters, vecUsedRegisters, stackAlloc.TotalSize);
}
private int ReserveSpillTemps(ref int tempsMask, int availableRegs)
{
for (int index = 0; index < MaxIROperands; index++)
{
int selectedReg = BitUtils.HighestBitSet(availableRegs);
tempsMask |= 1 << selectedReg;
availableRegs &= ~(1 << selectedReg);
}
return availableRegs;
}
private void ReplaceLocalWithRegister(LiveInterval interval)
{
Operand register = GetRegister(interval);
foreach (int usePosition in interval.UsePositions())
{
Node operation = GetOperationInfo(usePosition).Node.Value;
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
Operand source = operation.GetSource(srcIndex);
if (source == interval.Local)
{
operation.SetSource(srcIndex, register);
}
}
if (operation.Destination == interval.Local)
{
operation.Destination = register;
}
}
}
private static Operand GetRegister(LiveInterval interval)
{
Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed.");
return new Operand(
interval.Register.Index,
interval.Register.Type,
interval.Local.Type);
}
private void ReplaceLocalWithSpill(
LiveInterval interval,
ref int intUsedRegisters,
ref int vecUsedRegisters)
{
Operand local = interval.Local;
int spillOffset = interval.SpillOffset;
foreach (int usePosition in interval.UsePositions())
{
OperationInfo info = GetOperationInfo(usePosition);
int tempReg = GetSpillTemp(info, local.Type);
if (local.Type.IsInteger())
{
intUsedRegisters |= 1 << tempReg;
}
else
{
vecUsedRegisters |= 1 << tempReg;
}
Operand temp = new Operand(tempReg, local.Type.ToRegisterType(), local.Type);
LinkedListNode<Node> node = info.Node;
Node operation = node.Value;
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
Operand source = operation.GetSource(srcIndex);
if (source == local)
{
Operation fillOp = new Operation(Instruction.Fill, temp, Const(spillOffset));
node.List.AddBefore(node, fillOp);
operation.SetSource(srcIndex, temp);
}
}
if (operation.Destination == local)
{
Operation spillOp = new Operation(Instruction.Spill, null, Const(spillOffset), temp);
node.List.AddAfter(node, spillOp);
operation.Destination = temp;
}
}
}
private OperationInfo GetOperationInfo(int position)
{
return _operationNodes[position / InstructionGap];
}
private int GetSpillTemp(OperationInfo info, OperandType type)
{
int selectedReg;
if (type.IsInteger())
{
selectedReg = BitUtils.LowestBitSet(_intSpillTemps & ~info.IntSpillUsedRegisters);
info.IntSpillUsedRegisters |= 1 << selectedReg;
}
else
{
selectedReg = BitUtils.LowestBitSet(_vecSpillTemps & ~info.VecSpillUsedRegisters);
info.VecSpillUsedRegisters |= 1 << selectedReg;
}
Debug.Assert(selectedReg != -1, "Out of spill temporary registers. " + (info.Node.Value as Operation).Instruction);
return selectedReg;
}
private static void InsertSorted(
List<LiveInterval> list,
LiveInterval interval,
IComparer<LiveInterval> comparer = null)
{
int insertIndex = list.BinarySearch(interval, comparer);
if (insertIndex < 0)
{
insertIndex = ~insertIndex;
}
list.Insert(insertIndex, interval);
}
private void BuildIntervals(ControlFlowGraph cfg, RegisterMasks masks)
{
_operationNodes = new List<OperationInfo>();
_intervals = new List<LiveInterval>();
for (int index = 0; index < RegistersCount; index++)
{
_intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer)));
_intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector)));
}
HashSet<Operand> visited = new HashSet<Operand>();
LiveInterval GetOrAddInterval(Operand operand)
{
LiveInterval interval;
if (visited.Add(operand))
{
operand.NumberLocal(_intervals.Count);
interval = new LiveInterval(operand);
_intervals.Add(interval);
}
else
{
interval = _intervals[GetOperandId(operand)];
}
return interval;
}
int[] blockStarts = new int[cfg.Blocks.Count];
int operationPos = 0;
List<LiveRange> backwardsBranches = new List<LiveRange>();
for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
{
BasicBlock block = cfg.PostOrderBlocks[index];
blockStarts[block.Index] = operationPos;
for (LinkedListNode<Node> node = block.Operations.First; node != null; node = node.Next)
{
_operationNodes.Add(new OperationInfo(node));
Operation operation = node.Value as Operation;
// Note: For fixed intervals, we must process sources first, in
// order to extend the live range of the fixed interval to the last
// use, in case the register is both used and assigned on the same
// instruction.
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
Operand source = operation.GetSource(srcIndex);
if (source.Kind == OperandKind.LocalVariable)
{
LiveInterval interval = GetOrAddInterval(source);
Debug.Assert(!interval.IsEmpty, "Interval is empty.");
interval.SetEnd(operationPos + 1);
interval.AddUsePosition(operationPos);
}
else if (source.Kind == OperandKind.Register)
{
int iIndex = GetRegisterId(source.GetRegister());
LiveInterval interval = _intervals[iIndex];
if (interval.IsEmpty)
{
interval.SetStart(operationPos + 1);
}
else if (interval.GetEnd() < operationPos + 1)
{
interval.SetEnd(operationPos + 1);
}
}
}
Operand dest = operation.Destination;
if (dest != null)
{
if (dest.Kind == OperandKind.LocalVariable)
{
LiveInterval interval = GetOrAddInterval(dest);
if (interval.IsEmpty)
{
interval.SetStart(operationPos + 1);
}
interval.AddUsePosition(operationPos);
}
else if (dest.Kind == OperandKind.Register)
{
int iIndex = GetRegisterId(dest.GetRegister());
_intervals[iIndex].AddRange(operationPos + 1, operationPos + InstructionGap);
}
}
if (operation.Instruction == Instruction.Call)
{
AddIntervalCallerSavedReg(masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer);
AddIntervalCallerSavedReg(masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector);
}
operationPos += InstructionGap;
}
foreach (BasicBlock successor in Successors(block))
{
int branchIndex = cfg.PostOrderMap[block.Index];
int targetIndex = cfg.PostOrderMap[successor.Index];
// Is the branch jumping backwards?
if (targetIndex >= branchIndex)
{
int targetPos = blockStarts[successor.Index];
backwardsBranches.Add(new LiveRange(targetPos, operationPos));
}
}
}
foreach (LiveRange backwardBranch in backwardsBranches)
{
for (int iIndex = RegistersCount * 2; iIndex < _intervals.Count; iIndex++)
{
LiveInterval interval = _intervals[iIndex];
int start = interval.GetStart();
int end = interval.GetEnd();
if (backwardBranch.Start >= start && backwardBranch.Start < end)
{
if (interval.GetEnd() < backwardBranch.End)
{
interval.SetEnd(backwardBranch.End);
}
}
if (start > backwardBranch.Start)
{
break;
}
}
}
}
private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType)
{
while (mask != 0)
{
int regIndex = BitUtils.LowestBitSet(mask);
Register callerSavedReg = new Register(regIndex, regType);
int rIndex = GetRegisterId(callerSavedReg);
_intervals[rIndex].AddRange(operationPos + 1, operationPos + InstructionGap);
mask &= ~(1 << regIndex);
}
}
private static int GetOperandId(Operand operand)
{
if (operand.Kind == OperandKind.LocalVariable)
{
return operand.AsInt32();
}
else if (operand.Kind == OperandKind.Register)
{
return GetRegisterId(operand.GetRegister());
}
else
{
throw new ArgumentException($"Invalid operand kind \"{operand.Kind}\".");
}
}
private static int GetRegisterId(Register register)
{
return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0);
}
private static IEnumerable<BasicBlock> Successors(BasicBlock block)
{
if (block.Next != null)
{
yield return block.Next;
}
if (block.Branch != null)
{
yield return block.Branch;
}
}
}
}

View file

@ -0,0 +1,416 @@
using ARMeilleure.Common;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.CodeGen.RegisterAllocators
{
class HybridAllocator : IRegisterAllocator
{
private const int RegistersCount = 16;
private const int MaxIROperands = 4;
private struct BlockInfo
{
public bool HasCall;
public int IntFixedRegisters;
public int VecFixedRegisters;
}
private class LocalInfo
{
public int Uses { get; set; }
public int UseCount { get; set; }
public bool PreAllocated { get; set; }
public int Register { get; set; }
public int SpillOffset { get; set; }
public int Sequence { get; set; }
public Operand Temp { get; set; }
public OperandType Type { get; }
private int _first;
private int _last;
public bool IsBlockLocal => _first == _last;
public LocalInfo(OperandType type, int uses)
{
Uses = uses;
Type = type;
_first = -1;
_last = -1;
}
public void SetBlockIndex(int blkIndex)
{
if (_first == -1 || blkIndex < _first)
{
_first = blkIndex;
}
if (_last == -1 || blkIndex > _last)
{
_last = blkIndex;
}
}
}
public AllocationResult RunPass(
ControlFlowGraph cfg,
StackAllocator stackAlloc,
RegisterMasks regMasks)
{
int intUsedRegisters = 0;
int vecUsedRegisters = 0;
int intFreeRegisters = regMasks.IntAvailableRegisters;
int vecFreeRegisters = regMasks.VecAvailableRegisters;
BlockInfo[] blockInfo = new BlockInfo[cfg.Blocks.Count];
List<LocalInfo> locInfo = new List<LocalInfo>();
for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
{
BasicBlock block = cfg.PostOrderBlocks[index];
int intFixedRegisters = 0;
int vecFixedRegisters = 0;
bool hasCall = false;
foreach (Node node in block.Operations)
{
if (node is Operation operation && operation.Instruction == Instruction.Call)
{
hasCall = true;
}
for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++)
{
Operand source = node.GetSource(srcIndex);
if (source.Kind == OperandKind.LocalVariable)
{
locInfo[source.AsInt32() - 1].SetBlockIndex(block.Index);
}
}
for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++)
{
Operand dest = node.GetDestination(dstIndex);
if (dest.Kind == OperandKind.LocalVariable)
{
LocalInfo info;
if (dest.Value != 0)
{
info = locInfo[dest.AsInt32() - 1];
}
else
{
dest.NumberLocal(locInfo.Count + 1);
info = new LocalInfo(dest.Type, UsesCount(dest));
locInfo.Add(info);
}
info.SetBlockIndex(block.Index);
}
else if (dest.Kind == OperandKind.Register)
{
if (dest.Type.IsInteger())
{
intFixedRegisters |= 1 << dest.GetRegister().Index;
}
else
{
vecFixedRegisters |= 1 << dest.GetRegister().Index;
}
}
}
}
blockInfo[block.Index] = new BlockInfo()
{
HasCall = hasCall,
IntFixedRegisters = intFixedRegisters,
VecFixedRegisters = vecFixedRegisters
};
}
int intReservedCount = 0;
int vecReservedCount = 0;
foreach (LocalInfo info in locInfo.OrderByDescending(x => x.Uses))
{
if (info.Type.IsInteger() && intReservedCount < 7)
{
int selectedReg = BitUtils.HighestBitSet(intFreeRegisters & ~regMasks.IntCallerSavedRegisters);
int mask = 1 << selectedReg;
intFreeRegisters &= ~mask;
intUsedRegisters |= mask;
info.PreAllocated = true;
info.Register = selectedReg;
intReservedCount++;
}
else if (!info.Type.IsInteger() && vecReservedCount < 7)
{
int selectedReg = BitUtils.HighestBitSet(vecFreeRegisters & ~regMasks.VecCallerSavedRegisters);
int mask = 1 << selectedReg;
vecFreeRegisters &= ~mask;
vecUsedRegisters |= mask;
info.PreAllocated = true;
info.Register = selectedReg;
vecReservedCount++;
}
if (intReservedCount + vecReservedCount == 14)
{
break;
}
}
int sequence = 0;
for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
{
BasicBlock block = cfg.PostOrderBlocks[index];
BlockInfo blkInfo = blockInfo[block.Index];
int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters;
int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters;
int intCallerSavedRegisters = blkInfo.HasCall ? regMasks.IntCallerSavedRegisters : 0;
int vecCallerSavedRegisters = blkInfo.HasCall ? regMasks.VecCallerSavedRegisters : 0;
int intSpillTempRegisters = SelectSpillTemps(intCallerSavedRegisters, intLocalFreeRegisters);
int vecSpillTempRegisters = SelectSpillTemps(vecCallerSavedRegisters, vecLocalFreeRegisters);
intLocalFreeRegisters &= ~(intSpillTempRegisters | intCallerSavedRegisters);
vecLocalFreeRegisters &= ~(vecSpillTempRegisters | vecCallerSavedRegisters);
for (LinkedListNode<Node> llNode = block.Operations.First; llNode != null; llNode = llNode.Next)
{
Node node = llNode.Value;
int intLocalUse = 0;
int vecLocalUse = 0;
for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++)
{
Operand source = node.GetSource(srcIndex);
if (source.Kind != OperandKind.LocalVariable)
{
continue;
}
LocalInfo info = locInfo[source.AsInt32() - 1];
info.UseCount++;
Debug.Assert(info.UseCount <= info.Uses);
if (info.Register != -1)
{
node.SetSource(srcIndex, Register(info.Register, source.Type.ToRegisterType(), source.Type));
if (info.UseCount == info.Uses && !info.PreAllocated)
{
if (source.Type.IsInteger())
{
intLocalFreeRegisters |= 1 << info.Register;
}
else
{
vecLocalFreeRegisters |= 1 << info.Register;
}
}
}
else
{
Operand temp = info.Temp;
if (temp == null || info.Sequence != sequence)
{
temp = source.Type.IsInteger()
? GetSpillTemp(source, intSpillTempRegisters, ref intLocalUse)
: GetSpillTemp(source, vecSpillTempRegisters, ref vecLocalUse);
info.Sequence = sequence;
info.Temp = temp;
}
node.SetSource(srcIndex, temp);
Operation fillOp = new Operation(Instruction.Fill, temp, Const(info.SpillOffset));
block.Operations.AddBefore(llNode, fillOp);
}
}
int intLocalAsg = 0;
int vecLocalAsg = 0;
for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++)
{
Operand dest = node.GetDestination(dstIndex);
if (dest.Kind != OperandKind.LocalVariable)
{
continue;
}
LocalInfo info = locInfo[dest.AsInt32() - 1];
if (info.UseCount == 0 && !info.PreAllocated)
{
int mask = dest.Type.IsInteger()
? intLocalFreeRegisters
: vecLocalFreeRegisters;
if (info.IsBlockLocal && mask != 0)
{
int selectedReg = BitUtils.LowestBitSet(mask);
info.Register = selectedReg;
if (dest.Type.IsInteger())
{
intLocalFreeRegisters &= ~(1 << selectedReg);
intUsedRegisters |= 1 << selectedReg;
}
else
{
vecLocalFreeRegisters &= ~(1 << selectedReg);
vecUsedRegisters |= 1 << selectedReg;
}
}
else
{
info.Register = -1;
info.SpillOffset = stackAlloc.Allocate(dest.Type.GetSizeInBytes());
}
}
info.UseCount++;
Debug.Assert(info.UseCount <= info.Uses);
if (info.Register != -1)
{
node.SetDestination(dstIndex, Register(info.Register, dest.Type.ToRegisterType(), dest.Type));
}
else
{
Operand temp = info.Temp;
if (temp == null || info.Sequence != sequence)
{
temp = dest.Type.IsInteger()
? GetSpillTemp(dest, intSpillTempRegisters, ref intLocalAsg)
: GetSpillTemp(dest, vecSpillTempRegisters, ref vecLocalAsg);
info.Sequence = sequence;
info.Temp = temp;
}
node.SetDestination(dstIndex, temp);
Operation spillOp = new Operation(Instruction.Spill, null, Const(info.SpillOffset), temp);
llNode = block.Operations.AddAfter(llNode, spillOp);
}
}
sequence++;
intUsedRegisters |= intLocalAsg | intLocalUse;
vecUsedRegisters |= vecLocalAsg | vecLocalUse;
}
}
return new AllocationResult(intUsedRegisters, vecUsedRegisters, stackAlloc.TotalSize);
}
private static int SelectSpillTemps(int mask0, int mask1)
{
int selection = 0;
int count = 0;
while (count < MaxIROperands && mask0 != 0)
{
int mask = mask0 & -mask0;
selection |= mask;
mask0 &= ~mask;
count++;
}
while (count < MaxIROperands && mask1 != 0)
{
int mask = mask1 & -mask1;
selection |= mask;
mask1 &= ~mask;
count++;
}
Debug.Assert(count == MaxIROperands, "No enough registers for spill temps.");
return selection;
}
private static Operand GetSpillTemp(Operand local, int freeMask, ref int useMask)
{
int selectedReg = BitUtils.LowestBitSet(freeMask & ~useMask);
useMask |= 1 << selectedReg;
return Register(selectedReg, local.Type.ToRegisterType(), local.Type);
}
private static int UsesCount(Operand local)
{
return local.Assignments.Count + local.Uses.Count;
}
private static IEnumerable<BasicBlock> Successors(BasicBlock block)
{
if (block.Next != null)
{
yield return block.Next;
}
if (block.Branch != null)
{
yield return block.Branch;
}
}
}
}

View file

@ -0,0 +1,12 @@
using ARMeilleure.Translation;
namespace ARMeilleure.CodeGen.RegisterAllocators
{
interface IRegisterAllocator
{
AllocationResult RunPass(
ControlFlowGraph cfg,
StackAllocator stackAlloc,
RegisterMasks regMasks);
}
}

View file

@ -11,7 +11,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
// Based on:
// "Linear Scan Register Allocation for the Java(tm) HotSpot Client Compiler".
// http://www.christianwimmer.at/Publications/Wimmer04a/Wimmer04a.pdf
class LinearScan
class LinearScanAllocator : IRegisterAllocator
{
private const int InstructionGap = 2;
private const int InstructionGapMask = InstructionGap - 1;
@ -71,7 +71,10 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
}
}
public AllocationResult RunPass(ControlFlowGraph cfg, StackAllocator stackAlloc, RegisterMasks regMasks)
public AllocationResult RunPass(
ControlFlowGraph cfg,
StackAllocator stackAlloc,
RegisterMasks regMasks)
{
NumberLocals(cfg);

View file

@ -103,7 +103,11 @@ namespace ARMeilleure.CodeGen.X86
Logger.StartPass(PassName.Optimization);
Optimizer.RunPass(cfg);
if ((cctx.Options & CompilerOptions.SsaForm) != 0 &&
(cctx.Options & CompilerOptions.Optimize) != 0)
{
Optimizer.RunPass(cfg);
}
Logger.EndPass(PassName.Optimization, cfg);
@ -117,9 +121,21 @@ namespace ARMeilleure.CodeGen.X86
Logger.StartPass(PassName.RegisterAllocation);
Ssa.Deconstruct(cfg);
if ((cctx.Options & CompilerOptions.SsaForm) != 0)
{
Ssa.Deconstruct(cfg);
}
LinearScan regAlloc = new LinearScan();
IRegisterAllocator regAlloc;
if ((cctx.Options & CompilerOptions.Lsra) != 0)
{
regAlloc = new LinearScanAllocator();
}
else
{
regAlloc = new HybridAllocator();
}
RegisterMasks regMasks = new RegisterMasks(
CallingConvention.GetIntAvailableRegisters(),

View file

@ -40,7 +40,11 @@ namespace ARMeilleure.CodeGen.X86
OperandType[] argTypes = new OperandType[0];
GetFeatureInfo getFeatureInfo = Compiler.Compile<GetFeatureInfo>(cfg, argTypes, OperandType.I64);
GetFeatureInfo getFeatureInfo = Compiler.Compile<GetFeatureInfo>(
cfg,
argTypes,
OperandType.I64,
CompilerOptions.HighCq);
_featureInfo = getFeatureInfo();
}

View file

@ -374,6 +374,12 @@ namespace ARMeilleure.CodeGen.X86
if (IsSameOperandDestSrc1(operation) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm)
{
// FIXME: We should support the same variable as dest being used on sources.
for (int srcIndex = 1; srcIndex < operation.SourcesCount; srcIndex++)
{
Debug.Assert(operation.GetSource(srcIndex) == dest);
}
Operation copyOp = new Operation(Instruction.Copy, dest, src1);
node.List.AddBefore(node, copyOp);
@ -382,8 +388,12 @@ namespace ARMeilleure.CodeGen.X86
}
else if (inst == Instruction.ConditionalSelect)
{
Operand src2 = operation.GetSource(1);
Operand src3 = operation.GetSource(2);
// FIXME: We should support the same variable as dest being used on sources.
Debug.Assert(src1 == dest || src2 == dest);
Operation copyOp = new Operation(Instruction.Copy, dest, src3);
node.List.AddBefore(node, copyOp);

View file

@ -71,7 +71,7 @@ namespace ARMeilleure.Instructions
public static void Ret(ArmEmitterContext context)
{
context.Return(GetIntOrZR(context, RegisterAlias.Lr));
context.Return(context.BitwiseOr(GetIntOrZR(context, RegisterAlias.Lr), Const(CallFlag)));
}
public static void Tbnz(ArmEmitterContext context) => EmitTb(context, onNotZero: true);

View file

@ -10,6 +10,8 @@ namespace ARMeilleure.Instructions
{
static class InstEmitFlowHelper
{
public const ulong CallFlag = 1;
public static void EmitCondBranch(ArmEmitterContext context, Operand target, Condition cond)
{
if (cond != Condition.Al)
@ -140,7 +142,7 @@ namespace ARMeilleure.Instructions
public static void EmitCall(ArmEmitterContext context, ulong immediate)
{
context.Return(Const(immediate));
context.Return(Const(immediate | CallFlag));
}
public static void EmitVirtualCall(ArmEmitterContext context, Operand target)
@ -155,7 +157,7 @@ namespace ARMeilleure.Instructions
private static void EmitVirtualCallOrJump(ArmEmitterContext context, Operand target, bool isJump)
{
context.Return(target);
context.Return(context.BitwiseOr(target, Const(target.Type, (long)CallFlag)));
}
private static void EmitContinueOrReturnCheck(ArmEmitterContext context, Operand retVal)

View file

@ -49,8 +49,8 @@ namespace ARMeilleure.Instructions
{
switch (op.Size)
{
case 0: n = context.ZeroExtend8 (n.Type, n); n = context.Multiply(n, Const(0x01010101)); break;
case 1: n = context.ZeroExtend16(n.Type, n); n = context.Multiply(n, Const(0x00010001)); break;
case 0: n = context.ZeroExtend8 (n.Type, n); n = context.Multiply(n, Const(n.Type, 0x01010101)); break;
case 1: n = context.ZeroExtend16(n.Type, n); n = context.Multiply(n, Const(n.Type, 0x00010001)); break;
case 2: n = context.ZeroExtend32(n.Type, n); break;
}

View file

@ -66,7 +66,11 @@ namespace ARMeilleure.Memory
OperandType.V128
};
_compareExchange128 = Compiler.Compile<CompareExchange128>(cfg, argTypes, OperandType.V128);
_compareExchange128 = Compiler.Compile<CompareExchange128>(
cfg,
argTypes,
OperandType.V128,
CompilerOptions.HighCq);
}
}
}

View file

@ -12,21 +12,8 @@ namespace ARMeilleure.Translation
public static T Compile<T>(
ControlFlowGraph cfg,
OperandType[] funcArgTypes,
OperandType funcReturnType)
{
CompilerContext cctx = GetCompilerContext(cfg, funcArgTypes, funcReturnType);
CompiledFunction func = CodeGenerator.Generate(cctx);
IntPtr codePtr = JitCache.Map(func);
return Marshal.GetDelegateForFunctionPointer<T>(codePtr);
}
private static CompilerContext GetCompilerContext(
ControlFlowGraph cfg,
OperandType[] funcArgTypes,
OperandType funcReturnType)
OperandType funcReturnType,
CompilerOptions options)
{
Logger.StartPass(PassName.Dominance);
@ -37,11 +24,24 @@ namespace ARMeilleure.Translation
Logger.StartPass(PassName.SsaConstruction);
Ssa.Construct(cfg);
if ((options & CompilerOptions.SsaForm) != 0)
{
Ssa.Construct(cfg);
}
else
{
RegisterToLocal.Rename(cfg);
}
Logger.EndPass(PassName.SsaConstruction, cfg);
return new CompilerContext(cfg, funcArgTypes, funcReturnType);
CompilerContext cctx = new CompilerContext(cfg, funcArgTypes, funcReturnType, options);
CompiledFunction func = CodeGenerator.Generate(cctx);
IntPtr codePtr = JitCache.Map(func);
return Marshal.GetDelegateForFunctionPointer<T>(codePtr);
}
}
}

View file

@ -9,14 +9,18 @@ namespace ARMeilleure.Translation
public OperandType[] FuncArgTypes { get; }
public OperandType FuncReturnType { get; }
public CompilerOptions Options { get; }
public CompilerContext(
ControlFlowGraph cfg,
OperandType[] funcArgTypes,
OperandType funcReturnType)
OperandType funcReturnType,
CompilerOptions options)
{
Cfg = cfg;
FuncArgTypes = funcArgTypes;
FuncReturnType = funcReturnType;
Options = options;
}
}
}

View file

@ -0,0 +1,16 @@
using System;
namespace ARMeilleure.Translation
{
[Flags]
enum CompilerOptions
{
None = 0,
SsaForm = 1 << 0,
Optimize = 1 << 1,
Lsra = 1 << 2,
MediumCq = SsaForm | Optimize,
HighCq = SsaForm | Optimize | Lsra
}
}

View file

@ -0,0 +1,39 @@
using System.Collections.Concurrent;
namespace ARMeilleure.Translation
{
class PriorityQueue<T>
{
private ConcurrentQueue<T>[] _queues;
public PriorityQueue(int priorities)
{
_queues = new ConcurrentQueue<T>[priorities];
for (int index = 0; index < priorities; index++)
{
_queues[index] = new ConcurrentQueue<T>();
}
}
public void Enqueue(int priority, T value)
{
_queues[priority].Enqueue(value);
}
public bool TryDequeue(out T value)
{
for (int index = 0; index < _queues.Length; index++)
{
if (_queues[index].TryDequeue(out value))
{
return true;
}
}
value = default(T);
return false;
}
}
}

View file

@ -0,0 +1,52 @@
using ARMeilleure.IntermediateRepresentation;
using System.Collections.Generic;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Translation
{
static class RegisterToLocal
{
public static void Rename(ControlFlowGraph cfg)
{
Dictionary<Register, Operand> registerToLocalMap = new Dictionary<Register, Operand>();
Operand GetLocal(Operand op)
{
Register register = op.GetRegister();
if (!registerToLocalMap.TryGetValue(register, out Operand local))
{
local = Local(op.Type);
registerToLocalMap.Add(register, local);
}
return local;
}
foreach (BasicBlock block in cfg.Blocks)
{
foreach (Node node in block.Operations)
{
Operand dest = node.Destination;
if (dest != null && dest.Kind == OperandKind.Register)
{
node.Destination = GetLocal(dest);
}
for (int index = 0; index < node.SourcesCount; index++)
{
Operand source = node.GetSource(index);
if (source.Kind == OperandKind.Register)
{
node.SetSource(index, GetLocal(source));
}
}
}
}
}
}
}

View file

@ -68,7 +68,7 @@ namespace ARMeilleure.Translation
}
}
public static void RunPass(ControlFlowGraph cfg)
public static void RunPass(ControlFlowGraph cfg, bool isCompleteFunction)
{
// Compute local register inputs and outputs used inside blocks.
RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Count];
@ -218,8 +218,8 @@ namespace ARMeilleure.Translation
if (EndsWithReturn(block) || hasContextStore)
{
StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer);
StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector);
StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, isCompleteFunction);
StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, isCompleteFunction);
}
}
}
@ -311,9 +311,9 @@ namespace ARMeilleure.Translation
block.Operations.AddFirst(loadArg0);
}
private static void StoreLocals(BasicBlock block, long outputs, RegisterType baseType)
private static void StoreLocals(BasicBlock block, long outputs, RegisterType baseType, bool isCompleteFunction)
{
if (Optimizations.AssumeStrictAbiCompliance)
if (Optimizations.AssumeStrictAbiCompliance && isCompleteFunction)
{
if (baseType == RegisterType.Integer || baseType == RegisterType.Flag)
{

View file

@ -284,7 +284,7 @@ namespace ARMeilleure.Translation
{
return new Register(id - RegisterConsts.IntRegsCount, RegisterType.Vector);
}
else /* if (key < RegisterConsts.TotalCount) */
else /* if (id < RegisterConsts.TotalCount) */
{
return new Register(id - RegisterConsts.IntAndVecRegsCount, RegisterType.Flag);
}

View file

@ -1,19 +1,30 @@
using ARMeilleure.State;
using System.Threading;
namespace ARMeilleure.Translation
{
class TranslatedFunction
{
private const int MinCallsForRejit = 100;
private GuestFunction _func;
public TranslatedFunction(GuestFunction func)
private bool _rejit;
private int _callCount;
public TranslatedFunction(GuestFunction func, bool rejit)
{
_func = func;
_func = func;
_rejit = rejit;
}
public ulong Execute(ExecutionContext context)
public ulong Execute(State.ExecutionContext context)
{
return _func(context.NativeContextPtr);
}
public bool ShouldRejit()
{
return _rejit && Interlocked.Increment(ref _callCount) == MinCallsForRejit;
}
}
}

View file

@ -6,6 +6,7 @@ using ARMeilleure.Memory;
using ARMeilleure.State;
using System;
using System.Collections.Concurrent;
using System.Threading;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
@ -13,19 +14,56 @@ namespace ARMeilleure.Translation
{
public class Translator
{
private const ulong CallFlag = InstEmitFlowHelper.CallFlag;
private MemoryManager _memory;
private ConcurrentDictionary<ulong, TranslatedFunction> _funcs;
private PriorityQueue<ulong> _backgroundQueue;
private AutoResetEvent _backgroundTranslatorEvent;
private volatile int _threadCount;
public Translator(MemoryManager memory)
{
_memory = memory;
_funcs = new ConcurrentDictionary<ulong, TranslatedFunction>();
_backgroundQueue = new PriorityQueue<ulong>(2);
_backgroundTranslatorEvent = new AutoResetEvent(false);
}
public void Execute(ExecutionContext context, ulong address)
private void TranslateQueuedSubs()
{
while (_threadCount != 0)
{
if (_backgroundQueue.TryDequeue(out ulong address))
{
TranslatedFunction func = Translate(address, ExecutionMode.Aarch64, highCq: true);
_funcs.AddOrUpdate(address, func, (key, oldFunc) => func);
}
else
{
_backgroundTranslatorEvent.WaitOne();
}
}
}
public void Execute(State.ExecutionContext context, ulong address)
{
if (Interlocked.Increment(ref _threadCount) == 1)
{
Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs);
backgroundTranslatorThread.Priority = ThreadPriority.Lowest;
backgroundTranslatorThread.Start();
}
Statistics.InitializeTimer();
NativeInterface.RegisterThread(context, _memory);
@ -37,9 +75,14 @@ namespace ARMeilleure.Translation
while (context.Running && address != 0);
NativeInterface.UnregisterThread();
if (Interlocked.Decrement(ref _threadCount) == 0)
{
_backgroundTranslatorEvent.Set();
}
}
public ulong ExecuteSingle(ExecutionContext context, ulong address)
public ulong ExecuteSingle(State.ExecutionContext context, ulong address)
{
TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode);
@ -54,23 +97,37 @@ namespace ARMeilleure.Translation
private TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
{
// TODO: Investigate how we should handle code at unaligned addresses.
// Currently, those low bits are used to store special flags.
bool isCallTarget = (address & CallFlag) != 0;
address &= ~CallFlag;
if (!_funcs.TryGetValue(address, out TranslatedFunction func))
{
func = Translate(address, mode);
func = Translate(address, mode, highCq: false);
_funcs.TryAdd(address, func);
}
else if (isCallTarget && func.ShouldRejit())
{
_backgroundQueue.Enqueue(0, address);
_backgroundTranslatorEvent.Set();
}
return func;
}
private TranslatedFunction Translate(ulong address, ExecutionMode mode)
private TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq)
{
ArmEmitterContext context = new ArmEmitterContext(_memory, Aarch32Mode.User);
Logger.StartPass(PassName.Decoding);
Block[] blocks = Decoder.DecodeFunction(_memory, address, mode);
Block[] blocks = highCq
? Decoder.DecodeFunction (_memory, address, mode)
: Decoder.DecodeBasicBlock(_memory, address, mode);
Logger.EndPass(PassName.Decoding);
@ -89,15 +146,19 @@ namespace ARMeilleure.Translation
Logger.StartPass(PassName.RegisterUsage);
RegisterUsage.RunPass(cfg);
RegisterUsage.RunPass(cfg, isCompleteFunction: false);
Logger.EndPass(PassName.RegisterUsage);
OperandType[] argTypes = new OperandType[] { OperandType.I64 };
GuestFunction func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64);
CompilerOptions options = highCq
? CompilerOptions.HighCq
: CompilerOptions.None;
return new TranslatedFunction(func);
GuestFunction func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64, options);
return new TranslatedFunction(func, rejit: !highCq);
}
private static ControlFlowGraph EmitAndGetCFG(ArmEmitterContext context, Block[] blocks)