Add optimizations related to caller/callee saved registers, thread synchronization and disable tier 0
This commit is contained in:
parent
5001f78b1d
commit
b73a8ef01d
13 changed files with 204 additions and 124 deletions
|
@ -60,6 +60,8 @@ namespace ChocolArm64.Instructions
|
||||||
{
|
{
|
||||||
OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp;
|
OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp;
|
||||||
|
|
||||||
|
context.HasIndirectJump = true;
|
||||||
|
|
||||||
context.EmitStoreState();
|
context.EmitStoreState();
|
||||||
context.EmitLdintzr(op.Rn);
|
context.EmitLdintzr(op.Rn);
|
||||||
|
|
||||||
|
|
|
@ -86,7 +86,11 @@ namespace ChocolArm64.Instructions
|
||||||
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
||||||
context.EmitLdtmp();
|
context.EmitLdtmp();
|
||||||
|
|
||||||
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateVirtualSubroutine));
|
string name = isJump
|
||||||
|
? nameof(Translator.GetOrTranslateVirtualSubroutineForJump)
|
||||||
|
: nameof(Translator.GetOrTranslateVirtualSubroutine);
|
||||||
|
|
||||||
|
context.EmitPrivateCall(typeof(Translator), name);
|
||||||
|
|
||||||
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
||||||
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
|
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
|
||||||
|
|
|
@ -2,21 +2,23 @@ using System.Runtime.Intrinsics.X86;
|
||||||
|
|
||||||
public static class Optimizations
|
public static class Optimizations
|
||||||
{
|
{
|
||||||
internal static bool FastFP = true;
|
public static bool AssumeStrictAbiCompliance { get; set; } = true;
|
||||||
|
|
||||||
private static bool _useAllSseIfAvailable = true;
|
public static bool FastFP { get; set; } = true;
|
||||||
|
|
||||||
private static bool _useSseIfAvailable = true;
|
private const bool UseAllSseIfAvailable = true;
|
||||||
private static bool _useSse2IfAvailable = true;
|
|
||||||
private static bool _useSse3IfAvailable = true;
|
|
||||||
private static bool _useSsse3IfAvailable = true;
|
|
||||||
private static bool _useSse41IfAvailable = true;
|
|
||||||
private static bool _useSse42IfAvailable = true;
|
|
||||||
|
|
||||||
internal static bool UseSse = (_useAllSseIfAvailable && _useSseIfAvailable) && Sse.IsSupported;
|
public static bool UseSseIfAvailable { get; set; } = UseAllSseIfAvailable;
|
||||||
internal static bool UseSse2 = (_useAllSseIfAvailable && _useSse2IfAvailable) && Sse2.IsSupported;
|
public static bool UseSse2IfAvailable { get; set; } = UseAllSseIfAvailable;
|
||||||
internal static bool UseSse3 = (_useAllSseIfAvailable && _useSse3IfAvailable) && Sse3.IsSupported;
|
public static bool UseSse3IfAvailable { get; set; } = UseAllSseIfAvailable;
|
||||||
internal static bool UseSsse3 = (_useAllSseIfAvailable && _useSsse3IfAvailable) && Ssse3.IsSupported;
|
public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable;
|
||||||
internal static bool UseSse41 = (_useAllSseIfAvailable && _useSse41IfAvailable) && Sse41.IsSupported;
|
public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable;
|
||||||
internal static bool UseSse42 = (_useAllSseIfAvailable && _useSse42IfAvailable) && Sse42.IsSupported;
|
public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable;
|
||||||
|
|
||||||
|
internal static bool UseSse => UseSseIfAvailable && Sse.IsSupported;
|
||||||
|
internal static bool UseSse2 => UseSse2IfAvailable && Sse2.IsSupported;
|
||||||
|
internal static bool UseSse3 => UseSse3IfAvailable && Sse3.IsSupported;
|
||||||
|
internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported;
|
||||||
|
internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported;
|
||||||
|
internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported;
|
||||||
}
|
}
|
|
@ -6,11 +6,11 @@ namespace ChocolArm64.Translation
|
||||||
{
|
{
|
||||||
public long IntInputs { get; private set; }
|
public long IntInputs { get; private set; }
|
||||||
public long IntOutputs { get; private set; }
|
public long IntOutputs { get; private set; }
|
||||||
public long IntAwOutputs { get; private set; }
|
private long _intAwOutputs;
|
||||||
|
|
||||||
public long VecInputs { get; private set; }
|
public long VecInputs { get; private set; }
|
||||||
public long VecOutputs { get; private set; }
|
public long VecOutputs { get; private set; }
|
||||||
public long VecAwOutputs { get; private set; }
|
private long _vecAwOutputs;
|
||||||
|
|
||||||
public bool HasStateStore { get; private set; }
|
public bool HasStateStore { get; private set; }
|
||||||
|
|
||||||
|
@ -34,16 +34,16 @@ namespace ChocolArm64.Translation
|
||||||
//opcodes emitted by each ARM instruction.
|
//opcodes emitted by each ARM instruction.
|
||||||
//We can only consider the new outputs for doing input elimination
|
//We can only consider the new outputs for doing input elimination
|
||||||
//after all the CIL opcodes used by the instruction being emitted.
|
//after all the CIL opcodes used by the instruction being emitted.
|
||||||
IntAwOutputs = IntOutputs;
|
_intAwOutputs = IntOutputs;
|
||||||
VecAwOutputs = VecOutputs;
|
_vecAwOutputs = VecOutputs;
|
||||||
}
|
}
|
||||||
else if (emitter is ILOpCodeLoad ld && ILMethodBuilder.IsRegIndex(ld.Index))
|
else if (emitter is ILOpCodeLoad ld && ILMethodBuilder.IsRegIndex(ld.Index))
|
||||||
{
|
{
|
||||||
switch (ld.IoType)
|
switch (ld.IoType)
|
||||||
{
|
{
|
||||||
case IoType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~IntAwOutputs; break;
|
case IoType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~_intAwOutputs; break;
|
||||||
case IoType.Int: IntInputs |= (1L << ld.Index) & ~IntAwOutputs; break;
|
case IoType.Int: IntInputs |= (1L << ld.Index) & ~_intAwOutputs; break;
|
||||||
case IoType.Vector: VecInputs |= (1L << ld.Index) & ~VecAwOutputs; break;
|
case IoType.Vector: VecInputs |= (1L << ld.Index) & ~_vecAwOutputs; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (emitter is ILOpCodeStore st && ILMethodBuilder.IsRegIndex(st.Index))
|
else if (emitter is ILOpCodeStore st && ILMethodBuilder.IsRegIndex(st.Index))
|
||||||
|
|
|
@ -31,6 +31,8 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO
|
public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO
|
||||||
|
|
||||||
|
public bool HasIndirectJump { get; set; }
|
||||||
|
|
||||||
private Dictionary<Block, ILBlock> _visitedBlocks;
|
private Dictionary<Block, ILBlock> _visitedBlocks;
|
||||||
|
|
||||||
private Queue<Block> _branchTargets;
|
private Queue<Block> _branchTargets;
|
||||||
|
@ -91,7 +93,12 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
ResetBlockState();
|
ResetBlockState();
|
||||||
|
|
||||||
AdvanceOpCode();
|
if (AdvanceOpCode())
|
||||||
|
{
|
||||||
|
EmitSynchronization();
|
||||||
|
|
||||||
|
_ilBlock.Add(new ILOpCodeLoadState(_ilBlock, isSubEntry: true));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int GetIntTempIndex()
|
public static int GetIntTempIndex()
|
||||||
|
@ -127,10 +134,18 @@ namespace ChocolArm64.Translation
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_opcIndex == 0)
|
int opcIndex = _opcIndex;
|
||||||
|
|
||||||
|
if (opcIndex == 0)
|
||||||
{
|
{
|
||||||
MarkLabel(GetLabel(_currBlock.Position));
|
MarkLabel(GetLabel(_currBlock.Position));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isLastOp = opcIndex == CurrBlock.OpCodes.Count - 1;
|
||||||
|
|
||||||
|
if (isLastOp && CurrBlock.Branch != null &&
|
||||||
|
(ulong)CurrBlock.Branch.Position <= (ulong)CurrBlock.Position)
|
||||||
|
{
|
||||||
EmitSynchronization();
|
EmitSynchronization();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,7 +176,7 @@ namespace ChocolArm64.Translation
|
||||||
//of the next instruction to be executed (in the case that the condition
|
//of the next instruction to be executed (in the case that the condition
|
||||||
//is false, and the branch was not taken, as all basic blocks should end with
|
//is false, and the branch was not taken, as all basic blocks should end with
|
||||||
//some kind of branch).
|
//some kind of branch).
|
||||||
if (CurrOp == CurrBlock.GetLastOp() && CurrBlock.Next == null)
|
if (isLastOp && CurrBlock.Next == null)
|
||||||
{
|
{
|
||||||
EmitStoreState();
|
EmitStoreState();
|
||||||
EmitLdc_I8(CurrOp.Position + CurrOp.OpCodeSizeInBytes);
|
EmitLdc_I8(CurrOp.Position + CurrOp.OpCodeSizeInBytes);
|
||||||
|
@ -285,7 +300,7 @@ namespace ChocolArm64.Translation
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
_queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1));
|
_queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1, isComplete: true));
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool TryOptEmitSubroutineCall()
|
public bool TryOptEmitSubroutineCall()
|
||||||
|
|
|
@ -6,7 +6,7 @@ namespace ChocolArm64.Translation
|
||||||
{
|
{
|
||||||
private bool _hasLabel;
|
private bool _hasLabel;
|
||||||
|
|
||||||
private Label _lbl;
|
private Label _label;
|
||||||
|
|
||||||
public void Emit(ILMethodBuilder context)
|
public void Emit(ILMethodBuilder context)
|
||||||
{
|
{
|
||||||
|
@ -17,12 +17,12 @@ namespace ChocolArm64.Translation
|
||||||
{
|
{
|
||||||
if (!_hasLabel)
|
if (!_hasLabel)
|
||||||
{
|
{
|
||||||
_lbl = context.Generator.DefineLabel();
|
_label = context.Generator.DefineLabel();
|
||||||
|
|
||||||
_hasLabel = true;
|
_hasLabel = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return _lbl;
|
return _label;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -18,17 +18,29 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
private string _subName;
|
private string _subName;
|
||||||
|
|
||||||
|
public bool IsAarch64 { get; }
|
||||||
|
|
||||||
|
public bool IsSubComplete { get; }
|
||||||
|
|
||||||
private int _localsCount;
|
private int _localsCount;
|
||||||
|
|
||||||
public ILMethodBuilder(ILBlock[] ilBlocks, string subName)
|
public ILMethodBuilder(
|
||||||
|
ILBlock[] ilBlocks,
|
||||||
|
string subName,
|
||||||
|
bool isAarch64,
|
||||||
|
bool isSubComplete = false)
|
||||||
{
|
{
|
||||||
_ilBlocks = ilBlocks;
|
_ilBlocks = ilBlocks;
|
||||||
_subName = subName;
|
_subName = subName;
|
||||||
|
IsAarch64 = isAarch64;
|
||||||
|
IsSubComplete = isSubComplete;
|
||||||
}
|
}
|
||||||
|
|
||||||
public TranslatedSub GetSubroutine(TranslationTier tier)
|
public TranslatedSub GetSubroutine(TranslationTier tier)
|
||||||
{
|
{
|
||||||
LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]);
|
LocalAlloc = new LocalAlloc();
|
||||||
|
|
||||||
|
LocalAlloc.BuildUses(_ilBlocks[0]);
|
||||||
|
|
||||||
DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes);
|
DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes);
|
||||||
|
|
||||||
|
@ -40,8 +52,6 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
_localsCount = 0;
|
_localsCount = 0;
|
||||||
|
|
||||||
new ILOpCodeLoadState(_ilBlocks[0]).Emit(this);
|
|
||||||
|
|
||||||
foreach (ILBlock ilBlock in _ilBlocks)
|
foreach (ILBlock ilBlock in _ilBlocks)
|
||||||
{
|
{
|
||||||
ilBlock.Emit(this);
|
ilBlock.Emit(this);
|
||||||
|
|
|
@ -7,9 +7,12 @@ namespace ChocolArm64.Translation
|
||||||
{
|
{
|
||||||
private ILBlock _block;
|
private ILBlock _block;
|
||||||
|
|
||||||
public ILOpCodeLoadState(ILBlock block)
|
private bool _isSubEntry;
|
||||||
|
|
||||||
|
public ILOpCodeLoadState(ILBlock block, bool isSubEntry = false)
|
||||||
{
|
{
|
||||||
_block = block;
|
_block = block;
|
||||||
|
_isSubEntry = isSubEntry;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void Emit(ILMethodBuilder context)
|
public void Emit(ILMethodBuilder context)
|
||||||
|
@ -17,6 +20,12 @@ namespace ChocolArm64.Translation
|
||||||
long intInputs = context.LocalAlloc.GetIntInputs(_block);
|
long intInputs = context.LocalAlloc.GetIntInputs(_block);
|
||||||
long vecInputs = context.LocalAlloc.GetVecInputs(_block);
|
long vecInputs = context.LocalAlloc.GetVecInputs(_block);
|
||||||
|
|
||||||
|
if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
|
||||||
|
{
|
||||||
|
intInputs = LocalAlloc.ClearCallerSavedIntRegs(intInputs, context.IsAarch64);
|
||||||
|
vecInputs = LocalAlloc.ClearCallerSavedVecRegs(vecInputs, context.IsAarch64);
|
||||||
|
}
|
||||||
|
|
||||||
LoadLocals(context, intInputs, RegisterType.Int);
|
LoadLocals(context, intInputs, RegisterType.Int);
|
||||||
LoadLocals(context, vecInputs, RegisterType.Vector);
|
LoadLocals(context, vecInputs, RegisterType.Vector);
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,12 @@ namespace ChocolArm64.Translation
|
||||||
long intOutputs = context.LocalAlloc.GetIntOutputs(_block);
|
long intOutputs = context.LocalAlloc.GetIntOutputs(_block);
|
||||||
long vecOutputs = context.LocalAlloc.GetVecOutputs(_block);
|
long vecOutputs = context.LocalAlloc.GetVecOutputs(_block);
|
||||||
|
|
||||||
|
if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
|
||||||
|
{
|
||||||
|
intOutputs = LocalAlloc.ClearCallerSavedIntRegs(intOutputs, context.IsAarch64);
|
||||||
|
vecOutputs = LocalAlloc.ClearCallerSavedVecRegs(vecOutputs, context.IsAarch64);
|
||||||
|
}
|
||||||
|
|
||||||
StoreLocals(context, intOutputs, RegisterType.Int);
|
StoreLocals(context, intOutputs, RegisterType.Int);
|
||||||
StoreLocals(context, vecOutputs, RegisterType.Vector);
|
StoreLocals(context, vecOutputs, RegisterType.Vector);
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,11 @@ namespace ChocolArm64.Translation
|
||||||
{
|
{
|
||||||
class LocalAlloc
|
class LocalAlloc
|
||||||
{
|
{
|
||||||
|
public const long CallerSavedIntRegistersMask = 0x7fL << 9;
|
||||||
|
public const long PStateNzcvFlagsMask = 0xfL << 60;
|
||||||
|
|
||||||
|
public const long CallerSavedVecRegistersMask = 0xffffL << 16;
|
||||||
|
|
||||||
private class PathIo
|
private class PathIo
|
||||||
{
|
{
|
||||||
private Dictionary<ILBlock, long> _allInputs;
|
private Dictionary<ILBlock, long> _allInputs;
|
||||||
|
@ -57,15 +62,40 @@ namespace ChocolArm64.Translation
|
||||||
private Dictionary<ILBlock, PathIo> _intPaths;
|
private Dictionary<ILBlock, PathIo> _intPaths;
|
||||||
private Dictionary<ILBlock, PathIo> _vecPaths;
|
private Dictionary<ILBlock, PathIo> _vecPaths;
|
||||||
|
|
||||||
|
private HashSet<ILBlock> _entryBlocks;
|
||||||
|
|
||||||
private struct BlockIo
|
private struct BlockIo
|
||||||
{
|
{
|
||||||
public ILBlock Block;
|
public ILBlock Block { get; }
|
||||||
public ILBlock Entry;
|
public ILBlock Entry { get; }
|
||||||
|
|
||||||
public long IntInputs;
|
public long IntInputs { get; set; }
|
||||||
public long VecInputs;
|
public long VecInputs { get; set; }
|
||||||
public long IntOutputs;
|
public long IntOutputs { get; set; }
|
||||||
public long VecOutputs;
|
public long VecOutputs { get; set; }
|
||||||
|
|
||||||
|
public BlockIo(ILBlock block, ILBlock entry)
|
||||||
|
{
|
||||||
|
Block = block;
|
||||||
|
Entry = entry;
|
||||||
|
|
||||||
|
IntInputs = IntOutputs = 0;
|
||||||
|
VecInputs = VecOutputs = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockIo(
|
||||||
|
ILBlock block,
|
||||||
|
ILBlock entry,
|
||||||
|
long intInputs,
|
||||||
|
long vecInputs,
|
||||||
|
long intOutputs,
|
||||||
|
long vecOutputs) : this(block, entry)
|
||||||
|
{
|
||||||
|
IntInputs = intInputs;
|
||||||
|
VecInputs = vecInputs;
|
||||||
|
IntOutputs = intOutputs;
|
||||||
|
VecOutputs = vecOutputs;
|
||||||
|
}
|
||||||
|
|
||||||
public override bool Equals(object obj)
|
public override bool Equals(object obj)
|
||||||
{
|
{
|
||||||
|
@ -98,25 +128,15 @@ namespace ChocolArm64.Translation
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private const int MaxOptGraphLength = 40;
|
public LocalAlloc()
|
||||||
|
|
||||||
public LocalAlloc(ILBlock[] graph, ILBlock entry)
|
|
||||||
{
|
{
|
||||||
_intPaths = new Dictionary<ILBlock, PathIo>();
|
_intPaths = new Dictionary<ILBlock, PathIo>();
|
||||||
_vecPaths = new Dictionary<ILBlock, PathIo>();
|
_vecPaths = new Dictionary<ILBlock, PathIo>();
|
||||||
|
|
||||||
if (graph.Length > 1 &&
|
_entryBlocks = new HashSet<ILBlock>();
|
||||||
graph.Length < MaxOptGraphLength)
|
|
||||||
{
|
|
||||||
InitializeOptimal(graph, entry);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
InitializeFast(graph);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void InitializeOptimal(ILBlock[] graph, ILBlock entry)
|
public void BuildUses(ILBlock entry)
|
||||||
{
|
{
|
||||||
//This will go through all possible paths on the graph,
|
//This will go through all possible paths on the graph,
|
||||||
//and store all inputs/outputs for each block. A register
|
//and store all inputs/outputs for each block. A register
|
||||||
|
@ -133,19 +153,15 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
void Enqueue(BlockIo block)
|
void Enqueue(BlockIo block)
|
||||||
{
|
{
|
||||||
if (!visited.Contains(block))
|
if (visited.Add(block))
|
||||||
{
|
{
|
||||||
unvisited.Enqueue(block);
|
unvisited.Enqueue(block);
|
||||||
|
|
||||||
visited.Add(block);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Enqueue(new BlockIo()
|
_entryBlocks.Add(entry);
|
||||||
{
|
|
||||||
Block = entry,
|
Enqueue(new BlockIo(entry, entry));
|
||||||
Entry = entry
|
|
||||||
});
|
|
||||||
|
|
||||||
while (unvisited.Count > 0)
|
while (unvisited.Count > 0)
|
||||||
{
|
{
|
||||||
|
@ -177,19 +193,23 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
void EnqueueFromCurrent(ILBlock block, bool retTarget)
|
void EnqueueFromCurrent(ILBlock block, bool retTarget)
|
||||||
{
|
{
|
||||||
BlockIo blockIo = new BlockIo() { Block = block };
|
BlockIo blockIo;
|
||||||
|
|
||||||
if (retTarget)
|
if (retTarget)
|
||||||
{
|
{
|
||||||
blockIo.Entry = block;
|
blockIo = new BlockIo(block, block);
|
||||||
|
|
||||||
|
_entryBlocks.Add(block);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
blockIo.Entry = current.Entry;
|
blockIo = new BlockIo(
|
||||||
blockIo.IntInputs = current.IntInputs;
|
block,
|
||||||
blockIo.VecInputs = current.VecInputs;
|
current.Entry,
|
||||||
blockIo.IntOutputs = current.IntOutputs;
|
current.IntInputs,
|
||||||
blockIo.VecOutputs = current.VecOutputs;
|
current.VecInputs,
|
||||||
|
current.IntOutputs,
|
||||||
|
current.VecOutputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
Enqueue(blockIo);
|
Enqueue(blockIo);
|
||||||
|
@ -207,38 +227,6 @@ namespace ChocolArm64.Translation
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void InitializeFast(ILBlock[] graph)
|
|
||||||
{
|
|
||||||
//This is WAY faster than InitializeOptimal, but results in
|
|
||||||
//unneeded loads and stores, so the resulting code will be slower.
|
|
||||||
long intInputs = 0, intOutputs = 0;
|
|
||||||
long vecInputs = 0, vecOutputs = 0;
|
|
||||||
|
|
||||||
foreach (ILBlock block in graph)
|
|
||||||
{
|
|
||||||
intInputs |= block.IntInputs;
|
|
||||||
intOutputs |= block.IntOutputs;
|
|
||||||
vecInputs |= block.VecInputs;
|
|
||||||
vecOutputs |= block.VecOutputs;
|
|
||||||
}
|
|
||||||
|
|
||||||
//It's possible that not all code paths writes to those output registers,
|
|
||||||
//in those cases if we attempt to write an output registers that was
|
|
||||||
//not written, we will be just writing zero and messing up the old register value.
|
|
||||||
//So we just need to ensure that all outputs are loaded.
|
|
||||||
if (graph.Length > 1)
|
|
||||||
{
|
|
||||||
intInputs |= intOutputs;
|
|
||||||
vecInputs |= vecOutputs;
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach (ILBlock block in graph)
|
|
||||||
{
|
|
||||||
_intPaths.Add(block, new PathIo(block, intInputs, intOutputs));
|
|
||||||
_vecPaths.Add(block, new PathIo(block, vecInputs, vecOutputs));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public long GetIntInputs(ILBlock root) => GetInputsImpl(root, _intPaths.Values);
|
public long GetIntInputs(ILBlock root) => GetInputsImpl(root, _intPaths.Values);
|
||||||
public long GetVecInputs(ILBlock root) => GetInputsImpl(root, _vecPaths.Values);
|
public long GetVecInputs(ILBlock root) => GetInputsImpl(root, _vecPaths.Values);
|
||||||
|
|
||||||
|
@ -256,5 +244,29 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
public long GetIntOutputs(ILBlock block) => _intPaths[block].GetOutputs();
|
public long GetIntOutputs(ILBlock block) => _intPaths[block].GetOutputs();
|
||||||
public long GetVecOutputs(ILBlock block) => _vecPaths[block].GetOutputs();
|
public long GetVecOutputs(ILBlock block) => _vecPaths[block].GetOutputs();
|
||||||
|
|
||||||
|
public static long ClearCallerSavedIntRegs(long mask, bool isAarch64)
|
||||||
|
{
|
||||||
|
//TODO: ARM32 support.
|
||||||
|
if (isAarch64)
|
||||||
|
{
|
||||||
|
mask &= ~CallerSavedIntRegistersMask;
|
||||||
|
mask &= ~PStateNzcvFlagsMask;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static long ClearCallerSavedVecRegs(long mask, bool isAarch64)
|
||||||
|
{
|
||||||
|
//TODO: ARM32 support.
|
||||||
|
if (isAarch64)
|
||||||
|
{
|
||||||
|
mask &= ~CallerSavedVecRegistersMask;
|
||||||
|
}
|
||||||
|
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -80,16 +80,21 @@ namespace ChocolArm64.Translation
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal ArmSubroutine GetOrTranslateVirtualSubroutineForJump(CpuThreadState state, long position)
|
||||||
|
{
|
||||||
|
return GetOrTranslateVirtualSubroutineImpl(state, position, isJump: true);
|
||||||
|
}
|
||||||
|
|
||||||
internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
|
internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
|
||||||
|
{
|
||||||
|
return GetOrTranslateVirtualSubroutineImpl(state, position, isJump: false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ArmSubroutine GetOrTranslateVirtualSubroutineImpl(CpuThreadState state, long position, bool isJump)
|
||||||
{
|
{
|
||||||
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
|
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
|
||||||
{
|
{
|
||||||
sub = TranslateLowCq(position, state.GetExecutionMode());
|
sub = TranslateHighCq(position, state.GetExecutionMode(), !isJump);
|
||||||
}
|
|
||||||
|
|
||||||
if (sub.Tier == TranslationTier.Tier0)
|
|
||||||
{
|
|
||||||
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return sub.Delegate;
|
return sub.Delegate;
|
||||||
|
@ -99,7 +104,7 @@ namespace ChocolArm64.Translation
|
||||||
{
|
{
|
||||||
if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
|
if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
|
||||||
{
|
{
|
||||||
subroutine = TranslateLowCq(position, state.GetExecutionMode());
|
subroutine = TranslateHighCq(position, state.GetExecutionMode(), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
return subroutine;
|
return subroutine;
|
||||||
|
@ -124,7 +129,7 @@ namespace ChocolArm64.Translation
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
TranslateHighCq(item.Position, item.Mode);
|
TranslateHighCq(item.Position, item.Mode, item.IsComplete);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -142,14 +147,16 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
string subName = GetSubroutineName(position);
|
string subName = GetSubroutineName(position);
|
||||||
|
|
||||||
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName);
|
bool isAarch64 = mode == ExecutionMode.Aarch64;
|
||||||
|
|
||||||
|
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
|
||||||
|
|
||||||
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0);
|
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0);
|
||||||
|
|
||||||
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
|
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void TranslateHighCq(long position, ExecutionMode mode)
|
private TranslatedSub TranslateHighCq(long position, ExecutionMode mode, bool isComplete)
|
||||||
{
|
{
|
||||||
Block graph = Decoder.DecodeSubroutine(_memory, position, mode);
|
Block graph = Decoder.DecodeSubroutine(_memory, position, mode);
|
||||||
|
|
||||||
|
@ -159,7 +166,11 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
string subName = GetSubroutineName(position);
|
string subName = GetSubroutineName(position);
|
||||||
|
|
||||||
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName);
|
bool isAarch64 = mode == ExecutionMode.Aarch64;
|
||||||
|
|
||||||
|
isComplete &= !context.HasIndirectJump;
|
||||||
|
|
||||||
|
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
|
||||||
|
|
||||||
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1);
|
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1);
|
||||||
|
|
||||||
|
@ -173,6 +184,8 @@ namespace ChocolArm64.Translation
|
||||||
_cache.AddOrUpdate(position, subroutine, ilOpCount);
|
_cache.AddOrUpdate(position, subroutine, ilOpCount);
|
||||||
|
|
||||||
ForceAheadOfTimeCompilation(subroutine);
|
ForceAheadOfTimeCompilation(subroutine);
|
||||||
|
|
||||||
|
return subroutine;
|
||||||
}
|
}
|
||||||
|
|
||||||
private string GetSubroutineName(long position)
|
private string GetSubroutineName(long position)
|
||||||
|
|
|
@ -10,11 +10,18 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
public TranslationTier Tier { get; }
|
public TranslationTier Tier { get; }
|
||||||
|
|
||||||
public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier)
|
public bool IsComplete { get; }
|
||||||
|
|
||||||
|
public TranslatorQueueItem(
|
||||||
|
long position,
|
||||||
|
ExecutionMode mode,
|
||||||
|
TranslationTier tier,
|
||||||
|
bool isComplete = false)
|
||||||
{
|
{
|
||||||
Position = position;
|
Position = position;
|
||||||
Mode = mode;
|
Mode = mode;
|
||||||
Tier = tier;
|
Tier = tier;
|
||||||
|
IsComplete = isComplete;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Add table
Add a link
Reference in a new issue