Do not use a separate thread for translation, implement 2 tiers translation

This commit is contained in:
gdkchan 2018-03-02 15:25:25 -03:00
parent 4c1fe61dfe
commit 3768c13b35
8 changed files with 205 additions and 194 deletions

View file

@ -14,8 +14,6 @@ namespace ChocolArm64
private AA64Subroutine ExecDelegate;
private bool HasDelegate;
public static int StateArgIdx { get; private set; }
public static int MemoryArgIdx { get; private set; }
@ -27,7 +25,13 @@ namespace ChocolArm64
private HashSet<long> Callees;
public bool NeedsReJit { get; private set; }
private ATranslatedSubType Type;
private int CallCount;
private bool NeedsReJit;
private int MinCallCountForReJit = 5000;
public ATranslatedSub(DynamicMethod Method, List<ARegister> Params, HashSet<long> Callees)
{
@ -49,6 +53,8 @@ namespace ChocolArm64
this.Method = Method;
this.Params = Params.AsReadOnly();
this.Callees = Callees;
PrepareDelegate();
}
static ATranslatedSub()
@ -76,38 +82,53 @@ namespace ChocolArm64
}
}
public long Execute(AThreadState ThreadState, AMemory Memory)
private void PrepareDelegate()
{
if (!HasDelegate)
string Name = $"{Method.Name}_Dispatch";
DynamicMethod Mthd = new DynamicMethod(Name, typeof(long), FixedArgTypes);
ILGenerator Generator = Mthd.GetILGenerator();
Generator.EmitLdargSeq(FixedArgTypes.Length);
foreach (ARegister Reg in Params)
{
string Name = $"{Method.Name}_Dispatch";
Generator.EmitLdarg(StateArgIdx);
DynamicMethod Mthd = new DynamicMethod(Name, typeof(long), FixedArgTypes);
ILGenerator Generator = Mthd.GetILGenerator();
Generator.EmitLdargSeq(FixedArgTypes.Length);
foreach (ARegister Reg in Params)
{
Generator.EmitLdarg(StateArgIdx);
Generator.Emit(OpCodes.Ldfld, Reg.GetField());
}
Generator.Emit(OpCodes.Call, Method);
Generator.Emit(OpCodes.Ret);
ExecDelegate = (AA64Subroutine)Mthd.CreateDelegate(typeof(AA64Subroutine));
HasDelegate = true;
Generator.Emit(OpCodes.Ldfld, Reg.GetField());
}
Generator.Emit(OpCodes.Call, Method);
Generator.Emit(OpCodes.Ret);
ExecDelegate = (AA64Subroutine)Mthd.CreateDelegate(typeof(AA64Subroutine));
}
public bool ShouldReJit()
{
if (Type == ATranslatedSubType.SubTier0)
{
if (CallCount < MinCallCountForReJit)
{
CallCount++;
}
return CallCount == MinCallCountForReJit;
}
return Type == ATranslatedSubType.SubTier1 && NeedsReJit;
}
public long Execute(AThreadState ThreadState, AMemory Memory)
{
return ExecDelegate(ThreadState, Memory);
}
public void SetType(ATranslatedSubType Type) => this.Type = Type;
public bool HasCallee(long Position) => Callees.Contains(Position);
public void MarkForReJit() => NeedsReJit = true;
public void MarkForReJit() => NeedsReJit = true;
}
}

View file

@ -0,0 +1,17 @@
using ChocolArm64.Memory;
using ChocolArm64.State;
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Reflection;
using System.Reflection.Emit;
namespace ChocolArm64
{
enum ATranslatedSubType
{
SubBlock,
SubTier0,
SubTier1
}
}

View file

@ -7,6 +7,7 @@ using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Reflection.Emit;
using System.Runtime.CompilerServices;
using System.Threading;
namespace ChocolArm64
@ -15,11 +16,7 @@ namespace ChocolArm64
{
private Thread AsyncTranslation;
private HashSet<long> DoNotReJit;
private HashSet<long> WaitingForTranslation;
private ConcurrentQueue<long> TranslationQueue;
private HashSet<long> SubBlocks;
private ConcurrentDictionary<long, ATranslatedSub> CachedSubs;
@ -33,11 +30,7 @@ namespace ChocolArm64
public ATranslator(IReadOnlyDictionary<long, string> SymbolTable = null)
{
DoNotReJit = new HashSet<long>();
WaitingForTranslation = new HashSet<long>();
TranslationQueue = new ConcurrentQueue<long>();
SubBlocks = new HashSet<long>();
CachedSubs = new ConcurrentDictionary<long, ATranslatedSub>();
@ -71,12 +64,12 @@ namespace ChocolArm64
if (!CachedSubs.TryGetValue(Position, out ATranslatedSub Sub))
{
Sub = Translate(Thread.Memory, Position);
Sub = TranslateTier0(Thread.Memory, Position);
}
if (Sub.NeedsReJit)
if (Sub.ShouldReJit())
{
TranslateAsync(Thread.Memory, Position);
TranslateTier1(Thread.Memory, Position);
}
Position = Sub.Execute(Thread.ThreadState, Thread.Memory);
@ -106,7 +99,7 @@ namespace ChocolArm64
return CachedSubs.ContainsKey(Position);
}
private ATranslatedSub Translate(AMemory Memory, long Position)
private ATranslatedSub TranslateTier0(AMemory Memory, long Position)
{
ABlock Block = ADecoder.DecodeBasicBlock(this, Memory, Position);
@ -124,88 +117,66 @@ namespace ChocolArm64
ATranslatedSub Subroutine = Context.GetSubroutine();
CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine);
if (!DoNotReJit.Contains(Position))
if (SubBlocks.Contains(Position))
{
TranslateAsync(Memory, Position);
SubBlocks.Remove(Position);
Subroutine.SetType(ATranslatedSubType.SubBlock);
}
else
{
Subroutine.SetType(ATranslatedSubType.SubTier0);
}
CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine);
AOpCode LastOp = Block.GetLastOp();
if (LastOp.Emitter != AInstEmit.Ret &&
LastOp.Emitter != AInstEmit.Br)
{
DoNotReJit.Add(LastOp.Position + 4);
SubBlocks.Add(LastOp.Position + 4);
}
return Subroutine;
}
private void TranslateAsync(AMemory Memory, long Position)
private void TranslateTier1(AMemory Memory, long Position)
{
lock (WaitingForTranslation)
(ABlock[] Graph, ABlock Root) Cfg = ADecoder.DecodeSubroutine(this, Memory, Position);
string SubName = GetSubName(Position);
PropagateName(Cfg.Graph, SubName);
AILEmitterCtx Context = new AILEmitterCtx(this, Cfg.Graph, Cfg.Root, SubName);
if (Context.CurrBlock.Position != Position)
{
if (!WaitingForTranslation.Add(Position))
Context.Emit(OpCodes.Br, Context.GetLabel(Position));
}
do
{
Context.EmitOpCode();
}
while (Context.AdvanceOpCode());
//Mark all methods that calls this method for ReJiting,
//since we can now call it directly which is faster.
foreach (ATranslatedSub TS in CachedSubs.Values)
{
if (TS.HasCallee(Position))
{
return;
TS.MarkForReJit();
}
}
TranslationQueue.Enqueue(Position);
ATranslatedSub Subroutine = Context.GetSubroutine();
if (AsyncTranslation == null || !AsyncTranslation.IsAlive)
{
AsyncTranslation = new Thread(() => TranslateAsyncWork(Memory));
Subroutine.SetType(ATranslatedSubType.SubTier1);
AsyncTranslation.Priority = ThreadPriority.Lowest;
AsyncTranslation.Start();
}
}
private void TranslateAsyncWork(AMemory Memory)
{
while (TranslationQueue.TryDequeue(out long Position))
{
(ABlock[] Graph, ABlock Root) Cfg = ADecoder.DecodeSubroutine(this, Memory, Position);
string SubName = GetSubName(Position);
PropagateName(Cfg.Graph, SubName);
AILEmitterCtx Context = new AILEmitterCtx(this, Cfg.Graph, Cfg.Root, SubName);
if (Context.CurrBlock.Position != Position)
{
Context.Emit(OpCodes.Br, Context.GetLabel(Position));
}
do
{
Context.EmitOpCode();
}
while (Context.AdvanceOpCode());
//Mark all methods that calls this method for ReJiting,
//since we can now call it directly which is faster.
foreach (ATranslatedSub TS in CachedSubs.Values)
{
if (TS.HasCallee(Position))
{
TS.MarkForReJit();
}
}
ATranslatedSub Subroutine = Context.GetSubroutine();
CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine);
lock (WaitingForTranslation)
{
WaitingForTranslation.Remove(Position);
}
}
CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine);
}
private string GetSubName(long Position)

View file

@ -13,8 +13,6 @@ namespace ChocolArm64.Decoder
private static ConcurrentDictionary<Type, OpActivator> OpActivators;
private const int MaxGraphLength = 40;
static ADecoder()
{
OpActivators = new ConcurrentDictionary<Type, OpActivator>();
@ -46,11 +44,6 @@ namespace ChocolArm64.Decoder
{
if (!Visited.TryGetValue(Position, out ABlock Output))
{
if (Visited.Count >= MaxGraphLength)
{
return null;
}
Output = new ABlock(Position);
Blocks.Enqueue(Output);
@ -91,8 +84,8 @@ namespace ChocolArm64.Decoder
}
}
if ((!(LastOp is AOpCodeBImmAl) &&
!(LastOp is AOpCodeBReg)) || HasCachedSub)
if (!((LastOp is AOpCodeBImmAl) ||
(LastOp is AOpCodeBReg)) || HasCachedSub)
{
Current.Next = Enqueue(Current.EndPosition);
}

View file

@ -42,7 +42,7 @@ namespace ChocolArm64.Instruction
{
Context.EmitLdc_I8(Op.Position + 4);
Context.Emit(OpCodes.Br, Context.ExitLabel);
Context.Emit(OpCodes.Ret);
}
}
@ -71,7 +71,7 @@ namespace ChocolArm64.Instruction
{
Context.EmitLdc_I8(Op.Position + 4);
Context.Emit(OpCodes.Br, Context.ExitLabel);
Context.Emit(OpCodes.Ret);
}
}
}

View file

@ -20,7 +20,7 @@ namespace ChocolArm64.Instruction
Context.EmitStoreState();
Context.EmitLdc_I8(Op.Imm);
Context.Emit(OpCodes.Br, Context.ExitLabel);
Context.Emit(OpCodes.Ret);
}
}
@ -28,24 +28,7 @@ namespace ChocolArm64.Instruction
{
AOpCodeBImmCond Op = (AOpCodeBImmCond)Context.CurrOp;
AILLabel LblTaken;
if (Context.CurrBlock.Branch != null)
{
LblTaken = Context.GetLabel(Op.Imm);
}
else
{
LblTaken = new AILLabel();
}
Context.EmitCondBranch(LblTaken, Op.Cond);
if (Context.CurrBlock.Next == null ||
Context.CurrBlock.Branch == null)
{
EmitBranchPaths(Context, LblTaken);
}
EmitBranch(Context, Op.Cond);
}
public static void Bl(AILEmitterCtx Context)
@ -145,66 +128,62 @@ namespace ChocolArm64.Instruction
EmitBranch(Context, ILOp);
}
private static void EmitBranch(AILEmitterCtx Context, ACond Cond)
{
AOpCodeBImm Op = (AOpCodeBImm)Context.CurrOp;
if (Context.CurrBlock.Next != null &&
Context.CurrBlock.Branch != null)
{
Context.EmitCondBranch(Context.GetLabel(Op.Imm), Cond);
}
else
{
Context.EmitStoreState();
AILLabel LblTaken = new AILLabel();
Context.EmitCondBranch(LblTaken, Cond);
Context.EmitLdc_I8(Op.Position + 4);
Context.Emit(OpCodes.Ret);
Context.MarkLabel(LblTaken);
Context.EmitLdc_I8(Op.Imm);
Context.Emit(OpCodes.Ret);
}
}
private static void EmitBranch(AILEmitterCtx Context, OpCode ILOp)
{
AOpCodeBImm Op = (AOpCodeBImm)Context.CurrOp;
AILLabel LblTaken;
if (Context.CurrBlock.Branch != null)
if (Context.CurrBlock.Next != null &&
Context.CurrBlock.Branch != null)
{
LblTaken = Context.GetLabel(Op.Imm);
Context.Emit(ILOp, Context.GetLabel(Op.Imm));
}
else
{
LblTaken = new AILLabel();
}
Context.EmitStoreState();
Context.Emit(ILOp, LblTaken);
AILLabel LblTaken = new AILLabel();
if (Context.CurrBlock.Next == null ||
Context.CurrBlock.Branch == null)
{
EmitBranchPaths(Context, LblTaken);
}
}
Context.Emit(ILOp, LblTaken);
private static void EmitBranchPaths(AILEmitterCtx Context, AILLabel LblTaken)
{
AOpCodeBImm Op = (AOpCodeBImm)Context.CurrOp;
Context.EmitLdc_I8(Op.Position + 4);
AILLabel LblEnd = null;
Context.Emit(OpCodes.Ret);
if (Context.CurrBlock.Next == null)
{
EmitBranchExit(Context, Op.Position + 4);
}
else
{
LblEnd = new AILLabel();
Context.Emit(OpCodes.Br, LblEnd);
}
if (Context.CurrBlock.Branch == null)
{
Context.MarkLabel(LblTaken);
EmitBranchExit(Context, Op.Imm);
Context.EmitLdc_I8(Op.Imm);
Context.Emit(OpCodes.Ret);
}
if (LblEnd != null)
{
Context.MarkLabel(LblEnd);
}
}
private static void EmitBranchExit(AILEmitterCtx Context, long Imm)
{
Context.EmitStoreState();
Context.EmitLdc_I8(Imm);
Context.Emit(OpCodes.Br, Context.ExitLabel);
}
}
}

View file

@ -16,10 +16,6 @@ namespace ChocolArm64.Translation
private Dictionary<long, AILLabel> Labels;
public AILLabel ExitLabel { get; private set; }
private bool HasExit;
private int BlkIndex;
private int OpcIndex;
@ -74,8 +70,6 @@ namespace ChocolArm64.Translation
Labels = new Dictionary<long, AILLabel>();
ExitLabel = new AILLabel();
Emitter = new AILEmitter(Graph, Root, SubName);
ILBlock = Emitter.GetILBlock(0);
@ -98,15 +92,6 @@ namespace ChocolArm64.Translation
if (OpcIndex + 1 == CurrBlock.OpCodes.Count &&
BlkIndex + 1 == Graph.Length)
{
if (!HasExit)
{
MarkLabel(ExitLabel);
Emit(OpCodes.Ret);
HasExit = true;
}
return false;
}

View file

@ -67,9 +67,25 @@ namespace ChocolArm64.Translation
public long VecOutputs;
}
private const int MaxOptGraphLength = 55;
private const int MaxOptGraphLength = 40;
public ALocalAlloc(AILBlock[] Graph, AILBlock Root)
{
IntPaths = new Dictionary<AILBlock, PathIo>();
VecPaths = new Dictionary<AILBlock, PathIo>();
if (Graph.Length > 1 &&
Graph.Length < MaxOptGraphLength)
{
InitializeOptimal(Graph, Root);
}
else
{
InitializeFast(Graph);
}
}
private void InitializeOptimal(AILBlock[] Graph, AILBlock Root)
{
//This will go through all possible paths on the graph,
//and store all inputs/outputs for each block. A register
@ -80,9 +96,6 @@ namespace ChocolArm64.Translation
//when doing input elimination. Each block chain have a root, that's where
//the code starts executing. They are present on the subroutine start point,
//and on call return points too (address written to X30 by BL).
IntPaths = new Dictionary<AILBlock, PathIo>();
VecPaths = new Dictionary<AILBlock, PathIo>();
HashSet<BlockIo> Visited = new HashSet<BlockIo>();
Queue<BlockIo> Unvisited = new Queue<BlockIo>();
@ -163,6 +176,38 @@ namespace ChocolArm64.Translation
}
}
private void InitializeFast(AILBlock[] Graph)
{
//This is WAY faster than InitializeOptimal, but results in
//uneeded loads and stores, so the resulting code will be slower.
long IntInputs = 0, IntOutputs = 0;
long VecInputs = 0, VecOutputs = 0;
foreach (AILBlock Block in Graph)
{
IntInputs |= Block.IntInputs;
IntOutputs |= Block.IntOutputs;
VecInputs |= Block.VecInputs;
VecOutputs |= Block.VecOutputs;
}
//It's possible that not all code paths writes to those output registers,
//in those cases if we attempt to write an output registers that was
//not written, we will be just writing zero and messing up the old register value.
//So we just need to ensure that all outputs are loaded.
if (Graph.Length > 1)
{
IntInputs |= IntOutputs;
VecInputs |= VecOutputs;
}
foreach (AILBlock Block in Graph)
{
IntPaths.Add(Block, new PathIo(Block, IntInputs, IntOutputs));
VecPaths.Add(Block, new PathIo(Block, VecInputs, VecOutputs));
}
}
public long GetIntInputs(AILBlock Root) => GetInputsImpl(Root, IntPaths.Values);
public long GetVecInputs(AILBlock Root) => GetInputsImpl(Root, VecPaths.Values);