From 4c1fe61dfeb7a5324e341e1c4ad5932c6d5344ad Mon Sep 17 00:00:00 2001 From: gdkchan Date: Thu, 1 Mar 2018 23:02:20 -0300 Subject: [PATCH] Add background translation to the CPU --- ChocolArm64/ATranslatedSub.cs | 31 ++-- ChocolArm64/ATranslator.cs | 141 ++++++++++++++---- ChocolArm64/Decoder/ADecoder.cs | 19 +++ ChocolArm64/Instruction/AInstEmitException.cs | 13 ++ ChocolArm64/Instruction/AInstEmitFlow.cs | 102 ++++++++++++- ChocolArm64/Translation/AILEmitter.cs | 42 +++--- ChocolArm64/Translation/AILEmitterCtx.cs | 65 ++++++-- ChocolArm64/Translation/ALocalAlloc.cs | 49 +----- 8 files changed, 338 insertions(+), 124 deletions(-) diff --git a/ChocolArm64/ATranslatedSub.cs b/ChocolArm64/ATranslatedSub.cs index 71a6793a2a..61754c4e75 100644 --- a/ChocolArm64/ATranslatedSub.cs +++ b/ChocolArm64/ATranslatedSub.cs @@ -2,6 +2,7 @@ using ChocolArm64.Memory; using ChocolArm64.State; using System; using System.Collections.Generic; +using System.Collections.ObjectModel; using System.Reflection; using System.Reflection.Emit; @@ -15,33 +16,39 @@ namespace ChocolArm64 private bool HasDelegate; - public static Type[] FixedArgTypes { get; private set; } - public static int StateArgIdx { get; private set; } public static int MemoryArgIdx { get; private set; } + public static Type[] FixedArgTypes { get; private set; } + public DynamicMethod Method { get; private set; } - public HashSet SubCalls { get; private set; } + public ReadOnlyCollection Params { get; private set; } - public List Params { get; private set; } + private HashSet Callees; public bool NeedsReJit { get; private set; } - public ATranslatedSub() + public ATranslatedSub(DynamicMethod Method, List Params, HashSet Callees) { - SubCalls = new HashSet(); - } + if (Method == null) + { + throw new ArgumentNullException(nameof(Method)); + } - public ATranslatedSub(DynamicMethod Method, List Params) : this() - { if (Params == null) { throw new ArgumentNullException(nameof(Params)); } - this.Method = Method; - this.Params = Params; + if (Callees == null) + { + throw new ArgumentNullException(nameof(Callees)); + } + + this.Method = Method; + this.Params = Params.AsReadOnly(); + this.Callees = Callees; } static ATranslatedSub() @@ -99,6 +106,8 @@ namespace ChocolArm64 return ExecDelegate(ThreadState, Memory); } + public bool HasCallee(long Position) => Callees.Contains(Position); + public void MarkForReJit() => NeedsReJit = true; } } \ No newline at end of file diff --git a/ChocolArm64/ATranslator.cs b/ChocolArm64/ATranslator.cs index 2daf7bbc96..60278d7977 100644 --- a/ChocolArm64/ATranslator.cs +++ b/ChocolArm64/ATranslator.cs @@ -7,11 +7,20 @@ using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Reflection.Emit; +using System.Threading; namespace ChocolArm64 { public class ATranslator { + private Thread AsyncTranslation; + + private HashSet DoNotReJit; + + private HashSet WaitingForTranslation; + + private ConcurrentQueue TranslationQueue; + private ConcurrentDictionary CachedSubs; private ConcurrentDictionary SymbolTable; @@ -24,6 +33,12 @@ namespace ChocolArm64 public ATranslator(IReadOnlyDictionary SymbolTable = null) { + DoNotReJit = new HashSet(); + + WaitingForTranslation = new HashSet(); + + TranslationQueue = new ConcurrentQueue(); + CachedSubs = new ConcurrentDictionary(); if (SymbolTable != null) @@ -38,9 +53,9 @@ namespace ChocolArm64 KeepRunning = true; } - public void StopExecution() => KeepRunning = false; + internal void StopExecution() => KeepRunning = false; - public void ExecuteSubroutine(AThread Thread, long Position) + internal void ExecuteSubroutine(AThread Thread, long Position) { do { @@ -54,9 +69,14 @@ namespace ChocolArm64 CpuTrace?.Invoke(this, new ACpuTraceEventArgs(Position, SubName)); } - if (!CachedSubs.TryGetValue(Position, out ATranslatedSub Sub) || Sub.NeedsReJit) + if (!CachedSubs.TryGetValue(Position, out ATranslatedSub Sub)) { - Sub = TranslateSubroutine(Thread.Memory, Position); + Sub = Translate(Thread.Memory, Position); + } + + if (Sub.NeedsReJit) + { + TranslateAsync(Thread.Memory, Position); } Position = Sub.Execute(Thread.ThreadState, Thread.Memory); @@ -86,24 +106,15 @@ namespace ChocolArm64 return CachedSubs.ContainsKey(Position); } - private ATranslatedSub TranslateSubroutine(AMemory Memory, long Position) + private ATranslatedSub Translate(AMemory Memory, long Position) { - (ABlock[] Graph, ABlock Root) Cfg = ADecoder.DecodeSubroutine(this, Memory, Position); + ABlock Block = ADecoder.DecodeBasicBlock(this, Memory, Position); - string SubName = SymbolTable.GetOrAdd(Position, $"Sub{Position:x16}"); + ABlock[] Graph = new ABlock[] { Block }; - PropagateName(Cfg.Graph, SubName); + string SubName = GetSubName(Position); - AILEmitterCtx Context = new AILEmitterCtx( - this, - Cfg.Graph, - Cfg.Root, - SubName); - - if (Context.CurrBlock.Position != Position) - { - Context.Emit(OpCodes.Br, Context.GetLabel(Position)); - } + AILEmitterCtx Context = new AILEmitterCtx(this, Graph, Block, SubName); do { @@ -111,23 +122,97 @@ namespace ChocolArm64 } while (Context.AdvanceOpCode()); - //Mark all methods that calls this method for ReJiting, - //since we can now call it directly which is faster. - foreach (ATranslatedSub TS in CachedSubs.Values) - { - if (TS.SubCalls.Contains(Position)) - { - TS.MarkForReJit(); - } - } - ATranslatedSub Subroutine = Context.GetSubroutine(); CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine); + if (!DoNotReJit.Contains(Position)) + { + TranslateAsync(Memory, Position); + } + + AOpCode LastOp = Block.GetLastOp(); + + if (LastOp.Emitter != AInstEmit.Ret && + LastOp.Emitter != AInstEmit.Br) + { + DoNotReJit.Add(LastOp.Position + 4); + } + return Subroutine; } + private void TranslateAsync(AMemory Memory, long Position) + { + lock (WaitingForTranslation) + { + if (!WaitingForTranslation.Add(Position)) + { + return; + } + } + + TranslationQueue.Enqueue(Position); + + if (AsyncTranslation == null || !AsyncTranslation.IsAlive) + { + AsyncTranslation = new Thread(() => TranslateAsyncWork(Memory)); + + AsyncTranslation.Priority = ThreadPriority.Lowest; + + AsyncTranslation.Start(); + } + } + + private void TranslateAsyncWork(AMemory Memory) + { + while (TranslationQueue.TryDequeue(out long Position)) + { + (ABlock[] Graph, ABlock Root) Cfg = ADecoder.DecodeSubroutine(this, Memory, Position); + + string SubName = GetSubName(Position); + + PropagateName(Cfg.Graph, SubName); + + AILEmitterCtx Context = new AILEmitterCtx(this, Cfg.Graph, Cfg.Root, SubName); + + if (Context.CurrBlock.Position != Position) + { + Context.Emit(OpCodes.Br, Context.GetLabel(Position)); + } + + do + { + Context.EmitOpCode(); + } + while (Context.AdvanceOpCode()); + + //Mark all methods that calls this method for ReJiting, + //since we can now call it directly which is faster. + foreach (ATranslatedSub TS in CachedSubs.Values) + { + if (TS.HasCallee(Position)) + { + TS.MarkForReJit(); + } + } + + ATranslatedSub Subroutine = Context.GetSubroutine(); + + CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine); + + lock (WaitingForTranslation) + { + WaitingForTranslation.Remove(Position); + } + } + } + + private string GetSubName(long Position) + { + return SymbolTable.GetOrAdd(Position, $"Sub{Position:x16}"); + } + private void PropagateName(ABlock[] Graph, string Name) { foreach (ABlock Block in Graph) diff --git a/ChocolArm64/Decoder/ADecoder.cs b/ChocolArm64/Decoder/ADecoder.cs index 4430229034..0e28ec0417 100644 --- a/ChocolArm64/Decoder/ADecoder.cs +++ b/ChocolArm64/Decoder/ADecoder.cs @@ -13,11 +13,25 @@ namespace ChocolArm64.Decoder private static ConcurrentDictionary OpActivators; + private const int MaxGraphLength = 40; + static ADecoder() { OpActivators = new ConcurrentDictionary(); } + public static ABlock DecodeBasicBlock( + ATranslator Translator, + AMemory Memory, + long Start) + { + ABlock Block = new ABlock(Start); + + FillBlock(Memory, Block); + + return Block; + } + public static (ABlock[] Graph, ABlock Root) DecodeSubroutine( ATranslator Translator, AMemory Memory, @@ -32,6 +46,11 @@ namespace ChocolArm64.Decoder { if (!Visited.TryGetValue(Position, out ABlock Output)) { + if (Visited.Count >= MaxGraphLength) + { + return null; + } + Output = new ABlock(Position); Blocks.Enqueue(Output); diff --git a/ChocolArm64/Instruction/AInstEmitException.cs b/ChocolArm64/Instruction/AInstEmitException.cs index 209ba56f5b..2117958bb0 100644 --- a/ChocolArm64/Instruction/AInstEmitException.cs +++ b/ChocolArm64/Instruction/AInstEmitException.cs @@ -2,6 +2,7 @@ using ChocolArm64.Decoder; using ChocolArm64.State; using ChocolArm64.Translation; using System.Reflection; +using System.Reflection.Emit; namespace ChocolArm64.Instruction { @@ -37,6 +38,12 @@ namespace ChocolArm64.Instruction { Context.EmitLoadState(Context.CurrBlock.Next); } + else + { + Context.EmitLdc_I8(Op.Position + 4); + + Context.Emit(OpCodes.Br, Context.ExitLabel); + } } public static void Und(AILEmitterCtx Context) @@ -60,6 +67,12 @@ namespace ChocolArm64.Instruction { Context.EmitLoadState(Context.CurrBlock.Next); } + else + { + Context.EmitLdc_I8(Op.Position + 4); + + Context.Emit(OpCodes.Br, Context.ExitLabel); + } } } } \ No newline at end of file diff --git a/ChocolArm64/Instruction/AInstEmitFlow.cs b/ChocolArm64/Instruction/AInstEmitFlow.cs index be68aa6c97..03e7505ef2 100644 --- a/ChocolArm64/Instruction/AInstEmitFlow.cs +++ b/ChocolArm64/Instruction/AInstEmitFlow.cs @@ -11,14 +11,41 @@ namespace ChocolArm64.Instruction { AOpCodeBImmAl Op = (AOpCodeBImmAl)Context.CurrOp; - Context.Emit(OpCodes.Br, Context.GetLabel(Op.Imm)); + if (Context.CurrBlock.Branch != null) + { + Context.Emit(OpCodes.Br, Context.GetLabel(Op.Imm)); + } + else + { + Context.EmitStoreState(); + Context.EmitLdc_I8(Op.Imm); + + Context.Emit(OpCodes.Br, Context.ExitLabel); + } } public static void B_Cond(AILEmitterCtx Context) { AOpCodeBImmCond Op = (AOpCodeBImmCond)Context.CurrOp; - Context.EmitCondBranch(Context.GetLabel(Op.Imm), Op.Cond); + AILLabel LblTaken; + + if (Context.CurrBlock.Branch != null) + { + LblTaken = Context.GetLabel(Op.Imm); + } + else + { + LblTaken = new AILLabel(); + } + + Context.EmitCondBranch(LblTaken, Op.Cond); + + if (Context.CurrBlock.Next == null || + Context.CurrBlock.Branch == null) + { + EmitBranchPaths(Context, LblTaken); + } } public static void Bl(AILEmitterCtx Context) @@ -48,10 +75,7 @@ namespace ChocolArm64.Instruction Context.Emit(OpCodes.Pop); - if (Context.CurrBlock.Next != null) - { - Context.EmitLoadState(Context.CurrBlock.Next); - } + Context.EmitLoadState(Context.CurrBlock.Next); } else { @@ -93,7 +117,7 @@ namespace ChocolArm64.Instruction Context.EmitLdintzr(Op.Rt); Context.EmitLdc_I(0); - Context.Emit(ILOp, Context.GetLabel(Op.Imm)); + EmitBranch(Context, ILOp); } public static void Ret(AILEmitterCtx Context) @@ -118,7 +142,69 @@ namespace ChocolArm64.Instruction Context.EmitLdc_I(0); - Context.Emit(ILOp, Context.GetLabel(Op.Imm)); + EmitBranch(Context, ILOp); + } + + private static void EmitBranch(AILEmitterCtx Context, OpCode ILOp) + { + AOpCodeBImm Op = (AOpCodeBImm)Context.CurrOp; + + AILLabel LblTaken; + + if (Context.CurrBlock.Branch != null) + { + LblTaken = Context.GetLabel(Op.Imm); + } + else + { + LblTaken = new AILLabel(); + } + + Context.Emit(ILOp, LblTaken); + + if (Context.CurrBlock.Next == null || + Context.CurrBlock.Branch == null) + { + EmitBranchPaths(Context, LblTaken); + } + } + + private static void EmitBranchPaths(AILEmitterCtx Context, AILLabel LblTaken) + { + AOpCodeBImm Op = (AOpCodeBImm)Context.CurrOp; + + AILLabel LblEnd = null; + + if (Context.CurrBlock.Next == null) + { + EmitBranchExit(Context, Op.Position + 4); + } + else + { + LblEnd = new AILLabel(); + + Context.Emit(OpCodes.Br, LblEnd); + } + + if (Context.CurrBlock.Branch == null) + { + Context.MarkLabel(LblTaken); + + EmitBranchExit(Context, Op.Imm); + } + + if (LblEnd != null) + { + Context.MarkLabel(LblEnd); + } + } + + private static void EmitBranchExit(AILEmitterCtx Context, long Imm) + { + Context.EmitStoreState(); + Context.EmitLdc_I8(Imm); + + Context.Emit(OpCodes.Br, Context.ExitLabel); } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/AILEmitter.cs b/ChocolArm64/Translation/AILEmitter.cs index 8f6e1210f0..af37a6c752 100644 --- a/ChocolArm64/Translation/AILEmitter.cs +++ b/ChocolArm64/Translation/AILEmitter.cs @@ -58,11 +58,13 @@ namespace ChocolArm64.Translation this.Root = ILBlocks[Array.IndexOf(Graph, Root)]; } - public ATranslatedSub GetSubroutine() + public AILBlock GetILBlock(int Index) => ILBlocks[Index]; + + public ATranslatedSub GetSubroutine(HashSet Callees) { LocalAlloc = new ALocalAlloc(ILBlocks, Root); - InitSubroutine(); + InitSubroutine(Callees); InitLocals(); foreach (AILBlock ILBlock in ILBlocks) @@ -73,24 +75,7 @@ namespace ChocolArm64.Translation return Subroutine; } - public AILBlock GetILBlock(int Index) => ILBlocks[Index]; - - private void InitLocals() - { - int ParamsStart = ATranslatedSub.FixedArgTypes.Length; - - Locals = new Dictionary(); - - for (int Index = 0; Index < Subroutine.Params.Count; Index++) - { - ARegister Reg = Subroutine.Params[Index]; - - Generator.EmitLdarg(Index + ParamsStart); - Generator.EmitStloc(GetLocalIndex(Reg)); - } - } - - private void InitSubroutine() + private void InitSubroutine(HashSet Callees) { List Params = new List(); @@ -114,9 +99,24 @@ namespace ChocolArm64.Translation Generator = Mthd.GetILGenerator(); - Subroutine = new ATranslatedSub(Mthd, Params); + Subroutine = new ATranslatedSub(Mthd, Params, Callees); } + private void InitLocals() + { + int ParamsStart = ATranslatedSub.FixedArgTypes.Length; + + Locals = new Dictionary(); + + for (int Index = 0; Index < Subroutine.Params.Count; Index++) + { + ARegister Reg = Subroutine.Params[Index]; + + Generator.EmitLdarg(Index + ParamsStart); + Generator.EmitStloc(GetLocalIndex(Reg)); + } + } + private Type[] GetParamTypes(IList Params) { Type[] FixedArgs = ATranslatedSub.FixedArgTypes; diff --git a/ChocolArm64/Translation/AILEmitterCtx.cs b/ChocolArm64/Translation/AILEmitterCtx.cs index ffcfa851ae..8eda820fdb 100644 --- a/ChocolArm64/Translation/AILEmitterCtx.cs +++ b/ChocolArm64/Translation/AILEmitterCtx.cs @@ -12,14 +12,13 @@ namespace ChocolArm64.Translation { private ATranslator Translator; - private Dictionary Labels; + private HashSet Callees; - private AILEmitter Emitter; + private Dictionary Labels; - private AILBlock ILBlock; + public AILLabel ExitLabel { get; private set; } - private AOpCode OptOpLastCompare; - private AOpCode OptOpLastFlagSet; + private bool HasExit; private int BlkIndex; private int OpcIndex; @@ -29,6 +28,13 @@ namespace ChocolArm64.Translation public ABlock CurrBlock => Graph[BlkIndex]; public AOpCode CurrOp => Graph[BlkIndex].OpCodes[OpcIndex]; + private AILEmitter Emitter; + + private AILBlock ILBlock; + + private AOpCode OptOpLastCompare; + private AOpCode OptOpLastFlagSet; + //This is the index of the temporary register, used to store temporary //values needed by some functions, since IL doesn't have a swap instruction. //You can use any value here as long it doesn't conflict with the indices @@ -45,35 +51,67 @@ namespace ChocolArm64.Translation ABlock Root, string SubName) { + if (Translator == null) + { + throw new ArgumentNullException(nameof(Translator)); + } + + if (Graph == null) + { + throw new ArgumentNullException(nameof(Graph)); + } + + if (Root == null) + { + throw new ArgumentNullException(nameof(Root)); + } + this.Translator = Translator; this.Graph = Graph; this.Root = Root; + Callees = new HashSet(); + Labels = new Dictionary(); + ExitLabel = new AILLabel(); + Emitter = new AILEmitter(Graph, Root, SubName); ILBlock = Emitter.GetILBlock(0); OpcIndex = -1; - if (!AdvanceOpCode()) + if (Graph.Length == 0 || !AdvanceOpCode()) { throw new ArgumentException(nameof(Graph)); } } - public ATranslatedSub GetSubroutine() => Emitter.GetSubroutine(); + public ATranslatedSub GetSubroutine() + { + return Emitter.GetSubroutine(Callees); + } public bool AdvanceOpCode() { - while (++OpcIndex >= (CurrBlock?.OpCodes.Count ?? 0)) + if (OpcIndex + 1 == CurrBlock.OpCodes.Count && + BlkIndex + 1 == Graph.Length) { - if (BlkIndex + 1 >= Graph.Length) + if (!HasExit) { - return false; + MarkLabel(ExitLabel); + + Emit(OpCodes.Ret); + + HasExit = true; } + return false; + } + + while (++OpcIndex >= (CurrBlock?.OpCodes.Count ?? 0)) + { BlkIndex++; OpcIndex = -1; @@ -100,6 +138,13 @@ namespace ChocolArm64.Translation public bool TryOptEmitSubroutineCall() { + Callees.Add(((AOpCodeBImm)CurrOp).Imm); + + if (CurrBlock.Next == null) + { + return false; + } + if (!Translator.TryGetCachedSub(CurrOp, out ATranslatedSub Sub)) { return false; diff --git a/ChocolArm64/Translation/ALocalAlloc.cs b/ChocolArm64/Translation/ALocalAlloc.cs index f23af9c767..8edb38b538 100644 --- a/ChocolArm64/Translation/ALocalAlloc.cs +++ b/ChocolArm64/Translation/ALocalAlloc.cs @@ -70,21 +70,6 @@ namespace ChocolArm64.Translation private const int MaxOptGraphLength = 55; public ALocalAlloc(AILBlock[] Graph, AILBlock Root) - { - IntPaths = new Dictionary(); - VecPaths = new Dictionary(); - - if (Graph.Length < MaxOptGraphLength) - { - InitializeOptimal(Graph, Root); - } - else - { - InitializeFast(Graph); - } - } - - private void InitializeOptimal(AILBlock[] Graph, AILBlock Root) { //This will go through all possible paths on the graph, //and store all inputs/outputs for each block. A register @@ -95,6 +80,9 @@ namespace ChocolArm64.Translation //when doing input elimination. Each block chain have a root, that's where //the code starts executing. They are present on the subroutine start point, //and on call return points too (address written to X30 by BL). + IntPaths = new Dictionary(); + VecPaths = new Dictionary(); + HashSet Visited = new HashSet(); Queue Unvisited = new Queue(); @@ -175,37 +163,6 @@ namespace ChocolArm64.Translation } } - private void InitializeFast(AILBlock[] Graph) - { - //This is WAY faster than InitializeOptimal, but results in - //uneeded loads and stores, so the resulting code will be slower. - long IntInputs = 0; - long IntOutputs = 0; - long VecInputs = 0; - long VecOutputs = 0; - - foreach (AILBlock Block in Graph) - { - IntInputs |= Block.IntInputs; - IntOutputs |= Block.IntOutputs; - VecInputs |= Block.VecInputs; - VecOutputs |= Block.VecOutputs; - } - - //It's possible that not all code paths writes to those output registers, - //in those cases if we attempt to write an output registers that was - //not written, we will be just writing zero and messing up the old register value. - //So we just need to ensure that all outputs are loaded. - IntInputs |= IntOutputs; - VecInputs |= VecOutputs; - - foreach (AILBlock Block in Graph) - { - IntPaths.Add(Block, new PathIo(Block, IntInputs, IntOutputs)); - VecPaths.Add(Block, new PathIo(Block, VecInputs, VecOutputs)); - } - } - public long GetIntInputs(AILBlock Root) => GetInputsImpl(Root, IntPaths.Values); public long GetVecInputs(AILBlock Root) => GetInputsImpl(Root, VecPaths.Values);