Implement speculative translation on the cpu, and change the way how branches to unknown or untranslated addresses works

This commit is contained in:
gdkchan 2019-01-27 01:43:15 -02:00
parent f5b4f6ccc4
commit fa87345938
18 changed files with 380 additions and 144 deletions

View file

@ -28,11 +28,7 @@ namespace ChocolArm64.Decoders
return block;
}
public static Block DecodeSubroutine(
TranslatorCache cache,
MemoryManager memory,
long start,
ExecutionMode mode)
public static Block DecodeSubroutine(MemoryManager memory, long start, ExecutionMode mode)
{
Dictionary<long, Block> visited = new Dictionary<long, Block>();
Dictionary<long, Block> visitedEnd = new Dictionary<long, Block>();
@ -67,23 +63,17 @@ namespace ChocolArm64.Decoders
//(except BL/BLR that are sub calls) or end of executable, Next is null.
if (current.OpCodes.Count > 0)
{
bool hasCachedSub = false;
OpCode64 lastOp = current.GetLastOp();
if (lastOp is IOpCodeBImm op)
bool isCall = lastOp.Emitter == InstEmit.Bl ||
lastOp.Emitter == InstEmit.Blr;
if (lastOp is IOpCodeBImm op && !isCall)
{
if (op.Emitter == InstEmit.Bl)
{
hasCachedSub = cache.HasSubroutine(op.Imm);
}
else
{
current.Branch = Enqueue(op.Imm);
}
current.Branch = Enqueue(op.Imm);
}
if (!IsUnconditionalBranch(lastOp) || hasCachedSub)
if (!IsUnconditionalBranch(lastOp) || isCall)
{
current.Next = Enqueue(current.EndPosition);
}

View file

@ -17,6 +17,8 @@ namespace ChocolArm64.Instructions
}
else
{
//context.TranslateAhead(op.Imm);
context.EmitStoreState();
context.EmitLdc_I8(op.Imm);
@ -35,6 +37,8 @@ namespace ChocolArm64.Instructions
{
OpCodeBImmAl64 op = (OpCodeBImmAl64)context.CurrOp;
//context.TranslateAhead(op.Position + 4);
context.EmitLdc_I(op.Position + 4);
context.EmitStint(RegisterAlias.Lr);
context.EmitStoreState();
@ -46,12 +50,14 @@ namespace ChocolArm64.Instructions
{
OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp;
//context.TranslateAhead(op.Position + 4);
context.EmitLdintzr(op.Rn);
context.EmitLdc_I(op.Position + 4);
context.EmitStint(RegisterAlias.Lr);
context.EmitStoreState();
context.Emit(OpCodes.Ret);
InstEmitFlowHelper.EmitCallVirtual(context);
}
public static void Br(ILEmitterCtx context)
@ -113,6 +119,8 @@ namespace ChocolArm64.Instructions
}
else
{
//BranchTranslateAhead(context, op);
context.EmitStoreState();
ILLabel lblTaken = new ILLabel();
@ -142,6 +150,8 @@ namespace ChocolArm64.Instructions
}
else
{
//BranchTranslateAhead(context, op);
context.EmitStoreState();
ILLabel lblTaken = new ILLabel();
@ -159,5 +169,11 @@ namespace ChocolArm64.Instructions
context.Emit(OpCodes.Ret);
}
}
private static void BranchTranslateAhead(ILEmitterCtx context, OpCodeBImm64 op)
{
context.TranslateAhead(op.Position + 4);
context.TranslateAhead(op.Imm);
}
}
}

View file

@ -1,4 +1,6 @@
using ChocolArm64.State;
using ChocolArm64.Translation;
using System.Reflection;
using System.Reflection.Emit;
namespace ChocolArm64.Instructions
@ -7,12 +9,63 @@ namespace ChocolArm64.Instructions
{
public static void EmitCall(ILEmitterCtx context, long imm)
{
if (context.TryOptEmitSubroutineCall())
if (!context.TryOptEmitSubroutineCall())
{
context.TranslateAhead(imm);
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
BindingFlags.Instance |
BindingFlags.NonPublic));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdc_I8(imm);
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitCall(typeof(TranslatedSub), nameof(TranslatedSub.Execute));
}
EmitContinueOrReturnCheck(context);
}
public static void EmitCallVirtual(ILEmitterCtx context)
{
context.EmitSttmp();
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
BindingFlags.Instance |
BindingFlags.NonPublic));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateVirtualSubroutine));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitCall(typeof(TranslatedSub), nameof(TranslatedSub.Execute));
EmitContinueOrReturnCheck(context);
}
private static void EmitContinueOrReturnCheck(ILEmitterCtx context)
{
//Note: The return value of the called method will be placed
//at the Stack, the return value is always a Int64 with the
//return address of the function. We check if the address is
//correct, if it isn't we keep returning until we reach the dispatcher.
if (context.CurrBlock.Next != null)
{
//Note: the return value of the called method will be placed
//at the Stack, the return value is always a Int64 with the
//return address of the function. We check if the address is
//correct, if it isn't we keep returning until we reach the dispatcher.
context.Emit(OpCodes.Dup);
context.EmitLdc_I8(context.CurrOp.Position + 4);
@ -30,8 +83,6 @@ namespace ChocolArm64.Instructions
}
else
{
context.EmitLdc_I8(imm);
context.Emit(OpCodes.Ret);
}
}

View file

@ -82,6 +82,8 @@ namespace ChocolArm64.State
private static double _hostTickFreq;
internal Translator CurrentTranslator;
static CpuThreadState()
{
_hostTickFreq = 1.0 / Stopwatch.Frequency;

View file

@ -3,7 +3,6 @@ using ChocolArm64.State;
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using System.Reflection;
using System.Reflection.Emit;
@ -11,11 +10,9 @@ namespace ChocolArm64
{
class TranslatedSub
{
private delegate long Aa64Subroutine(CpuThreadState register, MemoryManager memory);
private delegate long ArmSubroutine(CpuThreadState register, MemoryManager memory);
private const int MinCallCountForReJit = 250;
private Aa64Subroutine _execDelegate;
private ArmSubroutine _execDelegate;
public static int StateArgIdx { get; private set; }
public static int MemoryArgIdx { get; private set; }
@ -26,27 +23,21 @@ namespace ChocolArm64
public ReadOnlyCollection<Register> SubArgs { get; private set; }
private HashSet<long> _callers;
public TranslationTier Tier { get; private set; }
private TranslatedSubType _type;
private int _callCount;
private bool _needsReJit;
public TranslatedSub(DynamicMethod method, List<Register> subArgs)
public TranslatedSub(DynamicMethod method, List<Register> subArgs, TranslationTier tier)
{
Method = method ?? throw new ArgumentNullException(nameof(method));;
SubArgs = subArgs?.AsReadOnly() ?? throw new ArgumentNullException(nameof(subArgs));
_callers = new HashSet<long>();
Tier = tier;
PrepareDelegate();
}
static TranslatedSub()
{
MethodInfo mthdInfo = typeof(Aa64Subroutine).GetMethod("Invoke");
MethodInfo mthdInfo = typeof(ArmSubroutine).GetMethod("Invoke");
ParameterInfo[] Params = mthdInfo.GetParameters();
@ -54,15 +45,15 @@ namespace ChocolArm64
for (int index = 0; index < Params.Length; index++)
{
Type paramType = Params[index].ParameterType;
Type argType = Params[index].ParameterType;
FixedArgTypes[index] = paramType;
FixedArgTypes[index] = argType;
if (paramType == typeof(CpuThreadState))
if (argType == typeof(CpuThreadState))
{
StateArgIdx = index;
}
else if (paramType == typeof(MemoryManager))
else if (argType == typeof(MemoryManager))
{
MemoryArgIdx = index;
}
@ -89,52 +80,12 @@ namespace ChocolArm64
generator.Emit(OpCodes.Call, Method);
generator.Emit(OpCodes.Ret);
_execDelegate = (Aa64Subroutine)mthd.CreateDelegate(typeof(Aa64Subroutine));
}
public bool ShouldReJit()
{
if (_needsReJit && _callCount < MinCallCountForReJit)
{
_callCount++;
return false;
}
return _needsReJit;
_execDelegate = (ArmSubroutine)mthd.CreateDelegate(typeof(ArmSubroutine));
}
public long Execute(CpuThreadState threadState, MemoryManager memory)
{
return _execDelegate(threadState, memory);
}
public void AddCaller(long position)
{
lock (_callers)
{
_callers.Add(position);
}
}
public long[] GetCallerPositions()
{
lock (_callers)
{
return _callers.ToArray();
}
}
public void SetType(TranslatedSubType type)
{
_type = type;
if (type == TranslatedSubType.SubTier0)
{
_needsReJit = true;
}
}
public void MarkForReJit() => _needsReJit = true;
}
}

View file

@ -1,8 +0,0 @@
namespace ChocolArm64
{
enum TranslatedSubType
{
SubTier0,
SubTier1
}
}

View file

@ -11,6 +11,7 @@ namespace ChocolArm64.Translation
class ILEmitterCtx
{
private TranslatorCache _cache;
private TranslatorQueue _queue;
private Dictionary<long, ILLabel> _labels;
@ -47,9 +48,10 @@ namespace ChocolArm64.Translation
private const int VecTmp1Index = -5;
private const int VecTmp2Index = -6;
public ILEmitterCtx(TranslatorCache cache, Block graph)
public ILEmitterCtx(TranslatorCache cache, TranslatorQueue queue, Block graph)
{
_cache = cache ?? throw new ArgumentNullException(nameof(cache));
_queue = queue ?? throw new ArgumentNullException(nameof(queue));
_currBlock = graph ?? throw new ArgumentNullException(nameof(graph));
_labels = new Dictionary<long, ILLabel>();
@ -243,6 +245,16 @@ namespace ChocolArm64.Translation
return new ILBlock();
}
public void TranslateAhead(long position, ExecutionMode mode = ExecutionMode.Aarch64)
{
if (_cache.TryGetSubroutine(position, out TranslatedSub sub) && sub.Tier != TranslationTier.Tier0)
{
return;
}
_queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1));
}
public bool TryOptEmitSubroutineCall()
{
if (_currBlock.Next == null)
@ -277,8 +289,6 @@ namespace ChocolArm64.Translation
EmitCall(subroutine.Method);
subroutine.AddCaller(_subPosition);
return true;
}
@ -463,7 +473,12 @@ namespace ChocolArm64.Translation
_ilBlock.Add(new ILOpCodeBranch(ilOp, label));
}
public void Emit(string text)
public void EmitFieldLoad(FieldInfo info)
{
_ilBlock.Add(new ILOpCodeLoadField(info));
}
public void EmitPrint(string text)
{
_ilBlock.Add(new ILOpCodeLog(text));
}

View file

@ -26,7 +26,7 @@ namespace ChocolArm64.Translation
_subName = subName;
}
public TranslatedSub GetSubroutine()
public TranslatedSub GetSubroutine(TranslationTier tier)
{
LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]);
@ -52,7 +52,7 @@ namespace ChocolArm64.Translation
Generator = method.GetILGenerator();
TranslatedSub subroutine = new TranslatedSub(method, subArgs);
TranslatedSub subroutine = new TranslatedSub(method, subArgs, tier);
int argsStart = TranslatedSub.FixedArgTypes.Length;

View file

@ -5,16 +5,16 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeCall : IILEmit
{
private MethodInfo _mthdInfo;
public MethodInfo Info { get; private set; }
public ILOpCodeCall(MethodInfo mthdInfo)
public ILOpCodeCall(MethodInfo info)
{
_mthdInfo = mthdInfo;
Info = info;
}
public void Emit(ILMethodBuilder context)
{
context.Generator.Emit(OpCodes.Call, _mthdInfo);
context.Generator.Emit(OpCodes.Call, Info);
}
}
}

View file

@ -0,0 +1,20 @@
using System.Reflection;
using System.Reflection.Emit;
namespace ChocolArm64.Translation
{
struct ILOpCodeLoadField : IILEmit
{
public FieldInfo Info { get; private set; }
public ILOpCodeLoadField(FieldInfo info)
{
Info = info;
}
public void Emit(ILMethodBuilder context)
{
context.Generator.Emit(OpCodes.Ldfld, Info);
}
}
}

View file

@ -0,0 +1,11 @@
namespace ChocolArm64
{
enum TranslationTier
{
Tier0,
Tier1,
Tier2,
Count
}
}

View file

@ -4,29 +4,59 @@ using ChocolArm64.Memory;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Threading;
namespace ChocolArm64
{
public class Translator
{
private MemoryManager _memory;
private CpuThreadState _dummyThreadState;
private TranslatorCache _cache;
private TranslatorQueue _queue;
private Thread _backgroundTranslator;
public event EventHandler<CpuTraceEventArgs> CpuTrace;
public bool EnableCpuTrace { get; set; }
public Translator()
private volatile int _threadCount;
public Translator(MemoryManager memory)
{
_memory = memory;
_dummyThreadState = new CpuThreadState();
_dummyThreadState.Running = false;
_cache = new TranslatorCache();
_queue = new TranslatorQueue();
}
internal void ExecuteSubroutine(CpuThread thread, long position)
{
if (Interlocked.Increment(ref _threadCount) == 1)
{
_backgroundTranslator = new Thread(TranslateQueuedSubs);
_backgroundTranslator.Start();
}
ExecuteSubroutine(thread.ThreadState, thread.Memory, position);
if (Interlocked.Decrement(ref _threadCount) == 0)
{
_queue.ForceSignal();
}
}
private void ExecuteSubroutine(CpuThreadState state, MemoryManager memory, long position)
{
state.CurrentTranslator = this;
do
{
if (EnableCpuTrace)
@ -34,50 +64,88 @@ namespace ChocolArm64
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
}
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateTier0(memory, position, state.GetExecutionMode());
}
TranslatedSub subroutine = GetOrTranslateSubroutine(state, memory, position);
if (sub.ShouldReJit())
{
TranslateTier1(memory, position, state.GetExecutionMode());
}
position = sub.Execute(state, memory);
position = subroutine.Execute(state, memory);
}
while (position != 0 && state.Running);
state.CurrentTranslator = null;
}
internal bool HasCachedSub(long position)
internal TranslatedSub GetOrTranslateVirtualSubroutine(CpuThreadState state, MemoryManager memory, long position)
{
return _cache.HasSubroutine(position);
if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
{
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier2));
subroutine = TranslateLowCq(memory, position, state.GetExecutionMode());
}
return subroutine;
}
private TranslatedSub TranslateTier0(MemoryManager memory, long position, ExecutionMode mode)
internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, MemoryManager memory, long position)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
{
subroutine = TranslateLowCq(memory, position, state.GetExecutionMode());
}
return subroutine;
}
private void TranslateQueuedSubs()
{
while (_threadCount != 0)
{
if (_queue.TryDequeue(out TranslatorQueueItem item))
{
bool isCached = _cache.TryGetSubroutine(item.Position, out TranslatedSub sub);
if (isCached && item.Tier <= sub.Tier)
{
continue;
}
if (item.Tier == TranslationTier.Tier0)
{
TranslateLowCq(_memory, item.Position, item.Mode);
}
else
{
TranslateHighCq(_memory, item.Position, item.Mode);
}
}
else
{
_queue.WaitForItems();
}
}
}
private TranslatedSub TranslateLowCq(MemoryManager memory, long position, ExecutionMode mode)
{
Block block = Decoder.DecodeBasicBlock(memory, position, mode);
ILEmitterCtx context = new ILEmitterCtx(_cache, block);
ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, block);
string subName = GetSubroutineName(position);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine();
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0);
subroutine.SetType(TranslatedSubType.SubTier0);
TranslatedSub cacheSub = _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
_cache.AddOrUpdate(position, subroutine, block.OpCodes.Count);
return subroutine;
return cacheSub;
}
private void TranslateTier1(MemoryManager memory, long position, ExecutionMode mode)
private void TranslateHighCq(MemoryManager memory, long position, ExecutionMode mode)
{
Block graph = Decoder.DecodeSubroutine(_cache, memory, position, mode);
Block graph = Decoder.DecodeSubroutine(memory, position, mode);
ILEmitterCtx context = new ILEmitterCtx(_cache, graph);
ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, graph);
ILBlock[] ilBlocks = context.GetILBlocks();
@ -85,9 +153,7 @@ namespace ChocolArm64
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine();
subroutine.SetType(TranslatedSubType.SubTier1);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1);
int ilOpCount = 0;
@ -98,23 +164,17 @@ namespace ChocolArm64
_cache.AddOrUpdate(position, subroutine, ilOpCount);
//Mark all methods that calls this method for ReJiting,
//since we can now call it directly which is faster.
if (_cache.TryGetSubroutine(position, out TranslatedSub oldSub))
{
foreach (long callerPos in oldSub.GetCallerPositions())
{
if (_cache.TryGetSubroutine(position, out TranslatedSub callerSub))
{
callerSub.MarkForReJit();
}
}
}
ForceAheadOfTimeCompilation(subroutine);
}
private string GetSubroutineName(long position)
{
return $"Sub{position:x16}";
}
private void ForceAheadOfTimeCompilation(TranslatedSub subroutine)
{
subroutine.Execute(_dummyThreadState, null);
}
}
}

View file

@ -58,6 +58,31 @@ namespace ChocolArm64
_sortedCache = new LinkedList<long>();
}
public TranslatedSub GetOrAdd(long position, TranslatedSub subroutine, int size)
{
ClearCacheIfNeeded();
lock (_sortedCache)
{
LinkedListNode<long> node = _sortedCache.AddLast(position);
CacheBucket bucket = new CacheBucket(subroutine, node, size);
bucket = _cache.GetOrAdd(position, bucket);
if (bucket.Node == node)
{
_totalSize += size;
}
else
{
_sortedCache.Remove(node);
}
return bucket.Subroutine;
}
}
public void AddOrUpdate(long position, TranslatedSub subroutine, int size)
{
ClearCacheIfNeeded();

View file

@ -0,0 +1,81 @@
using System.Collections.Concurrent;
using System.Threading;
namespace ChocolArm64
{
class TranslatorQueue
{
private const int MaxQueueSize = 1024;
private ConcurrentStack<TranslatorQueueItem>[] _translationQueue;
private ManualResetEvent _queueDataReceivedEvent;
private bool _signaled;
public TranslatorQueue()
{
_translationQueue = new ConcurrentStack<TranslatorQueueItem>[(int)TranslationTier.Count];
for (int prio = 0; prio < _translationQueue.Length; prio++)
{
_translationQueue[prio] = new ConcurrentStack<TranslatorQueueItem>();
}
_queueDataReceivedEvent = new ManualResetEvent(false);
}
public void Enqueue(TranslatorQueueItem item)
{
ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)item.Tier];
if (queue.Count >= MaxQueueSize)
{
queue.TryPop(out _);
}
queue.Push(item);
_queueDataReceivedEvent.Set();
}
public bool TryDequeue(out TranslatorQueueItem item)
{
for (int prio = 0; prio < _translationQueue.Length; prio++)
{
if (_translationQueue[prio].TryPop(out item))
{
return true;
}
}
item = default(TranslatorQueueItem);
return false;
}
public void WaitForItems()
{
_queueDataReceivedEvent.WaitOne();
lock (_queueDataReceivedEvent)
{
if (!_signaled)
{
_queueDataReceivedEvent.Reset();
}
}
}
public void ForceSignal()
{
lock (_queueDataReceivedEvent)
{
_signaled = true;
_queueDataReceivedEvent.Set();
_queueDataReceivedEvent.Close();
}
}
}
}

View file

@ -0,0 +1,20 @@
using ChocolArm64.State;
namespace ChocolArm64
{
struct TranslatorQueueItem
{
public long Position { get; }
public ExecutionMode Mode { get; }
public TranslationTier Tier { get; }
public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier)
{
Position = position;
Mode = mode;
Tier = tier;
}
}
}

View file

@ -789,7 +789,7 @@ namespace Ryujinx.Graphics.Graphics3d
GalVertexAttribType Type = (GalVertexAttribType)((Packed >> 27) & 0x7);
bool IsRgba = ((Packed >> 31) & 1) != 0;
// Check vertex array is enabled to avoid out of bounds exception when reading bytes
bool Enable = (ReadRegister(NvGpuEngine3dReg.VertexArrayNControl + ArrayIndex * 4) & 0x1000) != 0;

View file

@ -109,7 +109,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
_threads = new LinkedList<KThread>();
Translator = new Translator();
Translator = new Translator(CpuMemory);
Translator.CpuTrace += CpuTraceHandler;

View file

@ -48,10 +48,12 @@ namespace Ryujinx.Tests.Cpu
_entryPoint = Position;
Translator translator = new Translator();
_ramPointer = Marshal.AllocHGlobal(new IntPtr(_size));
_memory = new MemoryManager(_ramPointer);
_memory.Map(Position, 0, _size);
Translator translator = new Translator(_memory);
_thread = new CpuThread(translator, _memory, _entryPoint);
if (_unicornAvailable)