Enable tier 0 while fixing some perf issues related to tier 0
This commit is contained in:
parent
4ab1fb031c
commit
35c6b4e35d
7 changed files with 96 additions and 74 deletions
|
@ -11,6 +11,8 @@ namespace ChocolArm64.Instructions
|
||||||
{
|
{
|
||||||
if (context.Tier == TranslationTier.Tier0)
|
if (context.Tier == TranslationTier.Tier0)
|
||||||
{
|
{
|
||||||
|
context.EmitStoreState();
|
||||||
|
|
||||||
context.TranslateAhead(imm);
|
context.TranslateAhead(imm);
|
||||||
|
|
||||||
context.EmitLdc_I8(imm);
|
context.EmitLdc_I8(imm);
|
||||||
|
@ -22,6 +24,8 @@ namespace ChocolArm64.Instructions
|
||||||
|
|
||||||
if (!context.TryOptEmitSubroutineCall())
|
if (!context.TryOptEmitSubroutineCall())
|
||||||
{
|
{
|
||||||
|
context.HasSlowCall = true;
|
||||||
|
|
||||||
context.EmitStoreState();
|
context.EmitStoreState();
|
||||||
|
|
||||||
context.TranslateAhead(imm);
|
context.TranslateAhead(imm);
|
||||||
|
@ -34,6 +38,7 @@ namespace ChocolArm64.Instructions
|
||||||
|
|
||||||
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
||||||
context.EmitLdc_I8(imm);
|
context.EmitLdc_I8(imm);
|
||||||
|
context.EmitLdc_I4((int)CallType.Call);
|
||||||
|
|
||||||
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
|
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
|
||||||
|
|
||||||
|
@ -60,20 +65,6 @@ namespace ChocolArm64.Instructions
|
||||||
{
|
{
|
||||||
if (context.Tier == TranslationTier.Tier0)
|
if (context.Tier == TranslationTier.Tier0)
|
||||||
{
|
{
|
||||||
context.Emit(OpCodes.Dup);
|
|
||||||
|
|
||||||
context.EmitSttmp();
|
|
||||||
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
|
||||||
|
|
||||||
context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
|
|
||||||
BindingFlags.Instance |
|
|
||||||
BindingFlags.NonPublic));
|
|
||||||
|
|
||||||
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
|
||||||
context.EmitLdtmp();
|
|
||||||
|
|
||||||
context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine));
|
|
||||||
|
|
||||||
context.Emit(OpCodes.Ret);
|
context.Emit(OpCodes.Ret);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -87,12 +78,11 @@ namespace ChocolArm64.Instructions
|
||||||
|
|
||||||
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
||||||
context.EmitLdtmp();
|
context.EmitLdtmp();
|
||||||
|
context.EmitLdc_I4(isJump
|
||||||
|
? (int)CallType.VirtualJump
|
||||||
|
: (int)CallType.VirtualCall);
|
||||||
|
|
||||||
string name = isJump
|
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
|
||||||
? nameof(Translator.GetOrTranslateVirtualSubroutineForJump)
|
|
||||||
: nameof(Translator.GetOrTranslateVirtualSubroutine);
|
|
||||||
|
|
||||||
context.EmitPrivateCall(typeof(Translator), name);
|
|
||||||
|
|
||||||
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
||||||
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
|
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
|
||||||
|
|
9
ChocolArm64/Translation/CallType.cs
Normal file
9
ChocolArm64/Translation/CallType.cs
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
namespace ChocolArm64.Translation
|
||||||
|
{
|
||||||
|
enum CallType
|
||||||
|
{
|
||||||
|
Call,
|
||||||
|
VirtualCall,
|
||||||
|
VirtualJump
|
||||||
|
}
|
||||||
|
}
|
|
@ -33,6 +33,8 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
public bool HasIndirectJump { get; set; }
|
public bool HasIndirectJump { get; set; }
|
||||||
|
|
||||||
|
public bool HasSlowCall { get; set; }
|
||||||
|
|
||||||
private Dictionary<Block, ILBlock> _visitedBlocks;
|
private Dictionary<Block, ILBlock> _visitedBlocks;
|
||||||
|
|
||||||
private Queue<Block> _branchTargets;
|
private Queue<Block> _branchTargets;
|
||||||
|
@ -300,22 +302,31 @@ namespace ChocolArm64.Translation
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
_queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1, isComplete: true));
|
_queue.Enqueue(position, mode, TranslationTier.Tier1, isComplete: true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool TryOptEmitSubroutineCall()
|
public bool TryOptEmitSubroutineCall()
|
||||||
{
|
{
|
||||||
|
//Calls should always have a next block, unless
|
||||||
|
//we're translating a single basic block.
|
||||||
if (_currBlock.Next == null)
|
if (_currBlock.Next == null)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (CurrOp.Emitter != InstEmit.Bl)
|
if (!(CurrOp is IOpCodeBImm op))
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!_cache.TryGetSubroutine(((OpCodeBImmAl64)CurrOp).Imm, out TranslatedSub sub))
|
if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//It's not worth to call a Tier0 method, because
|
||||||
|
//it contains slow code, rather than the entire function.
|
||||||
|
if (sub.Tier == TranslationTier.Tier0)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@ namespace ChocolArm64.Translation
|
||||||
IsSubComplete = isSubComplete;
|
IsSubComplete = isSubComplete;
|
||||||
}
|
}
|
||||||
|
|
||||||
public TranslatedSub GetSubroutine(TranslationTier tier)
|
public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing)
|
||||||
{
|
{
|
||||||
RegUsage = new RegisterUsage();
|
RegUsage = new RegisterUsage();
|
||||||
|
|
||||||
|
@ -50,7 +50,12 @@ namespace ChocolArm64.Translation
|
||||||
long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]);
|
long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]);
|
||||||
long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]);
|
long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]);
|
||||||
|
|
||||||
TranslatedSub subroutine = new TranslatedSub(method, tier, intNiRegsMask, vecNiRegsMask);
|
TranslatedSub subroutine = new TranslatedSub(
|
||||||
|
method,
|
||||||
|
intNiRegsMask,
|
||||||
|
vecNiRegsMask,
|
||||||
|
tier,
|
||||||
|
isWorthOptimizing);
|
||||||
|
|
||||||
_locals = new Dictionary<Register, int>();
|
_locals = new Dictionary<Register, int>();
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,11 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
class TranslatedSub
|
class TranslatedSub
|
||||||
{
|
{
|
||||||
|
//This is the minimum amount of calls needed for the method
|
||||||
|
//to be retranslated with higher quality code. It's only worth
|
||||||
|
//doing that for hot code.
|
||||||
|
private const int MinCallCountForOpt = 30;
|
||||||
|
|
||||||
public ArmSubroutine Delegate { get; private set; }
|
public ArmSubroutine Delegate { get; private set; }
|
||||||
|
|
||||||
public static int StateArgIdx { get; }
|
public static int StateArgIdx { get; }
|
||||||
|
@ -24,16 +29,22 @@ namespace ChocolArm64.Translation
|
||||||
public long IntNiRegsMask { get; }
|
public long IntNiRegsMask { get; }
|
||||||
public long VecNiRegsMask { get; }
|
public long VecNiRegsMask { get; }
|
||||||
|
|
||||||
|
private bool _isWorthOptimizing;
|
||||||
|
|
||||||
|
private int _callCount;
|
||||||
|
|
||||||
public TranslatedSub(
|
public TranslatedSub(
|
||||||
DynamicMethod method,
|
DynamicMethod method,
|
||||||
TranslationTier tier,
|
|
||||||
long intNiRegsMask,
|
long intNiRegsMask,
|
||||||
long vecNiRegsMask)
|
long vecNiRegsMask,
|
||||||
|
TranslationTier tier,
|
||||||
|
bool isWorthOptimizing)
|
||||||
{
|
{
|
||||||
Method = method ?? throw new ArgumentNullException(nameof(method));;
|
Method = method ?? throw new ArgumentNullException(nameof(method));;
|
||||||
Tier = tier;
|
IntNiRegsMask = intNiRegsMask;
|
||||||
IntNiRegsMask = intNiRegsMask;
|
VecNiRegsMask = vecNiRegsMask;
|
||||||
VecNiRegsMask = vecNiRegsMask;
|
_isWorthOptimizing = isWorthOptimizing;
|
||||||
|
Tier = tier;
|
||||||
}
|
}
|
||||||
|
|
||||||
static TranslatedSub()
|
static TranslatedSub()
|
||||||
|
@ -70,5 +81,24 @@ namespace ChocolArm64.Translation
|
||||||
{
|
{
|
||||||
return Delegate(threadState, memory);
|
return Delegate(threadState, memory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public bool IsWorthOptimizing()
|
||||||
|
{
|
||||||
|
if (!_isWorthOptimizing)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_callCount++ < MinCallCountForOpt)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Only return true once, so that it is
|
||||||
|
//added to the queue only once.
|
||||||
|
_isWorthOptimizing = false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -63,53 +63,36 @@ namespace ChocolArm64.Translation
|
||||||
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
|
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
|
||||||
}
|
}
|
||||||
|
|
||||||
TranslatedSub subroutine = GetOrTranslateSubroutine(state, position);
|
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
|
||||||
|
{
|
||||||
|
sub = TranslateLowCq(position, state.GetExecutionMode());
|
||||||
|
}
|
||||||
|
|
||||||
position = subroutine.Execute(state, _memory);
|
position = sub.Execute(state, _memory);
|
||||||
}
|
}
|
||||||
while (position != 0 && state.Running);
|
while (position != 0 && state.Running);
|
||||||
|
|
||||||
state.CurrentTranslator = null;
|
state.CurrentTranslator = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
internal void TranslateVirtualSubroutine(CpuThreadState state, long position)
|
internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs)
|
||||||
{
|
|
||||||
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0)
|
|
||||||
{
|
|
||||||
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
internal ArmSubroutine GetOrTranslateVirtualSubroutineForJump(CpuThreadState state, long position)
|
|
||||||
{
|
|
||||||
return GetOrTranslateVirtualSubroutineImpl(state, position, isJump: true);
|
|
||||||
}
|
|
||||||
|
|
||||||
internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
|
|
||||||
{
|
|
||||||
return GetOrTranslateVirtualSubroutineImpl(state, position, isJump: false);
|
|
||||||
}
|
|
||||||
|
|
||||||
private ArmSubroutine GetOrTranslateVirtualSubroutineImpl(CpuThreadState state, long position, bool isJump)
|
|
||||||
{
|
{
|
||||||
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
|
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
|
||||||
{
|
{
|
||||||
sub = TranslateHighCq(position, state.GetExecutionMode(), !isJump);
|
sub = TranslateLowCq(position, state.GetExecutionMode());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sub.IsWorthOptimizing())
|
||||||
|
{
|
||||||
|
bool isComplete = cs == CallType.Call ||
|
||||||
|
cs == CallType.VirtualCall;
|
||||||
|
|
||||||
|
_queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete);
|
||||||
}
|
}
|
||||||
|
|
||||||
return sub.Delegate;
|
return sub.Delegate;
|
||||||
}
|
}
|
||||||
|
|
||||||
internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position)
|
|
||||||
{
|
|
||||||
if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
|
|
||||||
{
|
|
||||||
subroutine = TranslateHighCq(position, state.GetExecutionMode(), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
return subroutine;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void TranslateQueuedSubs()
|
private void TranslateQueuedSubs()
|
||||||
{
|
{
|
||||||
while (_threadCount != 0)
|
while (_threadCount != 0)
|
||||||
|
@ -151,7 +134,7 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
|
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
|
||||||
|
|
||||||
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0);
|
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true);
|
||||||
|
|
||||||
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
|
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
|
||||||
}
|
}
|
||||||
|
@ -172,7 +155,7 @@ namespace ChocolArm64.Translation
|
||||||
|
|
||||||
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
|
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
|
||||||
|
|
||||||
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1);
|
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall);
|
||||||
|
|
||||||
int ilOpCount = 0;
|
int ilOpCount = 0;
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
using ChocolArm64.State;
|
||||||
using System.Collections.Concurrent;
|
using System.Collections.Concurrent;
|
||||||
using System.Threading;
|
using System.Threading;
|
||||||
|
|
||||||
|
@ -5,10 +6,6 @@ namespace ChocolArm64.Translation
|
||||||
{
|
{
|
||||||
class TranslatorQueue
|
class TranslatorQueue
|
||||||
{
|
{
|
||||||
//This is the maximum number of functions to be translated that the queue can hold.
|
|
||||||
//The value may need some tuning to find the sweet spot.
|
|
||||||
private const int MaxQueueSize = 1024;
|
|
||||||
|
|
||||||
private ConcurrentStack<TranslatorQueueItem>[] _translationQueue;
|
private ConcurrentStack<TranslatorQueueItem>[] _translationQueue;
|
||||||
|
|
||||||
private ManualResetEvent _queueDataReceivedEvent;
|
private ManualResetEvent _queueDataReceivedEvent;
|
||||||
|
@ -27,14 +24,11 @@ namespace ChocolArm64.Translation
|
||||||
_queueDataReceivedEvent = new ManualResetEvent(false);
|
_queueDataReceivedEvent = new ManualResetEvent(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void Enqueue(TranslatorQueueItem item)
|
public void Enqueue(long position, ExecutionMode mode, TranslationTier tier, bool isComplete)
|
||||||
{
|
{
|
||||||
ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)item.Tier];
|
TranslatorQueueItem item = new TranslatorQueueItem(position, mode, tier, isComplete);
|
||||||
|
|
||||||
if (queue.Count >= MaxQueueSize)
|
ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)tier];
|
||||||
{
|
|
||||||
queue.TryPop(out _);
|
|
||||||
}
|
|
||||||
|
|
||||||
queue.Push(item);
|
queue.Push(item);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue