Enable tier 0 while fixing some perf issues related to tier 0

This commit is contained in:
Gabriel 2019-02-26 17:00:15 -03:00
parent 4ab1fb031c
commit 35c6b4e35d
7 changed files with 96 additions and 74 deletions

View file

@ -11,6 +11,8 @@ namespace ChocolArm64.Instructions
{
if (context.Tier == TranslationTier.Tier0)
{
context.EmitStoreState();
context.TranslateAhead(imm);
context.EmitLdc_I8(imm);
@ -22,6 +24,8 @@ namespace ChocolArm64.Instructions
if (!context.TryOptEmitSubroutineCall())
{
context.HasSlowCall = true;
context.EmitStoreState();
context.TranslateAhead(imm);
@ -34,6 +38,7 @@ namespace ChocolArm64.Instructions
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdc_I8(imm);
context.EmitLdc_I4((int)CallType.Call);
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
@ -60,20 +65,6 @@ namespace ChocolArm64.Instructions
{
if (context.Tier == TranslationTier.Tier0)
{
context.Emit(OpCodes.Dup);
context.EmitSttmp();
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
BindingFlags.Instance |
BindingFlags.NonPublic));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp();
context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine));
context.Emit(OpCodes.Ret);
}
else
@ -87,12 +78,11 @@ namespace ChocolArm64.Instructions
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp();
context.EmitLdc_I4(isJump
? (int)CallType.VirtualJump
: (int)CallType.VirtualCall);
string name = isJump
? nameof(Translator.GetOrTranslateVirtualSubroutineForJump)
: nameof(Translator.GetOrTranslateVirtualSubroutine);
context.EmitPrivateCall(typeof(Translator), name);
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);

View file

@ -0,0 +1,9 @@
namespace ChocolArm64.Translation
{
enum CallType
{
Call,
VirtualCall,
VirtualJump
}
}

View file

@ -33,6 +33,8 @@ namespace ChocolArm64.Translation
public bool HasIndirectJump { get; set; }
public bool HasSlowCall { get; set; }
private Dictionary<Block, ILBlock> _visitedBlocks;
private Queue<Block> _branchTargets;
@ -300,22 +302,31 @@ namespace ChocolArm64.Translation
return;
}
_queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1, isComplete: true));
_queue.Enqueue(position, mode, TranslationTier.Tier1, isComplete: true);
}
public bool TryOptEmitSubroutineCall()
{
//Calls should always have a next block, unless
//we're translating a single basic block.
if (_currBlock.Next == null)
{
return false;
}
if (CurrOp.Emitter != InstEmit.Bl)
if (!(CurrOp is IOpCodeBImm op))
{
return false;
}
if (!_cache.TryGetSubroutine(((OpCodeBImmAl64)CurrOp).Imm, out TranslatedSub sub))
if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub))
{
return false;
}
//It's not worth to call a Tier0 method, because
//it contains slow code, rather than the entire function.
if (sub.Tier == TranslationTier.Tier0)
{
return false;
}

View file

@ -39,7 +39,7 @@ namespace ChocolArm64.Translation
IsSubComplete = isSubComplete;
}
public TranslatedSub GetSubroutine(TranslationTier tier)
public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing)
{
RegUsage = new RegisterUsage();
@ -50,7 +50,12 @@ namespace ChocolArm64.Translation
long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]);
long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]);
TranslatedSub subroutine = new TranslatedSub(method, tier, intNiRegsMask, vecNiRegsMask);
TranslatedSub subroutine = new TranslatedSub(
method,
intNiRegsMask,
vecNiRegsMask,
tier,
isWorthOptimizing);
_locals = new Dictionary<Register, int>();

View file

@ -10,6 +10,11 @@ namespace ChocolArm64.Translation
class TranslatedSub
{
//This is the minimum amount of calls needed for the method
//to be retranslated with higher quality code. It's only worth
//doing that for hot code.
private const int MinCallCountForOpt = 30;
public ArmSubroutine Delegate { get; private set; }
public static int StateArgIdx { get; }
@ -24,16 +29,22 @@ namespace ChocolArm64.Translation
public long IntNiRegsMask { get; }
public long VecNiRegsMask { get; }
private bool _isWorthOptimizing;
private int _callCount;
public TranslatedSub(
DynamicMethod method,
TranslationTier tier,
long intNiRegsMask,
long vecNiRegsMask)
long vecNiRegsMask,
TranslationTier tier,
bool isWorthOptimizing)
{
Method = method ?? throw new ArgumentNullException(nameof(method));;
Tier = tier;
IntNiRegsMask = intNiRegsMask;
VecNiRegsMask = vecNiRegsMask;
Method = method ?? throw new ArgumentNullException(nameof(method));;
IntNiRegsMask = intNiRegsMask;
VecNiRegsMask = vecNiRegsMask;
_isWorthOptimizing = isWorthOptimizing;
Tier = tier;
}
static TranslatedSub()
@ -70,5 +81,24 @@ namespace ChocolArm64.Translation
{
return Delegate(threadState, memory);
}
public bool IsWorthOptimizing()
{
if (!_isWorthOptimizing)
{
return false;
}
if (_callCount++ < MinCallCountForOpt)
{
return false;
}
//Only return true once, so that it is
//added to the queue only once.
_isWorthOptimizing = false;
return true;
}
}
}

View file

@ -63,53 +63,36 @@ namespace ChocolArm64.Translation
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
}
TranslatedSub subroutine = GetOrTranslateSubroutine(state, position);
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateLowCq(position, state.GetExecutionMode());
}
position = subroutine.Execute(state, _memory);
position = sub.Execute(state, _memory);
}
while (position != 0 && state.Running);
state.CurrentTranslator = null;
}
internal void TranslateVirtualSubroutine(CpuThreadState state, long position)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0)
{
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
}
}
internal ArmSubroutine GetOrTranslateVirtualSubroutineForJump(CpuThreadState state, long position)
{
return GetOrTranslateVirtualSubroutineImpl(state, position, isJump: true);
}
internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
{
return GetOrTranslateVirtualSubroutineImpl(state, position, isJump: false);
}
private ArmSubroutine GetOrTranslateVirtualSubroutineImpl(CpuThreadState state, long position, bool isJump)
internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateHighCq(position, state.GetExecutionMode(), !isJump);
sub = TranslateLowCq(position, state.GetExecutionMode());
}
if (sub.IsWorthOptimizing())
{
bool isComplete = cs == CallType.Call ||
cs == CallType.VirtualCall;
_queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete);
}
return sub.Delegate;
}
internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
{
subroutine = TranslateHighCq(position, state.GetExecutionMode(), true);
}
return subroutine;
}
private void TranslateQueuedSubs()
{
while (_threadCount != 0)
@ -151,7 +134,7 @@ namespace ChocolArm64.Translation
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true);
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
}
@ -172,7 +155,7 @@ namespace ChocolArm64.Translation
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall);
int ilOpCount = 0;

View file

@ -1,3 +1,4 @@
using ChocolArm64.State;
using System.Collections.Concurrent;
using System.Threading;
@ -5,10 +6,6 @@ namespace ChocolArm64.Translation
{
class TranslatorQueue
{
//This is the maximum number of functions to be translated that the queue can hold.
//The value may need some tuning to find the sweet spot.
private const int MaxQueueSize = 1024;
private ConcurrentStack<TranslatorQueueItem>[] _translationQueue;
private ManualResetEvent _queueDataReceivedEvent;
@ -27,14 +24,11 @@ namespace ChocolArm64.Translation
_queueDataReceivedEvent = new ManualResetEvent(false);
}
public void Enqueue(TranslatorQueueItem item)
public void Enqueue(long position, ExecutionMode mode, TranslationTier tier, bool isComplete)
{
ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)item.Tier];
TranslatorQueueItem item = new TranslatorQueueItem(position, mode, tier, isComplete);
if (queue.Count >= MaxQueueSize)
{
queue.TryPop(out _);
}
ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)tier];
queue.Push(item);