Enable tier 0 while fixing some perf issues related to tier 0

This commit is contained in:
Gabriel 2019-02-26 17:00:15 -03:00
commit 35c6b4e35d
7 changed files with 96 additions and 74 deletions

View file

@ -11,6 +11,8 @@ namespace ChocolArm64.Instructions
{ {
if (context.Tier == TranslationTier.Tier0) if (context.Tier == TranslationTier.Tier0)
{ {
context.EmitStoreState();
context.TranslateAhead(imm); context.TranslateAhead(imm);
context.EmitLdc_I8(imm); context.EmitLdc_I8(imm);
@ -22,6 +24,8 @@ namespace ChocolArm64.Instructions
if (!context.TryOptEmitSubroutineCall()) if (!context.TryOptEmitSubroutineCall())
{ {
context.HasSlowCall = true;
context.EmitStoreState(); context.EmitStoreState();
context.TranslateAhead(imm); context.TranslateAhead(imm);
@ -34,6 +38,7 @@ namespace ChocolArm64.Instructions
context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdc_I8(imm); context.EmitLdc_I8(imm);
context.EmitLdc_I4((int)CallType.Call);
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine)); context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
@ -60,20 +65,6 @@ namespace ChocolArm64.Instructions
{ {
if (context.Tier == TranslationTier.Tier0) if (context.Tier == TranslationTier.Tier0)
{ {
context.Emit(OpCodes.Dup);
context.EmitSttmp();
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
BindingFlags.Instance |
BindingFlags.NonPublic));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp();
context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine));
context.Emit(OpCodes.Ret); context.Emit(OpCodes.Ret);
} }
else else
@ -87,12 +78,11 @@ namespace ChocolArm64.Instructions
context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp(); context.EmitLdtmp();
context.EmitLdc_I4(isJump
? (int)CallType.VirtualJump
: (int)CallType.VirtualCall);
string name = isJump context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
? nameof(Translator.GetOrTranslateVirtualSubroutineForJump)
: nameof(Translator.GetOrTranslateVirtualSubroutine);
context.EmitPrivateCall(typeof(Translator), name);
context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdarg(TranslatedSub.MemoryArgIdx);

View file

@ -0,0 +1,9 @@
namespace ChocolArm64.Translation
{
enum CallType
{
Call,
VirtualCall,
VirtualJump
}
}

View file

@ -33,6 +33,8 @@ namespace ChocolArm64.Translation
public bool HasIndirectJump { get; set; } public bool HasIndirectJump { get; set; }
public bool HasSlowCall { get; set; }
private Dictionary<Block, ILBlock> _visitedBlocks; private Dictionary<Block, ILBlock> _visitedBlocks;
private Queue<Block> _branchTargets; private Queue<Block> _branchTargets;
@ -300,22 +302,31 @@ namespace ChocolArm64.Translation
return; return;
} }
_queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1, isComplete: true)); _queue.Enqueue(position, mode, TranslationTier.Tier1, isComplete: true);
} }
public bool TryOptEmitSubroutineCall() public bool TryOptEmitSubroutineCall()
{ {
//Calls should always have a next block, unless
//we're translating a single basic block.
if (_currBlock.Next == null) if (_currBlock.Next == null)
{ {
return false; return false;
} }
if (CurrOp.Emitter != InstEmit.Bl) if (!(CurrOp is IOpCodeBImm op))
{ {
return false; return false;
} }
if (!_cache.TryGetSubroutine(((OpCodeBImmAl64)CurrOp).Imm, out TranslatedSub sub)) if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub))
{
return false;
}
//It's not worth to call a Tier0 method, because
//it contains slow code, rather than the entire function.
if (sub.Tier == TranslationTier.Tier0)
{ {
return false; return false;
} }

View file

@ -39,7 +39,7 @@ namespace ChocolArm64.Translation
IsSubComplete = isSubComplete; IsSubComplete = isSubComplete;
} }
public TranslatedSub GetSubroutine(TranslationTier tier) public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing)
{ {
RegUsage = new RegisterUsage(); RegUsage = new RegisterUsage();
@ -50,7 +50,12 @@ namespace ChocolArm64.Translation
long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]); long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]);
long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]); long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]);
TranslatedSub subroutine = new TranslatedSub(method, tier, intNiRegsMask, vecNiRegsMask); TranslatedSub subroutine = new TranslatedSub(
method,
intNiRegsMask,
vecNiRegsMask,
tier,
isWorthOptimizing);
_locals = new Dictionary<Register, int>(); _locals = new Dictionary<Register, int>();

View file

@ -10,6 +10,11 @@ namespace ChocolArm64.Translation
class TranslatedSub class TranslatedSub
{ {
//This is the minimum amount of calls needed for the method
//to be retranslated with higher quality code. It's only worth
//doing that for hot code.
private const int MinCallCountForOpt = 30;
public ArmSubroutine Delegate { get; private set; } public ArmSubroutine Delegate { get; private set; }
public static int StateArgIdx { get; } public static int StateArgIdx { get; }
@ -24,16 +29,22 @@ namespace ChocolArm64.Translation
public long IntNiRegsMask { get; } public long IntNiRegsMask { get; }
public long VecNiRegsMask { get; } public long VecNiRegsMask { get; }
private bool _isWorthOptimizing;
private int _callCount;
public TranslatedSub( public TranslatedSub(
DynamicMethod method, DynamicMethod method,
TranslationTier tier,
long intNiRegsMask, long intNiRegsMask,
long vecNiRegsMask) long vecNiRegsMask,
TranslationTier tier,
bool isWorthOptimizing)
{ {
Method = method ?? throw new ArgumentNullException(nameof(method));; Method = method ?? throw new ArgumentNullException(nameof(method));;
Tier = tier; IntNiRegsMask = intNiRegsMask;
IntNiRegsMask = intNiRegsMask; VecNiRegsMask = vecNiRegsMask;
VecNiRegsMask = vecNiRegsMask; _isWorthOptimizing = isWorthOptimizing;
Tier = tier;
} }
static TranslatedSub() static TranslatedSub()
@ -70,5 +81,24 @@ namespace ChocolArm64.Translation
{ {
return Delegate(threadState, memory); return Delegate(threadState, memory);
} }
public bool IsWorthOptimizing()
{
if (!_isWorthOptimizing)
{
return false;
}
if (_callCount++ < MinCallCountForOpt)
{
return false;
}
//Only return true once, so that it is
//added to the queue only once.
_isWorthOptimizing = false;
return true;
}
} }
} }

View file

@ -63,53 +63,36 @@ namespace ChocolArm64.Translation
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position)); CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
} }
TranslatedSub subroutine = GetOrTranslateSubroutine(state, position); if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateLowCq(position, state.GetExecutionMode());
}
position = subroutine.Execute(state, _memory); position = sub.Execute(state, _memory);
} }
while (position != 0 && state.Running); while (position != 0 && state.Running);
state.CurrentTranslator = null; state.CurrentTranslator = null;
} }
internal void TranslateVirtualSubroutine(CpuThreadState state, long position) internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0)
{
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
}
}
internal ArmSubroutine GetOrTranslateVirtualSubroutineForJump(CpuThreadState state, long position)
{
return GetOrTranslateVirtualSubroutineImpl(state, position, isJump: true);
}
internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
{
return GetOrTranslateVirtualSubroutineImpl(state, position, isJump: false);
}
private ArmSubroutine GetOrTranslateVirtualSubroutineImpl(CpuThreadState state, long position, bool isJump)
{ {
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub)) if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{ {
sub = TranslateHighCq(position, state.GetExecutionMode(), !isJump); sub = TranslateLowCq(position, state.GetExecutionMode());
}
if (sub.IsWorthOptimizing())
{
bool isComplete = cs == CallType.Call ||
cs == CallType.VirtualCall;
_queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete);
} }
return sub.Delegate; return sub.Delegate;
} }
internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
{
subroutine = TranslateHighCq(position, state.GetExecutionMode(), true);
}
return subroutine;
}
private void TranslateQueuedSubs() private void TranslateQueuedSubs()
{ {
while (_threadCount != 0) while (_threadCount != 0)
@ -151,7 +134,7 @@ namespace ChocolArm64.Translation
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64); ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0); TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true);
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count); return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
} }
@ -172,7 +155,7 @@ namespace ChocolArm64.Translation
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete); ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1); TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall);
int ilOpCount = 0; int ilOpCount = 0;

View file

@ -1,3 +1,4 @@
using ChocolArm64.State;
using System.Collections.Concurrent; using System.Collections.Concurrent;
using System.Threading; using System.Threading;
@ -5,10 +6,6 @@ namespace ChocolArm64.Translation
{ {
class TranslatorQueue class TranslatorQueue
{ {
//This is the maximum number of functions to be translated that the queue can hold.
//The value may need some tuning to find the sweet spot.
private const int MaxQueueSize = 1024;
private ConcurrentStack<TranslatorQueueItem>[] _translationQueue; private ConcurrentStack<TranslatorQueueItem>[] _translationQueue;
private ManualResetEvent _queueDataReceivedEvent; private ManualResetEvent _queueDataReceivedEvent;
@ -27,14 +24,11 @@ namespace ChocolArm64.Translation
_queueDataReceivedEvent = new ManualResetEvent(false); _queueDataReceivedEvent = new ManualResetEvent(false);
} }
public void Enqueue(TranslatorQueueItem item) public void Enqueue(long position, ExecutionMode mode, TranslationTier tier, bool isComplete)
{ {
ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)item.Tier]; TranslatorQueueItem item = new TranslatorQueueItem(position, mode, tier, isComplete);
if (queue.Count >= MaxQueueSize) ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)tier];
{
queue.TryPop(out _);
}
queue.Push(item); queue.Push(item);