Enable tier 0 while fixing some perf issues related to tier 0
This commit is contained in:
parent
4ab1fb031c
commit
35c6b4e35d
7 changed files with 96 additions and 74 deletions
|
@ -11,6 +11,8 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
if (context.Tier == TranslationTier.Tier0)
|
||||
{
|
||||
context.EmitStoreState();
|
||||
|
||||
context.TranslateAhead(imm);
|
||||
|
||||
context.EmitLdc_I8(imm);
|
||||
|
@ -22,6 +24,8 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
if (!context.TryOptEmitSubroutineCall())
|
||||
{
|
||||
context.HasSlowCall = true;
|
||||
|
||||
context.EmitStoreState();
|
||||
|
||||
context.TranslateAhead(imm);
|
||||
|
@ -34,6 +38,7 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
||||
context.EmitLdc_I8(imm);
|
||||
context.EmitLdc_I4((int)CallType.Call);
|
||||
|
||||
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
|
||||
|
||||
|
@ -60,20 +65,6 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
if (context.Tier == TranslationTier.Tier0)
|
||||
{
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitSttmp();
|
||||
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
||||
|
||||
context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
|
||||
BindingFlags.Instance |
|
||||
BindingFlags.NonPublic));
|
||||
|
||||
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
||||
context.EmitLdtmp();
|
||||
|
||||
context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine));
|
||||
|
||||
context.Emit(OpCodes.Ret);
|
||||
}
|
||||
else
|
||||
|
@ -87,12 +78,11 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
||||
context.EmitLdtmp();
|
||||
context.EmitLdc_I4(isJump
|
||||
? (int)CallType.VirtualJump
|
||||
: (int)CallType.VirtualCall);
|
||||
|
||||
string name = isJump
|
||||
? nameof(Translator.GetOrTranslateVirtualSubroutineForJump)
|
||||
: nameof(Translator.GetOrTranslateVirtualSubroutine);
|
||||
|
||||
context.EmitPrivateCall(typeof(Translator), name);
|
||||
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
|
||||
|
||||
context.EmitLdarg(TranslatedSub.StateArgIdx);
|
||||
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
|
||||
|
|
9
ChocolArm64/Translation/CallType.cs
Normal file
9
ChocolArm64/Translation/CallType.cs
Normal file
|
@ -0,0 +1,9 @@
|
|||
namespace ChocolArm64.Translation
|
||||
{
|
||||
enum CallType
|
||||
{
|
||||
Call,
|
||||
VirtualCall,
|
||||
VirtualJump
|
||||
}
|
||||
}
|
|
@ -33,6 +33,8 @@ namespace ChocolArm64.Translation
|
|||
|
||||
public bool HasIndirectJump { get; set; }
|
||||
|
||||
public bool HasSlowCall { get; set; }
|
||||
|
||||
private Dictionary<Block, ILBlock> _visitedBlocks;
|
||||
|
||||
private Queue<Block> _branchTargets;
|
||||
|
@ -300,22 +302,31 @@ namespace ChocolArm64.Translation
|
|||
return;
|
||||
}
|
||||
|
||||
_queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1, isComplete: true));
|
||||
_queue.Enqueue(position, mode, TranslationTier.Tier1, isComplete: true);
|
||||
}
|
||||
|
||||
public bool TryOptEmitSubroutineCall()
|
||||
{
|
||||
//Calls should always have a next block, unless
|
||||
//we're translating a single basic block.
|
||||
if (_currBlock.Next == null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (CurrOp.Emitter != InstEmit.Bl)
|
||||
if (!(CurrOp is IOpCodeBImm op))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!_cache.TryGetSubroutine(((OpCodeBImmAl64)CurrOp).Imm, out TranslatedSub sub))
|
||||
if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//It's not worth to call a Tier0 method, because
|
||||
//it contains slow code, rather than the entire function.
|
||||
if (sub.Tier == TranslationTier.Tier0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ namespace ChocolArm64.Translation
|
|||
IsSubComplete = isSubComplete;
|
||||
}
|
||||
|
||||
public TranslatedSub GetSubroutine(TranslationTier tier)
|
||||
public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing)
|
||||
{
|
||||
RegUsage = new RegisterUsage();
|
||||
|
||||
|
@ -50,7 +50,12 @@ namespace ChocolArm64.Translation
|
|||
long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]);
|
||||
long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]);
|
||||
|
||||
TranslatedSub subroutine = new TranslatedSub(method, tier, intNiRegsMask, vecNiRegsMask);
|
||||
TranslatedSub subroutine = new TranslatedSub(
|
||||
method,
|
||||
intNiRegsMask,
|
||||
vecNiRegsMask,
|
||||
tier,
|
||||
isWorthOptimizing);
|
||||
|
||||
_locals = new Dictionary<Register, int>();
|
||||
|
||||
|
|
|
@ -10,6 +10,11 @@ namespace ChocolArm64.Translation
|
|||
|
||||
class TranslatedSub
|
||||
{
|
||||
//This is the minimum amount of calls needed for the method
|
||||
//to be retranslated with higher quality code. It's only worth
|
||||
//doing that for hot code.
|
||||
private const int MinCallCountForOpt = 30;
|
||||
|
||||
public ArmSubroutine Delegate { get; private set; }
|
||||
|
||||
public static int StateArgIdx { get; }
|
||||
|
@ -24,16 +29,22 @@ namespace ChocolArm64.Translation
|
|||
public long IntNiRegsMask { get; }
|
||||
public long VecNiRegsMask { get; }
|
||||
|
||||
private bool _isWorthOptimizing;
|
||||
|
||||
private int _callCount;
|
||||
|
||||
public TranslatedSub(
|
||||
DynamicMethod method,
|
||||
TranslationTier tier,
|
||||
long intNiRegsMask,
|
||||
long vecNiRegsMask)
|
||||
long vecNiRegsMask,
|
||||
TranslationTier tier,
|
||||
bool isWorthOptimizing)
|
||||
{
|
||||
Method = method ?? throw new ArgumentNullException(nameof(method));;
|
||||
Tier = tier;
|
||||
IntNiRegsMask = intNiRegsMask;
|
||||
VecNiRegsMask = vecNiRegsMask;
|
||||
Method = method ?? throw new ArgumentNullException(nameof(method));;
|
||||
IntNiRegsMask = intNiRegsMask;
|
||||
VecNiRegsMask = vecNiRegsMask;
|
||||
_isWorthOptimizing = isWorthOptimizing;
|
||||
Tier = tier;
|
||||
}
|
||||
|
||||
static TranslatedSub()
|
||||
|
@ -70,5 +81,24 @@ namespace ChocolArm64.Translation
|
|||
{
|
||||
return Delegate(threadState, memory);
|
||||
}
|
||||
|
||||
public bool IsWorthOptimizing()
|
||||
{
|
||||
if (!_isWorthOptimizing)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (_callCount++ < MinCallCountForOpt)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//Only return true once, so that it is
|
||||
//added to the queue only once.
|
||||
_isWorthOptimizing = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -63,53 +63,36 @@ namespace ChocolArm64.Translation
|
|||
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
|
||||
}
|
||||
|
||||
TranslatedSub subroutine = GetOrTranslateSubroutine(state, position);
|
||||
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
|
||||
{
|
||||
sub = TranslateLowCq(position, state.GetExecutionMode());
|
||||
}
|
||||
|
||||
position = subroutine.Execute(state, _memory);
|
||||
position = sub.Execute(state, _memory);
|
||||
}
|
||||
while (position != 0 && state.Running);
|
||||
|
||||
state.CurrentTranslator = null;
|
||||
}
|
||||
|
||||
internal void TranslateVirtualSubroutine(CpuThreadState state, long position)
|
||||
{
|
||||
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0)
|
||||
{
|
||||
_queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
|
||||
}
|
||||
}
|
||||
|
||||
internal ArmSubroutine GetOrTranslateVirtualSubroutineForJump(CpuThreadState state, long position)
|
||||
{
|
||||
return GetOrTranslateVirtualSubroutineImpl(state, position, isJump: true);
|
||||
}
|
||||
|
||||
internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
|
||||
{
|
||||
return GetOrTranslateVirtualSubroutineImpl(state, position, isJump: false);
|
||||
}
|
||||
|
||||
private ArmSubroutine GetOrTranslateVirtualSubroutineImpl(CpuThreadState state, long position, bool isJump)
|
||||
internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs)
|
||||
{
|
||||
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
|
||||
{
|
||||
sub = TranslateHighCq(position, state.GetExecutionMode(), !isJump);
|
||||
sub = TranslateLowCq(position, state.GetExecutionMode());
|
||||
}
|
||||
|
||||
if (sub.IsWorthOptimizing())
|
||||
{
|
||||
bool isComplete = cs == CallType.Call ||
|
||||
cs == CallType.VirtualCall;
|
||||
|
||||
_queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete);
|
||||
}
|
||||
|
||||
return sub.Delegate;
|
||||
}
|
||||
|
||||
internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position)
|
||||
{
|
||||
if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
|
||||
{
|
||||
subroutine = TranslateHighCq(position, state.GetExecutionMode(), true);
|
||||
}
|
||||
|
||||
return subroutine;
|
||||
}
|
||||
|
||||
private void TranslateQueuedSubs()
|
||||
{
|
||||
while (_threadCount != 0)
|
||||
|
@ -151,7 +134,7 @@ namespace ChocolArm64.Translation
|
|||
|
||||
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
|
||||
|
||||
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0);
|
||||
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true);
|
||||
|
||||
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
|
||||
}
|
||||
|
@ -172,7 +155,7 @@ namespace ChocolArm64.Translation
|
|||
|
||||
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
|
||||
|
||||
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1);
|
||||
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall);
|
||||
|
||||
int ilOpCount = 0;
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
using ChocolArm64.State;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Threading;
|
||||
|
||||
|
@ -5,10 +6,6 @@ namespace ChocolArm64.Translation
|
|||
{
|
||||
class TranslatorQueue
|
||||
{
|
||||
//This is the maximum number of functions to be translated that the queue can hold.
|
||||
//The value may need some tuning to find the sweet spot.
|
||||
private const int MaxQueueSize = 1024;
|
||||
|
||||
private ConcurrentStack<TranslatorQueueItem>[] _translationQueue;
|
||||
|
||||
private ManualResetEvent _queueDataReceivedEvent;
|
||||
|
@ -27,14 +24,11 @@ namespace ChocolArm64.Translation
|
|||
_queueDataReceivedEvent = new ManualResetEvent(false);
|
||||
}
|
||||
|
||||
public void Enqueue(TranslatorQueueItem item)
|
||||
public void Enqueue(long position, ExecutionMode mode, TranslationTier tier, bool isComplete)
|
||||
{
|
||||
ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)item.Tier];
|
||||
TranslatorQueueItem item = new TranslatorQueueItem(position, mode, tier, isComplete);
|
||||
|
||||
if (queue.Count >= MaxQueueSize)
|
||||
{
|
||||
queue.TryPop(out _);
|
||||
}
|
||||
ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)tier];
|
||||
|
||||
queue.Push(item);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue