diff --git a/ChocolArm64/ChocolArm64.csproj b/ChocolArm64/ChocolArm64.csproj
index 5178e8eb40..ea98003f9b 100644
--- a/ChocolArm64/ChocolArm64.csproj
+++ b/ChocolArm64/ChocolArm64.csproj
@@ -27,6 +27,7 @@
+
diff --git a/ChocolArm64/CpuThread.cs b/ChocolArm64/CpuThread.cs
index 6cd34f8127..ad1fd6f3c1 100644
--- a/ChocolArm64/CpuThread.cs
+++ b/ChocolArm64/CpuThread.cs
@@ -32,8 +32,6 @@ namespace ChocolArm64
{
translator.ExecuteSubroutine(this, entrypoint);
- memory.RemoveMonitor(ThreadState.Core);
-
WorkFinished?.Invoke(this, EventArgs.Empty);
});
}
diff --git a/ChocolArm64/Decoders/OpCodeSimdCvt64.cs b/ChocolArm64/Decoders/OpCodeSimdCvt64.cs
index eacd594099..3181a85a34 100644
--- a/ChocolArm64/Decoders/OpCodeSimdCvt64.cs
+++ b/ChocolArm64/Decoders/OpCodeSimdCvt64.cs
@@ -8,18 +8,9 @@ namespace ChocolArm64.Decoders
public OpCodeSimdCvt64(Inst inst, long position, int opCode) : base(inst, position, opCode)
{
- //TODO:
- //Und of Fixed Point variants.
int scale = (opCode >> 10) & 0x3f;
int sf = (opCode >> 31) & 0x1;
- /*if (Type != SF && !(Type == 2 && SF == 1))
- {
- Emitter = AInstEmit.Und;
-
- return;
- }*/
-
FBits = 64 - scale;
RegisterSize = sf != 0
diff --git a/ChocolArm64/Events/InvalidAccessEventArgs.cs b/ChocolArm64/Events/InvalidAccessEventArgs.cs
deleted file mode 100644
index 9c349755f0..0000000000
--- a/ChocolArm64/Events/InvalidAccessEventArgs.cs
+++ /dev/null
@@ -1,14 +0,0 @@
-using System;
-
-namespace ChocolArm64.Events
-{
- public class MemoryAccessEventArgs : EventArgs
- {
- public long Position { get; private set; }
-
- public MemoryAccessEventArgs(long position)
- {
- Position = position;
- }
- }
-}
\ No newline at end of file
diff --git a/ChocolArm64/Exceptions/VmmPageFaultException.cs b/ChocolArm64/Exceptions/VmmPageFaultException.cs
deleted file mode 100644
index f33aafc013..0000000000
--- a/ChocolArm64/Exceptions/VmmPageFaultException.cs
+++ /dev/null
@@ -1,13 +0,0 @@
-using System;
-
-namespace ChocolArm64.Exceptions
-{
- public class VmmPageFaultException : Exception
- {
- private const string ExMsg = "Tried to access unmapped address 0x{0:x16}!";
-
- public VmmPageFaultException() { }
-
- public VmmPageFaultException(long position) : base(string.Format(ExMsg, position)) { }
- }
-}
\ No newline at end of file
diff --git a/ChocolArm64/Instructions/InstEmitAlu.cs b/ChocolArm64/Instructions/InstEmitAlu.cs
index d5d9cd6541..bd49124e49 100644
--- a/ChocolArm64/Instructions/InstEmitAlu.cs
+++ b/ChocolArm64/Instructions/InstEmitAlu.cs
@@ -51,6 +51,8 @@ namespace ChocolArm64.Instructions
public static void Adds(ILEmitterCtx context)
{
+ context.TryOptMarkCondWithoutCmp();
+
EmitAluLoadOpers(context);
context.Emit(OpCodes.Add);
diff --git a/ChocolArm64/Instructions/InstEmitBfm.cs b/ChocolArm64/Instructions/InstEmitBfm.cs
index d25af8be8a..4a03959940 100644
--- a/ChocolArm64/Instructions/InstEmitBfm.cs
+++ b/ChocolArm64/Instructions/InstEmitBfm.cs
@@ -11,21 +11,56 @@ namespace ChocolArm64.Instructions
{
OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp;
- EmitBfmLoadRn(context);
+ if (op.Pos < op.Shift)
+ {
+ //BFI.
+ context.EmitLdintzr(op.Rn);
- context.EmitLdintzr(op.Rd);
- context.EmitLdc_I(~op.WMask & op.TMask);
+ int shift = op.GetBitsCount() - op.Shift;
- context.Emit(OpCodes.And);
- context.Emit(OpCodes.Or);
+ int width = op.Pos + 1;
- context.EmitLdintzr(op.Rd);
- context.EmitLdc_I(~op.TMask);
+ long mask = (long)(ulong.MaxValue >> (64 - width));
- context.Emit(OpCodes.And);
- context.Emit(OpCodes.Or);
+ context.EmitLdc_I(mask);
- context.EmitStintzr(op.Rd);
+ context.Emit(OpCodes.And);
+
+ context.EmitLsl(shift);
+
+ context.EmitLdintzr(op.Rd);
+
+ context.EmitLdc_I(~(mask << shift));
+
+ context.Emit(OpCodes.And);
+ context.Emit(OpCodes.Or);
+
+ context.EmitStintzr(op.Rd);
+ }
+ else
+ {
+ //BFXIL.
+ context.EmitLdintzr(op.Rn);
+
+ context.EmitLsr(op.Shift);
+
+ int width = op.Pos - op.Shift + 1;
+
+ long mask = (long)(ulong.MaxValue >> (64 - width));
+
+ context.EmitLdc_I(mask);
+
+ context.Emit(OpCodes.And);
+
+ context.EmitLdintzr(op.Rd);
+
+ context.EmitLdc_I(~mask);
+
+ context.Emit(OpCodes.And);
+ context.Emit(OpCodes.Or);
+
+ context.EmitStintzr(op.Rd);
+ }
}
public static void Sbfm(ILEmitterCtx context)
diff --git a/ChocolArm64/Instructions/InstEmitFlow.cs b/ChocolArm64/Instructions/InstEmitFlow.cs
index a842dca9d1..5eae89cc09 100644
--- a/ChocolArm64/Instructions/InstEmitFlow.cs
+++ b/ChocolArm64/Instructions/InstEmitFlow.cs
@@ -39,7 +39,6 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I(op.Position + 4);
context.EmitStint(RegisterAlias.Lr);
- context.EmitStoreState();
EmitCall(context, op.Imm);
}
@@ -60,6 +59,8 @@ namespace ChocolArm64.Instructions
{
OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp;
+ context.HasIndirectJump = true;
+
context.EmitStoreState();
context.EmitLdintzr(op.Rn);
diff --git a/ChocolArm64/Instructions/InstEmitFlow32.cs b/ChocolArm64/Instructions/InstEmitFlow32.cs
index 61f1d34c53..dea490c775 100644
--- a/ChocolArm64/Instructions/InstEmitFlow32.cs
+++ b/ChocolArm64/Instructions/InstEmitFlow32.cs
@@ -65,7 +65,6 @@ namespace ChocolArm64.Instructions
}
context.EmitStint(GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr));
- context.EmitStoreState();
//If x is true, then this is a branch with link and exchange.
//In this case we need to swap the mode between Arm <-> Thumb.
diff --git a/ChocolArm64/Instructions/InstEmitFlowHelper.cs b/ChocolArm64/Instructions/InstEmitFlowHelper.cs
index e93ef42679..a6091a5711 100644
--- a/ChocolArm64/Instructions/InstEmitFlowHelper.cs
+++ b/ChocolArm64/Instructions/InstEmitFlowHelper.cs
@@ -11,6 +11,8 @@ namespace ChocolArm64.Instructions
{
if (context.Tier == TranslationTier.Tier0)
{
+ context.EmitStoreState();
+
context.TranslateAhead(imm);
context.EmitLdc_I8(imm);
@@ -22,6 +24,10 @@ namespace ChocolArm64.Instructions
if (!context.TryOptEmitSubroutineCall())
{
+ context.HasSlowCall = true;
+
+ context.EmitStoreState();
+
context.TranslateAhead(imm);
context.EmitLdarg(TranslatedSub.StateArgIdx);
@@ -32,6 +38,7 @@ namespace ChocolArm64.Instructions
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdc_I8(imm);
+ context.EmitLdc_I4((int)CallType.Call);
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
@@ -58,20 +65,6 @@ namespace ChocolArm64.Instructions
{
if (context.Tier == TranslationTier.Tier0)
{
- context.Emit(OpCodes.Dup);
-
- context.EmitSttmp();
- context.EmitLdarg(TranslatedSub.StateArgIdx);
-
- context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
- BindingFlags.Instance |
- BindingFlags.NonPublic));
-
- context.EmitLdarg(TranslatedSub.StateArgIdx);
- context.EmitLdtmp();
-
- context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine));
-
context.Emit(OpCodes.Ret);
}
else
@@ -85,8 +78,11 @@ namespace ChocolArm64.Instructions
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp();
+ context.EmitLdc_I4(isJump
+ ? (int)CallType.VirtualJump
+ : (int)CallType.VirtualCall);
- context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateVirtualSubroutine));
+ context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
diff --git a/ChocolArm64/Instructions/InstEmitMemory.cs b/ChocolArm64/Instructions/InstEmitMemory.cs
index 96f782df64..ea779c8da4 100644
--- a/ChocolArm64/Instructions/InstEmitMemory.cs
+++ b/ChocolArm64/Instructions/InstEmitMemory.cs
@@ -31,8 +31,6 @@ namespace ChocolArm64.Instructions
{
OpCodeMem64 op = (OpCodeMem64)context.CurrOp;
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
-
EmitLoadAddress(context);
if (signed && op.Extend64)
@@ -69,7 +67,6 @@ namespace ChocolArm64.Instructions
return;
}
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdc_I8(op.Imm);
if (op.Signed)
@@ -116,13 +113,10 @@ namespace ChocolArm64.Instructions
}
}
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
-
EmitLoadAddress(context);
EmitReadAndStore(op.Rt);
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitLdc_I8(1 << op.Size);
@@ -137,8 +131,6 @@ namespace ChocolArm64.Instructions
{
OpCodeMem64 op = (OpCodeMem64)context.CurrOp;
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
-
EmitLoadAddress(context);
if (op is IOpCodeSimd64)
@@ -159,8 +151,6 @@ namespace ChocolArm64.Instructions
{
OpCodeMemPair64 op = (OpCodeMemPair64)context.CurrOp;
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
-
EmitLoadAddress(context);
if (op is IOpCodeSimd64)
@@ -174,7 +164,6 @@ namespace ChocolArm64.Instructions
EmitWriteCall(context, op.Size);
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitLdc_I8(1 << op.Size);
diff --git a/ChocolArm64/Instructions/InstEmitMemory32.cs b/ChocolArm64/Instructions/InstEmitMemory32.cs
index 4d6a57a472..1e1419e65e 100644
--- a/ChocolArm64/Instructions/InstEmitMemory32.cs
+++ b/ChocolArm64/Instructions/InstEmitMemory32.cs
@@ -64,9 +64,7 @@ namespace ChocolArm64.Instructions
{
if ((mask & 1) != 0)
{
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
-
context.EmitLdc_I4(offset);
context.Emit(OpCodes.Add);
@@ -129,9 +127,7 @@ namespace ChocolArm64.Instructions
{
if ((mask & 1) != 0)
{
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
-
context.EmitLdc_I4(offset);
context.Emit(OpCodes.Add);
@@ -198,8 +194,6 @@ namespace ChocolArm64.Instructions
context.EmitSttmp();
}
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
-
if (op.Index)
{
context.EmitLdtmp();
diff --git a/ChocolArm64/Instructions/InstEmitMemoryEx.cs b/ChocolArm64/Instructions/InstEmitMemoryEx.cs
index 42daca63b7..920c695fff 100644
--- a/ChocolArm64/Instructions/InstEmitMemoryEx.cs
+++ b/ChocolArm64/Instructions/InstEmitMemoryEx.cs
@@ -23,7 +23,9 @@ namespace ChocolArm64.Instructions
public static void Clrex(ILEmitterCtx context)
{
- EmitMemoryCall(context, nameof(MemoryManager.ClearExclusive));
+ context.EmitLdarg(TranslatedSub.StateArgIdx);
+
+ context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.ClearExclusiveAddress));
}
public static void Dmb(ILEmitterCtx context) => EmitBarrier(context);
@@ -37,12 +39,12 @@ namespace ChocolArm64.Instructions
private static void EmitLdr(ILEmitterCtx context, AccessType accType)
{
- EmitLoad(context, accType, false);
+ EmitLoad(context, accType, pair: false);
}
private static void EmitLdp(ILEmitterCtx context, AccessType accType)
{
- EmitLoad(context, accType, true);
+ EmitLoad(context, accType, pair: true);
}
private static void EmitLoad(ILEmitterCtx context, AccessType accType, bool pair)
@@ -57,32 +59,121 @@ namespace ChocolArm64.Instructions
EmitBarrier(context);
}
- if (exclusive)
- {
- EmitMemoryCall(context, nameof(MemoryManager.SetExclusive), op.Rn);
- }
-
context.EmitLdint(op.Rn);
context.EmitSttmp();
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
- context.EmitLdtmp();
+ if (exclusive)
+ {
+ context.EmitLdarg(TranslatedSub.StateArgIdx);
+ context.EmitLdtmp();
- EmitReadZxCall(context, op.Size);
+ context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.SetExclusiveAddress));
+ }
- context.EmitStintzr(op.Rt);
+ void WriteExclusiveValue(string propName)
+ {
+ context.Emit(OpCodes.Dup);
+
+ if (op.Size < 3)
+ {
+ context.Emit(OpCodes.Conv_U8);
+ }
+
+ context.EmitSttmp2();
+ context.EmitLdarg(TranslatedSub.StateArgIdx);
+ context.EmitLdtmp2();
+
+ context.EmitCallPrivatePropSet(typeof(CpuThreadState), propName);
+ }
if (pair)
{
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
- context.EmitLdtmp();
- context.EmitLdc_I8(1 << op.Size);
+ //Exclusive loads should be atomic. For pairwise loads, we need to
+ //read all the data at once. For a 32-bits pairwise load, we do a
+ //simple 64-bits load, for a 128-bits load, we need to call a special
+ //method to read 128-bits atomically.
+ if (op.Size == 2)
+ {
+ context.EmitLdtmp();
- context.Emit(OpCodes.Add);
+ EmitReadZxCall(context, 3);
+
+ context.Emit(OpCodes.Dup);
+
+ //Mask low half.
+ context.Emit(OpCodes.Conv_U4);
+
+ if (exclusive)
+ {
+ WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow));
+ }
+
+ context.EmitStintzr(op.Rt);
+
+ //Shift high half.
+ context.EmitLsr(32);
+ context.Emit(OpCodes.Conv_U4);
+
+ if (exclusive)
+ {
+ WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueHigh));
+ }
+
+ context.EmitStintzr(op.Rt2);
+ }
+ else if (op.Size == 3)
+ {
+ context.EmitLdarg(TranslatedSub.MemoryArgIdx);
+ context.EmitLdtmp();
+
+ context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicReadInt128));
+
+ context.Emit(OpCodes.Dup);
+
+ //Load low part of the vector.
+ context.EmitLdc_I4(0);
+ context.EmitLdc_I4(3);
+
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractIntZx));
+
+ if (exclusive)
+ {
+ WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow));
+ }
+
+ context.EmitStintzr(op.Rt);
+
+ //Load high part of the vector.
+ context.EmitLdc_I4(1);
+ context.EmitLdc_I4(3);
+
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractIntZx));
+
+ if (exclusive)
+ {
+ WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueHigh));
+ }
+
+ context.EmitStintzr(op.Rt2);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Invalid load size of {1 << op.Size} bytes.");
+ }
+ }
+ else
+ {
+ //8, 16, 32 or 64-bits (non-pairwise) load.
+ context.EmitLdtmp();
EmitReadZxCall(context, op.Size);
- context.EmitStintzr(op.Rt2);
+ if (exclusive)
+ {
+ WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow));
+ }
+
+ context.EmitStintzr(op.Rt);
}
}
@@ -99,12 +190,12 @@ namespace ChocolArm64.Instructions
private static void EmitStr(ILEmitterCtx context, AccessType accType)
{
- EmitStore(context, accType, false);
+ EmitStore(context, accType, pair: false);
}
private static void EmitStp(ILEmitterCtx context, AccessType accType)
{
- EmitStore(context, accType, true);
+ EmitStore(context, accType, pair: true);
}
private static void EmitStore(ILEmitterCtx context, AccessType accType, bool pair)
@@ -119,66 +210,132 @@ namespace ChocolArm64.Instructions
EmitBarrier(context);
}
- ILLabel lblEx = new ILLabel();
- ILLabel lblEnd = new ILLabel();
-
if (exclusive)
{
- EmitMemoryCall(context, nameof(MemoryManager.TestExclusive), op.Rn);
+ ILLabel lblEx = new ILLabel();
+ ILLabel lblEnd = new ILLabel();
+
+ context.EmitLdarg(TranslatedSub.StateArgIdx);
+ context.EmitLdint(op.Rn);
+
+ context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.CheckExclusiveAddress));
context.Emit(OpCodes.Brtrue_S, lblEx);
- context.EmitLdc_I8(1);
+ //Address check failed, set error right away and do not store anything.
+ context.EmitLdc_I4(1);
context.EmitStintzr(op.Rs);
- context.Emit(OpCodes.Br_S, lblEnd);
- }
+ context.Emit(OpCodes.Br, lblEnd);
- context.MarkLabel(lblEx);
+ //Address check passsed.
+ context.MarkLabel(lblEx);
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
- context.EmitLdint(op.Rn);
- context.EmitLdintzr(op.Rt);
-
- EmitWriteCall(context, op.Size);
-
- if (pair)
- {
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
- context.EmitLdc_I8(1 << op.Size);
- context.Emit(OpCodes.Add);
+ context.EmitLdarg(TranslatedSub.StateArgIdx);
- context.EmitLdintzr(op.Rt2);
+ context.EmitCallPrivatePropGet(typeof(CpuThreadState), nameof(CpuThreadState.ExclusiveValueLow));
- EmitWriteCall(context, op.Size);
- }
+ void EmitCast()
+ {
+ //The input should be always int64.
+ switch (op.Size)
+ {
+ case 0: context.Emit(OpCodes.Conv_U1); break;
+ case 1: context.Emit(OpCodes.Conv_U2); break;
+ case 2: context.Emit(OpCodes.Conv_U4); break;
+ }
+ }
+
+ EmitCast();
+
+ if (pair)
+ {
+ context.EmitLdarg(TranslatedSub.StateArgIdx);
+
+ context.EmitCallPrivatePropGet(typeof(CpuThreadState), nameof(CpuThreadState.ExclusiveValueHigh));
+
+ EmitCast();
+
+ context.EmitLdintzr(op.Rt);
+
+ EmitCast();
+
+ context.EmitLdintzr(op.Rt2);
+
+ EmitCast();
+
+ switch (op.Size)
+ {
+ case 2: context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchange2xInt32)); break;
+ case 3: context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt128)); break;
+
+ default: throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes.");
+ }
+ }
+ else
+ {
+ context.EmitLdintzr(op.Rt);
+
+ EmitCast();
+
+ switch (op.Size)
+ {
+ case 0: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeByte)); break;
+ case 1: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt16)); break;
+ case 2: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt32)); break;
+ case 3: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt64)); break;
+
+ default: throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes.");
+ }
+ }
+
+ //The value returned is a bool, true if the values compared
+ //were equal and the new value was written, false otherwise.
+ //We need to invert this result, as on ARM 1 indicates failure,
+ //and 0 success on those instructions.
+ context.EmitLdc_I4(1);
+
+ context.Emit(OpCodes.Xor);
+ context.Emit(OpCodes.Dup);
+ context.Emit(OpCodes.Conv_U8);
- if (exclusive)
- {
- context.EmitLdc_I8(0);
context.EmitStintzr(op.Rs);
- EmitMemoryCall(context, nameof(MemoryManager.ClearExclusiveForStore));
+ //Only clear the exclusive monitor if the store was successful (Rs = false).
+ context.Emit(OpCodes.Brtrue_S, lblEnd);
+
+ Clrex(context);
+
+ context.MarkLabel(lblEnd);
}
-
- context.MarkLabel(lblEnd);
- }
-
- private static void EmitMemoryCall(ILEmitterCtx context, string name, int rn = -1)
- {
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
- context.EmitLdarg(TranslatedSub.StateArgIdx);
-
- context.EmitCallPropGet(typeof(CpuThreadState), nameof(CpuThreadState.Core));
-
- if (rn != -1)
+ else
{
- context.EmitLdint(rn);
- }
+ void EmitWriteCall(int rt, long offset)
+ {
+ context.EmitLdint(op.Rn);
- context.EmitCall(typeof(MemoryManager), name);
+ if (offset != 0)
+ {
+ context.EmitLdc_I8(offset);
+
+ context.Emit(OpCodes.Add);
+ }
+
+ context.EmitLdintzr(rt);
+
+ InstEmitMemoryHelper.EmitWriteCall(context, op.Size);
+ }
+
+ EmitWriteCall(op.Rt, 0);
+
+ if (pair)
+ {
+ EmitWriteCall(op.Rt2, 1 << op.Size);
+ }
+ }
}
private static void EmitBarrier(ILEmitterCtx context)
diff --git a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs
index f953564c46..c225cdd8cc 100644
--- a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs
+++ b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs
@@ -1,13 +1,20 @@
using ChocolArm64.Decoders;
using ChocolArm64.Memory;
+using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
+using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instructions
{
static class InstEmitMemoryHelper
{
+ private static int _tempIntAddress = ILEmitterCtx.GetIntTempIndex();
+ private static int _tempIntValue = ILEmitterCtx.GetIntTempIndex();
+ private static int _tempIntPtAddr = ILEmitterCtx.GetIntTempIndex();
+ private static int _tempVecValue = ILEmitterCtx.GetVecTempIndex();
+
private enum Extension
{
Zx,
@@ -32,9 +39,10 @@ namespace ChocolArm64.Instructions
private static void EmitReadCall(ILEmitterCtx context, Extension ext, int size)
{
- bool isSimd = GetIsSimd(context);
+ //Save the address into a temp.
+ context.EmitStint(_tempIntAddress);
- string name = null;
+ bool isSimd = IsSimd(context);
if (size < 0 || size > (isSimd ? 4 : 3))
{
@@ -43,28 +51,27 @@ namespace ChocolArm64.Instructions
if (isSimd)
{
- switch (size)
+ if (context.Tier == TranslationTier.Tier0 || !Sse2.IsSupported || size < 2)
{
- case 0: name = nameof(MemoryManager.ReadVector8); break;
- case 1: name = nameof(MemoryManager.ReadVector16); break;
- case 2: name = nameof(MemoryManager.ReadVector32); break;
- case 3: name = nameof(MemoryManager.ReadVector64); break;
- case 4: name = nameof(MemoryManager.ReadVector128); break;
+ EmitReadVectorFallback(context, size);
+ }
+ else
+ {
+ EmitReadVector(context, size);
}
}
else
{
- switch (size)
+ if (context.Tier == TranslationTier.Tier0)
{
- case 0: name = nameof(MemoryManager.ReadByte); break;
- case 1: name = nameof(MemoryManager.ReadUInt16); break;
- case 2: name = nameof(MemoryManager.ReadUInt32); break;
- case 3: name = nameof(MemoryManager.ReadUInt64); break;
+ EmitReadIntFallback(context, size);
+ }
+ else
+ {
+ EmitReadInt(context, size);
}
}
- context.EmitCall(typeof(MemoryManager), name);
-
if (!isSimd)
{
if (ext == Extension.Sx32 ||
@@ -89,50 +96,379 @@ namespace ChocolArm64.Instructions
public static void EmitWriteCall(ILEmitterCtx context, int size)
{
- bool isSimd = GetIsSimd(context);
+ bool isSimd = IsSimd(context);
- string name = null;
+ //Save the value into a temp.
+ if (isSimd)
+ {
+ context.EmitStvec(_tempVecValue);
+ }
+ else
+ {
+ context.EmitStint(_tempIntValue);
+ }
+
+ //Save the address into a temp.
+ context.EmitStint(_tempIntAddress);
if (size < 0 || size > (isSimd ? 4 : 3))
{
throw new ArgumentOutOfRangeException(nameof(size));
}
- if (size < 3 && !isSimd)
- {
- context.Emit(OpCodes.Conv_I4);
- }
-
if (isSimd)
{
- switch (size)
+ if (context.Tier == TranslationTier.Tier0 || !Sse2.IsSupported || size < 2)
{
- case 0: name = nameof(MemoryManager.WriteVector8); break;
- case 1: name = nameof(MemoryManager.WriteVector16); break;
- case 2: name = nameof(MemoryManager.WriteVector32); break;
- case 3: name = nameof(MemoryManager.WriteVector64); break;
- case 4: name = nameof(MemoryManager.WriteVector128); break;
+ EmitWriteVectorFallback(context, size);
+ }
+ else
+ {
+ EmitWriteVector(context, size);
}
}
else
{
- switch (size)
+ if (context.Tier == TranslationTier.Tier0)
{
- case 0: name = nameof(MemoryManager.WriteByte); break;
- case 1: name = nameof(MemoryManager.WriteUInt16); break;
- case 2: name = nameof(MemoryManager.WriteUInt32); break;
- case 3: name = nameof(MemoryManager.WriteUInt64); break;
+ EmitWriteIntFallback(context, size);
+ }
+ else
+ {
+ EmitWriteInt(context, size);
}
}
-
- context.EmitCall(typeof(MemoryManager), name);
}
- private static bool GetIsSimd(ILEmitterCtx context)
+ private static bool IsSimd(ILEmitterCtx context)
{
return context.CurrOp is IOpCodeSimd64 &&
!(context.CurrOp is OpCodeSimdMemMs64 ||
context.CurrOp is OpCodeSimdMemSs64);
}
+
+ private static void EmitReadInt(ILEmitterCtx context, int size)
+ {
+ EmitAddressCheck(context, size);
+
+ ILLabel lblFastPath = new ILLabel();
+ ILLabel lblSlowPath = new ILLabel();
+ ILLabel lblEnd = new ILLabel();
+
+ context.Emit(OpCodes.Brfalse_S, lblFastPath);
+
+ context.MarkLabel(lblSlowPath);
+
+ EmitReadIntFallback(context, size);
+
+ context.Emit(OpCodes.Br, lblEnd);
+
+ context.MarkLabel(lblFastPath);
+
+ EmitPtPointerLoad(context, lblSlowPath);
+
+ switch (size)
+ {
+ case 0: context.Emit(OpCodes.Ldind_U1); break;
+ case 1: context.Emit(OpCodes.Ldind_U2); break;
+ case 2: context.Emit(OpCodes.Ldind_U4); break;
+ case 3: context.Emit(OpCodes.Ldind_I8); break;
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitReadVector(ILEmitterCtx context, int size)
+ {
+ EmitAddressCheck(context, size);
+
+ ILLabel lblFastPath = new ILLabel();
+ ILLabel lblSlowPath = new ILLabel();
+ ILLabel lblEnd = new ILLabel();
+
+ context.Emit(OpCodes.Brfalse_S, lblFastPath);
+
+ context.MarkLabel(lblSlowPath);
+
+ EmitReadVectorFallback(context, size);
+
+ context.Emit(OpCodes.Br, lblEnd);
+
+ context.MarkLabel(lblFastPath);
+
+ EmitPtPointerLoad(context, lblSlowPath);
+
+ switch (size)
+ {
+ case 2: context.EmitCall(typeof(Sse), nameof(Sse.LoadScalarVector128)); break;
+
+ case 3:
+ {
+ Type[] types = new Type[] { typeof(double*) };
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.LoadScalarVector128), types));
+
+ break;
+ }
+
+ case 4: context.EmitCall(typeof(Sse), nameof(Sse.LoadAlignedVector128)); break;
+
+ throw new InvalidOperationException($"Invalid vector load size of {1 << size} bytes.");
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitWriteInt(ILEmitterCtx context, int size)
+ {
+ EmitAddressCheck(context, size);
+
+ ILLabel lblFastPath = new ILLabel();
+ ILLabel lblSlowPath = new ILLabel();
+ ILLabel lblEnd = new ILLabel();
+
+ context.Emit(OpCodes.Brfalse_S, lblFastPath);
+
+ context.MarkLabel(lblSlowPath);
+
+ EmitWriteIntFallback(context, size);
+
+ context.Emit(OpCodes.Br, lblEnd);
+
+ context.MarkLabel(lblFastPath);
+
+ EmitPtPointerLoad(context, lblSlowPath);
+
+ context.EmitLdint(_tempIntValue);
+
+ if (size < 3)
+ {
+ context.Emit(OpCodes.Conv_U4);
+ }
+
+ switch (size)
+ {
+ case 0: context.Emit(OpCodes.Stind_I1); break;
+ case 1: context.Emit(OpCodes.Stind_I2); break;
+ case 2: context.Emit(OpCodes.Stind_I4); break;
+ case 3: context.Emit(OpCodes.Stind_I8); break;
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitWriteVector(ILEmitterCtx context, int size)
+ {
+ EmitAddressCheck(context, size);
+
+ ILLabel lblFastPath = new ILLabel();
+ ILLabel lblSlowPath = new ILLabel();
+ ILLabel lblEnd = new ILLabel();
+
+ context.Emit(OpCodes.Brfalse_S, lblFastPath);
+
+ context.MarkLabel(lblSlowPath);
+
+ EmitWriteVectorFallback(context, size);
+
+ context.Emit(OpCodes.Br, lblEnd);
+
+ context.MarkLabel(lblFastPath);
+
+ EmitPtPointerLoad(context, lblSlowPath);
+
+ context.EmitLdvec(_tempVecValue);
+
+ switch (size)
+ {
+ case 2: context.EmitCall(typeof(Sse), nameof(Sse.StoreScalar)); break;
+ case 3: context.EmitCall(typeof(Sse2), nameof(Sse2.StoreScalar)); break;
+ case 4: context.EmitCall(typeof(Sse), nameof(Sse.StoreAligned)); break;
+
+ default: throw new InvalidOperationException($"Invalid vector store size of {1 << size} bytes.");
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitAddressCheck(ILEmitterCtx context, int size)
+ {
+ long addressCheckMask = ~(context.Memory.AddressSpaceSize - 1);
+
+ addressCheckMask |= (1u << size) - 1;
+
+ context.EmitLdint(_tempIntAddress);
+
+ context.EmitLdc_I(addressCheckMask);
+
+ context.Emit(OpCodes.And);
+ }
+
+ private static void EmitPtPointerLoad(ILEmitterCtx context, ILLabel lblFallbackPath)
+ {
+ context.EmitLdc_I8(context.Memory.PageTable.ToInt64());
+
+ context.Emit(OpCodes.Conv_I);
+
+ int bit = MemoryManager.PageBits;
+
+ do
+ {
+ context.EmitLdint(_tempIntAddress);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ context.Emit(OpCodes.Conv_U8);
+ }
+
+ context.EmitLsr(bit);
+
+ bit += context.Memory.PtLevelBits;
+
+ if (bit < context.Memory.AddressSpaceBits)
+ {
+ context.EmitLdc_I8(context.Memory.PtLevelMask);
+
+ context.Emit(OpCodes.And);
+ }
+
+ context.EmitLdc_I8(IntPtr.Size);
+
+ context.Emit(OpCodes.Mul);
+ context.Emit(OpCodes.Conv_I);
+ context.Emit(OpCodes.Add);
+ context.Emit(OpCodes.Ldind_I);
+ }
+ while (bit < context.Memory.AddressSpaceBits);
+
+ if (!context.Memory.HasWriteWatchSupport)
+ {
+ context.Emit(OpCodes.Conv_U8);
+
+ context.EmitStint(_tempIntPtAddr);
+ context.EmitLdint(_tempIntPtAddr);
+
+ context.EmitLdc_I8(MemoryManager.PteFlagsMask);
+
+ context.Emit(OpCodes.And);
+
+ context.Emit(OpCodes.Brtrue, lblFallbackPath);
+
+ context.EmitLdint(_tempIntPtAddr);
+
+ context.Emit(OpCodes.Conv_I);
+ }
+
+ context.EmitLdint(_tempIntAddress);
+
+ context.EmitLdc_I(MemoryManager.PageMask);
+
+ context.Emit(OpCodes.And);
+ context.Emit(OpCodes.Conv_I);
+ context.Emit(OpCodes.Add);
+ }
+
+ private static void EmitReadIntFallback(ILEmitterCtx context, int size)
+ {
+ context.EmitLdarg(TranslatedSub.MemoryArgIdx);
+ context.EmitLdint(_tempIntAddress);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ context.Emit(OpCodes.Conv_U8);
+ }
+
+ string fallbackMethodName = null;
+
+ switch (size)
+ {
+ case 0: fallbackMethodName = nameof(MemoryManager.ReadByte); break;
+ case 1: fallbackMethodName = nameof(MemoryManager.ReadUInt16); break;
+ case 2: fallbackMethodName = nameof(MemoryManager.ReadUInt32); break;
+ case 3: fallbackMethodName = nameof(MemoryManager.ReadUInt64); break;
+ }
+
+ context.EmitCall(typeof(MemoryManager), fallbackMethodName);
+ }
+
+ private static void EmitReadVectorFallback(ILEmitterCtx context, int size)
+ {
+ context.EmitLdarg(TranslatedSub.MemoryArgIdx);
+ context.EmitLdint(_tempIntAddress);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ context.Emit(OpCodes.Conv_U8);
+ }
+
+ string fallbackMethodName = null;
+
+ switch (size)
+ {
+ case 0: fallbackMethodName = nameof(MemoryManager.ReadVector8); break;
+ case 1: fallbackMethodName = nameof(MemoryManager.ReadVector16); break;
+ case 2: fallbackMethodName = nameof(MemoryManager.ReadVector32); break;
+ case 3: fallbackMethodName = nameof(MemoryManager.ReadVector64); break;
+ case 4: fallbackMethodName = nameof(MemoryManager.ReadVector128); break;
+ }
+
+ context.EmitCall(typeof(MemoryManager), fallbackMethodName);
+ }
+
+ private static void EmitWriteIntFallback(ILEmitterCtx context, int size)
+ {
+ context.EmitLdarg(TranslatedSub.MemoryArgIdx);
+ context.EmitLdint(_tempIntAddress);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ context.Emit(OpCodes.Conv_U8);
+ }
+
+ context.EmitLdint(_tempIntValue);
+
+ if (size < 3)
+ {
+ context.Emit(OpCodes.Conv_U4);
+ }
+
+ string fallbackMethodName = null;
+
+ switch (size)
+ {
+ case 0: fallbackMethodName = nameof(MemoryManager.WriteByte); break;
+ case 1: fallbackMethodName = nameof(MemoryManager.WriteUInt16); break;
+ case 2: fallbackMethodName = nameof(MemoryManager.WriteUInt32); break;
+ case 3: fallbackMethodName = nameof(MemoryManager.WriteUInt64); break;
+ }
+
+ context.EmitCall(typeof(MemoryManager), fallbackMethodName);
+ }
+
+ private static void EmitWriteVectorFallback(ILEmitterCtx context, int size)
+ {
+ context.EmitLdarg(TranslatedSub.MemoryArgIdx);
+ context.EmitLdint(_tempIntAddress);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ context.Emit(OpCodes.Conv_U8);
+ }
+
+ context.EmitLdvec(_tempVecValue);
+
+ string fallbackMethodName = null;
+
+ switch (size)
+ {
+ case 0: fallbackMethodName = nameof(MemoryManager.WriteVector8); break;
+ case 1: fallbackMethodName = nameof(MemoryManager.WriteVector16); break;
+ case 2: fallbackMethodName = nameof(MemoryManager.WriteVector32); break;
+ case 3: fallbackMethodName = nameof(MemoryManager.WriteVector64); break;
+ case 4: fallbackMethodName = nameof(MemoryManager.WriteVector128); break;
+ }
+
+ context.EmitCall(typeof(MemoryManager), fallbackMethodName);
+ }
}
}
\ No newline at end of file
diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
index acb9f7f093..d2d87beffe 100644
--- a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
@@ -1,4 +1,5 @@
// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+// https://www.agner.org/optimize/#vectorclass @ vectori128.h
using ChocolArm64.Decoders;
using ChocolArm64.State;
@@ -184,8 +185,8 @@ namespace ChocolArm64.Instructions
if (sizeF == 0)
{
- Type[] typesSsv = new Type[] { typeof(float) };
- Type[] typesSubAndNot = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSsv = new Type[] { typeof(float) };
+ Type[] typesSubAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
@@ -193,9 +194,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAndNot));
-
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAnt));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt));
context.EmitStvec(op.Rd);
@@ -203,20 +203,19 @@ namespace ChocolArm64.Instructions
}
else /* if (sizeF == 1) */
{
- Type[] typesSsv = new Type[] { typeof(double) };
- Type[] typesSubAndNot = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSsv = new Type[] { typeof(double) };
+ Type[] typesSubAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
- EmitLdvecWithCastToDouble(context, op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rm);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAndNot));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAnt));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
-
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@@ -242,8 +241,8 @@ namespace ChocolArm64.Instructions
if (sizeF == 0)
{
- Type[] typesSav = new Type[] { typeof(float) };
- Type[] typesSubAndNot = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(float) };
+ Type[] typesSubAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
@@ -251,9 +250,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAndNot));
-
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAnt));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt));
context.EmitStvec(op.Rd);
@@ -264,20 +262,19 @@ namespace ChocolArm64.Instructions
}
else /* if (sizeF == 1) */
{
- Type[] typesSav = new Type[] { typeof(double) };
- Type[] typesSubAndNot = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(double) };
+ Type[] typesSubAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
- EmitLdvecWithCastToDouble(context, op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rm);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAndNot));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAnt));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
-
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
}
else
@@ -299,15 +296,15 @@ namespace ChocolArm64.Instructions
if (op.Size == 0)
{
- Type[] typesSsv = new Type[] { typeof(float) };
- Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSsv = new Type[] { typeof(float) };
+ Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
context.EmitLdvec(op.Rn);
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt));
context.EmitStvec(op.Rd);
@@ -315,17 +312,17 @@ namespace ChocolArm64.Instructions
}
else /* if (op.Size == 1) */
{
- Type[] typesSsv = new Type[] { typeof(double) };
- Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSsv = new Type[] { typeof(double) };
+ Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
- EmitLdvecWithCastToDouble(context, op.Rn);
+ context.EmitLdvec(op.Rn);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@@ -349,15 +346,15 @@ namespace ChocolArm64.Instructions
if (sizeF == 0)
{
- Type[] typesSav = new Type[] { typeof(float) };
- Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(float) };
+ Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
context.EmitLdvec(op.Rn);
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt));
context.EmitStvec(op.Rd);
@@ -368,17 +365,17 @@ namespace ChocolArm64.Instructions
}
else /* if (sizeF == 1) */
{
- Type[] typesSav = new Type[] { typeof(double) };
- Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(double) };
+ Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
- EmitLdvecWithCastToDouble(context, op.Rn);
+ context.EmitLdvec(op.Rn);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
}
else
@@ -433,7 +430,7 @@ namespace ChocolArm64.Instructions
Type[] typesAddH = new Type[] { typeof(Vector128), typeof(Vector128) };
context.EmitLdvec(op.Rn);
- context.Emit(OpCodes.Dup);
+ context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
@@ -445,12 +442,12 @@ namespace ChocolArm64.Instructions
{
Type[] typesAddH = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithCastToDouble(context, op.Rn);
- context.Emit(OpCodes.Dup);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@@ -536,14 +533,14 @@ namespace ChocolArm64.Instructions
{
Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithCastToDouble(context, op.Ra);
- EmitLdvecWithCastToDouble(context, op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rm);
+ context.EmitLdvec(op.Ra);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AddScalar), typesMulAdd));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@@ -718,14 +715,14 @@ namespace ChocolArm64.Instructions
{
Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithCastToDouble(context, op.Rd);
- EmitLdvecWithCastToDouble(context, op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rm);
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
}
else
@@ -751,18 +748,16 @@ namespace ChocolArm64.Instructions
Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) };
context.EmitLdvec(op.Rd);
-
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.Emit(OpCodes.Dup);
+ context.EmitLdvec(op.Rm);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
-
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
context.EmitStvec(op.Rd);
@@ -776,21 +771,19 @@ namespace ChocolArm64.Instructions
Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) };
Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithCastToDouble(context, op.Rd);
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rn);
-
- EmitLdvecWithCastToDouble(context, op.Rm);
- context.Emit(OpCodes.Dup);
+ context.EmitLdvec(op.Rm);
+ context.EmitLdvec(op.Rm);
context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
-
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
}
else
@@ -841,14 +834,14 @@ namespace ChocolArm64.Instructions
{
Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithCastToDouble(context, op.Rd);
- EmitLdvecWithCastToDouble(context, op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rm);
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
}
else
@@ -874,17 +867,15 @@ namespace ChocolArm64.Instructions
Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) };
context.EmitLdvec(op.Rd);
-
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.Emit(OpCodes.Dup);
+ context.EmitLdvec(op.Rm);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub));
-
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub));
context.EmitStvec(op.Rd);
@@ -899,21 +890,19 @@ namespace ChocolArm64.Instructions
Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) };
Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithCastToDouble(context, op.Rd);
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rn);
-
- EmitLdvecWithCastToDouble(context, op.Rm);
- context.Emit(OpCodes.Dup);
+ context.EmitLdvec(op.Rm);
+ context.EmitLdvec(op.Rm);
context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
-
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
}
else
@@ -950,14 +939,14 @@ namespace ChocolArm64.Instructions
{
Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithCastToDouble(context, op.Ra);
- EmitLdvecWithCastToDouble(context, op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rm);
+ context.EmitLdvec(op.Ra);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@@ -1022,7 +1011,7 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.Emit(OpCodes.Dup);
+ context.EmitLdvec(op.Rm);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
@@ -1041,17 +1030,17 @@ namespace ChocolArm64.Instructions
Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) };
Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithCastToDouble(context, op.Rn);
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rm);
- context.Emit(OpCodes.Dup);
+ context.EmitLdvec(op.Rm);
+ context.EmitLdvec(op.Rm);
context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
}
else
@@ -1125,11 +1114,11 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
- EmitLdvecWithCastToDouble(context, op.Rn);
+ context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@@ -1175,11 +1164,11 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
- EmitLdvecWithCastToDouble(context, op.Rn);
+ context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
}
else
@@ -1242,8 +1231,7 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1;
- if (Optimizations.FastFP && Optimizations.UseSse
- && sizeF == 0)
+ if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.ReciprocalScalar));
}
@@ -1262,8 +1250,7 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1;
- if (Optimizations.FastFP && Optimizations.UseSse
- && sizeF == 0)
+ if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.Reciprocal));
}
@@ -1310,13 +1297,13 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(2d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
- EmitLdvecWithCastToDouble(context, op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rm);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@@ -1367,13 +1354,13 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(2d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
- EmitLdvecWithCastToDouble(context, op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rm);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
}
else
@@ -1579,8 +1566,7 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1;
- if (Optimizations.FastFP && Optimizations.UseSse
- && sizeF == 0)
+ if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.ReciprocalSqrtScalar));
}
@@ -1599,8 +1585,7 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1;
- if (Optimizations.FastFP && Optimizations.UseSse
- && sizeF == 0)
+ if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.ReciprocalSqrt));
}
@@ -1654,14 +1639,14 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(3d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
- EmitLdvecWithCastToDouble(context, op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rm);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
@@ -1719,14 +1704,14 @@ namespace ChocolArm64.Instructions
context.EmitLdc_R8(3d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
- EmitLdvecWithCastToDouble(context, op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rm);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
}
else
@@ -1800,11 +1785,18 @@ namespace ChocolArm64.Instructions
public static void Mla_V(ILEmitterCtx context)
{
- EmitVectorTernaryOpZx(context, () =>
+ if (Optimizations.UseSse41)
{
- context.Emit(OpCodes.Mul);
- context.Emit(OpCodes.Add);
- });
+ EmitSse41Mul_AddSub(context, nameof(Sse2.Add));
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, () =>
+ {
+ context.Emit(OpCodes.Mul);
+ context.Emit(OpCodes.Add);
+ });
+ }
}
public static void Mla_Ve(ILEmitterCtx context)
@@ -1818,11 +1810,18 @@ namespace ChocolArm64.Instructions
public static void Mls_V(ILEmitterCtx context)
{
- EmitVectorTernaryOpZx(context, () =>
+ if (Optimizations.UseSse41)
{
- context.Emit(OpCodes.Mul);
- context.Emit(OpCodes.Sub);
- });
+ EmitSse41Mul_AddSub(context, nameof(Sse2.Subtract));
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, () =>
+ {
+ context.Emit(OpCodes.Mul);
+ context.Emit(OpCodes.Sub);
+ });
+ }
}
public static void Mls_Ve(ILEmitterCtx context)
@@ -1836,7 +1835,14 @@ namespace ChocolArm64.Instructions
public static void Mul_V(ILEmitterCtx context)
{
- EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul));
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Mul_AddSub(context);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul));
+ }
}
public static void Mul_Ve(ILEmitterCtx context)
@@ -1857,18 +1863,12 @@ namespace ChocolArm64.Instructions
Type[] typesSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
- string[] namesSzv = new string[] { nameof(VectorHelper.VectorSByteZero),
- nameof(VectorHelper.VectorInt16Zero),
- nameof(VectorHelper.VectorInt32Zero),
- nameof(VectorHelper.VectorInt64Zero) };
-
- VectorHelper.EmitCall(context, namesSzv[op.Size]);
-
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+ context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -1915,20 +1915,125 @@ namespace ChocolArm64.Instructions
public static void Sabd_V(ILEmitterCtx context)
{
- EmitVectorBinaryOpSx(context, () =>
+ if (Optimizations.UseSse2)
{
- context.Emit(OpCodes.Sub);
- EmitAbs(context);
- });
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
+
+ Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
+ Type[] typesAndOr = new Type[] { typeof(Vector128), typeof(Vector128) };
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSub));
+
+ context.EmitStvectmp(); // Cmp mask
+ context.EmitLdvectmp(); // Cmp mask
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr));
+
+ context.EmitLdvectmp(); // Cmp mask
+
+ context.EmitLdvec(op.Rm);
+ context.EmitLdvec(op.Rn);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr));
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, () =>
+ {
+ context.Emit(OpCodes.Sub);
+ EmitAbs(context);
+ });
+ }
}
public static void Sabdl_V(ILEmitterCtx context)
{
- EmitVectorWidenRnRmBinaryOpSx(context, () =>
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
{
- context.Emit(OpCodes.Sub);
- EmitAbs(context);
- });
+ Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1],
+ VectorIntTypesPerSizeLog2[op.Size + 1] };
+ Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
+ Type[] typesAndOr = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] };
+
+ string nameCvt = op.Size == 0
+ ? nameof(Sse41.ConvertToVector128Int16)
+ : nameof(Sse41.ConvertToVector128Int32);
+
+ context.EmitLdvec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
+
+ context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
+
+ context.EmitLdvec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
+
+ context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
+
+ context.EmitStvectmp2(); // Long Rm
+ context.EmitStvectmp(); // Long Rn
+
+ context.EmitLdvectmp(); // Long Rn
+ context.EmitLdvectmp2(); // Long Rm
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSub));
+
+ context.EmitStvectmp3(); // Cmp mask
+ context.EmitLdvectmp3(); // Cmp mask
+
+ context.EmitLdvectmp(); // Long Rn
+ context.EmitLdvectmp2(); // Long Rm
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr));
+
+ context.EmitLdvectmp3(); // Cmp mask
+
+ context.EmitLdvectmp2(); // Long Rm
+ context.EmitLdvectmp(); // Long Rn
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr));
+
+ context.EmitStvec(op.Rd);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, () =>
+ {
+ context.Emit(OpCodes.Sub);
+ EmitAbs(context);
+ });
+ }
}
public static void Sadalp_V(ILEmitterCtx context)
@@ -1951,25 +2056,29 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
- EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -1997,20 +2106,20 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
- EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1);
-
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
- EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -2027,29 +2136,22 @@ namespace ChocolArm64.Instructions
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
-
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp2();
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd));
- context.EmitLdvectmp();
- context.EmitLdvectmp2();
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd));
- context.EmitLdc_I4(1);
+ context.Emit(OpCodes.Ldc_I4_1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -2083,23 +2185,21 @@ namespace ChocolArm64.Instructions
context.EmitStvectmp();
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
+ context.EmitLdvec(op.Rn);
context.EmitLdvectmp();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub));
context.Emit(OpCodes.Dup);
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rm);
context.EmitLdvectmp();
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub));
-
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg));
-
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesAddSub));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -2128,12 +2228,12 @@ namespace ChocolArm64.Instructions
Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41);
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -2169,12 +2269,12 @@ namespace ChocolArm64.Instructions
Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41);
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -2217,21 +2317,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithSignedCast(context, op.Rd, op.Size + 1);
-
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@@ -2239,7 +2342,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
- EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -2277,21 +2380,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithSignedCast(context, op.Rd, op.Size + 1);
-
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@@ -2299,7 +2405,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
- EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -2423,23 +2529,22 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(op.Size == 0 ? sbyte.MinValue : short.MinValue);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
- context.Emit(OpCodes.Dup);
context.EmitStvectmp();
+ context.EmitLdvectmp();
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
+ context.EmitLdvec(op.Rn);
context.EmitLdvectmp();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAdd));
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rm);
context.EmitLdvectmp();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAdd));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesSubAdd));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesSubAdd));
-
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -2476,25 +2581,29 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
- EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -2517,20 +2626,20 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
- EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1);
-
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
- EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -2594,20 +2703,152 @@ namespace ChocolArm64.Instructions
public static void Uabd_V(ILEmitterCtx context)
{
- EmitVectorBinaryOpZx(context, () =>
+ if (Optimizations.UseSse41)
{
- context.Emit(OpCodes.Sub);
- EmitAbs(context);
- });
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
+
+ Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
+ Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] };
+ Type[] typesAndOr = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(long) };
+
+ Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
+
+ context.EmitLdvec(op.Rm);
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
+
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmpSub));
+
+ context.EmitLdc_I8(-1L);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr));
+
+ context.EmitStvectmp(); // Cmp mask
+ context.EmitLdvectmp(); // Cmp mask
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr));
+
+ context.EmitLdvectmp(); // Cmp mask
+
+ context.EmitLdvec(op.Rm);
+ context.EmitLdvec(op.Rn);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr));
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, () =>
+ {
+ context.Emit(OpCodes.Sub);
+ EmitAbs(context);
+ });
+ }
}
public static void Uabdl_V(ILEmitterCtx context)
{
- EmitVectorWidenRnRmBinaryOpZx(context, () =>
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
{
- context.Emit(OpCodes.Sub);
- EmitAbs(context);
- });
+ Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1],
+ VectorUIntTypesPerSizeLog2[op.Size + 1] };
+ Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1],
+ VectorIntTypesPerSizeLog2 [op.Size + 1] };
+ Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
+ Type[] typesAndOr = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
+ Type[] typesSav = new Type[] { typeof(long) };
+
+ string nameCvt = op.Size == 0
+ ? nameof(Sse41.ConvertToVector128Int16)
+ : nameof(Sse41.ConvertToVector128Int32);
+
+ context.EmitLdvec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
+
+ context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
+
+ context.EmitLdvec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
+
+ context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
+
+ context.EmitStvectmp2(); // Long Rm
+ context.EmitStvectmp(); // Long Rn
+
+ context.EmitLdvectmp2(); // Long Rm
+ context.EmitLdvectmp(); // Long Rn
+
+ context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.Max), typesMax));
+
+ context.EmitLdvectmp2(); // Long Rm
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmpSub));
+
+ context.EmitLdc_I8(-1L);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr));
+
+ context.EmitStvectmp3(); // Cmp mask
+ context.EmitLdvectmp3(); // Cmp mask
+
+ context.EmitLdvectmp(); // Long Rn
+ context.EmitLdvectmp2(); // Long Rm
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr));
+
+ context.EmitLdvectmp3(); // Cmp mask
+
+ context.EmitLdvectmp2(); // Long Rm
+ context.EmitLdvectmp(); // Long Rn
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr));
+
+ context.EmitStvec(op.Rd);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, () =>
+ {
+ context.Emit(OpCodes.Sub);
+ EmitAbs(context);
+ });
+ }
}
public static void Uadalp_V(ILEmitterCtx context)
@@ -2630,25 +2871,29 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -2695,20 +2940,20 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1);
-
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -2725,29 +2970,22 @@ namespace ChocolArm64.Instructions
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
-
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp2();
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd));
- context.EmitLdvectmp();
- context.EmitLdvectmp2();
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd));
- context.EmitLdc_I4(1);
+ context.Emit(OpCodes.Ldc_I4_1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -2774,16 +3012,14 @@ namespace ChocolArm64.Instructions
{
Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
- context.Emit(OpCodes.Dup);
-
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
-
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub));
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesAvgSub));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -2812,12 +3048,12 @@ namespace ChocolArm64.Instructions
Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -2853,12 +3089,12 @@ namespace ChocolArm64.Instructions
Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -2901,21 +3137,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithUnsignedCast(context, op.Rd, op.Size + 1);
-
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@@ -2923,7 +3162,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -2961,21 +3200,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithUnsignedCast(context, op.Rd, op.Size + 1);
-
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@@ -2983,7 +3225,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -3052,12 +3294,12 @@ namespace ChocolArm64.Instructions
{
Type[] typesAvg = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -3104,25 +3346,29 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -3145,20 +3391,20 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1);
-
- EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -3295,5 +3541,77 @@ namespace ChocolArm64.Instructions
EmitVectorZeroUpper(context, op.Rd);
}
}
+
+ private static void EmitSse41Mul_AddSub(ILEmitterCtx context, string nameAddSub = null)
+ {
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
+
+ if (nameAddSub != null)
+ {
+ context.EmitLdvec(op.Rd);
+ }
+
+ if (op.Size == 0)
+ {
+ Type[] typesBle = new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) };
+ Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesShs = new Type[] { typeof(Vector128), typeof(byte) };
+ Type[] typesSav = new Type[] { typeof(int) };
+
+ context.EmitLdvec(op.Rn);
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
+
+ context.EmitLdvec(op.Rm);
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
+
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
+
+ context.EmitLdc_I4(0x00FF00FF);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.BlendVariable), typesBle));
+ }
+ else if (op.Size == 1)
+ {
+ Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) };
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
+ }
+ else /* if (op.Size == 2) */
+ {
+ Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) };
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.MultiplyLow), typesMul));
+ }
+
+ if (nameAddSub != null)
+ {
+ Type[] typesAddSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameAddSub, typesAddSub));
+ }
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
}
}
diff --git a/ChocolArm64/Instructions/InstEmitSimdCmp.cs b/ChocolArm64/Instructions/InstEmitSimdCmp.cs
index fdf3951e64..d54edb7eda 100644
--- a/ChocolArm64/Instructions/InstEmitSimdCmp.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdCmp.cs
@@ -20,19 +20,32 @@ namespace ChocolArm64.Instructions
public static void Cmeq_V(ILEmitterCtx context)
{
- if (context.CurrOp is OpCodeSimdReg64 op)
+ if (Optimizations.UseSse41)
{
- if (op.Size < 3 && Optimizations.UseSse2)
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
+
+ Type typeSse = op.Size != 3 ? typeof(Sse2) : typeof(Sse41);
+
+ context.EmitLdvec(op.Rn);
+
+ if (op is OpCodeSimdReg64 binOp)
{
- EmitSse2Op(context, nameof(Sse2.CompareEqual));
- }
- else if (op.Size == 3 && Optimizations.UseSse41)
- {
- EmitSse41Op(context, nameof(Sse41.CompareEqual));
+ context.EmitLdvec(binOp.Rm);
}
else
{
- EmitCmpOp(context, OpCodes.Beq_S, scalar: false);
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+ }
+
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.CompareEqual), typesCmp));
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
}
}
else
@@ -48,7 +61,45 @@ namespace ChocolArm64.Instructions
public static void Cmge_V(ILEmitterCtx context)
{
- EmitCmpOp(context, OpCodes.Bge_S, scalar: false);
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
+ Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(long) };
+
+ Type typeSse = op.Size != 3 ? typeof(Sse2) : typeof(Sse42);
+
+ if (op is OpCodeSimdReg64 binOp)
+ {
+ context.EmitLdvec(binOp.Rm);
+ }
+ else
+ {
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+ }
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.CompareGreaterThan), typesCmp));
+
+ context.EmitLdc_I8(-1L);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitCmpOp(context, OpCodes.Bge_S, scalar: false);
+ }
}
public static void Cmgt_S(ILEmitterCtx context)
@@ -58,19 +109,32 @@ namespace ChocolArm64.Instructions
public static void Cmgt_V(ILEmitterCtx context)
{
- if (context.CurrOp is OpCodeSimdReg64 op)
+ if (Optimizations.UseSse42)
{
- if (op.Size < 3 && Optimizations.UseSse2)
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
+
+ Type typeSse = op.Size != 3 ? typeof(Sse2) : typeof(Sse42);
+
+ context.EmitLdvec(op.Rn);
+
+ if (op is OpCodeSimdReg64 binOp)
{
- EmitSse2Op(context, nameof(Sse2.CompareGreaterThan));
- }
- else if (op.Size == 3 && Optimizations.UseSse42)
- {
- EmitSse42Op(context, nameof(Sse42.CompareGreaterThan));
+ context.EmitLdvec(binOp.Rm);
}
else
{
- EmitCmpOp(context, OpCodes.Bgt_S, scalar: false);
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+ }
+
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.CompareGreaterThan), typesCmp));
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
}
}
else
@@ -86,7 +150,42 @@ namespace ChocolArm64.Instructions
public static void Cmhi_V(ILEmitterCtx context)
{
- EmitCmpOp(context, OpCodes.Bgt_Un_S, scalar: false);
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 3)
+ {
+ Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
+ Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] };
+ Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(long) };
+
+ Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
+
+ context.EmitLdvec(op.Rm);
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
+
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmp));
+
+ context.EmitLdc_I8(-1L);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitCmpOp(context, OpCodes.Bgt_Un_S, scalar: false);
+ }
}
public static void Cmhs_S(ILEmitterCtx context)
@@ -96,7 +195,35 @@ namespace ChocolArm64.Instructions
public static void Cmhs_V(ILEmitterCtx context)
{
- EmitCmpOp(context, OpCodes.Bge_Un_S, scalar: false);
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 3)
+ {
+ Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
+ Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] };
+
+ Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmp));
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitCmpOp(context, OpCodes.Bge_Un_S, scalar: false);
+ }
}
public static void Cmle_S(ILEmitterCtx context)
@@ -106,7 +233,37 @@ namespace ChocolArm64.Instructions
public static void Cmle_V(ILEmitterCtx context)
{
- EmitCmpOp(context, OpCodes.Ble_S, scalar: false);
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
+ Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(long) };
+
+ Type typeSse = op.Size != 3 ? typeof(Sse2) : typeof(Sse42);
+
+ context.EmitLdvec(op.Rn);
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.CompareGreaterThan), typesCmp));
+
+ context.EmitLdc_I8(-1L);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitCmpOp(context, OpCodes.Ble_S, scalar: false);
+ }
}
public static void Cmlt_S(ILEmitterCtx context)
@@ -116,7 +273,30 @@ namespace ChocolArm64.Instructions
public static void Cmlt_V(ILEmitterCtx context)
{
- EmitCmpOp(context, OpCodes.Blt_S, scalar: false);
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
+
+ Type typeSse = op.Size != 3 ? typeof(Sse2) : typeof(Sse42);
+
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeSse.GetMethod(nameof(Sse2.CompareGreaterThan), typesCmp));
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitCmpOp(context, OpCodes.Blt_S, scalar: false);
+ }
}
public static void Cmtst_S(ILEmitterCtx context)
@@ -318,9 +498,6 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
-
if (cmpWithZero)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
@@ -330,8 +507,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rm);
}
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp2();
+ context.EmitStvectmp();
+ context.EmitLdvectmp();
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrderedScalar), typesCmp));
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
@@ -340,18 +517,18 @@ namespace ChocolArm64.Instructions
context.Emit(OpCodes.Brtrue_S, lblNaN);
- context.EmitLdc_I4(0);
+ context.Emit(OpCodes.Ldc_I4_0);
+ context.EmitLdvec(op.Rn);
context.EmitLdvectmp();
- context.EmitLdvectmp2();
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThanOrEqualOrderedScalar), typesCmp));
+ context.EmitLdvec(op.Rn);
context.EmitLdvectmp();
- context.EmitLdvectmp2();
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp));
+ context.EmitLdvec(op.Rn);
context.EmitLdvectmp();
- context.EmitLdvectmp2();
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareLessThanOrderedScalar), typesCmp));
context.EmitStflg((int)PState.NBit);
@@ -363,10 +540,10 @@ namespace ChocolArm64.Instructions
context.MarkLabel(lblNaN);
- context.EmitLdc_I4(1);
- context.Emit(OpCodes.Dup);
- context.EmitLdc_I4(0);
- context.Emit(OpCodes.Dup);
+ context.Emit(OpCodes.Ldc_I4_1);
+ context.Emit(OpCodes.Ldc_I4_1);
+ context.Emit(OpCodes.Ldc_I4_0);
+ context.Emit(OpCodes.Ldc_I4_0);
context.EmitStflg((int)PState.NBit);
context.EmitStflg((int)PState.ZBit);
@@ -382,42 +559,39 @@ namespace ChocolArm64.Instructions
ILLabel lblNaN = new ILLabel();
ILLabel lblEnd = new ILLabel();
- EmitLdvecWithCastToDouble(context, op.Rn);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
+ context.EmitLdvec(op.Rn);
if (cmpWithZero)
{
- VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
}
else
{
- EmitLdvecWithCastToDouble(context, op.Rm);
+ context.EmitLdvec(op.Rm);
}
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp2();
+ context.EmitStvectmp();
+ context.EmitLdvectmp();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrderedScalar), typesCmp));
- VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp));
context.Emit(OpCodes.Brtrue_S, lblNaN);
- context.EmitLdc_I4(0);
+ context.Emit(OpCodes.Ldc_I4_0);
+ context.EmitLdvec(op.Rn);
context.EmitLdvectmp();
- context.EmitLdvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThanOrEqualOrderedScalar), typesCmp));
+ context.EmitLdvec(op.Rn);
context.EmitLdvectmp();
- context.EmitLdvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp));
+ context.EmitLdvec(op.Rn);
context.EmitLdvectmp();
- context.EmitLdvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareLessThanOrderedScalar), typesCmp));
context.EmitStflg((int)PState.NBit);
@@ -429,10 +603,10 @@ namespace ChocolArm64.Instructions
context.MarkLabel(lblNaN);
- context.EmitLdc_I4(1);
- context.Emit(OpCodes.Dup);
- context.EmitLdc_I4(0);
- context.Emit(OpCodes.Dup);
+ context.Emit(OpCodes.Ldc_I4_1);
+ context.Emit(OpCodes.Ldc_I4_1);
+ context.Emit(OpCodes.Ldc_I4_0);
+ context.Emit(OpCodes.Ldc_I4_0);
context.EmitStflg((int)PState.NBit);
context.EmitStflg((int)PState.ZBit);
@@ -656,26 +830,26 @@ namespace ChocolArm64.Instructions
if (!isLeOrLt)
{
- EmitLdvecWithCastToDouble(context, op.Rn);
+ context.EmitLdvec(op.Rn);
}
if (op is OpCodeSimdReg64 binOp)
{
- EmitLdvecWithCastToDouble(context, binOp.Rm);
+ context.EmitLdvec(binOp.Rm);
}
else
{
- VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
}
if (isLeOrLt)
{
- EmitLdvecWithCastToDouble(context, op.Rn);
+ context.EmitLdvec(op.Rn);
}
context.EmitCall(typeof(Sse2).GetMethod(name, types));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
if (scalar)
{
diff --git a/ChocolArm64/Instructions/InstEmitSimdCvt.cs b/ChocolArm64/Instructions/InstEmitSimdCvt.cs
index 2eac3194d6..c5f16f86cb 100644
--- a/ChocolArm64/Instructions/InstEmitSimdCvt.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdCvt.cs
@@ -21,28 +21,26 @@ namespace ChocolArm64.Instructions
if (op.Size == 1 && op.Opc == 0)
{
//Double -> Single.
+ Type[] typesCvt = new Type[] { typeof(Vector128), typeof(Vector128) };
+
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithCastToDouble(context, op.Rn);
-
- Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) };
-
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Single), types));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Single), typesCvt));
context.EmitStvec(op.Rd);
}
else if (op.Size == 0 && op.Opc == 1)
{
//Single -> Double.
- VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
+ Type[] typesCvt = new Type[] { typeof(Vector128), typeof(Vector128) };
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitLdvec(op.Rn);
- Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) };
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), typesCvt));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), types));
-
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -80,18 +78,18 @@ namespace ChocolArm64.Instructions
{
Type[] typesCvt = new Type[] { typeof(Vector128) };
- string nameMov = op.RegisterSize == RegisterSize.Simd128
- ? nameof(Sse.MoveHighToLow)
- : nameof(Sse.MoveLowToHigh);
-
context.EmitLdvec(op.Rn);
- context.Emit(OpCodes.Dup);
- context.EmitCall(typeof(Sse).GetMethod(nameMov));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow)));
+ }
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Double), typesCvt));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -154,7 +152,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
- EmitLdvecWithCastToDouble(context, op.Rn);
+ context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
context.Emit(OpCodes.Dup);
@@ -209,22 +207,50 @@ namespace ChocolArm64.Instructions
public static void Fcvtns_S(ILEmitterCtx context)
{
- EmitFcvtn(context, signed: true, scalar: true);
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvt_Signed(context, RoundMode.ToNearest, scalar: true);
+ }
+ else
+ {
+ EmitFcvtn(context, signed: true, scalar: true);
+ }
}
public static void Fcvtns_V(ILEmitterCtx context)
{
- EmitFcvtn(context, signed: true, scalar: false);
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvt_Signed(context, RoundMode.ToNearest, scalar: false);
+ }
+ else
+ {
+ EmitFcvtn(context, signed: true, scalar: false);
+ }
}
public static void Fcvtnu_S(ILEmitterCtx context)
{
- EmitFcvtn(context, signed: false, scalar: true);
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvt_Unsigned(context, RoundMode.ToNearest, scalar: true);
+ }
+ else
+ {
+ EmitFcvtn(context, signed: false, scalar: true);
+ }
}
public static void Fcvtnu_V(ILEmitterCtx context)
{
- EmitFcvtn(context, signed: false, scalar: false);
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvt_Unsigned(context, RoundMode.ToNearest, scalar: false);
+ }
+ else
+ {
+ EmitFcvtn(context, signed: false, scalar: false);
+ }
}
public static void Fcvtps_Gp(ILEmitterCtx context)
@@ -244,17 +270,43 @@ namespace ChocolArm64.Instructions
public static void Fcvtzs_Gp_Fixed(ILEmitterCtx context)
{
- EmitFcvtzs_Gp_Fix(context);
+ EmitFcvtzs_Gp_Fixed(context);
}
public static void Fcvtzs_S(ILEmitterCtx context)
{
- EmitScalarFcvtzs(context);
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvt_Signed(context, RoundMode.TowardsZero, scalar: true);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: true);
+ }
}
public static void Fcvtzs_V(ILEmitterCtx context)
{
- EmitVectorFcvtzs(context);
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvt_Signed(context, RoundMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtzs_V_Fixed(ILEmitterCtx context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvt_Signed(context, RoundMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: false);
+ }
}
public static void Fcvtzu_Gp(ILEmitterCtx context)
@@ -264,17 +316,43 @@ namespace ChocolArm64.Instructions
public static void Fcvtzu_Gp_Fixed(ILEmitterCtx context)
{
- EmitFcvtzu_Gp_Fix(context);
+ EmitFcvtzu_Gp_Fixed(context);
}
public static void Fcvtzu_S(ILEmitterCtx context)
{
- EmitScalarFcvtzu(context);
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvt_Unsigned(context, RoundMode.TowardsZero, scalar: true);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: true);
+ }
}
public static void Fcvtzu_V(ILEmitterCtx context)
{
- EmitVectorFcvtzu(context);
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvt_Unsigned(context, RoundMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtzu_V_Fixed(ILEmitterCtx context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvt_Unsigned(context, RoundMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: false);
+ }
}
public static void Scvtf_Gp(ILEmitterCtx context)
@@ -285,7 +363,7 @@ namespace ChocolArm64.Instructions
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
- context.Emit(OpCodes.Conv_U4);
+ context.Emit(OpCodes.Conv_I4);
}
EmitFloatCast(context, op.Size);
@@ -293,15 +371,42 @@ namespace ChocolArm64.Instructions
EmitScalarSetF(context, op.Rd, op.Size);
}
+ public static void Scvtf_Gp_Fixed(ILEmitterCtx context)
+ {
+ OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
+
+ context.EmitLdintzr(op.Rn);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ context.Emit(OpCodes.Conv_I4);
+ }
+
+ EmitFloatCast(context, op.Size);
+
+ EmitI2fFBitsMul(context, op.Size, op.FBits);
+
+ EmitScalarSetF(context, op.Rd, op.Size);
+ }
+
public static void Scvtf_S(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
- EmitVectorExtractSx(context, op.Rn, 0, op.Size + 2);
+ int sizeF = op.Size & 1;
- EmitFloatCast(context, op.Size);
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2cvtF_Signed(context, scalar: true);
+ }
+ else
+ {
+ EmitVectorExtractSx(context, op.Rn, 0, sizeF + 2);
- EmitScalarSetF(context, op.Rd, op.Size);
+ EmitFloatCast(context, sizeF);
+
+ EmitScalarSetF(context, op.Rd, sizeF);
+ }
}
public static void Scvtf_V(ILEmitterCtx context)
@@ -312,18 +417,24 @@ namespace ChocolArm64.Instructions
if (Optimizations.UseSse2 && sizeF == 0)
{
- Type[] typesCvt = new Type[] { typeof(Vector128) };
+ EmitSse2cvtF_Signed(context, scalar: false);
+ }
+ else
+ {
+ EmitVectorCvtf(context, signed: true);
+ }
+ }
- EmitLdvecWithSignedCast(context, op.Rn, 2);
+ public static void Scvtf_V_Fixed(ILEmitterCtx context)
+ {
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
+ // sizeF == ((OpCodeSimdShImm64)op).Size - 2
+ int sizeF = op.Size & 1;
- context.EmitStvec(op.Rd);
-
- if (op.RegisterSize == RegisterSize.Simd64)
- {
- EmitVectorZeroUpper(context, op.Rd);
- }
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2cvtF_Signed(context, scalar: false);
}
else
{
@@ -349,47 +460,78 @@ namespace ChocolArm64.Instructions
EmitScalarSetF(context, op.Rd, op.Size);
}
- public static void Ucvtf_S(ILEmitterCtx context)
+ public static void Ucvtf_Gp_Fixed(ILEmitterCtx context)
{
- OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+ OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
- EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2);
+ context.EmitLdintzr(op.Rn);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ context.Emit(OpCodes.Conv_U4);
+ }
context.Emit(OpCodes.Conv_R_Un);
EmitFloatCast(context, op.Size);
+ EmitI2fFBitsMul(context, op.Size, op.FBits);
+
EmitScalarSetF(context, op.Rd, op.Size);
}
+ public static void Ucvtf_S(ILEmitterCtx context)
+ {
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2cvtF_Unsigned(context, scalar: true);
+ }
+ else
+ {
+ EmitVectorExtractZx(context, op.Rn, 0, sizeF + 2);
+
+ context.Emit(OpCodes.Conv_R_Un);
+
+ EmitFloatCast(context, sizeF);
+
+ EmitScalarSetF(context, op.Rd, sizeF);
+ }
+ }
+
public static void Ucvtf_V(ILEmitterCtx context)
{
- EmitVectorCvtf(context, signed: false);
- }
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
- private static int GetFBits(ILEmitterCtx context)
- {
- if (context.CurrOp is OpCodeSimdShImm64 op)
- {
- return GetImmShr(op);
- }
+ int sizeF = op.Size & 1;
- return 0;
- }
-
- private static void EmitFloatCast(ILEmitterCtx context, int size)
- {
- if (size == 0)
+ if (Optimizations.UseSse2 && sizeF == 0)
{
- context.Emit(OpCodes.Conv_R4);
- }
- else if (size == 1)
- {
- context.Emit(OpCodes.Conv_R8);
+ EmitSse2cvtF_Unsigned(context, scalar: false);
}
else
{
- throw new ArgumentOutOfRangeException(nameof(size));
+ EmitVectorCvtf(context, signed: false);
+ }
+ }
+
+ public static void Ucvtf_V_Fixed(ILEmitterCtx context)
+ {
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ // sizeF == ((OpCodeSimdShImm64)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2cvtF_Unsigned(context, scalar: false);
+ }
+ else
+ {
+ EmitVectorCvtf(context, signed: false);
}
}
@@ -403,11 +545,6 @@ namespace ChocolArm64.Instructions
int bytes = op.GetBitsCount() >> 3;
int elems = !scalar ? bytes >> sizeI : 1;
- if (scalar && (sizeF == 0))
- {
- EmitVectorZeroLowerTmp(context);
- }
-
for (int index = 0; index < elems; index++)
{
EmitVectorExtractF(context, op.Rn, index, sizeF);
@@ -429,13 +566,62 @@ namespace ChocolArm64.Instructions
: nameof(VectorHelper.SatF64ToU64));
}
- EmitVectorInsertTmp(context, index, sizeI);
+ if (scalar)
+ {
+ EmitVectorZeroAll(context, op.Rd);
+ }
+
+ EmitVectorInsert(context, op.Rd, index, sizeI);
}
- context.EmitLdvectmp();
- context.EmitStvec(op.Rd);
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
- if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ private static void EmitFcvtz(ILEmitterCtx context, bool signed, bool scalar)
+ {
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ int fBits = GetFBits(context);
+
+ int bytes = op.GetBitsCount() >> 3;
+ int elems = !scalar ? bytes >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ EmitVectorExtractF(context, op.Rn, index, sizeF);
+
+ EmitF2iFBitsMul(context, sizeF, fBits);
+
+ if (sizeF == 0)
+ {
+ VectorHelper.EmitCall(context, signed
+ ? nameof(VectorHelper.SatF32ToS32)
+ : nameof(VectorHelper.SatF32ToU32));
+
+ context.Emit(OpCodes.Conv_U8);
+ }
+ else /* if (sizeF == 1) */
+ {
+ VectorHelper.EmitCall(context, signed
+ ? nameof(VectorHelper.SatF64ToS64)
+ : nameof(VectorHelper.SatF64ToU64));
+ }
+
+ if (scalar)
+ {
+ EmitVectorZeroAll(context, op.Rd);
+ }
+
+ EmitVectorInsert(context, op.Rd, index, sizeI);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
@@ -476,17 +662,17 @@ namespace ChocolArm64.Instructions
context.EmitStintzr(op.Rd);
}
- private static void EmitFcvtzs_Gp_Fix(ILEmitterCtx context)
+ private static void EmitFcvtzs_Gp_Fixed(ILEmitterCtx context)
{
- EmitFcvtz__Gp_Fix(context, true);
+ EmitFcvtz__Gp_Fixed(context, true);
}
- private static void EmitFcvtzu_Gp_Fix(ILEmitterCtx context)
+ private static void EmitFcvtzu_Gp_Fixed(ILEmitterCtx context)
{
- EmitFcvtz__Gp_Fix(context, false);
+ EmitFcvtz__Gp_Fixed(context, false);
}
- private static void EmitFcvtz__Gp_Fix(ILEmitterCtx context, bool signed)
+ private static void EmitFcvtz__Gp_Fixed(ILEmitterCtx context, bool signed)
{
OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
@@ -530,9 +716,7 @@ namespace ChocolArm64.Instructions
context.Emit(OpCodes.Conv_R_Un);
}
- context.Emit(sizeF == 0
- ? OpCodes.Conv_R4
- : OpCodes.Conv_R8);
+ EmitFloatCast(context, sizeF);
EmitI2fFBitsMul(context, sizeF, fBits);
@@ -545,102 +729,29 @@ namespace ChocolArm64.Instructions
}
}
- private static void EmitScalarFcvtzs(ILEmitterCtx context)
+ private static int GetFBits(ILEmitterCtx context)
{
- EmitScalarFcvtz(context, true);
- }
-
- private static void EmitScalarFcvtzu(ILEmitterCtx context)
- {
- EmitScalarFcvtz(context, false);
- }
-
- private static void EmitScalarFcvtz(ILEmitterCtx context, bool signed)
- {
- OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
-
- int sizeF = op.Size & 1;
- int sizeI = sizeF + 2;
-
- int fBits = GetFBits(context);
-
- EmitVectorExtractF(context, op.Rn, 0, sizeF);
-
- EmitF2iFBitsMul(context, sizeF, fBits);
-
- if (sizeF == 0)
+ if (context.CurrOp is OpCodeSimdShImm64 op)
{
- VectorHelper.EmitCall(context, signed
- ? nameof(VectorHelper.SatF32ToS32)
- : nameof(VectorHelper.SatF32ToU32));
- }
- else /* if (sizeF == 1) */
- {
- VectorHelper.EmitCall(context, signed
- ? nameof(VectorHelper.SatF64ToS64)
- : nameof(VectorHelper.SatF64ToU64));
+ return GetImmShr(op);
}
- if (sizeF == 0)
+ return 0;
+ }
+
+ private static void EmitFloatCast(ILEmitterCtx context, int size)
+ {
+ if (size == 0)
{
- context.Emit(OpCodes.Conv_U8);
+ context.Emit(OpCodes.Conv_R4);
}
-
- EmitScalarSet(context, op.Rd, sizeI);
- }
-
- private static void EmitVectorFcvtzs(ILEmitterCtx context)
- {
- EmitVectorFcvtz(context, true);
- }
-
- private static void EmitVectorFcvtzu(ILEmitterCtx context)
- {
- EmitVectorFcvtz(context, false);
- }
-
- private static void EmitVectorFcvtz(ILEmitterCtx context, bool signed)
- {
- OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
-
- int sizeF = op.Size & 1;
- int sizeI = sizeF + 2;
-
- int fBits = GetFBits(context);
-
- int bytes = op.GetBitsCount() >> 3;
- int elems = bytes >> sizeI;
-
- for (int index = 0; index < elems; index++)
+ else if (size == 1)
{
- EmitVectorExtractF(context, op.Rn, index, sizeF);
-
- EmitF2iFBitsMul(context, sizeF, fBits);
-
- if (sizeF == 0)
- {
- VectorHelper.EmitCall(context, signed
- ? nameof(VectorHelper.SatF32ToS32)
- : nameof(VectorHelper.SatF32ToU32));
- }
- else /* if (sizeF == 1) */
- {
- VectorHelper.EmitCall(context, signed
- ? nameof(VectorHelper.SatF64ToS64)
- : nameof(VectorHelper.SatF64ToU64));
- }
-
- if (sizeF == 0)
- {
- context.Emit(OpCodes.Conv_U8);
- }
-
- EmitVectorInsert(context, op.Rd, index, sizeI);
+ context.Emit(OpCodes.Conv_R8);
}
-
- if (op.RegisterSize == RegisterSize.Simd64)
+ else
{
- EmitVectorZeroUpper(context, op.Rd);
+ throw new ArgumentOutOfRangeException(nameof(size));
}
}
@@ -751,5 +862,467 @@ namespace ChocolArm64.Instructions
context.Emit(OpCodes.Mul);
}
}
+
+ private static void EmitSse41Fcvt_Signed(ILEmitterCtx context, RoundMode roundMode, bool scalar)
+ {
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ // sizeF == ((OpCodeSimdShImm64)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesRndCvt = new Type[] { typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(int) };
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrdered), types));
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.And), types));
+
+ if (op is OpCodeSimdShImm64 fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+ context.EmitLdc_I4(fpScaled);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), types));
+ }
+
+ context.EmitCall(typeof(Sse41).GetMethod(GetSse41NameRnd(roundMode), typesRndCvt));
+
+ context.EmitStvectmp();
+ context.EmitLdvectmp();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Int32), typesRndCvt));
+
+ context.EmitLdvectmp();
+
+ context.EmitLdc_I4(0x4F000000); // 2.14748365E9f (2147483648)
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThanOrEqual), types));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), types));
+
+ context.EmitStvec(op.Rd);
+
+ if (scalar)
+ {
+ EmitVectorZero32_128(context, op.Rd);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesRndCvt = new Type[] { typeof(Vector128) };
+ Type[] typesSv = new Type[] { typeof(long), typeof(long) };
+ Type[] typesSav = new Type[] { typeof(long) };
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrdered), types));
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), types));
+
+ if (op is OpCodeSimdShImm64 fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+ context.EmitLdc_I8(fpScaled);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), types));
+ }
+
+ context.EmitCall(typeof(Sse41).GetMethod(GetSse41NameRnd(roundMode), typesRndCvt));
+
+ context.EmitStvectmp();
+
+ if (!scalar)
+ {
+ context.EmitLdvectmp();
+ context.EmitLdvectmp();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToInt64), typesRndCvt));
+ }
+ else
+ {
+ context.EmitLdc_I8(0L);
+ }
+
+ context.EmitLdvectmp();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToInt64), typesRndCvt));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSv));
+
+ context.EmitLdvectmp();
+
+ context.EmitLdc_I8(0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThanOrEqual), types));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types));
+
+ context.EmitStvec(op.Rd);
+
+ if (scalar)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ }
+
+ private static void EmitSse41Fcvt_Unsigned(ILEmitterCtx context, RoundMode roundMode, bool scalar)
+ {
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ // sizeF == ((OpCodeSimdShImm64)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesAdd = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesRndCvt = new Type[] { typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(int) };
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrdered), types));
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.And), types));
+
+ if (op is OpCodeSimdShImm64 fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+ context.EmitLdc_I4(fpScaled);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), types));
+ }
+
+ context.EmitCall(typeof(Sse41).GetMethod(GetSse41NameRnd(roundMode), typesRndCvt));
+
+ context.Emit(OpCodes.Dup);
+
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThan), types));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.And), types));
+
+ context.EmitStvectmp();
+ context.EmitLdvectmp();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Int32), typesRndCvt));
+
+ context.EmitLdvectmp();
+
+ context.EmitLdc_I4(0x4F000000); // 2.14748365E9f (2147483648)
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitStvectmp2();
+ context.EmitLdvectmp2();
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), types));
+
+ context.Emit(OpCodes.Dup);
+
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThan), types));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.And), types));
+
+ context.EmitStvectmp();
+ context.EmitLdvectmp();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Int32), typesRndCvt));
+
+ context.EmitLdvectmp();
+ context.EmitLdvectmp2();
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThanOrEqual), types));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), types));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
+
+ context.EmitStvec(op.Rd);
+
+ if (scalar)
+ {
+ EmitVectorZero32_128(context, op.Rd);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesAdd = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesRndCvt = new Type[] { typeof(Vector128) };
+ Type[] typesSv = new Type[] { typeof(long), typeof(long) };
+ Type[] typesSav = new Type[] { typeof(long) };
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrdered), types));
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), types));
+
+ if (op is OpCodeSimdShImm64 fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+ context.EmitLdc_I8(fpScaled);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), types));
+ }
+
+ context.EmitCall(typeof(Sse41).GetMethod(GetSse41NameRnd(roundMode), typesRndCvt));
+
+ context.Emit(OpCodes.Dup);
+
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), types));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), types));
+
+ context.EmitStvectmp();
+
+ if (!scalar)
+ {
+ context.EmitLdvectmp();
+ context.EmitLdvectmp();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToInt64), typesRndCvt));
+ }
+ else
+ {
+ context.EmitLdc_I8(0L);
+ }
+
+ context.EmitLdvectmp();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToInt64), typesRndCvt));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSv));
+
+ context.EmitLdvectmp();
+
+ context.EmitLdc_I8(0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitStvectmp2();
+ context.EmitLdvectmp2();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), types));
+
+ context.Emit(OpCodes.Dup);
+
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), types));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), types));
+
+ context.EmitStvectmp();
+
+ if (!scalar)
+ {
+ context.EmitLdvectmp();
+ context.EmitLdvectmp();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToInt64), typesRndCvt));
+ }
+ else
+ {
+ context.EmitLdc_I8(0L);
+ }
+
+ context.EmitLdvectmp();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToInt64), typesRndCvt));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSv));
+
+ context.EmitLdvectmp();
+ context.EmitLdvectmp2();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThanOrEqual), types));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
+
+ context.EmitStvec(op.Rd);
+
+ if (scalar)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ }
+
+ private static void EmitSse2cvtF_Signed(ILEmitterCtx context, bool scalar)
+ {
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesCvt = new Type[] { typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(int) };
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
+
+ if (op is OpCodeSimdShImm64 fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+ context.EmitLdc_I4(fpScaled);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul));
+ }
+
+ context.EmitStvec(op.Rd);
+
+ if (scalar)
+ {
+ EmitVectorZero32_128(context, op.Rd);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+
+ private static void EmitSse2cvtF_Unsigned(ILEmitterCtx context, bool scalar)
+ {
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSrlSll = new Type[] { typeof(Vector128), typeof(byte) };
+ Type[] typesCvt = new Type[] { typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(int) };
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitLdc_I4(16);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrlSll));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
+
+ context.EmitLdc_I4(0x47800000); // 65536.0f (1 << 16)
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitLdc_I4(16);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSrlSll));
+
+ context.EmitLdc_I4(16);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrlSll));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
+
+ if (op is OpCodeSimdShImm64 fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+ context.EmitLdc_I4(fpScaled);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
+ }
+
+ context.EmitStvec(op.Rd);
+
+ if (scalar)
+ {
+ EmitVectorZero32_128(context, op.Rd);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+
+ private static string GetSse41NameRnd(RoundMode roundMode)
+ {
+ switch (roundMode)
+ {
+ case RoundMode.ToNearest:
+ return nameof(Sse41.RoundToNearestInteger); // even
+
+ case RoundMode.TowardsMinusInfinity:
+ return nameof(Sse41.RoundToNegativeInfinity);
+
+ case RoundMode.TowardsPlusInfinity:
+ return nameof(Sse41.RoundToPositiveInfinity);
+
+ case RoundMode.TowardsZero:
+ return nameof(Sse41.RoundToZero);
+
+ default: throw new ArgumentException(nameof(roundMode));
+ }
+ }
}
}
diff --git a/ChocolArm64/Instructions/InstEmitSimdHelper.cs b/ChocolArm64/Instructions/InstEmitSimdHelper.cs
index 5a44e1a148..6799a3a388 100644
--- a/ChocolArm64/Instructions/InstEmitSimdHelper.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdHelper.cs
@@ -86,13 +86,13 @@ namespace ChocolArm64.Instructions
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
+ context.EmitLdvec(op.Rn);
Type baseType = VectorIntTypesPerSizeLog2[op.Size];
if (op is OpCodeSimdReg64 binOp)
{
- EmitLdvecWithSignedCast(context, binOp.Rm, op.Size);
+ context.EmitLdvec(binOp.Rm);
context.EmitCall(type.GetMethod(name, new Type[] { baseType, baseType }));
}
@@ -101,7 +101,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(type.GetMethod(name, new Type[] { baseType }));
}
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -109,80 +109,6 @@ namespace ChocolArm64.Instructions
}
}
- public static void EmitLdvecWithSignedCast(ILEmitterCtx context, int reg, int size)
- {
- context.EmitLdvec(reg);
-
- switch (size)
- {
- case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToSByte)); break;
- case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToInt16)); break;
- case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToInt32)); break;
- case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToInt64)); break;
-
- default: throw new ArgumentOutOfRangeException(nameof(size));
- }
- }
-
- public static void EmitLdvecWithCastToDouble(ILEmitterCtx context, int reg)
- {
- context.EmitLdvec(reg);
-
- VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToDouble));
- }
-
- public static void EmitStvecWithCastFromDouble(ILEmitterCtx context, int reg)
- {
- VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleToSingle));
-
- context.EmitStvec(reg);
- }
-
- public static void EmitLdvecWithUnsignedCast(ILEmitterCtx context, int reg, int size)
- {
- context.EmitLdvec(reg);
-
- switch (size)
- {
- case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToByte)); break;
- case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToUInt16)); break;
- case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToUInt32)); break;
- case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToUInt64)); break;
-
- default: throw new ArgumentOutOfRangeException(nameof(size));
- }
- }
-
- public static void EmitStvecWithSignedCast(ILEmitterCtx context, int reg, int size)
- {
- switch (size)
- {
- case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSByteToSingle)); break;
- case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt16ToSingle)); break;
- case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt32ToSingle)); break;
- case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt64ToSingle)); break;
-
- default: throw new ArgumentOutOfRangeException(nameof(size));
- }
-
- context.EmitStvec(reg);
- }
-
- public static void EmitStvecWithUnsignedCast(ILEmitterCtx context, int reg, int size)
- {
- switch (size)
- {
- case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorByteToSingle)); break;
- case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorUInt16ToSingle)); break;
- case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorUInt32ToSingle)); break;
- case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorUInt64ToSingle)); break;
-
- default: throw new ArgumentOutOfRangeException(nameof(size));
- }
-
- context.EmitStvec(reg);
- }
-
public static void EmitScalarSseOrSse2OpF(ILEmitterCtx context, string name)
{
EmitSseOrSse2OpF(context, name, true);
@@ -199,17 +125,7 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1;
- void Ldvec(int reg)
- {
- context.EmitLdvec(reg);
-
- if (sizeF == 1)
- {
- VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToDouble));
- }
- }
-
- Ldvec(op.Rn);
+ context.EmitLdvec(op.Rn);
Type type;
Type baseType;
@@ -227,7 +143,7 @@ namespace ChocolArm64.Instructions
if (op is OpCodeSimdReg64 binOp)
{
- Ldvec(binOp.Rm);
+ context.EmitLdvec(binOp.Rm);
context.EmitCall(type.GetMethod(name, new Type[] { baseType, baseType }));
}
@@ -236,11 +152,6 @@ namespace ChocolArm64.Instructions
context.EmitCall(type.GetMethod(name, new Type[] { baseType }));
}
- if (sizeF == 1)
- {
- VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleToSingle));
- }
-
context.EmitStvec(op.Rd);
if (scalar)
@@ -681,12 +592,9 @@ namespace ChocolArm64.Instructions
emit();
- EmitVectorInsertTmp(context, index, op.Size);
+ EmitVectorInsert(context, op.Rd, index, op.Size);
}
- context.EmitLdvectmp();
- context.EmitStvec(op.Rd);
-
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
@@ -964,8 +872,8 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.UnpackLow), types));
- context.Emit(OpCodes.Dup);
context.EmitStvectmp();
+ context.EmitLdvectmp();
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
@@ -987,20 +895,13 @@ namespace ChocolArm64.Instructions
Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) };
context.EmitLdvec(op.Rn);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
-
context.EmitLdvec(op.Rm);
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp2();
-
context.EmitLdc_I4(2 << 6 | 0 << 4 | 2 << 2 | 0 << 0);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
- context.EmitLdvectmp();
- context.EmitLdvectmp2();
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitLdc_I4(3 << 6 | 1 << 4 | 3 << 2 | 1 << 0);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
@@ -1014,26 +915,19 @@ namespace ChocolArm64.Instructions
{
Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithCastToDouble(context, op.Rn);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
-
- EmitLdvecWithCastToDouble(context, op.Rm);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp2();
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), types));
- context.EmitLdvectmp();
- context.EmitLdvectmp2();
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types));
context.EmitCall(typeof(Sse2).GetMethod(name, types));
- EmitStvecWithCastFromDouble(context, op.Rd);
+ context.EmitStvec(op.Rd);
}
}
@@ -1074,11 +968,6 @@ namespace ChocolArm64.Instructions
int bytes = op.GetBitsCount() >> 3;
int elems = !scalar ? bytes >> op.Size : 1;
- if (scalar)
- {
- EmitVectorZeroLowerTmp(context);
- }
-
for (int index = 0; index < elems; index++)
{
EmitVectorExtractSx(context, op.Rn, index, op.Size);
@@ -1094,13 +983,15 @@ namespace ChocolArm64.Instructions
EmitUnarySignedSatQAbsOrNeg(context);
}
- EmitVectorInsertTmp(context, index, op.Size);
+ if (scalar)
+ {
+ EmitVectorZeroAll(context, op.Rd);
+ }
+
+ EmitVectorInsert(context, op.Rd, index, op.Size);
}
- context.EmitLdvectmp();
- context.EmitStvec(op.Rd);
-
- if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
@@ -1141,11 +1032,6 @@ namespace ChocolArm64.Instructions
int bytes = op.GetBitsCount() >> 3;
int elems = !scalar ? bytes >> op.Size : 1;
- if (scalar)
- {
- EmitVectorZeroLowerTmp(context);
- }
-
if (add || sub)
{
for (int index = 0; index < elems; index++)
@@ -1171,7 +1057,12 @@ namespace ChocolArm64.Instructions
}
}
- EmitVectorInsertTmp(context, index, op.Size);
+ if (scalar)
+ {
+ EmitVectorZeroAll(context, op.Rd);
+ }
+
+ EmitVectorInsert(context, op.Rd, index, op.Size);
}
}
else if (accumulate)
@@ -1192,7 +1083,12 @@ namespace ChocolArm64.Instructions
EmitBinarySatQAccumulate(context, signed);
}
- EmitVectorInsertTmp(context, index, op.Size);
+ if (scalar)
+ {
+ EmitVectorZeroAll(context, op.Rd);
+ }
+
+ EmitVectorInsert(context, op.Rd, index, op.Size);
}
}
else
@@ -1206,14 +1102,16 @@ namespace ChocolArm64.Instructions
EmitSatQ(context, op.Size, true, signed);
- EmitVectorInsertTmp(context, index, op.Size);
+ if (scalar)
+ {
+ EmitVectorZeroAll(context, op.Rd);
+ }
+
+ EmitVectorInsert(context, op.Rd, index, op.Size);
}
}
- context.EmitLdvectmp();
- context.EmitStvec(op.Rd);
-
- if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
@@ -1277,13 +1175,9 @@ namespace ChocolArm64.Instructions
}
// TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
- public static void EmitSatQ(
- ILEmitterCtx context,
- int sizeDst,
- bool signedSrc,
- bool signedDst)
+ public static void EmitSatQ(ILEmitterCtx context, int sizeDst, bool signedSrc, bool signedDst)
{
- if (sizeDst > 2)
+ if ((uint)sizeDst > 2u)
{
throw new ArgumentOutOfRangeException(nameof(sizeDst));
}
@@ -1473,16 +1367,16 @@ namespace ChocolArm64.Instructions
{
if (Optimizations.UseSse)
{
- //TODO: Use Sse2.MoveScalar once it is fixed,
- //as of the time of writing it just crashes the JIT (SDK 2.1.503).
+ // TODO: Use Sse2.MoveScalar once it is fixed (in .NET Core 3.0),
+ // as of the time of writing it just crashes the JIT.
/*Type[] typesMov = new Type[] { typeof(Vector128) };
- EmitLdvecWithUnsignedCast(context, reg, 3);
+ context.EmitLdvec(reg);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), typesMov));
- EmitStvecWithUnsignedCast(context, reg, 3);*/
+ context.EmitStvec(reg);*/
context.EmitLdvec(reg);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
diff --git a/ChocolArm64/Instructions/InstEmitSimdLogical.cs b/ChocolArm64/Instructions/InstEmitSimdLogical.cs
index 3473fc5d98..a5a9227410 100644
--- a/ChocolArm64/Instructions/InstEmitSimdLogical.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdLogical.cs
@@ -30,14 +30,14 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
- Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithUnsignedCast(context, op.Rm, 0);
- EmitLdvecWithUnsignedCast(context, op.Rn, 0);
+ context.EmitLdvec(op.Rm);
+ context.EmitLdvec(op.Rn);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
- EmitStvecWithUnsignedCast(context, op.Rd, 0);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -79,20 +79,20 @@ namespace ChocolArm64.Instructions
if (Optimizations.UseSse2)
{
- Type[] typesXorAndNot = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesXorAnd = new Type[] { typeof(Vector128), typeof(Vector128) };
- string nameAndNot = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And);
+ string nameAnd = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And);
- EmitLdvecWithUnsignedCast(context, op.Rd, 0);
- EmitLdvecWithUnsignedCast(context, op.Rm, 0);
- EmitLdvecWithUnsignedCast(context, op.Rn, 0);
- EmitLdvecWithUnsignedCast(context, op.Rd, 0);
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rm);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rd);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot));
- context.EmitCall(typeof(Sse2).GetMethod(nameAndNot, typesXorAndNot));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd));
+ context.EmitCall(typeof(Sse2).GetMethod(nameAnd, typesXorAnd));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd));
- EmitStvecWithUnsignedCast(context, op.Rd, 0);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -120,7 +120,6 @@ namespace ChocolArm64.Instructions
}
context.Emit(OpCodes.And);
-
context.Emit(OpCodes.Xor);
EmitVectorInsert(context, op.Rd, index, 3);
@@ -141,20 +140,18 @@ namespace ChocolArm64.Instructions
Type[] typesXorAnd = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithUnsignedCast(context, op.Rm, 0);
- context.Emit(OpCodes.Dup);
-
- EmitLdvecWithUnsignedCast(context, op.Rn, 0);
+ context.EmitLdvec(op.Rm);
+ context.EmitLdvec(op.Rm);
+ context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd));
- EmitLdvecWithUnsignedCast(context, op.Rd, 0);
+ context.EmitLdvec(op.Rd);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesXorAnd));
-
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd));
- EmitStvecWithUnsignedCast(context, op.Rd, 0);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -196,17 +193,17 @@ namespace ChocolArm64.Instructions
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
- Type[] typesSav = new Type[] { typeof(byte) };
- Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(long) };
+ Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithUnsignedCast(context, op.Rn, 0);
+ context.EmitLdvec(op.Rn);
- context.EmitLdc_I4(byte.MaxValue);
+ context.EmitLdc_I8(-1L);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
- EmitStvecWithUnsignedCast(context, op.Rd, 0);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -225,19 +222,19 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
- Type[] typesSav = new Type[] { typeof(byte) };
- Type[] typesAndNotOr = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(long) };
+ Type[] typesAntOr = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithUnsignedCast(context, op.Rn, 0);
- EmitLdvecWithUnsignedCast(context, op.Rm, 0);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(byte.MaxValue);
+ context.EmitLdc_I8(-1L);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNotOr));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndNotOr));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAntOr));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAntOr));
- EmitStvecWithUnsignedCast(context, op.Rd, 0);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -305,7 +302,7 @@ namespace ChocolArm64.Instructions
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithSignedCast(context, op.Rn, 0); // value
+ context.EmitLdvec(op.Rn); // value
context.EmitLdc_I8(14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0); // maskE1
context.EmitLdc_I8(06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0); // maskE0
@@ -314,7 +311,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
- EmitStvecWithSignedCast(context, op.Rd, 0);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -336,7 +333,7 @@ namespace ChocolArm64.Instructions
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value
+ context.EmitLdvec(op.Rn); // value
if (op.Size == 0)
{
@@ -353,7 +350,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -375,7 +372,7 @@ namespace ChocolArm64.Instructions
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value
+ context.EmitLdvec(op.Rn); // value
if (op.Size == 0)
{
@@ -397,7 +394,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
diff --git a/ChocolArm64/Instructions/InstEmitSimdMemory.cs b/ChocolArm64/Instructions/InstEmitSimdMemory.cs
index 9b84eb8681..18ec1d33ea 100644
--- a/ChocolArm64/Instructions/InstEmitSimdMemory.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdMemory.cs
@@ -45,7 +45,6 @@ namespace ChocolArm64.Instructions
if (isLoad)
{
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdc_I8(offset);
@@ -62,7 +61,6 @@ namespace ChocolArm64.Instructions
}
else
{
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdc_I8(offset);
@@ -90,7 +88,6 @@ namespace ChocolArm64.Instructions
void EmitMemAddress()
{
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdc_I8(offset);
diff --git a/ChocolArm64/Instructions/InstEmitSimdMove.cs b/ChocolArm64/Instructions/InstEmitSimdMove.cs
index 2844dfdf4c..131ddec610 100644
--- a/ChocolArm64/Instructions/InstEmitSimdMove.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdMove.cs
@@ -59,7 +59,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -108,7 +108,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -138,7 +138,7 @@ namespace ChocolArm64.Instructions
Type[] typesShs = new Type[] { typeof(Vector128), typeof(byte) };
Type[] typesOr = new Type[] { typeof(Vector128), typeof(Vector128) };
- EmitLdvecWithUnsignedCast(context, op.Rn, 0);
+ context.EmitLdvec(op.Rn);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -150,7 +150,7 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(op.Imm4);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesShs));
- EmitLdvecWithUnsignedCast(context, op.Rm, 0);
+ context.EmitLdvec(op.Rm);
context.EmitLdc_I4((op.RegisterSize == RegisterSize.Simd64 ? 8 : 16) - op.Imm4);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), typesShs));
@@ -164,7 +164,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
- EmitStvecWithUnsignedCast(context, op.Rd, 0);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -318,12 +318,26 @@ namespace ChocolArm64.Instructions
public static void Movi_V(ILEmitterCtx context)
{
- EmitVectorImmUnaryOp(context, () => { });
+ if (Optimizations.UseSse2)
+ {
+ EmitMoviMvni(context, not: false);
+ }
+ else
+ {
+ EmitVectorImmUnaryOp(context, () => { });
+ }
}
public static void Mvni_V(ILEmitterCtx context)
{
- EmitVectorImmUnaryOp(context, () => context.Emit(OpCodes.Not));
+ if (Optimizations.UseSse2)
+ {
+ EmitMoviMvni(context, not: true);
+ }
+ else
+ {
+ EmitVectorImmUnaryOp(context, () => context.Emit(OpCodes.Not));
+ }
}
public static void Smov_S(ILEmitterCtx context)
@@ -341,35 +355,94 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdTbl64 op = (OpCodeSimdTbl64)context.CurrOp;
- context.EmitLdvec(op.Rm);
-
- for (int index = 0; index < op.Size; index++)
+ if (Optimizations.UseSsse3)
{
- context.EmitLdvec((op.Rn + index) & 0x1f);
- }
+ Type[] typesCmpSflSub = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesOr = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSav = new Type[] { typeof(long) };
- switch (op.Size)
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+
+ context.EmitLdc_I8(0x0F0F0F0F0F0F0F0FL);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitStvectmp2();
+ context.EmitLdvectmp2();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSflSub));
+
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
+
+ context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesCmpSflSub));
+
+ for (int index = 1; index < op.Size; index++)
+ {
+ context.EmitLdvec((op.Rn + index) & 0x1F);
+ context.EmitLdvec(op.Rm);
+
+ context.EmitLdc_I8(0x1010101010101010L * index);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSflSub));
+
+ context.EmitStvectmp();
+ context.EmitLdvectmp();
+
+ context.EmitLdvectmp2();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSflSub));
+
+ context.EmitLdvectmp();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
+
+ context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesCmpSflSub));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
+ }
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
{
- case 1: VectorHelper.EmitCall(context,
- nameof(VectorHelper.Tbl1_V64),
- nameof(VectorHelper.Tbl1_V128)); break;
+ context.EmitLdvec(op.Rm);
- case 2: VectorHelper.EmitCall(context,
- nameof(VectorHelper.Tbl2_V64),
- nameof(VectorHelper.Tbl2_V128)); break;
+ for (int index = 0; index < op.Size; index++)
+ {
+ context.EmitLdvec((op.Rn + index) & 0x1F);
+ }
- case 3: VectorHelper.EmitCall(context,
- nameof(VectorHelper.Tbl3_V64),
- nameof(VectorHelper.Tbl3_V128)); break;
+ switch (op.Size)
+ {
+ case 1: VectorHelper.EmitCall(context,
+ nameof(VectorHelper.Tbl1_V64),
+ nameof(VectorHelper.Tbl1_V128)); break;
- case 4: VectorHelper.EmitCall(context,
- nameof(VectorHelper.Tbl4_V64),
- nameof(VectorHelper.Tbl4_V128)); break;
+ case 2: VectorHelper.EmitCall(context,
+ nameof(VectorHelper.Tbl2_V64),
+ nameof(VectorHelper.Tbl2_V128)); break;
- default: throw new InvalidOperationException();
+ case 3: VectorHelper.EmitCall(context,
+ nameof(VectorHelper.Tbl3_V64),
+ nameof(VectorHelper.Tbl3_V128)); break;
+
+ case 4: VectorHelper.EmitCall(context,
+ nameof(VectorHelper.Tbl4_V64),
+ nameof(VectorHelper.Tbl4_V128)); break;
+
+ default: throw new InvalidOperationException();
+ }
+
+ context.EmitStvec(op.Rd);
}
-
- context.EmitStvec(op.Rd);
}
public static void Trn1_V(ILEmitterCtx context)
@@ -418,7 +491,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
- EmitLdvecWithSignedCast(context, op.Rn, 0); // value
+ context.EmitLdvec(op.Rn); // value
context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // mask
context.Emit(OpCodes.Dup); // mask
@@ -480,6 +553,38 @@ namespace ChocolArm64.Instructions
}
}
+ private static void EmitMoviMvni(ILEmitterCtx context, bool not)
+ {
+ OpCodeSimdImm64 op = (OpCodeSimdImm64)context.CurrOp;
+
+ Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] };
+
+ long imm = op.Imm;
+
+ if (not)
+ {
+ imm = ~imm;
+ }
+
+ if (op.Size < 3)
+ {
+ context.EmitLdc_I4((int)imm);
+ }
+ else
+ {
+ context.EmitLdc_I8(imm);
+ }
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+
private static void EmitVectorTranspose(ILEmitterCtx context, int part)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
@@ -492,7 +597,7 @@ namespace ChocolArm64.Instructions
? nameof(Sse2.UnpackLow)
: nameof(Sse2.UnpackHigh);
- EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value
+ context.EmitLdvec(op.Rn); // value
if (op.Size < 3)
{
@@ -504,7 +609,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
}
- EmitLdvecWithSignedCast(context, op.Rm, op.Size); // value
+ context.EmitLdvec(op.Rm); // value
if (op.Size < 3)
{
@@ -518,7 +623,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(op.Size)));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -560,7 +665,7 @@ namespace ChocolArm64.Instructions
if (op.RegisterSize == RegisterSize.Simd128)
{
- EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value
+ context.EmitLdvec(op.Rn); // value
if (op.Size < 3)
{
@@ -572,7 +677,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
}
- EmitLdvecWithSignedCast(context, op.Rm, op.Size); // value
+ context.EmitLdvec(op.Rm); // value
if (op.Size < 3)
{
@@ -586,12 +691,12 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3)));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
}
else
{
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesSflUpk(op.Size))); // value
@@ -605,11 +710,11 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
}
- VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt64Zero));
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3)));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
}
}
else
@@ -648,8 +753,8 @@ namespace ChocolArm64.Instructions
? nameof(Sse2.UnpackLow)
: nameof(Sse2.UnpackHigh);
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
- EmitLdvecWithSignedCast(context, op.Rm, op.Size);
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
if (op.RegisterSize == RegisterSize.Simd128)
{
@@ -658,12 +763,12 @@ namespace ChocolArm64.Instructions
else
{
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesSflUpk(op.Size)));
- VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt64Zero));
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3)));
}
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
}
else
{
diff --git a/ChocolArm64/Instructions/InstEmitSimdShift.cs b/ChocolArm64/Instructions/InstEmitSimdShift.cs
index 843052110f..6865948ae0 100644
--- a/ChocolArm64/Instructions/InstEmitSimdShift.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdShift.cs
@@ -5,6 +5,7 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
+using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instructions.InstEmitSimdHelper;
@@ -13,9 +14,65 @@ namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
+#region "Masks"
+ private static readonly long[] _masks_RshrnShrn = new long[]
+ {
+ 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
+ 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
+ };
+#endregion
+
public static void Rshrn_V(ILEmitterCtx context)
{
- EmitVectorShrImmNarrowOpZx(context, round: true);
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
+
+ Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], VectorUIntTypesPerSizeLog2[op.Size + 1] };
+ Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
+ Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size + 1] };
+ Type[] typesSve = new Type[] { typeof(long), typeof(long) };
+
+ string nameMov = op.RegisterSize == RegisterSize.Simd128
+ ? nameof(Sse.MoveLowToHigh)
+ : nameof(Sse.MoveHighToLow);
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ context.EmitLdvec(op.Rd);
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitLdc_I8(roundConst);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
+
+ context.EmitLdc_I4(shift);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); // value
+
+ context.EmitLdc_I8(_masks_RshrnShrn[op.Size]); // mask
+ context.Emit(OpCodes.Dup); // mask
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
+
+ context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameMov));
+
+ context.EmitStvec(op.Rd);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: true);
+ }
}
public static void Shl_S(ILEmitterCtx context)
@@ -42,12 +99,12 @@ namespace ChocolArm64.Instructions
{
Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -80,19 +137,20 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -102,7 +160,45 @@ namespace ChocolArm64.Instructions
public static void Shrn_V(ILEmitterCtx context)
{
- EmitVectorShrImmNarrowOpZx(context, round: false);
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
+
+ Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
+ Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) };
+ Type[] typesSve = new Type[] { typeof(long), typeof(long) };
+
+ string nameMov = op.RegisterSize == RegisterSize.Simd128
+ ? nameof(Sse.MoveLowToHigh)
+ : nameof(Sse.MoveHighToLow);
+
+ int shift = GetImmShr(op);
+
+ context.EmitLdvec(op.Rd);
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitLdc_I4(shift);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); // value
+
+ context.EmitLdc_I8(_masks_RshrnShrn[op.Size]); // mask
+ context.Emit(OpCodes.Dup); // mask
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
+
+ context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameMov));
+
+ context.EmitStvec(op.Rd);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: false);
+ }
}
public static void Sli_V(ILEmitterCtx context)
@@ -271,8 +367,7 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
- if (Optimizations.UseSse2 && op.Size > 0
- && op.Size < 3)
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{
Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
@@ -280,10 +375,7 @@ namespace ChocolArm64.Instructions
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(eSize - shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
@@ -291,14 +383,14 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(eSize - 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
- context.EmitLdvectmp();
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -320,8 +412,7 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
- if (Optimizations.UseSse2 && op.Size > 0
- && op.Size < 3)
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{
Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
@@ -329,11 +420,8 @@ namespace ChocolArm64.Instructions
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
- EmitLdvecWithSignedCast(context, op.Rd, op.Size);
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(eSize - shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
@@ -341,7 +429,7 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(eSize - 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
- context.EmitLdvectmp();
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
@@ -349,7 +437,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -403,19 +491,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
- context.EmitLdc_I4(shift);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
+ if (shift != 0)
+ {
+ context.EmitLdc_I4(shift);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
+ }
- EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -432,17 +524,16 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
- if (Optimizations.UseSse2 && op.Size > 0
- && op.Size < 3)
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(GetImmShr(op));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -464,21 +555,20 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
- if (Optimizations.UseSse2 && op.Size > 0
- && op.Size < 3)
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
- EmitLdvecWithSignedCast(context, op.Rd, op.Size);
- EmitLdvecWithSignedCast(context, op.Rn, op.Size);
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(GetImmShr(op));
-
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
+
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
- EmitStvecWithSignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -610,10 +700,7 @@ namespace ChocolArm64.Instructions
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(eSize - shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
@@ -621,14 +708,14 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(eSize - 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
- context.EmitLdvectmp();
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -658,11 +745,8 @@ namespace ChocolArm64.Instructions
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
- EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(eSize - shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
@@ -670,7 +754,7 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(eSize - 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
- context.EmitLdvectmp();
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
@@ -678,7 +762,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -732,19 +816,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+ context.EmitLdvec(op.Rn);
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
-
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
- context.EmitLdc_I4(shift);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
+ if (shift != 0)
+ {
+ context.EmitLdc_I4(shift);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
+ }
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
+ context.EmitStvec(op.Rd);
}
else
{
@@ -765,12 +853,12 @@ namespace ChocolArm64.Instructions
{
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(GetImmShr(op));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -797,15 +885,15 @@ namespace ChocolArm64.Instructions
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
- EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
- EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
+ context.EmitLdvec(op.Rd);
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(GetImmShr(op));
-
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
+
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -899,12 +987,9 @@ namespace ChocolArm64.Instructions
context.Emit(OpCodes.Add);
}
- EmitVectorInsertTmp(context, index, op.Size);
+ EmitVectorInsert(context, op.Rd, index, op.Size);
}
- context.EmitLdvectmp();
- context.EmitStvec(op.Rd);
-
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
{
EmitVectorZeroUpper(context, op.Rd);
@@ -1044,11 +1129,7 @@ namespace ChocolArm64.Instructions
}
// dst64 = (Int(src64, signed) + roundConst) >> shift;
- private static void EmitShrImm64(
- ILEmitterCtx context,
- bool signed,
- long roundConst,
- int shift)
+ private static void EmitShrImm64(ILEmitterCtx context, bool signed, long roundConst, int shift)
{
context.EmitLdc_I8(roundConst);
context.EmitLdc_I4(shift);
diff --git a/ChocolArm64/Instructions/InstEmitSystem.cs b/ChocolArm64/Instructions/InstEmitSystem.cs
index 0e61d5bded..5687768a88 100644
--- a/ChocolArm64/Instructions/InstEmitSystem.cs
+++ b/ChocolArm64/Instructions/InstEmitSystem.cs
@@ -102,7 +102,6 @@ namespace ChocolArm64.Instructions
//DC ZVA
for (int offs = 0; offs < (4 << CpuThreadState.DczSizeLog2); offs += 8)
{
- context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdintzr(op.Rt);
context.EmitLdc_I(offs);
diff --git a/ChocolArm64/Instructions/VectorHelper.cs b/ChocolArm64/Instructions/VectorHelper.cs
index f02c131e68..d1dfaced41 100644
--- a/ChocolArm64/Instructions/VectorHelper.cs
+++ b/ChocolArm64/Instructions/VectorHelper.cs
@@ -26,8 +26,8 @@ namespace ChocolArm64.Instructions
{
if (float.IsNaN(value)) return 0;
- return value > int.MaxValue ? int.MaxValue :
- value < int.MinValue ? int.MinValue : (int)value;
+ return value >= int.MaxValue ? int.MaxValue :
+ value <= int.MinValue ? int.MinValue : (int)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -35,8 +35,8 @@ namespace ChocolArm64.Instructions
{
if (float.IsNaN(value)) return 0;
- return value > long.MaxValue ? long.MaxValue :
- value < long.MinValue ? long.MinValue : (long)value;
+ return value >= long.MaxValue ? long.MaxValue :
+ value <= long.MinValue ? long.MinValue : (long)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -44,8 +44,8 @@ namespace ChocolArm64.Instructions
{
if (float.IsNaN(value)) return 0;
- return value > uint.MaxValue ? uint.MaxValue :
- value < uint.MinValue ? uint.MinValue : (uint)value;
+ return value >= uint.MaxValue ? uint.MaxValue :
+ value <= uint.MinValue ? uint.MinValue : (uint)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -53,8 +53,8 @@ namespace ChocolArm64.Instructions
{
if (float.IsNaN(value)) return 0;
- return value > ulong.MaxValue ? ulong.MaxValue :
- value < ulong.MinValue ? ulong.MinValue : (ulong)value;
+ return value >= ulong.MaxValue ? ulong.MaxValue :
+ value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -62,8 +62,8 @@ namespace ChocolArm64.Instructions
{
if (double.IsNaN(value)) return 0;
- return value > int.MaxValue ? int.MaxValue :
- value < int.MinValue ? int.MinValue : (int)value;
+ return value >= int.MaxValue ? int.MaxValue :
+ value <= int.MinValue ? int.MinValue : (int)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -71,8 +71,8 @@ namespace ChocolArm64.Instructions
{
if (double.IsNaN(value)) return 0;
- return value > long.MaxValue ? long.MaxValue :
- value < long.MinValue ? long.MinValue : (long)value;
+ return value >= long.MaxValue ? long.MaxValue :
+ value <= long.MinValue ? long.MinValue : (long)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -80,8 +80,8 @@ namespace ChocolArm64.Instructions
{
if (double.IsNaN(value)) return 0;
- return value > uint.MaxValue ? uint.MaxValue :
- value < uint.MinValue ? uint.MinValue : (uint)value;
+ return value >= uint.MaxValue ? uint.MaxValue :
+ value <= uint.MinValue ? uint.MinValue : (uint)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -89,8 +89,8 @@ namespace ChocolArm64.Instructions
{
if (double.IsNaN(value)) return 0;
- return value > ulong.MaxValue ? ulong.MaxValue :
- value < ulong.MinValue ? ulong.MinValue : (ulong)value;
+ return value >= ulong.MaxValue ? ulong.MaxValue :
+ value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
}
public static double Round(double value, CpuThreadState state)
@@ -500,50 +500,6 @@ namespace ChocolArm64.Instructions
return Sse41.Insert(vector, value, 0b1110);
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorSByteZero()
- {
- if (Sse2.IsSupported)
- {
- return Sse2.SetZeroVector128();
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorInt16Zero()
- {
- if (Sse2.IsSupported)
- {
- return Sse2.SetZeroVector128();
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorInt32Zero()
- {
- if (Sse2.IsSupported)
- {
- return Sse2.SetZeroVector128();
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorInt64Zero()
- {
- if (Sse2.IsSupported)
- {
- return Sse2.SetZeroVector128();
- }
-
- throw new PlatformNotSupportedException();
- }
-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128 VectorSingleZero()
{
@@ -554,214 +510,5 @@ namespace ChocolArm64.Instructions
throw new PlatformNotSupportedException();
}
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorDoubleZero()
- {
- if (Sse2.IsSupported)
- {
- return Sse2.SetZeroVector128();
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorSingleToSByte(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorSingleToInt16(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorSingleToInt32(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorSingleToInt64(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorSingleToByte(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorSingleToUInt16(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorSingleToUInt32(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorSingleToUInt64(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorSingleToDouble(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorSByteToSingle(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorInt16ToSingle(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorInt32ToSingle(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorInt64ToSingle(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorByteToSingle(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorUInt16ToSingle(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorUInt32ToSingle(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorUInt64ToSingle(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static Vector128 VectorDoubleToSingle(Vector128 vector)
- {
- if (Sse.IsSupported)
- {
- return Sse.StaticCast(vector);
- }
-
- throw new PlatformNotSupportedException();
- }
}
}
diff --git a/ChocolArm64/Memory/CompareExchange128.cs b/ChocolArm64/Memory/CompareExchange128.cs
new file mode 100644
index 0000000000..1618ff0fbc
--- /dev/null
+++ b/ChocolArm64/Memory/CompareExchange128.cs
@@ -0,0 +1,151 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace ChocolArm64.Memory
+{
+ static class CompareExchange128
+ {
+ private struct Int128
+ {
+ public ulong Low { get; }
+ public ulong High { get; }
+
+ public Int128(ulong low, ulong high)
+ {
+ Low = low;
+ High = high;
+ }
+ }
+
+ private delegate Int128 InterlockedCompareExchange(IntPtr address, Int128 expected, Int128 desired);
+
+ private delegate int GetCpuId();
+
+ private static InterlockedCompareExchange _interlockedCompareExchange;
+
+ static CompareExchange128()
+ {
+ if (RuntimeInformation.OSArchitecture != Architecture.X64 || !IsCmpxchg16bSupported())
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ byte[] interlockedCompareExchange128Code;
+
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ interlockedCompareExchange128Code = new byte[]
+ {
+ 0x53, // push rbx
+ 0x49, 0x8b, 0x00, // mov rax, [r8]
+ 0x49, 0x8b, 0x19, // mov rbx, [r9]
+ 0x49, 0x89, 0xca, // mov r10, rcx
+ 0x49, 0x89, 0xd3, // mov r11, rdx
+ 0x49, 0x8b, 0x49, 0x08, // mov rcx, [r9+8]
+ 0x49, 0x8b, 0x50, 0x08, // mov rdx, [r8+8]
+ 0xf0, 0x49, 0x0f, 0xc7, 0x0b, // lock cmpxchg16b [r11]
+ 0x49, 0x89, 0x02, // mov [r10], rax
+ 0x4c, 0x89, 0xd0, // mov rax, r10
+ 0x49, 0x89, 0x52, 0x08, // mov [r10+8], rdx
+ 0x5b, // pop rbx
+ 0xc3 // ret
+ };
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+ RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ interlockedCompareExchange128Code = new byte[]
+ {
+ 0x53, // push rbx
+ 0x49, 0x89, 0xd1, // mov r9, rdx
+ 0x48, 0x89, 0xcb, // mov rbx, rcx
+ 0x48, 0x89, 0xf0, // mov rax, rsi
+ 0x4c, 0x89, 0xca, // mov rdx, r9
+ 0x4c, 0x89, 0xc1, // mov rcx, r8
+ 0xf0, 0x48, 0x0f, 0xc7, 0x0f, // lock cmpxchg16b [rdi]
+ 0x5b, // pop rbx
+ 0xc3 // ret
+ };
+ }
+ else
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ IntPtr funcPtr = MapCodeAsExecutable(interlockedCompareExchange128Code);
+
+ _interlockedCompareExchange = Marshal.GetDelegateForFunctionPointer(funcPtr);
+ }
+
+ private static bool IsCmpxchg16bSupported()
+ {
+ byte[] getCpuIdCode = new byte[]
+ {
+ 0x53, // push rbx
+ 0xb8, 0x01, 0x00, 0x00, 0x00, // mov eax, 0x1
+ 0x0f, 0xa2, // cpuid
+ 0x89, 0xc8, // mov eax, ecx
+ 0x5b, // pop rbx
+ 0xc3 // ret
+ };
+
+ IntPtr funcPtr = MapCodeAsExecutable(getCpuIdCode);
+
+ GetCpuId getCpuId = Marshal.GetDelegateForFunctionPointer(funcPtr);
+
+ int cpuId = getCpuId();
+
+ MemoryManagement.Free(funcPtr);
+
+ return (cpuId & (1 << 13)) != 0;
+ }
+
+ private static IntPtr MapCodeAsExecutable(byte[] code)
+ {
+ ulong codeLength = (ulong)code.Length;
+
+ IntPtr funcPtr = MemoryManagement.Allocate(codeLength);
+
+ unsafe
+ {
+ fixed (byte* codePtr = code)
+ {
+ byte* dest = (byte*)funcPtr;
+
+ long size = (long)codeLength;
+
+ Buffer.MemoryCopy(codePtr, dest, size, size);
+ }
+ }
+
+ MemoryManagement.Reprotect(funcPtr, codeLength, MemoryProtection.Execute);
+
+ return funcPtr;
+ }
+
+ public static bool InterlockedCompareExchange128(
+ IntPtr address,
+ ulong expectedLow,
+ ulong expectedHigh,
+ ulong desiredLow,
+ ulong desiredHigh)
+ {
+ Int128 expected = new Int128(expectedLow, expectedHigh);
+ Int128 desired = new Int128(desiredLow, desiredHigh);
+
+ Int128 old = _interlockedCompareExchange(address, expected, desired);
+
+ return old.Low == expected.Low && old.High == expected.High;
+ }
+
+ public static void InterlockedRead128(IntPtr address, out ulong low, out ulong high)
+ {
+ Int128 zero = new Int128(0, 0);
+
+ Int128 old = _interlockedCompareExchange(address, zero, zero);
+
+ low = old.Low;
+ high = old.High;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Memory/MemoryManagement.cs b/ChocolArm64/Memory/MemoryManagement.cs
new file mode 100644
index 0000000000..fa4bc4fac2
--- /dev/null
+++ b/ChocolArm64/Memory/MemoryManagement.cs
@@ -0,0 +1,114 @@
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace ChocolArm64.Memory
+{
+ public static class MemoryManagement
+ {
+ public static bool HasWriteWatchSupport => RuntimeInformation.IsOSPlatform(OSPlatform.Windows);
+
+ public static IntPtr Allocate(ulong size)
+ {
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ IntPtr sizeNint = new IntPtr((long)size);
+
+ return MemoryManagementWindows.Allocate(sizeNint);
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+ RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ return MemoryManagementUnix.Allocate(size);
+ }
+ else
+ {
+ throw new PlatformNotSupportedException();
+ }
+ }
+
+ public static IntPtr AllocateWriteTracked(ulong size)
+ {
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ IntPtr sizeNint = new IntPtr((long)size);
+
+ return MemoryManagementWindows.AllocateWriteTracked(sizeNint);
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+ RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ return MemoryManagementUnix.Allocate(size);
+ }
+ else
+ {
+ throw new PlatformNotSupportedException();
+ }
+ }
+
+ public static void Reprotect(IntPtr address, ulong size, MemoryProtection permission)
+ {
+ bool result;
+
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ IntPtr sizeNint = new IntPtr((long)size);
+
+ result = MemoryManagementWindows.Reprotect(address, sizeNint, permission);
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+ RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ result = MemoryManagementUnix.Reprotect(address, size, permission);
+ }
+ else
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ if (!result)
+ {
+ throw new MemoryProtectionException(permission);
+ }
+ }
+
+ public static bool Free(IntPtr address)
+ {
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ return MemoryManagementWindows.Free(address);
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+ RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ return MemoryManagementUnix.Free(address);
+ }
+ else
+ {
+ throw new PlatformNotSupportedException();
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool GetModifiedPages(
+ IntPtr address,
+ IntPtr size,
+ IntPtr[] addresses,
+ out ulong count)
+ {
+ //This is only supported on windows, but returning
+ //false (failed) is also valid for platforms without
+ //write tracking support on the OS.
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ return MemoryManagementWindows.GetModifiedPages(address, size, addresses, out count);
+ }
+ else
+ {
+ count = 0;
+
+ return false;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Memory/MemoryManagementUnix.cs b/ChocolArm64/Memory/MemoryManagementUnix.cs
new file mode 100644
index 0000000000..9fe1aef094
--- /dev/null
+++ b/ChocolArm64/Memory/MemoryManagementUnix.cs
@@ -0,0 +1,70 @@
+using Mono.Unix.Native;
+using System;
+
+namespace ChocolArm64.Memory
+{
+ static class MemoryManagementUnix
+ {
+ public static IntPtr Allocate(ulong size)
+ {
+ ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
+
+ const MmapProts prot = MmapProts.PROT_READ | MmapProts.PROT_WRITE;
+
+ const MmapFlags flags = MmapFlags.MAP_PRIVATE | MmapFlags.MAP_ANONYMOUS;
+
+ IntPtr ptr = Syscall.mmap(IntPtr.Zero, size + pageSize, prot, flags, -1, 0);
+
+ if (ptr == IntPtr.Zero)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ unsafe
+ {
+ ptr = new IntPtr(ptr.ToInt64() + (long)pageSize);
+
+ *((ulong*)ptr - 1) = size;
+ }
+
+ return ptr;
+ }
+
+ public static bool Reprotect(IntPtr address, ulong size, Memory.MemoryProtection protection)
+ {
+ MmapProts prot = GetProtection(protection);
+
+ return Syscall.mprotect(address, size, prot) == 0;
+ }
+
+ private static MmapProts GetProtection(Memory.MemoryProtection protection)
+ {
+ switch (protection)
+ {
+ case Memory.MemoryProtection.None: return MmapProts.PROT_NONE;
+ case Memory.MemoryProtection.Read: return MmapProts.PROT_READ;
+ case Memory.MemoryProtection.ReadAndWrite: return MmapProts.PROT_READ | MmapProts.PROT_WRITE;
+ case Memory.MemoryProtection.ReadAndExecute: return MmapProts.PROT_READ | MmapProts.PROT_EXEC;
+ case Memory.MemoryProtection.Execute: return MmapProts.PROT_EXEC;
+
+ default: throw new ArgumentException($"Invalid permission \"{protection}\".");
+ }
+ }
+
+ public static bool Free(IntPtr address)
+ {
+ ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
+
+ ulong size;
+
+ unsafe
+ {
+ size = *((ulong*)address - 1);
+
+ address = new IntPtr(address.ToInt64() - (long)pageSize);
+ }
+
+ return Syscall.munmap(address, size + pageSize) == 0;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Memory/MemoryManagementWindows.cs b/ChocolArm64/Memory/MemoryManagementWindows.cs
new file mode 100644
index 0000000000..6cee134279
--- /dev/null
+++ b/ChocolArm64/Memory/MemoryManagementWindows.cs
@@ -0,0 +1,155 @@
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace ChocolArm64.Memory
+{
+ static class MemoryManagementWindows
+ {
+ [Flags]
+ private enum AllocationType : uint
+ {
+ Commit = 0x1000,
+ Reserve = 0x2000,
+ Decommit = 0x4000,
+ Release = 0x8000,
+ Reset = 0x80000,
+ Physical = 0x400000,
+ TopDown = 0x100000,
+ WriteWatch = 0x200000,
+ LargePages = 0x20000000
+ }
+
+ [Flags]
+ private enum MemoryProtection : uint
+ {
+ NoAccess = 0x01,
+ ReadOnly = 0x02,
+ ReadWrite = 0x04,
+ WriteCopy = 0x08,
+ Execute = 0x10,
+ ExecuteRead = 0x20,
+ ExecuteReadWrite = 0x40,
+ ExecuteWriteCopy = 0x80,
+ GuardModifierflag = 0x100,
+ NoCacheModifierflag = 0x200,
+ WriteCombineModifierflag = 0x400
+ }
+
+ private enum WriteWatchFlags : uint
+ {
+ None = 0,
+ Reset = 1
+ }
+
+ [DllImport("kernel32.dll")]
+ private static extern IntPtr VirtualAlloc(
+ IntPtr lpAddress,
+ IntPtr dwSize,
+ AllocationType flAllocationType,
+ MemoryProtection flProtect);
+
+ [DllImport("kernel32.dll")]
+ private static extern bool VirtualProtect(
+ IntPtr lpAddress,
+ IntPtr dwSize,
+ MemoryProtection flNewProtect,
+ out MemoryProtection lpflOldProtect);
+
+ [DllImport("kernel32.dll")]
+ private static extern bool VirtualFree(
+ IntPtr lpAddress,
+ IntPtr dwSize,
+ AllocationType dwFreeType);
+
+ [DllImport("kernel32.dll")]
+ private static extern int GetWriteWatch(
+ WriteWatchFlags dwFlags,
+ IntPtr lpBaseAddress,
+ IntPtr dwRegionSize,
+ IntPtr[] lpAddresses,
+ ref ulong lpdwCount,
+ out uint lpdwGranularity);
+
+ public static IntPtr Allocate(IntPtr size)
+ {
+ const AllocationType flags =
+ AllocationType.Reserve |
+ AllocationType.Commit;
+
+ IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
+
+ if (ptr == IntPtr.Zero)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ return ptr;
+ }
+
+ public static IntPtr AllocateWriteTracked(IntPtr size)
+ {
+ const AllocationType flags =
+ AllocationType.Reserve |
+ AllocationType.Commit |
+ AllocationType.WriteWatch;
+
+ IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
+
+ if (ptr == IntPtr.Zero)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ return ptr;
+ }
+
+ public static bool Reprotect(IntPtr address, IntPtr size, Memory.MemoryProtection protection)
+ {
+ MemoryProtection prot = GetProtection(protection);
+
+ return VirtualProtect(address, size, prot, out _);
+ }
+
+ private static MemoryProtection GetProtection(Memory.MemoryProtection protection)
+ {
+ switch (protection)
+ {
+ case Memory.MemoryProtection.None: return MemoryProtection.NoAccess;
+ case Memory.MemoryProtection.Read: return MemoryProtection.ReadOnly;
+ case Memory.MemoryProtection.ReadAndWrite: return MemoryProtection.ReadWrite;
+ case Memory.MemoryProtection.ReadAndExecute: return MemoryProtection.ExecuteRead;
+ case Memory.MemoryProtection.Execute: return MemoryProtection.Execute;
+
+ default: throw new ArgumentException($"Invalid permission \"{protection}\".");
+ }
+ }
+
+ public static bool Free(IntPtr address)
+ {
+ return VirtualFree(address, IntPtr.Zero, AllocationType.Release);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool GetModifiedPages(
+ IntPtr address,
+ IntPtr size,
+ IntPtr[] addresses,
+ out ulong count)
+ {
+ ulong pagesCount = (ulong)addresses.Length;
+
+ int result = GetWriteWatch(
+ WriteWatchFlags.Reset,
+ address,
+ size,
+ addresses,
+ ref pagesCount,
+ out uint granularity);
+
+ count = pagesCount;
+
+ return result == 0;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Memory/MemoryManager.cs b/ChocolArm64/Memory/MemoryManager.cs
index 1f21256807..ce102e096c 100644
--- a/ChocolArm64/Memory/MemoryManager.cs
+++ b/ChocolArm64/Memory/MemoryManager.cs
@@ -1,178 +1,540 @@
-using ChocolArm64.Events;
-using ChocolArm64.Exceptions;
using ChocolArm64.Instructions;
-using ChocolArm64.State;
using System;
-using System.Collections.Concurrent;
-using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Threading;
+using static ChocolArm64.Memory.CompareExchange128;
+using static ChocolArm64.Memory.MemoryManagement;
+
namespace ChocolArm64.Memory
{
public unsafe class MemoryManager : IMemory, IDisposable
{
- private const int PtLvl0Bits = 13;
- private const int PtLvl1Bits = 14;
- public const int PageBits = 12;
+ public const int PageBits = 12;
+ public const int PageSize = 1 << PageBits;
+ public const int PageMask = PageSize - 1;
- private const int PtLvl0Size = 1 << PtLvl0Bits;
- private const int PtLvl1Size = 1 << PtLvl1Bits;
- public const int PageSize = 1 << PageBits;
+ private const long PteFlagNotModified = 1;
- private const int PtLvl0Mask = PtLvl0Size - 1;
- private const int PtLvl1Mask = PtLvl1Size - 1;
- public const int PageMask = PageSize - 1;
-
- private const int PtLvl0Bit = PageBits + PtLvl1Bits;
- private const int PtLvl1Bit = PageBits;
-
- private const long ErgMask = (4 << CpuThreadState.ErgSizeLog2) - 1;
-
- private class ArmMonitor
- {
- public long Position;
- public bool ExState;
-
- public bool HasExclusiveAccess(long position)
- {
- return Position == position && ExState;
- }
- }
-
- private Dictionary _monitors;
-
- private ConcurrentDictionary _observedPages;
+ internal const long PteFlagsMask = 7;
public IntPtr Ram { get; private set; }
private byte* _ramPtr;
- private byte*** _pageTable;
+ private IntPtr _pageTable;
- public event EventHandler InvalidAccess;
+ internal IntPtr PageTable => _pageTable;
- public event EventHandler ObservedAccess;
+ internal int PtLevelBits { get; }
+ internal int PtLevelSize { get; }
+ internal int PtLevelMask { get; }
- public MemoryManager(IntPtr ram)
+ public bool HasWriteWatchSupport => MemoryManagement.HasWriteWatchSupport;
+
+ public int AddressSpaceBits { get; }
+ public long AddressSpaceSize { get; }
+
+ public MemoryManager(
+ IntPtr ram,
+ int addressSpaceBits = 48,
+ bool useFlatPageTable = false)
{
- _monitors = new Dictionary();
-
- _observedPages = new ConcurrentDictionary();
-
Ram = ram;
_ramPtr = (byte*)ram;
- _pageTable = (byte***)Marshal.AllocHGlobal(PtLvl0Size * IntPtr.Size);
+ AddressSpaceBits = addressSpaceBits;
+ AddressSpaceSize = 1L << addressSpaceBits;
- for (int l0 = 0; l0 < PtLvl0Size; l0++)
+ //When flat page table is requested, we use a single
+ //array for the mappings of the entire address space.
+ //This has better performance, but also high memory usage.
+ //The multi level page table uses 9 bits per level, so
+ //the memory usage is lower, but the performance is also
+ //lower, since each address translation requires multiple reads.
+ if (useFlatPageTable)
{
- _pageTable[l0] = null;
+ PtLevelBits = addressSpaceBits - PageBits;
}
+ else
+ {
+ PtLevelBits = 9;
+ }
+
+ PtLevelSize = 1 << PtLevelBits;
+ PtLevelMask = PtLevelSize - 1;
+
+ _pageTable = Allocate((ulong)(PtLevelSize * IntPtr.Size));
}
- public void RemoveMonitor(int core)
+ public void Map(long va, long pa, long size)
{
- lock (_monitors)
- {
- ClearExclusive(core);
-
- _monitors.Remove(core);
- }
+ SetPtEntries(va, _ramPtr + pa, size);
}
- public void SetExclusive(int core, long position)
+ public void Unmap(long position, long size)
{
- position &= ~ErgMask;
+ SetPtEntries(position, null, size);
+ }
- lock (_monitors)
+ public bool IsMapped(long position)
+ {
+ return Translate(position) != IntPtr.Zero;
+ }
+
+ public long GetPhysicalAddress(long virtualAddress)
+ {
+ byte* ptr = (byte*)Translate(virtualAddress);
+
+ return (long)(ptr - _ramPtr);
+ }
+
+ private IntPtr Translate(long position)
+ {
+ if (!IsValidPosition(position))
{
- foreach (ArmMonitor mon in _monitors.Values)
+ return IntPtr.Zero;
+ }
+
+ byte* ptr = GetPtEntry(position);
+
+ ulong ptrUlong = (ulong)ptr;
+
+ if ((ptrUlong & PteFlagsMask) != 0)
+ {
+ ptrUlong &= ~(ulong)PteFlagsMask;
+
+ ptr = (byte*)ptrUlong;
+ }
+
+ return new IntPtr(ptr + (position & PageMask));
+ }
+
+ private IntPtr TranslateWrite(long position)
+ {
+ if (!IsValidPosition(position))
+ {
+ return IntPtr.Zero;
+ }
+
+ byte* ptr = GetPtEntry(position);
+
+ ulong ptrUlong = (ulong)ptr;
+
+ if ((ptrUlong & PteFlagsMask) != 0)
+ {
+ if ((ptrUlong & PteFlagNotModified) != 0)
{
- if (mon.Position == position && mon.ExState)
+ ClearPtEntryFlag(position, PteFlagNotModified);
+ }
+
+ ptrUlong &= ~(ulong)PteFlagsMask;
+
+ ptr = (byte*)ptrUlong;
+ }
+
+ return new IntPtr(ptr + (position & PageMask));
+ }
+
+ private byte* GetPtEntry(long position)
+ {
+ return *(byte**)GetPtPtr(position);
+ }
+
+ private void SetPtEntries(long va, byte* ptr, long size)
+ {
+ long endPosition = (va + size + PageMask) & ~PageMask;
+
+ while ((ulong)va < (ulong)endPosition)
+ {
+ SetPtEntry(va, ptr);
+
+ va += PageSize;
+
+ if (ptr != null)
+ {
+ ptr += PageSize;
+ }
+ }
+ }
+
+ private void SetPtEntry(long position, byte* ptr)
+ {
+ *(byte**)GetPtPtr(position) = ptr;
+ }
+
+ private void SetPtEntryFlag(long position, long flag)
+ {
+ ModifyPtEntryFlag(position, flag, setFlag: true);
+ }
+
+ private void ClearPtEntryFlag(long position, long flag)
+ {
+ ModifyPtEntryFlag(position, flag, setFlag: false);
+ }
+
+ private void ModifyPtEntryFlag(long position, long flag, bool setFlag)
+ {
+ IntPtr* pt = (IntPtr*)_pageTable;
+
+ while (true)
+ {
+ IntPtr* ptPtr = GetPtPtr(position);
+
+ IntPtr old = *ptPtr;
+
+ long modified = old.ToInt64();
+
+ if (setFlag)
+ {
+ modified |= flag;
+ }
+ else
+ {
+ modified &= ~flag;
+ }
+
+ IntPtr origValue = Interlocked.CompareExchange(ref *ptPtr, new IntPtr(modified), old);
+
+ if (origValue == old)
+ {
+ break;
+ }
+ }
+ }
+
+ private IntPtr* GetPtPtr(long position)
+ {
+ if (!IsValidPosition(position))
+ {
+ throw new ArgumentOutOfRangeException(nameof(position));
+ }
+
+ IntPtr nextPtr = _pageTable;
+
+ IntPtr* ptePtr = null;
+
+ int bit = PageBits;
+
+ while (true)
+ {
+ long index = (position >> bit) & PtLevelMask;
+
+ ptePtr = &((IntPtr*)nextPtr)[index];
+
+ bit += PtLevelBits;
+
+ if (bit >= AddressSpaceBits)
+ {
+ break;
+ }
+
+ nextPtr = *ptePtr;
+
+ if (nextPtr == IntPtr.Zero)
+ {
+ //Entry does not yet exist, allocate a new one.
+ IntPtr newPtr = Allocate((ulong)(PtLevelSize * IntPtr.Size));
+
+ //Try to swap the current pointer (should be zero), with the allocated one.
+ nextPtr = Interlocked.Exchange(ref *ptePtr, newPtr);
+
+ //If the old pointer is not null, then another thread already has set it.
+ if (nextPtr != IntPtr.Zero)
{
- mon.ExState = false;
+ Free(newPtr);
+ }
+ else
+ {
+ nextPtr = newPtr;
}
}
+ }
- if (!_monitors.TryGetValue(core, out ArmMonitor threadMon))
+ return ptePtr;
+ }
+
+ public bool IsRegionModified(long position, long size)
+ {
+ if (!HasWriteWatchSupport)
+ {
+ return IsRegionModifiedFallback(position, size);
+ }
+
+ IntPtr address = Translate(position);
+
+ IntPtr baseAddr = address;
+ IntPtr expectedAddr = address;
+
+ long pendingPages = 0;
+
+ long pages = size / PageSize;
+
+ bool modified = false;
+
+ bool IsAnyPageModified()
+ {
+ IntPtr pendingSize = new IntPtr(pendingPages * PageSize);
+
+ IntPtr[] addresses = new IntPtr[pendingPages];
+
+ bool result = GetModifiedPages(baseAddr, pendingSize, addresses, out ulong count);
+
+ if (result)
{
- threadMon = new ArmMonitor();
-
- _monitors.Add(core, threadMon);
+ return count != 0;
}
-
- threadMon.Position = position;
- threadMon.ExState = true;
- }
- }
-
- public bool TestExclusive(int core, long position)
- {
- //Note: Any call to this method also should be followed by a
- //call to ClearExclusiveForStore if this method returns true.
- position &= ~ErgMask;
-
- Monitor.Enter(_monitors);
-
- if (!_monitors.TryGetValue(core, out ArmMonitor threadMon))
- {
- Monitor.Exit(_monitors);
-
- return false;
- }
-
- bool exState = threadMon.HasExclusiveAccess(position);
-
- if (!exState)
- {
- Monitor.Exit(_monitors);
- }
-
- return exState;
- }
-
- public void ClearExclusiveForStore(int core)
- {
- if (_monitors.TryGetValue(core, out ArmMonitor threadMon))
- {
- threadMon.ExState = false;
- }
-
- Monitor.Exit(_monitors);
- }
-
- public void ClearExclusive(int core)
- {
- lock (_monitors)
- {
- if (_monitors.TryGetValue(core, out ArmMonitor threadMon))
+ else
{
- threadMon.ExState = false;
+ return true;
}
}
+
+ while (pages-- > 0)
+ {
+ if (address != expectedAddr)
+ {
+ modified |= IsAnyPageModified();
+
+ baseAddr = address;
+
+ pendingPages = 0;
+ }
+
+ expectedAddr = address + PageSize;
+
+ pendingPages++;
+
+ if (pages == 0)
+ {
+ break;
+ }
+
+ position += PageSize;
+
+ address = Translate(position);
+ }
+
+ if (pendingPages != 0)
+ {
+ modified |= IsAnyPageModified();
+ }
+
+ return modified;
}
- public void WriteInt32ToSharedAddr(long position, int value)
+ private unsafe bool IsRegionModifiedFallback(long position, long size)
{
- long maskedPosition = position & ~ErgMask;
+ long endAddr = (position + size + PageMask) & ~PageMask;
- lock (_monitors)
+ bool modified = false;
+
+ while ((ulong)position < (ulong)endAddr)
{
- foreach (ArmMonitor mon in _monitors.Values)
+ if (IsValidPosition(position))
{
- if (mon.Position == maskedPosition && mon.ExState)
+ byte* ptr = ((byte**)_pageTable)[position >> PageBits];
+
+ ulong ptrUlong = (ulong)ptr;
+
+ if ((ptrUlong & PteFlagNotModified) == 0)
{
- mon.ExState = false;
+ modified = true;
+
+ SetPtEntryFlag(position, PteFlagNotModified);
}
}
+ else
+ {
+ modified = true;
+ }
- WriteInt32(position, value);
+ position += PageSize;
}
+
+ return modified;
+ }
+
+ public bool TryGetHostAddress(long position, long size, out IntPtr ptr)
+ {
+ if (IsContiguous(position, size))
+ {
+ ptr = (IntPtr)Translate(position);
+
+ return true;
+ }
+
+ ptr = IntPtr.Zero;
+
+ return false;
+ }
+
+ private bool IsContiguous(long position, long size)
+ {
+ long endPos = position + size;
+
+ position &= ~PageMask;
+
+ long expectedPa = GetPhysicalAddress(position);
+
+ while ((ulong)position < (ulong)endPos)
+ {
+ long pa = GetPhysicalAddress(position);
+
+ if (pa != expectedPa)
+ {
+ return false;
+ }
+
+ position += PageSize;
+ expectedPa += PageSize;
+ }
+
+ return true;
+ }
+
+ public bool IsValidPosition(long position)
+ {
+ return (ulong)position < (ulong)AddressSpaceSize;
+ }
+
+ internal bool AtomicCompareExchange2xInt32(
+ long position,
+ int expectedLow,
+ int expectedHigh,
+ int desiredLow,
+ int desiredHigh)
+ {
+ long expected = (uint)expectedLow;
+ long desired = (uint)desiredLow;
+
+ expected |= (long)expectedHigh << 32;
+ desired |= (long)desiredHigh << 32;
+
+ return AtomicCompareExchangeInt64(position, expected, desired);
+ }
+
+ internal bool AtomicCompareExchangeInt128(
+ long position,
+ ulong expectedLow,
+ ulong expectedHigh,
+ ulong desiredLow,
+ ulong desiredHigh)
+ {
+ if ((position & 0xf) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ IntPtr ptr = TranslateWrite(position);
+
+ return InterlockedCompareExchange128(ptr, expectedLow, expectedHigh, desiredLow, desiredHigh);
+ }
+
+ internal Vector128 AtomicReadInt128(long position)
+ {
+ if ((position & 0xf) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ IntPtr ptr = Translate(position);
+
+ InterlockedRead128(ptr, out ulong low, out ulong high);
+
+ Vector128 vector = default(Vector128);
+
+ vector = VectorHelper.VectorInsertInt(low, vector, 0, 3);
+ vector = VectorHelper.VectorInsertInt(high, vector, 1, 3);
+
+ return vector;
+ }
+
+ public bool AtomicCompareExchangeByte(long position, byte expected, byte desired)
+ {
+ int* ptr = (int*)Translate(position);
+
+ int currentValue = *ptr;
+
+ int expected32 = (currentValue & ~byte.MaxValue) | expected;
+ int desired32 = (currentValue & ~byte.MaxValue) | desired;
+
+ return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32;
+ }
+
+ public bool AtomicCompareExchangeInt16(long position, short expected, short desired)
+ {
+ if ((position & 1) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ int* ptr = (int*)Translate(position);
+
+ int currentValue = *ptr;
+
+ int expected32 = (currentValue & ~ushort.MaxValue) | (ushort)expected;
+ int desired32 = (currentValue & ~ushort.MaxValue) | (ushort)desired;
+
+ return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32;
+ }
+
+ public bool AtomicCompareExchangeInt32(long position, int expected, int desired)
+ {
+ if ((position & 3) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ int* ptr = (int*)TranslateWrite(position);
+
+ return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected;
+ }
+
+ public bool AtomicCompareExchangeInt64(long position, long expected, long desired)
+ {
+ if ((position & 7) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ long* ptr = (long*)TranslateWrite(position);
+
+ return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected;
+ }
+
+ public int AtomicIncrementInt32(long position)
+ {
+ if ((position & 3) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ int* ptr = (int*)TranslateWrite(position);
+
+ return Interlocked.Increment(ref *ptr);
+ }
+
+ public int AtomicDecrementInt32(long position)
+ {
+ if ((position & 3) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ int* ptr = (int*)TranslateWrite(position);
+
+ return Interlocked.Decrement(ref *ptr);
+ }
+
+ private void AbortWithAlignmentFault(long position)
+ {
+ //TODO: Abort mode and exception support on the CPU.
+ throw new InvalidOperationException($"Tried to compare exchange a misaligned address 0x{position:X16}.");
}
public sbyte ReadSByte(long position)
@@ -353,7 +715,7 @@ namespace ChocolArm64.Memory
int copySize = (int)(pageLimit - position);
- Marshal.Copy((IntPtr)Translate(position), data, offset, copySize);
+ Marshal.Copy(Translate(position), data, offset, copySize);
position += copySize;
offset += copySize;
@@ -390,7 +752,7 @@ namespace ChocolArm64.Memory
int copySize = (int)(pageLimit - position);
- Marshal.Copy((IntPtr)Translate(position), data, offset, copySize);
+ Marshal.Copy(Translate(position), data, offset, copySize);
position += copySize;
offset += copySize;
@@ -553,7 +915,7 @@ namespace ChocolArm64.Memory
int copySize = (int)(pageLimit - position);
- Marshal.Copy(data, offset, (IntPtr)TranslateWrite(position), copySize);
+ Marshal.Copy(data, offset, TranslateWrite(position), copySize);
position += copySize;
offset += copySize;
@@ -583,7 +945,7 @@ namespace ChocolArm64.Memory
int copySize = (int)(pageLimit - position);
- Marshal.Copy(data, offset, (IntPtr)TranslateWrite(position), copySize);
+ Marshal.Copy(data, offset, Translate(position), copySize);
position += copySize;
offset += copySize;
@@ -596,8 +958,8 @@ namespace ChocolArm64.Memory
if (IsContiguous(src, size) &&
IsContiguous(dst, size))
{
- byte* srcPtr = Translate(src);
- byte* dstPtr = TranslateWrite(dst);
+ byte* srcPtr = (byte*)Translate(src);
+ byte* dstPtr = (byte*)Translate(dst);
Buffer.MemoryCopy(srcPtr, dstPtr, size, size);
}
@@ -607,266 +969,6 @@ namespace ChocolArm64.Memory
}
}
- public void Map(long va, long pa, long size)
- {
- SetPtEntries(va, _ramPtr + pa, size);
- }
-
- public void Unmap(long position, long size)
- {
- SetPtEntries(position, null, size);
-
- StopObservingRegion(position, size);
- }
-
- public bool IsMapped(long position)
- {
- if (!(IsValidPosition(position)))
- {
- return false;
- }
-
- long l0 = (position >> PtLvl0Bit) & PtLvl0Mask;
- long l1 = (position >> PtLvl1Bit) & PtLvl1Mask;
-
- if (_pageTable[l0] == null)
- {
- return false;
- }
-
- return _pageTable[l0][l1] != null || _observedPages.ContainsKey(position >> PageBits);
- }
-
- public long GetPhysicalAddress(long virtualAddress)
- {
- byte* ptr = Translate(virtualAddress);
-
- return (long)(ptr - _ramPtr);
- }
-
- internal byte* Translate(long position)
- {
- long l0 = (position >> PtLvl0Bit) & PtLvl0Mask;
- long l1 = (position >> PtLvl1Bit) & PtLvl1Mask;
-
- long old = position;
-
- byte** lvl1 = _pageTable[l0];
-
- if ((position >> (PtLvl0Bit + PtLvl0Bits)) != 0)
- {
- goto Unmapped;
- }
-
- if (lvl1 == null)
- {
- goto Unmapped;
- }
-
- position &= PageMask;
-
- byte* ptr = lvl1[l1];
-
- if (ptr == null)
- {
- goto Unmapped;
- }
-
- return ptr + position;
-
-Unmapped:
- return HandleNullPte(old);
- }
-
- private byte* HandleNullPte(long position)
- {
- long key = position >> PageBits;
-
- if (_observedPages.TryGetValue(key, out IntPtr ptr))
- {
- return (byte*)ptr + (position & PageMask);
- }
-
- InvalidAccess?.Invoke(this, new MemoryAccessEventArgs(position));
-
- throw new VmmPageFaultException(position);
- }
-
- internal byte* TranslateWrite(long position)
- {
- long l0 = (position >> PtLvl0Bit) & PtLvl0Mask;
- long l1 = (position >> PtLvl1Bit) & PtLvl1Mask;
-
- long old = position;
-
- byte** lvl1 = _pageTable[l0];
-
- if ((position >> (PtLvl0Bit + PtLvl0Bits)) != 0)
- {
- goto Unmapped;
- }
-
- if (lvl1 == null)
- {
- goto Unmapped;
- }
-
- position &= PageMask;
-
- byte* ptr = lvl1[l1];
-
- if (ptr == null)
- {
- goto Unmapped;
- }
-
- return ptr + position;
-
-Unmapped:
- return HandleNullPteWrite(old);
- }
-
- private byte* HandleNullPteWrite(long position)
- {
- long key = position >> PageBits;
-
- MemoryAccessEventArgs e = new MemoryAccessEventArgs(position);
-
- if (_observedPages.TryGetValue(key, out IntPtr ptr))
- {
- SetPtEntry(position, (byte*)ptr);
-
- ObservedAccess?.Invoke(this, e);
-
- return (byte*)ptr + (position & PageMask);
- }
-
- InvalidAccess?.Invoke(this, e);
-
- throw new VmmPageFaultException(position);
- }
-
- private void SetPtEntries(long va, byte* ptr, long size)
- {
- long endPosition = (va + size + PageMask) & ~PageMask;
-
- while ((ulong)va < (ulong)endPosition)
- {
- SetPtEntry(va, ptr);
-
- va += PageSize;
-
- if (ptr != null)
- {
- ptr += PageSize;
- }
- }
- }
-
- private void SetPtEntry(long position, byte* ptr)
- {
- if (!IsValidPosition(position))
- {
- throw new ArgumentOutOfRangeException(nameof(position));
- }
-
- long l0 = (position >> PtLvl0Bit) & PtLvl0Mask;
- long l1 = (position >> PtLvl1Bit) & PtLvl1Mask;
-
- if (_pageTable[l0] == null)
- {
- byte** lvl1 = (byte**)Marshal.AllocHGlobal(PtLvl1Size * IntPtr.Size);
-
- for (int zl1 = 0; zl1 < PtLvl1Size; zl1++)
- {
- lvl1[zl1] = null;
- }
-
- Thread.MemoryBarrier();
-
- _pageTable[l0] = lvl1;
- }
-
- _pageTable[l0][l1] = ptr;
- }
-
- public void StartObservingRegion(long position, long size)
- {
- long endPosition = (position + size + PageMask) & ~PageMask;
-
- position &= ~PageMask;
-
- while ((ulong)position < (ulong)endPosition)
- {
- _observedPages[position >> PageBits] = (IntPtr)Translate(position);
-
- SetPtEntry(position, null);
-
- position += PageSize;
- }
- }
-
- public void StopObservingRegion(long position, long size)
- {
- long endPosition = (position + size + PageMask) & ~PageMask;
-
- while (position < endPosition)
- {
- lock (_observedPages)
- {
- if (_observedPages.TryRemove(position >> PageBits, out IntPtr ptr))
- {
- SetPtEntry(position, (byte*)ptr);
- }
- }
-
- position += PageSize;
- }
- }
-
- public bool TryGetHostAddress(long position, long size, out IntPtr ptr)
- {
- if (IsContiguous(position, size))
- {
- ptr = (IntPtr)Translate(position);
-
- return true;
- }
-
- ptr = IntPtr.Zero;
-
- return false;
- }
-
- private bool IsContiguous(long position, long size)
- {
- long endPos = position + size;
-
- position &= ~PageMask;
-
- long expectedPa = GetPhysicalAddress(position);
-
- while ((ulong)position < (ulong)endPos)
- {
- long pa = GetPhysicalAddress(position);
-
- if (pa != expectedPa)
- {
- return false;
- }
-
- position += PageSize;
- expectedPa += PageSize;
- }
-
- return true;
- }
-
- public bool IsValidPosition(long position)
- {
- return position >> (PtLvl0Bits + PtLvl1Bits + PageBits) == 0;
- }
-
public void Dispose()
{
Dispose(true);
@@ -874,24 +976,36 @@ Unmapped:
protected virtual void Dispose(bool disposing)
{
- if (_pageTable == null)
+ IntPtr ptr = Interlocked.Exchange(ref _pageTable, IntPtr.Zero);
+
+ if (ptr != IntPtr.Zero)
{
+ FreePageTableEntry(ptr, PageBits);
+ }
+ }
+
+ private void FreePageTableEntry(IntPtr ptr, int levelBitEnd)
+ {
+ levelBitEnd += PtLevelBits;
+
+ if (levelBitEnd >= AddressSpaceBits)
+ {
+ Free(ptr);
+
return;
}
- for (int l0 = 0; l0 < PtLvl0Size; l0++)
+ for (int index = 0; index < PtLevelSize; index++)
{
- if (_pageTable[l0] != null)
- {
- Marshal.FreeHGlobal((IntPtr)_pageTable[l0]);
- }
+ IntPtr ptePtr = ((IntPtr*)ptr)[index];
- _pageTable[l0] = null;
+ if (ptePtr != IntPtr.Zero)
+ {
+ FreePageTableEntry(ptePtr, levelBitEnd);
+ }
}
- Marshal.FreeHGlobal((IntPtr)_pageTable);
-
- _pageTable = null;
+ Free(ptr);
}
}
}
\ No newline at end of file
diff --git a/ChocolArm64/Memory/MemoryProtection.cs b/ChocolArm64/Memory/MemoryProtection.cs
new file mode 100644
index 0000000000..d0874bfc0f
--- /dev/null
+++ b/ChocolArm64/Memory/MemoryProtection.cs
@@ -0,0 +1,16 @@
+using System;
+
+namespace ChocolArm64.Memory
+{
+ [Flags]
+ public enum MemoryProtection
+ {
+ None = 0,
+ Read = 1 << 0,
+ Write = 1 << 1,
+ Execute = 1 << 2,
+
+ ReadAndWrite = Read | Write,
+ ReadAndExecute = Read | Execute
+ }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Memory/MemoryProtectionException.cs b/ChocolArm64/Memory/MemoryProtectionException.cs
new file mode 100644
index 0000000000..3d2cebad33
--- /dev/null
+++ b/ChocolArm64/Memory/MemoryProtectionException.cs
@@ -0,0 +1,10 @@
+using System;
+
+namespace ChocolArm64.Memory
+{
+ class MemoryProtectionException : Exception
+ {
+ public MemoryProtectionException(MemoryProtection protection) :
+ base($"Failed to set memory protection to \"{protection}\".") { }
+ }
+}
\ No newline at end of file
diff --git a/ChocolArm64/OpCodeTable.cs b/ChocolArm64/OpCodeTable.cs
index 3a8d3948d8..fb8b19cd19 100644
--- a/ChocolArm64/OpCodeTable.cs
+++ b/ChocolArm64/OpCodeTable.cs
@@ -310,15 +310,17 @@ namespace ChocolArm64
SetA64("x00111100x101000000000xxxxxxxxxx", InstEmit.Fcvtps_Gp, typeof(OpCodeSimdCvt64));
SetA64("x00111100x101001000000xxxxxxxxxx", InstEmit.Fcvtpu_Gp, typeof(OpCodeSimdCvt64));
SetA64("x00111100x111000000000xxxxxxxxxx", InstEmit.Fcvtzs_Gp, typeof(OpCodeSimdCvt64));
- SetA64("x00111100x011000xxxxxxxxxxxxxxxx", InstEmit.Fcvtzs_Gp_Fixed, typeof(OpCodeSimdCvt64));
+ SetA64(">00111100x011000>xxxxxxxxxxxxxxx", InstEmit.Fcvtzs_Gp_Fixed, typeof(OpCodeSimdCvt64));
SetA64("010111101x100001101110xxxxxxxxxx", InstEmit.Fcvtzs_S, typeof(OpCodeSimd64));
SetA64("0>0011101<100001101110xxxxxxxxxx", InstEmit.Fcvtzs_V, typeof(OpCodeSimd64));
- SetA64("0x0011110>>xxxxx111111xxxxxxxxxx", InstEmit.Fcvtzs_V, typeof(OpCodeSimdShImm64));
+ SetA64("0x001111001xxxxx111111xxxxxxxxxx", InstEmit.Fcvtzs_V_Fixed, typeof(OpCodeSimdShImm64));
+ SetA64("0100111101xxxxxx111111xxxxxxxxxx", InstEmit.Fcvtzs_V_Fixed, typeof(OpCodeSimdShImm64));
SetA64("x00111100x111001000000xxxxxxxxxx", InstEmit.Fcvtzu_Gp, typeof(OpCodeSimdCvt64));
- SetA64("x00111100x011001xxxxxxxxxxxxxxxx", InstEmit.Fcvtzu_Gp_Fixed, typeof(OpCodeSimdCvt64));
+ SetA64(">00111100x011001>xxxxxxxxxxxxxxx", InstEmit.Fcvtzu_Gp_Fixed, typeof(OpCodeSimdCvt64));
SetA64("011111101x100001101110xxxxxxxxxx", InstEmit.Fcvtzu_S, typeof(OpCodeSimd64));
SetA64("0>1011101<100001101110xxxxxxxxxx", InstEmit.Fcvtzu_V, typeof(OpCodeSimd64));
- SetA64("0x1011110>>xxxxx111111xxxxxxxxxx", InstEmit.Fcvtzu_V, typeof(OpCodeSimdShImm64));
+ SetA64("0x101111001xxxxx111111xxxxxxxxxx", InstEmit.Fcvtzu_V_Fixed, typeof(OpCodeSimdShImm64));
+ SetA64("0110111101xxxxxx111111xxxxxxxxxx", InstEmit.Fcvtzu_V_Fixed, typeof(OpCodeSimdShImm64));
SetA64("000111100x1xxxxx000110xxxxxxxxxx", InstEmit.Fdiv_S, typeof(OpCodeSimdReg64));
SetA64("0>1011100<1xxxxx111111xxxxxxxxxx", InstEmit.Fdiv_V, typeof(OpCodeSimdReg64));
SetA64("000111110x0xxxxx0xxxxxxxxxxxxxxx", InstEmit.Fmadd_S, typeof(OpCodeSimdReg64));
@@ -434,8 +436,11 @@ namespace ChocolArm64
SetA64("0x001110<<100000001010xxxxxxxxxx", InstEmit.Saddlp_V, typeof(OpCodeSimd64));
SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstEmit.Saddw_V, typeof(OpCodeSimdReg64));
SetA64("x00111100x100010000000xxxxxxxxxx", InstEmit.Scvtf_Gp, typeof(OpCodeSimdCvt64));
+ SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstEmit.Scvtf_Gp_Fixed, typeof(OpCodeSimdCvt64));
SetA64("010111100x100001110110xxxxxxxxxx", InstEmit.Scvtf_S, typeof(OpCodeSimd64));
SetA64("0>0011100<100001110110xxxxxxxxxx", InstEmit.Scvtf_V, typeof(OpCodeSimd64));
+ SetA64("0x001111001xxxxx111001xxxxxxxxxx", InstEmit.Scvtf_V_Fixed, typeof(OpCodeSimdShImm64));
+ SetA64("0100111101xxxxxx111001xxxxxxxxxx", InstEmit.Scvtf_V_Fixed, typeof(OpCodeSimdShImm64));
SetA64("01011110000xxxxx000000xxxxxxxxxx", InstEmit.Sha1c_V, typeof(OpCodeSimdReg64));
SetA64("0101111000101000000010xxxxxxxxxx", InstEmit.Sha1h_V, typeof(OpCodeSimd64));
SetA64("01011110000xxxxx001000xxxxxxxxxx", InstEmit.Sha1m_V, typeof(OpCodeSimdReg64));
@@ -542,8 +547,11 @@ namespace ChocolArm64
SetA64("01101110<<110000001110xxxxxxxxxx", InstEmit.Uaddlv_V, typeof(OpCodeSimd64));
SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstEmit.Uaddw_V, typeof(OpCodeSimdReg64));
SetA64("x00111100x100011000000xxxxxxxxxx", InstEmit.Ucvtf_Gp, typeof(OpCodeSimdCvt64));
+ SetA64(">00111100x000011>xxxxxxxxxxxxxxx", InstEmit.Ucvtf_Gp_Fixed, typeof(OpCodeSimdCvt64));
SetA64("011111100x100001110110xxxxxxxxxx", InstEmit.Ucvtf_S, typeof(OpCodeSimd64));
SetA64("0>1011100<100001110110xxxxxxxxxx", InstEmit.Ucvtf_V, typeof(OpCodeSimd64));
+ SetA64("0x101111001xxxxx111001xxxxxxxxxx", InstEmit.Ucvtf_V_Fixed, typeof(OpCodeSimdShImm64));
+ SetA64("0110111101xxxxxx111001xxxxxxxxxx", InstEmit.Ucvtf_V_Fixed, typeof(OpCodeSimdShImm64));
SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstEmit.Uhadd_V, typeof(OpCodeSimdReg64));
SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstEmit.Uhsub_V, typeof(OpCodeSimdReg64));
SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstEmit.Umax_V, typeof(OpCodeSimdReg64));
diff --git a/ChocolArm64/Optimizations.cs b/ChocolArm64/Optimizations.cs
index 8fa6f4626c..cbb8131f5c 100644
--- a/ChocolArm64/Optimizations.cs
+++ b/ChocolArm64/Optimizations.cs
@@ -2,21 +2,23 @@ using System.Runtime.Intrinsics.X86;
public static class Optimizations
{
- internal static bool FastFP = true;
+ public static bool AssumeStrictAbiCompliance { get; set; }
- private static bool _useAllSseIfAvailable = true;
+ public static bool FastFP { get; set; } = true;
- private static bool _useSseIfAvailable = true;
- private static bool _useSse2IfAvailable = true;
- private static bool _useSse3IfAvailable = true;
- private static bool _useSsse3IfAvailable = true;
- private static bool _useSse41IfAvailable = true;
- private static bool _useSse42IfAvailable = true;
+ private const bool UseAllSseIfAvailable = true;
- internal static bool UseSse = (_useAllSseIfAvailable && _useSseIfAvailable) && Sse.IsSupported;
- internal static bool UseSse2 = (_useAllSseIfAvailable && _useSse2IfAvailable) && Sse2.IsSupported;
- internal static bool UseSse3 = (_useAllSseIfAvailable && _useSse3IfAvailable) && Sse3.IsSupported;
- internal static bool UseSsse3 = (_useAllSseIfAvailable && _useSsse3IfAvailable) && Ssse3.IsSupported;
- internal static bool UseSse41 = (_useAllSseIfAvailable && _useSse41IfAvailable) && Sse41.IsSupported;
- internal static bool UseSse42 = (_useAllSseIfAvailable && _useSse42IfAvailable) && Sse42.IsSupported;
-}
+ public static bool UseSseIfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSse2IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSse3IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable;
+
+ internal static bool UseSse => UseSseIfAvailable && Sse.IsSupported;
+ internal static bool UseSse2 => UseSse2IfAvailable && Sse2.IsSupported;
+ internal static bool UseSse3 => UseSse3IfAvailable && Sse3.IsSupported;
+ internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported;
+ internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported;
+ internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported;
+}
\ No newline at end of file
diff --git a/ChocolArm64/State/CpuThreadState.cs b/ChocolArm64/State/CpuThreadState.cs
index abec60bb2e..caf73deb1f 100644
--- a/ChocolArm64/State/CpuThreadState.cs
+++ b/ChocolArm64/State/CpuThreadState.cs
@@ -37,7 +37,6 @@ namespace ChocolArm64.State
public int ElrHyp;
public bool Running { get; set; }
- public int Core { get; set; }
private bool _interrupted;
@@ -85,6 +84,16 @@ namespace ChocolArm64.State
internal Translator CurrentTranslator;
+ private ulong _exclusiveAddress;
+
+ internal ulong ExclusiveValueLow { get; set; }
+ internal ulong ExclusiveValueHigh { get; set; }
+
+ public CpuThreadState()
+ {
+ ClearExclusiveAddress();
+ }
+
static CpuThreadState()
{
_hostTickFreq = 1.0 / Stopwatch.Frequency;
@@ -94,6 +103,26 @@ namespace ChocolArm64.State
_tickCounter.Start();
}
+ internal void SetExclusiveAddress(ulong address)
+ {
+ _exclusiveAddress = GetMaskedExclusiveAddress(address);
+ }
+
+ internal bool CheckExclusiveAddress(ulong address)
+ {
+ return GetMaskedExclusiveAddress(address) == _exclusiveAddress;
+ }
+
+ internal void ClearExclusiveAddress()
+ {
+ _exclusiveAddress = ulong.MaxValue;
+ }
+
+ private ulong GetMaskedExclusiveAddress(ulong address)
+ {
+ return address & ~((4UL << ErgSizeLog2) - 1);
+ }
+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal bool Synchronize(int bbWeight)
{
diff --git a/ChocolArm64/Translation/CallType.cs b/ChocolArm64/Translation/CallType.cs
new file mode 100644
index 0000000000..937ede768a
--- /dev/null
+++ b/ChocolArm64/Translation/CallType.cs
@@ -0,0 +1,9 @@
+namespace ChocolArm64.Translation
+{
+ enum CallType
+ {
+ Call,
+ VirtualCall,
+ VirtualJump
+ }
+}
\ No newline at end of file
diff --git a/ChocolArm64/Translation/ILBlock.cs b/ChocolArm64/Translation/ILBlock.cs
index 136579012b..12773705a1 100644
--- a/ChocolArm64/Translation/ILBlock.cs
+++ b/ChocolArm64/Translation/ILBlock.cs
@@ -4,13 +4,13 @@ namespace ChocolArm64.Translation
{
class ILBlock : IILEmit
{
- public long IntInputs { get; private set; }
- public long IntOutputs { get; private set; }
- public long IntAwOutputs { get; private set; }
+ public long IntInputs { get; private set; }
+ public long IntOutputs { get; private set; }
+ private long _intAwOutputs;
- public long VecInputs { get; private set; }
- public long VecOutputs { get; private set; }
- public long VecAwOutputs { get; private set; }
+ public long VecInputs { get; private set; }
+ public long VecOutputs { get; private set; }
+ private long _vecAwOutputs;
public bool HasStateStore { get; private set; }
@@ -34,25 +34,25 @@ namespace ChocolArm64.Translation
//opcodes emitted by each ARM instruction.
//We can only consider the new outputs for doing input elimination
//after all the CIL opcodes used by the instruction being emitted.
- IntAwOutputs = IntOutputs;
- VecAwOutputs = VecOutputs;
+ _intAwOutputs = IntOutputs;
+ _vecAwOutputs = VecOutputs;
}
else if (emitter is ILOpCodeLoad ld && ILMethodBuilder.IsRegIndex(ld.Index))
{
- switch (ld.IoType)
+ switch (ld.VarType)
{
- case IoType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~IntAwOutputs; break;
- case IoType.Int: IntInputs |= (1L << ld.Index) & ~IntAwOutputs; break;
- case IoType.Vector: VecInputs |= (1L << ld.Index) & ~VecAwOutputs; break;
+ case VarType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~_intAwOutputs; break;
+ case VarType.Int: IntInputs |= (1L << ld.Index) & ~_intAwOutputs; break;
+ case VarType.Vector: VecInputs |= (1L << ld.Index) & ~_vecAwOutputs; break;
}
}
else if (emitter is ILOpCodeStore st && ILMethodBuilder.IsRegIndex(st.Index))
{
- switch (st.IoType)
+ switch (st.VarType)
{
- case IoType.Flag: IntOutputs |= (1L << st.Index) << 32; break;
- case IoType.Int: IntOutputs |= 1L << st.Index; break;
- case IoType.Vector: VecOutputs |= 1L << st.Index; break;
+ case VarType.Flag: IntOutputs |= (1L << st.Index) << 32; break;
+ case VarType.Int: IntOutputs |= 1L << st.Index; break;
+ case VarType.Vector: VecOutputs |= 1L << st.Index; break;
}
}
else if (emitter is ILOpCodeStoreState)
diff --git a/ChocolArm64/Translation/ILEmitterCtx.cs b/ChocolArm64/Translation/ILEmitterCtx.cs
index ef63e60cd3..8804521c57 100644
--- a/ChocolArm64/Translation/ILEmitterCtx.cs
+++ b/ChocolArm64/Translation/ILEmitterCtx.cs
@@ -1,5 +1,6 @@
using ChocolArm64.Decoders;
using ChocolArm64.Instructions;
+using ChocolArm64.Memory;
using ChocolArm64.State;
using System;
using System.Collections.Generic;
@@ -10,6 +11,8 @@ namespace ChocolArm64.Translation
{
class ILEmitterCtx
{
+ public MemoryManager Memory { get; }
+
private TranslatorCache _cache;
private TranslatorQueue _queue;
@@ -28,6 +31,10 @@ namespace ChocolArm64.Translation
public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO
+ public bool HasIndirectJump { get; set; }
+
+ public bool HasSlowCall { get; set; }
+
private Dictionary _visitedBlocks;
private Queue _branchTargets;
@@ -43,18 +50,35 @@ namespace ChocolArm64.Translation
//values needed by some functions, since IL doesn't have a swap instruction.
//You can use any value here as long it doesn't conflict with the indices
//for the other registers. Any value >= 64 or < 0 will do.
- private const int IntTmpIndex = -1;
- private const int RorTmpIndex = -2;
- private const int CmpOptTmp1Index = -3;
- private const int CmpOptTmp2Index = -4;
- private const int VecTmp1Index = -5;
- private const int VecTmp2Index = -6;
+ private const int ReservedLocalsCount = 64;
- public ILEmitterCtx(TranslatorCache cache, TranslatorQueue queue, TranslationTier tier, Block graph)
+ private const int RorTmpIndex = ReservedLocalsCount + 0;
+ private const int CmpOptTmp1Index = ReservedLocalsCount + 1;
+ private const int CmpOptTmp2Index = ReservedLocalsCount + 2;
+ private const int IntGpTmp1Index = ReservedLocalsCount + 3;
+ private const int IntGpTmp2Index = ReservedLocalsCount + 4;
+ private const int UserIntTempStart = ReservedLocalsCount + 5;
+
+ //Vectors are part of another "set" of locals.
+ private const int VecGpTmp1Index = ReservedLocalsCount + 0;
+ private const int VecGpTmp2Index = ReservedLocalsCount + 1;
+ private const int VecGpTmp3Index = ReservedLocalsCount + 2;
+ private const int UserVecTempStart = ReservedLocalsCount + 3;
+
+ private static int _userIntTempCount;
+ private static int _userVecTempCount;
+
+ public ILEmitterCtx(
+ MemoryManager memory,
+ TranslatorCache cache,
+ TranslatorQueue queue,
+ TranslationTier tier,
+ Block graph)
{
- _cache = cache ?? throw new ArgumentNullException(nameof(cache));
- _queue = queue ?? throw new ArgumentNullException(nameof(queue));
- _currBlock = graph ?? throw new ArgumentNullException(nameof(graph));
+ Memory = memory ?? throw new ArgumentNullException(nameof(memory));
+ _cache = cache ?? throw new ArgumentNullException(nameof(cache));
+ _queue = queue ?? throw new ArgumentNullException(nameof(queue));
+ _currBlock = graph ?? throw new ArgumentNullException(nameof(graph));
Tier = tier;
@@ -72,7 +96,22 @@ namespace ChocolArm64.Translation
ResetBlockState();
- AdvanceOpCode();
+ if (AdvanceOpCode())
+ {
+ EmitSynchronization();
+
+ _ilBlock.Add(new ILOpCodeLoadState(_ilBlock, isSubEntry: true));
+ }
+ }
+
+ public static int GetIntTempIndex()
+ {
+ return UserIntTempStart + _userIntTempCount++;
+ }
+
+ public static int GetVecTempIndex()
+ {
+ return UserVecTempStart + _userVecTempCount++;
}
public ILBlock[] GetILBlocks()
@@ -98,10 +137,18 @@ namespace ChocolArm64.Translation
return;
}
- if (_opcIndex == 0)
+ int opcIndex = _opcIndex;
+
+ if (opcIndex == 0)
{
MarkLabel(GetLabel(_currBlock.Position));
+ }
+ bool isLastOp = opcIndex == CurrBlock.OpCodes.Count - 1;
+
+ if (isLastOp && CurrBlock.Branch != null &&
+ (ulong)CurrBlock.Branch.Position <= (ulong)CurrBlock.Position)
+ {
EmitSynchronization();
}
@@ -132,7 +179,7 @@ namespace ChocolArm64.Translation
//of the next instruction to be executed (in the case that the condition
//is false, and the branch was not taken, as all basic blocks should end with
//some kind of branch).
- if (CurrOp == CurrBlock.GetLastOp() && CurrBlock.Next == null)
+ if (isLastOp && CurrBlock.Next == null)
{
EmitStoreState();
EmitLdc_I8(CurrOp.Position + CurrOp.OpCodeSizeInBytes);
@@ -144,7 +191,7 @@ namespace ChocolArm64.Translation
_ilBlock.Add(new ILBarrier());
}
- private Condition GetInverseCond(Condition cond)
+ private static Condition GetInverseCond(Condition cond)
{
//Bit 0 of all conditions is basically a negation bit, so
//inverting this bit has the effect of inverting the condition.
@@ -256,32 +303,43 @@ namespace ChocolArm64.Translation
return;
}
- _queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1));
+ _queue.Enqueue(position, mode, TranslationTier.Tier1, isComplete: true);
}
public bool TryOptEmitSubroutineCall()
{
+ //Calls should always have a next block, unless
+ //we're translating a single basic block.
if (_currBlock.Next == null)
{
return false;
}
- if (CurrOp.Emitter != InstEmit.Bl)
+ if (!(CurrOp is IOpCodeBImm op))
{
return false;
}
- if (!_cache.TryGetSubroutine(((OpCodeBImmAl64)CurrOp).Imm, out TranslatedSub subroutine))
+ if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub))
{
return false;
}
+ //It's not worth to call a Tier0 method, because
+ //it contains slow code, rather than the entire function.
+ if (sub.Tier == TranslationTier.Tier0)
+ {
+ return false;
+ }
+
+ EmitStoreState(sub);
+
for (int index = 0; index < TranslatedSub.FixedArgTypes.Length; index++)
{
EmitLdarg(index);
}
- EmitCall(subroutine.Method);
+ EmitCall(sub.Method);
return true;
}
@@ -292,8 +350,8 @@ namespace ChocolArm64.Translation
InstEmitAluHelper.EmitAluLoadOpers(this);
- Stloc(CmpOptTmp2Index, IoType.Int);
- Stloc(CmpOptTmp1Index, IoType.Int);
+ Stloc(CmpOptTmp2Index, VarType.Int);
+ Stloc(CmpOptTmp1Index, VarType.Int);
}
private Dictionary _branchOps = new Dictionary()
@@ -312,19 +370,57 @@ namespace ChocolArm64.Translation
public void EmitCondBranch(ILLabel target, Condition cond)
{
+ if (_optOpLastCompare != null &&
+ _optOpLastCompare == _optOpLastFlagSet && _branchOps.ContainsKey(cond))
+ {
+ if (_optOpLastCompare.Emitter == InstEmit.Subs)
+ {
+ Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize);
+ Ldloc(CmpOptTmp2Index, VarType.Int, _optOpLastCompare.RegisterSize);
+
+ Emit(_branchOps[cond], target);
+
+ return;
+ }
+ else if (_optOpLastCompare.Emitter == InstEmit.Adds && cond != Condition.GeUn
+ && cond != Condition.LtUn
+ && cond != Condition.GtUn
+ && cond != Condition.LeUn)
+ {
+ //There are several limitations that needs to be taken into account for CMN comparisons:
+ //* The unsigned comparisons are not valid, as they depend on the
+ //carry flag value, and they will have different values for addition and
+ //subtraction. For addition, it's carry, and for subtraction, it's borrow.
+ //So, we need to make sure we're not doing a unsigned compare for the CMN case.
+ //* We can only do the optimization for the immediate variants,
+ //because when the second operand value is exactly INT_MIN, we can't
+ //negate the value as theres no positive counterpart.
+ //Such invalid values can't be encoded on the immediate encodings.
+ if (_optOpLastCompare is IOpCodeAluImm64 op)
+ {
+ Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize);
+
+ if (_optOpLastCompare.RegisterSize == RegisterSize.Int32)
+ {
+ EmitLdc_I4((int)-op.Imm);
+ }
+ else
+ {
+ EmitLdc_I8(-op.Imm);
+ }
+
+ Emit(_branchOps[cond], target);
+
+ return;
+ }
+ }
+ }
+
OpCode ilOp;
int intCond = (int)cond;
- if (_optOpLastCompare != null &&
- _optOpLastCompare == _optOpLastFlagSet && _branchOps.ContainsKey(cond))
- {
- Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize);
- Ldloc(CmpOptTmp2Index, IoType.Int, _optOpLastCompare.RegisterSize);
-
- ilOp = _branchOps[cond];
- }
- else if (intCond < 14)
+ if (intCond < 14)
{
int condTrue = intCond >> 1;
@@ -424,14 +520,14 @@ namespace ChocolArm64.Translation
{
if (amount > 0)
{
- Stloc(RorTmpIndex, IoType.Int);
- Ldloc(RorTmpIndex, IoType.Int);
+ Stloc(RorTmpIndex, VarType.Int);
+ Ldloc(RorTmpIndex, VarType.Int);
EmitLdc_I4(amount);
Emit(OpCodes.Shr_Un);
- Ldloc(RorTmpIndex, IoType.Int);
+ Ldloc(RorTmpIndex, VarType.Int);
EmitLdc_I4(CurrOp.GetBitsCount() - amount);
@@ -479,7 +575,7 @@ namespace ChocolArm64.Translation
public void EmitLdarg(int index)
{
- _ilBlock.Add(new ILOpCodeLoad(index, IoType.Arg));
+ _ilBlock.Add(new ILOpCodeLoad(index, VarType.Arg));
}
public void EmitLdintzr(int index)
@@ -521,22 +617,33 @@ namespace ChocolArm64.Translation
_ilBlock.Add(new ILOpCodeStoreState(_ilBlock));
}
- public void EmitLdtmp() => EmitLdint(IntTmpIndex);
- public void EmitSttmp() => EmitStint(IntTmpIndex);
+ private void EmitStoreState(TranslatedSub callSub)
+ {
+ _ilBlock.Add(new ILOpCodeStoreState(_ilBlock, callSub));
+ }
- public void EmitLdvectmp() => EmitLdvec(VecTmp1Index);
- public void EmitStvectmp() => EmitStvec(VecTmp1Index);
+ public void EmitLdtmp() => EmitLdint(IntGpTmp1Index);
+ public void EmitSttmp() => EmitStint(IntGpTmp1Index);
- public void EmitLdvectmp2() => EmitLdvec(VecTmp2Index);
- public void EmitStvectmp2() => EmitStvec(VecTmp2Index);
+ public void EmitLdtmp2() => EmitLdint(IntGpTmp2Index);
+ public void EmitSttmp2() => EmitStint(IntGpTmp2Index);
- public void EmitLdint(int index) => Ldloc(index, IoType.Int);
- public void EmitStint(int index) => Stloc(index, IoType.Int);
+ public void EmitLdvectmp() => EmitLdvec(VecGpTmp1Index);
+ public void EmitStvectmp() => EmitStvec(VecGpTmp1Index);
- public void EmitLdvec(int index) => Ldloc(index, IoType.Vector);
- public void EmitStvec(int index) => Stloc(index, IoType.Vector);
+ public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index);
+ public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index);
- public void EmitLdflg(int index) => Ldloc(index, IoType.Flag);
+ public void EmitLdvectmp3() => EmitLdvec(VecGpTmp3Index);
+ public void EmitStvectmp3() => EmitStvec(VecGpTmp3Index);
+
+ public void EmitLdint(int index) => Ldloc(index, VarType.Int);
+ public void EmitStint(int index) => Stloc(index, VarType.Int);
+
+ public void EmitLdvec(int index) => Ldloc(index, VarType.Vector);
+ public void EmitStvec(int index) => Stloc(index, VarType.Vector);
+
+ public void EmitLdflg(int index) => Ldloc(index, VarType.Flag);
public void EmitStflg(int index)
{
//Set this only if any of the NZCV flag bits were modified.
@@ -549,52 +656,32 @@ namespace ChocolArm64.Translation
_optOpLastFlagSet = CurrOp;
}
- Stloc(index, IoType.Flag);
+ Stloc(index, VarType.Flag);
}
- private void Ldloc(int index, IoType ioType)
+ private void Ldloc(int index, VarType varType)
{
- _ilBlock.Add(new ILOpCodeLoad(index, ioType, CurrOp.RegisterSize));
+ _ilBlock.Add(new ILOpCodeLoad(index, varType, CurrOp.RegisterSize));
}
- private void Ldloc(int index, IoType ioType, RegisterSize registerSize)
+ private void Ldloc(int index, VarType varType, RegisterSize registerSize)
{
- _ilBlock.Add(new ILOpCodeLoad(index, ioType, registerSize));
+ _ilBlock.Add(new ILOpCodeLoad(index, varType, registerSize));
}
- private void Stloc(int index, IoType ioType)
+ private void Stloc(int index, VarType varType)
{
- _ilBlock.Add(new ILOpCodeStore(index, ioType, CurrOp.RegisterSize));
+ _ilBlock.Add(new ILOpCodeStore(index, varType, CurrOp.RegisterSize));
}
public void EmitCallPropGet(Type objType, string propName)
{
- if (objType == null)
- {
- throw new ArgumentNullException(nameof(objType));
- }
-
- if (propName == null)
- {
- throw new ArgumentNullException(nameof(propName));
- }
-
- EmitCall(objType.GetMethod($"get_{propName}"));
+ EmitCall(objType, $"get_{propName}");
}
public void EmitCallPropSet(Type objType, string propName)
{
- if (objType == null)
- {
- throw new ArgumentNullException(nameof(objType));
- }
-
- if (propName == null)
- {
- throw new ArgumentNullException(nameof(propName));
- }
-
- EmitCall(objType.GetMethod($"set_{propName}"));
+ EmitCall(objType, $"set_{propName}");
}
public void EmitCall(Type objType, string mthdName)
@@ -612,6 +699,16 @@ namespace ChocolArm64.Translation
EmitCall(objType.GetMethod(mthdName));
}
+ public void EmitCallPrivatePropGet(Type objType, string propName)
+ {
+ EmitPrivateCall(objType, $"get_{propName}");
+ }
+
+ public void EmitCallPrivatePropSet(Type objType, string propName)
+ {
+ EmitPrivateCall(objType, $"set_{propName}");
+ }
+
public void EmitPrivateCall(Type objType, string mthdName)
{
if (objType == null)
diff --git a/ChocolArm64/Translation/ILLabel.cs b/ChocolArm64/Translation/ILLabel.cs
index f423a4256c..17a31783df 100644
--- a/ChocolArm64/Translation/ILLabel.cs
+++ b/ChocolArm64/Translation/ILLabel.cs
@@ -6,7 +6,7 @@ namespace ChocolArm64.Translation
{
private bool _hasLabel;
- private Label _lbl;
+ private Label _label;
public void Emit(ILMethodBuilder context)
{
@@ -17,12 +17,12 @@ namespace ChocolArm64.Translation
{
if (!_hasLabel)
{
- _lbl = context.Generator.DefineLabel();
+ _label = context.Generator.DefineLabel();
_hasLabel = true;
}
- return _lbl;
+ return _label;
}
}
}
\ No newline at end of file
diff --git a/ChocolArm64/Translation/ILMethodBuilder.cs b/ChocolArm64/Translation/ILMethodBuilder.cs
index 892f831be3..98b5052043 100644
--- a/ChocolArm64/Translation/ILMethodBuilder.cs
+++ b/ChocolArm64/Translation/ILMethodBuilder.cs
@@ -8,7 +8,10 @@ namespace ChocolArm64.Translation
{
class ILMethodBuilder
{
- public LocalAlloc LocalAlloc { get; private set; }
+ private const int RegsCount = 32;
+ private const int RegsMask = RegsCount - 1;
+
+ public RegisterUsage RegUsage { get; private set; }
public ILGenerator Generator { get; private set; }
@@ -18,29 +21,47 @@ namespace ChocolArm64.Translation
private string _subName;
+ public bool IsAarch64 { get; }
+
+ public bool IsSubComplete { get; }
+
private int _localsCount;
- public ILMethodBuilder(ILBlock[] ilBlocks, string subName)
+ public ILMethodBuilder(
+ ILBlock[] ilBlocks,
+ string subName,
+ bool isAarch64,
+ bool isSubComplete = false)
{
- _ilBlocks = ilBlocks;
- _subName = subName;
+ _ilBlocks = ilBlocks;
+ _subName = subName;
+ IsAarch64 = isAarch64;
+ IsSubComplete = isSubComplete;
}
- public TranslatedSub GetSubroutine(TranslationTier tier)
+ public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing)
{
- LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]);
+ RegUsage = new RegisterUsage();
+
+ RegUsage.BuildUses(_ilBlocks[0]);
DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes);
- Generator = method.GetILGenerator();
+ long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]);
+ long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]);
- TranslatedSub subroutine = new TranslatedSub(method, tier);
+ TranslatedSub subroutine = new TranslatedSub(
+ method,
+ intNiRegsMask,
+ vecNiRegsMask,
+ tier,
+ isWorthOptimizing);
_locals = new Dictionary();
_localsCount = 0;
- new ILOpCodeLoadState(_ilBlocks[0]).Emit(this);
+ Generator = method.GetILGenerator();
foreach (ILBlock ilBlock in _ilBlocks)
{
@@ -80,13 +101,13 @@ namespace ChocolArm64.Translation
public static Register GetRegFromBit(int bit, RegisterType baseType)
{
- if (bit < 32)
+ if (bit < RegsCount)
{
return new Register(bit, baseType);
}
else if (baseType == RegisterType.Int)
{
- return new Register(bit & 0x1f, RegisterType.Flag);
+ return new Register(bit & RegsMask, RegisterType.Flag);
}
else
{
@@ -96,7 +117,7 @@ namespace ChocolArm64.Translation
public static bool IsRegIndex(int index)
{
- return (uint)index < 32;
+ return (uint)index < RegsCount;
}
}
}
\ No newline at end of file
diff --git a/ChocolArm64/Translation/ILOpCode.cs b/ChocolArm64/Translation/ILOpCode.cs
index 4021603c01..486452820d 100644
--- a/ChocolArm64/Translation/ILOpCode.cs
+++ b/ChocolArm64/Translation/ILOpCode.cs
@@ -4,16 +4,16 @@ namespace ChocolArm64.Translation
{
struct ILOpCode : IILEmit
{
- private OpCode _ilOp;
+ public OpCode ILOp { get; }
public ILOpCode(OpCode ilOp)
{
- _ilOp = ilOp;
+ ILOp = ilOp;
}
public void Emit(ILMethodBuilder context)
{
- context.Generator.Emit(_ilOp);
+ context.Generator.Emit(ILOp);
}
}
}
\ No newline at end of file
diff --git a/ChocolArm64/Translation/ILOpCodeBranch.cs b/ChocolArm64/Translation/ILOpCodeBranch.cs
index 22b80b5d52..9d4e40fa9d 100644
--- a/ChocolArm64/Translation/ILOpCodeBranch.cs
+++ b/ChocolArm64/Translation/ILOpCodeBranch.cs
@@ -4,18 +4,18 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeBranch : IILEmit
{
- private OpCode _ilOp;
- private ILLabel _label;
+ public OpCode ILOp { get; }
+ public ILLabel Label { get; }
public ILOpCodeBranch(OpCode ilOp, ILLabel label)
{
- _ilOp = ilOp;
- _label = label;
+ ILOp = ilOp;
+ Label = label;
}
public void Emit(ILMethodBuilder context)
{
- context.Generator.Emit(_ilOp, _label.GetLabel(context));
+ context.Generator.Emit(ILOp, Label.GetLabel(context));
}
}
}
\ No newline at end of file
diff --git a/ChocolArm64/Translation/ILOpCodeCall.cs b/ChocolArm64/Translation/ILOpCodeCall.cs
index c046aeeb75..dc20417a9a 100644
--- a/ChocolArm64/Translation/ILOpCodeCall.cs
+++ b/ChocolArm64/Translation/ILOpCodeCall.cs
@@ -5,9 +5,9 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeCall : IILEmit
{
- public MethodInfo Info { get; private set; }
+ public MethodInfo Info { get; }
- public bool IsVirtual { get; private set; }
+ public bool IsVirtual { get; }
public ILOpCodeCall(MethodInfo info, bool isVirtual)
{
diff --git a/ChocolArm64/Translation/ILOpCodeConst.cs b/ChocolArm64/Translation/ILOpCodeConst.cs
index 2aaf8676ee..cd3b58ff04 100644
--- a/ChocolArm64/Translation/ILOpCodeConst.cs
+++ b/ChocolArm64/Translation/ILOpCodeConst.cs
@@ -16,6 +16,8 @@ namespace ChocolArm64.Translation
private ImmVal _value;
+ public long Value => _value.I8;
+
private enum ConstType
{
Int32,
diff --git a/ChocolArm64/Translation/ILOpCodeLoad.cs b/ChocolArm64/Translation/ILOpCodeLoad.cs
index c31e06bbd9..0d11eeaa4b 100644
--- a/ChocolArm64/Translation/ILOpCodeLoad.cs
+++ b/ChocolArm64/Translation/ILOpCodeLoad.cs
@@ -5,28 +5,28 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeLoad : IILEmit
{
- public int Index { get; private set; }
+ public int Index { get; }
- public IoType IoType { get; private set; }
+ public VarType VarType { get; }
- public RegisterSize RegisterSize { get; private set; }
+ public RegisterSize RegisterSize { get; }
- public ILOpCodeLoad(int index, IoType ioType, RegisterSize registerSize = 0)
+ public ILOpCodeLoad(int index, VarType varType, RegisterSize registerSize = 0)
{
Index = index;
- IoType = ioType;
+ VarType = varType;
RegisterSize = registerSize;
}
public void Emit(ILMethodBuilder context)
{
- switch (IoType)
+ switch (VarType)
{
- case IoType.Arg: context.Generator.EmitLdarg(Index); break;
+ case VarType.Arg: context.Generator.EmitLdarg(Index); break;
- case IoType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break;
- case IoType.Int: EmitLdloc(context, Index, RegisterType.Int); break;
- case IoType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break;
+ case VarType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break;
+ case VarType.Int: EmitLdloc(context, Index, RegisterType.Int); break;
+ case VarType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break;
}
}
diff --git a/ChocolArm64/Translation/ILOpCodeLoadField.cs b/ChocolArm64/Translation/ILOpCodeLoadField.cs
index abcd37c348..f0507ac226 100644
--- a/ChocolArm64/Translation/ILOpCodeLoadField.cs
+++ b/ChocolArm64/Translation/ILOpCodeLoadField.cs
@@ -5,7 +5,7 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeLoadField : IILEmit
{
- public FieldInfo Info { get; private set; }
+ public FieldInfo Info { get; }
public ILOpCodeLoadField(FieldInfo info)
{
diff --git a/ChocolArm64/Translation/ILOpCodeLoadState.cs b/ChocolArm64/Translation/ILOpCodeLoadState.cs
index ddab611019..c23dc94329 100644
--- a/ChocolArm64/Translation/ILOpCodeLoadState.cs
+++ b/ChocolArm64/Translation/ILOpCodeLoadState.cs
@@ -7,15 +7,24 @@ namespace ChocolArm64.Translation
{
private ILBlock _block;
- public ILOpCodeLoadState(ILBlock block)
+ private bool _isSubEntry;
+
+ public ILOpCodeLoadState(ILBlock block, bool isSubEntry = false)
{
- _block = block;
+ _block = block;
+ _isSubEntry = isSubEntry;
}
public void Emit(ILMethodBuilder context)
{
- long intInputs = context.LocalAlloc.GetIntInputs(_block);
- long vecInputs = context.LocalAlloc.GetVecInputs(_block);
+ long intInputs = context.RegUsage.GetIntInputs(_block);
+ long vecInputs = context.RegUsage.GetVecInputs(_block);
+
+ if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
+ {
+ intInputs = RegisterUsage.ClearCallerSavedIntRegs(intInputs, context.IsAarch64);
+ vecInputs = RegisterUsage.ClearCallerSavedVecRegs(vecInputs, context.IsAarch64);
+ }
LoadLocals(context, intInputs, RegisterType.Int);
LoadLocals(context, vecInputs, RegisterType.Vector);
diff --git a/ChocolArm64/Translation/ILOpCodeLog.cs b/ChocolArm64/Translation/ILOpCodeLog.cs
index ebb042b596..53846f927e 100644
--- a/ChocolArm64/Translation/ILOpCodeLog.cs
+++ b/ChocolArm64/Translation/ILOpCodeLog.cs
@@ -2,16 +2,16 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeLog : IILEmit
{
- private string _text;
+ public string Text { get; }
public ILOpCodeLog(string text)
{
- _text = text;
+ Text = text;
}
public void Emit(ILMethodBuilder context)
{
- context.Generator.EmitWriteLine(_text);
+ context.Generator.EmitWriteLine(Text);
}
}
}
\ No newline at end of file
diff --git a/ChocolArm64/Translation/ILOpCodeStore.cs b/ChocolArm64/Translation/ILOpCodeStore.cs
index 17a6259c6f..7ac78e9ae4 100644
--- a/ChocolArm64/Translation/ILOpCodeStore.cs
+++ b/ChocolArm64/Translation/ILOpCodeStore.cs
@@ -5,28 +5,28 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeStore : IILEmit
{
- public int Index { get; private set; }
+ public int Index { get; }
- public IoType IoType { get; private set; }
+ public VarType VarType { get; }
- public RegisterSize RegisterSize { get; private set; }
+ public RegisterSize RegisterSize { get; }
- public ILOpCodeStore(int index, IoType ioType, RegisterSize registerSize = 0)
+ public ILOpCodeStore(int index, VarType varType, RegisterSize registerSize = 0)
{
Index = index;
- IoType = ioType;
+ VarType = varType;
RegisterSize = registerSize;
}
public void Emit(ILMethodBuilder context)
{
- switch (IoType)
+ switch (VarType)
{
- case IoType.Arg: context.Generator.EmitStarg(Index); break;
+ case VarType.Arg: context.Generator.EmitStarg(Index); break;
- case IoType.Flag: EmitStloc(context, Index, RegisterType.Flag); break;
- case IoType.Int: EmitStloc(context, Index, RegisterType.Int); break;
- case IoType.Vector: EmitStloc(context, Index, RegisterType.Vector); break;
+ case VarType.Flag: EmitStloc(context, Index, RegisterType.Flag); break;
+ case VarType.Int: EmitStloc(context, Index, RegisterType.Int); break;
+ case VarType.Vector: EmitStloc(context, Index, RegisterType.Vector); break;
}
}
diff --git a/ChocolArm64/Translation/ILOpCodeStoreState.cs b/ChocolArm64/Translation/ILOpCodeStoreState.cs
index 458e9eda43..a587dbfe84 100644
--- a/ChocolArm64/Translation/ILOpCodeStoreState.cs
+++ b/ChocolArm64/Translation/ILOpCodeStoreState.cs
@@ -7,15 +7,33 @@ namespace ChocolArm64.Translation
{
private ILBlock _block;
- public ILOpCodeStoreState(ILBlock block)
+ private TranslatedSub _callSub;
+
+ public ILOpCodeStoreState(ILBlock block, TranslatedSub callSub = null)
{
- _block = block;
+ _block = block;
+ _callSub = callSub;
}
public void Emit(ILMethodBuilder context)
{
- long intOutputs = context.LocalAlloc.GetIntOutputs(_block);
- long vecOutputs = context.LocalAlloc.GetVecOutputs(_block);
+ long intOutputs = context.RegUsage.GetIntOutputs(_block);
+ long vecOutputs = context.RegUsage.GetVecOutputs(_block);
+
+ if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
+ {
+ intOutputs = RegisterUsage.ClearCallerSavedIntRegs(intOutputs, context.IsAarch64);
+ vecOutputs = RegisterUsage.ClearCallerSavedVecRegs(vecOutputs, context.IsAarch64);
+ }
+
+ if (_callSub != null)
+ {
+ //Those register are assigned on the callee function, without
+ //reading it's value first. We don't need to write them because
+ //they are not going to be read on the callee.
+ intOutputs &= ~_callSub.IntNiRegsMask;
+ vecOutputs &= ~_callSub.VecNiRegsMask;
+ }
StoreLocals(context, intOutputs, RegisterType.Int);
StoreLocals(context, vecOutputs, RegisterType.Vector);
diff --git a/ChocolArm64/Translation/LocalAlloc.cs b/ChocolArm64/Translation/RegisterUsage.cs
similarity index 56%
rename from ChocolArm64/Translation/LocalAlloc.cs
rename to ChocolArm64/Translation/RegisterUsage.cs
index 763be6190d..2e6829d512 100644
--- a/ChocolArm64/Translation/LocalAlloc.cs
+++ b/ChocolArm64/Translation/RegisterUsage.cs
@@ -3,8 +3,13 @@ using System.Collections.Generic;
namespace ChocolArm64.Translation
{
- class LocalAlloc
+ class RegisterUsage
{
+ public const long CallerSavedIntRegistersMask = 0x7fL << 9;
+ public const long PStateNzcvFlagsMask = 0xfL << 60;
+
+ public const long CallerSavedVecRegistersMask = 0xffffL << 16;
+
private class PathIo
{
private Dictionary _allInputs;
@@ -18,31 +23,30 @@ namespace ChocolArm64.Translation
_cmnOutputs = new Dictionary();
}
- public PathIo(ILBlock root, long inputs, long outputs) : this()
+ public void Set(ILBlock entry, long inputs, long outputs)
{
- Set(root, inputs, outputs);
- }
-
- public void Set(ILBlock root, long inputs, long outputs)
- {
- if (!_allInputs.TryAdd(root, inputs))
+ if (!_allInputs.TryAdd(entry, inputs))
{
- _allInputs[root] |= inputs;
+ _allInputs[entry] |= inputs;
}
- if (!_cmnOutputs.TryAdd(root, outputs))
+ if (!_cmnOutputs.TryAdd(entry, outputs))
{
- _cmnOutputs[root] &= outputs;
+ _cmnOutputs[entry] &= outputs;
}
_allOutputs |= outputs;
}
- public long GetInputs(ILBlock root)
+ public long GetInputs(ILBlock entry)
{
- if (_allInputs.TryGetValue(root, out long inputs))
+ if (_allInputs.TryGetValue(entry, out long inputs))
{
- return inputs | (_allOutputs & ~_cmnOutputs[root]);
+ //We also need to read the registers that may not be written
+ //by all paths that can reach a exit point, to ensure that
+ //the local variable will not remain uninitialized depending
+ //on the flow path taken.
+ return inputs | (_allOutputs & ~_cmnOutputs[entry]);
}
return 0;
@@ -57,15 +61,38 @@ namespace ChocolArm64.Translation
private Dictionary _intPaths;
private Dictionary _vecPaths;
- private struct BlockIo
+ private struct BlockIo : IEquatable
{
- public ILBlock Block;
- public ILBlock Entry;
+ public ILBlock Block { get; }
+ public ILBlock Entry { get; }
- public long IntInputs;
- public long VecInputs;
- public long IntOutputs;
- public long VecOutputs;
+ public long IntInputs { get; set; }
+ public long VecInputs { get; set; }
+ public long IntOutputs { get; set; }
+ public long VecOutputs { get; set; }
+
+ public BlockIo(ILBlock block, ILBlock entry)
+ {
+ Block = block;
+ Entry = entry;
+
+ IntInputs = IntOutputs = 0;
+ VecInputs = VecOutputs = 0;
+ }
+
+ public BlockIo(
+ ILBlock block,
+ ILBlock entry,
+ long intInputs,
+ long vecInputs,
+ long intOutputs,
+ long vecOutputs) : this(block, entry)
+ {
+ IntInputs = intInputs;
+ VecInputs = vecInputs;
+ IntOutputs = intOutputs;
+ VecOutputs = vecOutputs;
+ }
public override bool Equals(object obj)
{
@@ -74,6 +101,11 @@ namespace ChocolArm64.Translation
return false;
}
+ return Equals(other);
+ }
+
+ public bool Equals(BlockIo other)
+ {
return other.Block == Block &&
other.Entry == Entry &&
other.IntInputs == IntInputs &&
@@ -98,25 +130,13 @@ namespace ChocolArm64.Translation
}
}
- private const int MaxOptGraphLength = 40;
-
- public LocalAlloc(ILBlock[] graph, ILBlock entry)
+ public RegisterUsage()
{
_intPaths = new Dictionary();
_vecPaths = new Dictionary();
-
- if (graph.Length > 1 &&
- graph.Length < MaxOptGraphLength)
- {
- InitializeOptimal(graph, entry);
- }
- else
- {
- InitializeFast(graph);
- }
}
- private void InitializeOptimal(ILBlock[] graph, ILBlock entry)
+ public void BuildUses(ILBlock entry)
{
//This will go through all possible paths on the graph,
//and store all inputs/outputs for each block. A register
@@ -124,7 +144,7 @@ namespace ChocolArm64.Translation
//When a block can be reached by more than one path, then the
//output from all paths needs to be set for this block, and
//only outputs present in all of the parent blocks can be considered
- //when doing input elimination. Each block chain have a entry, that's where
+ //when doing input elimination. Each block chain has a entry, that's where
//the code starts executing. They are present on the subroutine start point,
//and on call return points too (address written to X30 by BL).
HashSet visited = new HashSet();
@@ -133,19 +153,13 @@ namespace ChocolArm64.Translation
void Enqueue(BlockIo block)
{
- if (!visited.Contains(block))
+ if (visited.Add(block))
{
unvisited.Enqueue(block);
-
- visited.Add(block);
}
}
- Enqueue(new BlockIo()
- {
- Block = entry,
- Entry = entry
- });
+ Enqueue(new BlockIo(entry, entry));
while (unvisited.Count > 0)
{
@@ -177,19 +191,21 @@ namespace ChocolArm64.Translation
void EnqueueFromCurrent(ILBlock block, bool retTarget)
{
- BlockIo blockIo = new BlockIo() { Block = block };
+ BlockIo blockIo;
if (retTarget)
{
- blockIo.Entry = block;
+ blockIo = new BlockIo(block, block);
}
else
{
- blockIo.Entry = current.Entry;
- blockIo.IntInputs = current.IntInputs;
- blockIo.VecInputs = current.VecInputs;
- blockIo.IntOutputs = current.IntOutputs;
- blockIo.VecOutputs = current.VecOutputs;
+ blockIo = new BlockIo(
+ block,
+ current.Entry,
+ current.IntInputs,
+ current.VecInputs,
+ current.IntOutputs,
+ current.VecOutputs);
}
Enqueue(blockIo);
@@ -207,54 +223,63 @@ namespace ChocolArm64.Translation
}
}
- private void InitializeFast(ILBlock[] graph)
- {
- //This is WAY faster than InitializeOptimal, but results in
- //unneeded loads and stores, so the resulting code will be slower.
- long intInputs = 0, intOutputs = 0;
- long vecInputs = 0, vecOutputs = 0;
+ public long GetIntInputs(ILBlock entry) => GetInputsImpl(entry, _intPaths.Values);
+ public long GetVecInputs(ILBlock entry) => GetInputsImpl(entry, _vecPaths.Values);
- foreach (ILBlock block in graph)
- {
- intInputs |= block.IntInputs;
- intOutputs |= block.IntOutputs;
- vecInputs |= block.VecInputs;
- vecOutputs |= block.VecOutputs;
- }
-
- //It's possible that not all code paths writes to those output registers,
- //in those cases if we attempt to write an output registers that was
- //not written, we will be just writing zero and messing up the old register value.
- //So we just need to ensure that all outputs are loaded.
- if (graph.Length > 1)
- {
- intInputs |= intOutputs;
- vecInputs |= vecOutputs;
- }
-
- foreach (ILBlock block in graph)
- {
- _intPaths.Add(block, new PathIo(block, intInputs, intOutputs));
- _vecPaths.Add(block, new PathIo(block, vecInputs, vecOutputs));
- }
- }
-
- public long GetIntInputs(ILBlock root) => GetInputsImpl(root, _intPaths.Values);
- public long GetVecInputs(ILBlock root) => GetInputsImpl(root, _vecPaths.Values);
-
- private long GetInputsImpl(ILBlock root, IEnumerable values)
+ private long GetInputsImpl(ILBlock entry, IEnumerable values)
{
long inputs = 0;
foreach (PathIo path in values)
{
- inputs |= path.GetInputs(root);
+ inputs |= path.GetInputs(entry);
}
return inputs;
}
+ public long GetIntNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _intPaths.Values);
+ public long GetVecNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _vecPaths.Values);
+
+ private long GetNotInputsImpl(ILBlock entry, IEnumerable values)
+ {
+ //Returns a mask with registers that are written to
+ //before being read. Only those registers that are
+ //written in all paths, and is not read before being
+ //written to on those paths, should be set on the mask.
+ long mask = -1L;
+
+ foreach (PathIo path in values)
+ {
+ mask &= path.GetOutputs() & ~path.GetInputs(entry);
+ }
+
+ return mask;
+ }
+
public long GetIntOutputs(ILBlock block) => _intPaths[block].GetOutputs();
public long GetVecOutputs(ILBlock block) => _vecPaths[block].GetOutputs();
+
+ public static long ClearCallerSavedIntRegs(long mask, bool isAarch64)
+ {
+ //TODO: ARM32 support.
+ if (isAarch64)
+ {
+ mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask);
+ }
+
+ return mask;
+ }
+
+ public static long ClearCallerSavedVecRegs(long mask, bool isAarch64)
+ {
+ //TODO: ARM32 support.
+ if (isAarch64)
+ {
+ mask &= ~CallerSavedVecRegistersMask;
+ }
+
+ return mask;
+ }
}
}
\ No newline at end of file
diff --git a/ChocolArm64/Translation/TranslatedSub.cs b/ChocolArm64/Translation/TranslatedSub.cs
index 65d7035107..8b599b7a93 100644
--- a/ChocolArm64/Translation/TranslatedSub.cs
+++ b/ChocolArm64/Translation/TranslatedSub.cs
@@ -10,21 +10,41 @@ namespace ChocolArm64.Translation
class TranslatedSub
{
+ //This is the minimum amount of calls needed for the method
+ //to be retranslated with higher quality code. It's only worth
+ //doing that for hot code.
+ private const int MinCallCountForOpt = 30;
+
public ArmSubroutine Delegate { get; private set; }
- public static int StateArgIdx { get; private set; }
- public static int MemoryArgIdx { get; private set; }
+ public static int StateArgIdx { get; }
+ public static int MemoryArgIdx { get; }
- public static Type[] FixedArgTypes { get; private set; }
+ public static Type[] FixedArgTypes { get; }
- public DynamicMethod Method { get; private set; }
+ public DynamicMethod Method { get; }
- public TranslationTier Tier { get; private set; }
+ public TranslationTier Tier { get; }
- public TranslatedSub(DynamicMethod method, TranslationTier tier)
+ public long IntNiRegsMask { get; }
+ public long VecNiRegsMask { get; }
+
+ private bool _isWorthOptimizing;
+
+ private int _callCount;
+
+ public TranslatedSub(
+ DynamicMethod method,
+ long intNiRegsMask,
+ long vecNiRegsMask,
+ TranslationTier tier,
+ bool isWorthOptimizing)
{
- Method = method ?? throw new ArgumentNullException(nameof(method));;
- Tier = tier;
+ Method = method ?? throw new ArgumentNullException(nameof(method));;
+ IntNiRegsMask = intNiRegsMask;
+ VecNiRegsMask = vecNiRegsMask;
+ _isWorthOptimizing = isWorthOptimizing;
+ Tier = tier;
}
static TranslatedSub()
@@ -61,5 +81,24 @@ namespace ChocolArm64.Translation
{
return Delegate(threadState, memory);
}
+
+ public bool IsWorthOptimizing()
+ {
+ if (!_isWorthOptimizing)
+ {
+ return false;
+ }
+
+ if (_callCount++ < MinCallCountForOpt)
+ {
+ return false;
+ }
+
+ //Only return true once, so that it is
+ //added to the queue only once.
+ _isWorthOptimizing = false;
+
+ return true;
+ }
}
}
\ No newline at end of file
diff --git a/ChocolArm64/Translation/Translator.cs b/ChocolArm64/Translation/Translator.cs
index 7f7df6e5b2..bda0bca09f 100644
--- a/ChocolArm64/Translation/Translator.cs
+++ b/ChocolArm64/Translation/Translator.cs
@@ -63,48 +63,36 @@ namespace ChocolArm64.Translation
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
}
- TranslatedSub subroutine = GetOrTranslateSubroutine(state, position);
+ if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
+ {
+ sub = TranslateLowCq(position, state.GetExecutionMode());
+ }
- position = subroutine.Execute(state, _memory);
+ position = sub.Execute(state, _memory);
}
while (position != 0 && state.Running);
state.CurrentTranslator = null;
}
- internal void TranslateVirtualSubroutine(CpuThreadState state, long position)
- {
- if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0)
- {
- _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
- }
- }
-
- internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
+ internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateLowCq(position, state.GetExecutionMode());
}
- if (sub.Tier == TranslationTier.Tier0)
+ if (sub.IsWorthOptimizing())
{
- _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
+ bool isComplete = cs == CallType.Call ||
+ cs == CallType.VirtualCall;
+
+ _queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete);
}
return sub.Delegate;
}
- internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position)
- {
- if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
- {
- subroutine = TranslateLowCq(position, state.GetExecutionMode());
- }
-
- return subroutine;
- }
-
private void TranslateQueuedSubs()
{
while (_threadCount != 0)
@@ -124,7 +112,7 @@ namespace ChocolArm64.Translation
}
else
{
- TranslateHighCq(item.Position, item.Mode);
+ TranslateHighCq(item.Position, item.Mode, item.IsComplete);
}
}
else
@@ -138,30 +126,36 @@ namespace ChocolArm64.Translation
{
Block block = Decoder.DecodeBasicBlock(_memory, position, mode);
- ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, TranslationTier.Tier0, block);
+ ILEmitterCtx context = new ILEmitterCtx(_memory, _cache, _queue, TranslationTier.Tier0, block);
string subName = GetSubroutineName(position);
- ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName);
+ bool isAarch64 = mode == ExecutionMode.Aarch64;
- TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0);
+ ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
+
+ TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true);
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
}
- private void TranslateHighCq(long position, ExecutionMode mode)
+ private TranslatedSub TranslateHighCq(long position, ExecutionMode mode, bool isComplete)
{
Block graph = Decoder.DecodeSubroutine(_memory, position, mode);
- ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, TranslationTier.Tier1, graph);
+ ILEmitterCtx context = new ILEmitterCtx(_memory, _cache, _queue, TranslationTier.Tier1, graph);
ILBlock[] ilBlocks = context.GetILBlocks();
string subName = GetSubroutineName(position);
- ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName);
+ bool isAarch64 = mode == ExecutionMode.Aarch64;
- TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1);
+ isComplete &= !context.HasIndirectJump;
+
+ ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
+
+ TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall);
int ilOpCount = 0;
@@ -170,9 +164,11 @@ namespace ChocolArm64.Translation
ilOpCount += ilBlock.Count;
}
+ ForceAheadOfTimeCompilation(subroutine);
+
_cache.AddOrUpdate(position, subroutine, ilOpCount);
- ForceAheadOfTimeCompilation(subroutine);
+ return subroutine;
}
private string GetSubroutineName(long position)
diff --git a/ChocolArm64/Translation/TranslatorQueue.cs b/ChocolArm64/Translation/TranslatorQueue.cs
index 89d665bfbd..0f1d847470 100644
--- a/ChocolArm64/Translation/TranslatorQueue.cs
+++ b/ChocolArm64/Translation/TranslatorQueue.cs
@@ -1,3 +1,4 @@
+using ChocolArm64.State;
using System.Collections.Concurrent;
using System.Threading;
@@ -5,10 +6,6 @@ namespace ChocolArm64.Translation
{
class TranslatorQueue
{
- //This is the maximum number of functions to be translated that the queue can hold.
- //The value may need some tuning to find the sweet spot.
- private const int MaxQueueSize = 1024;
-
private ConcurrentStack[] _translationQueue;
private ManualResetEvent _queueDataReceivedEvent;
@@ -27,14 +24,11 @@ namespace ChocolArm64.Translation
_queueDataReceivedEvent = new ManualResetEvent(false);
}
- public void Enqueue(TranslatorQueueItem item)
+ public void Enqueue(long position, ExecutionMode mode, TranslationTier tier, bool isComplete)
{
- ConcurrentStack queue = _translationQueue[(int)item.Tier];
+ TranslatorQueueItem item = new TranslatorQueueItem(position, mode, tier, isComplete);
- if (queue.Count >= MaxQueueSize)
- {
- queue.TryPop(out _);
- }
+ ConcurrentStack queue = _translationQueue[(int)tier];
queue.Push(item);
diff --git a/ChocolArm64/Translation/TranslatorQueueItem.cs b/ChocolArm64/Translation/TranslatorQueueItem.cs
index 0988414a50..dde2706d98 100644
--- a/ChocolArm64/Translation/TranslatorQueueItem.cs
+++ b/ChocolArm64/Translation/TranslatorQueueItem.cs
@@ -10,11 +10,18 @@ namespace ChocolArm64.Translation
public TranslationTier Tier { get; }
- public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier)
+ public bool IsComplete { get; }
+
+ public TranslatorQueueItem(
+ long position,
+ ExecutionMode mode,
+ TranslationTier tier,
+ bool isComplete = false)
{
- Position = position;
- Mode = mode;
- Tier = tier;
+ Position = position;
+ Mode = mode;
+ Tier = tier;
+ IsComplete = isComplete;
}
}
}
\ No newline at end of file
diff --git a/ChocolArm64/Translation/IoType.cs b/ChocolArm64/Translation/VarType.cs
similarity index 85%
rename from ChocolArm64/Translation/IoType.cs
rename to ChocolArm64/Translation/VarType.cs
index c7710e0c67..d671575e98 100644
--- a/ChocolArm64/Translation/IoType.cs
+++ b/ChocolArm64/Translation/VarType.cs
@@ -1,6 +1,6 @@
namespace ChocolArm64.Translation
{
- enum IoType
+ enum VarType
{
Arg,
Flag,
diff --git a/Ryujinx.Common/Utilities/BitUtils.cs b/Ryujinx.Common/Utilities/BitUtils.cs
index 135b397d3d..5f70f742a0 100644
--- a/Ryujinx.Common/Utilities/BitUtils.cs
+++ b/Ryujinx.Common/Utilities/BitUtils.cs
@@ -34,6 +34,11 @@ namespace Ryujinx.Common
return value & -(long)size;
}
+ public static int DivRoundUp(int value, int dividend)
+ {
+ return (value + dividend - 1) / dividend;
+ }
+
public static ulong DivRoundUp(ulong value, uint dividend)
{
return (value + dividend - 1) / dividend;
@@ -44,6 +49,24 @@ namespace Ryujinx.Common
return (value + dividend - 1) / dividend;
}
+ public static int Pow2RoundUp(int value)
+ {
+ value--;
+
+ value |= (value >> 1);
+ value |= (value >> 2);
+ value |= (value >> 4);
+ value |= (value >> 8);
+ value |= (value >> 16);
+
+ return ++value;
+ }
+
+ public static int Pow2RoundDown(int value)
+ {
+ return IsPowerOfTwo32(value) ? value : Pow2RoundUp(value) >> 1;
+ }
+
public static bool IsPowerOfTwo32(int value)
{
return value != 0 && (value & (value - 1)) == 0;
@@ -77,7 +100,7 @@ namespace Ryujinx.Common
do
{
nibbleIdx -= 4;
- preCount = ClzNibbleTbl[(value >> nibbleIdx) & 0b1111];
+ preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111];
count += preCount;
}
while (preCount == 4);
@@ -85,6 +108,18 @@ namespace Ryujinx.Common
return (ulong)count;
}
+ public static int CountTrailingZeros32(int value)
+ {
+ int count = 0;
+
+ while (((value >> count) & 1) == 0)
+ {
+ count++;
+ }
+
+ return count;
+ }
+
public static long ReverseBits64(long value)
{
return (long)ReverseBits64((ulong)value);
@@ -101,4 +136,4 @@ namespace Ryujinx.Common
return (value >> 32) | (value << 32);
}
}
-}
\ No newline at end of file
+}
diff --git a/Ryujinx.Graphics/CdmaProcessor.cs b/Ryujinx.Graphics/CdmaProcessor.cs
index 4ebf200751..4ff12fbf50 100644
--- a/Ryujinx.Graphics/CdmaProcessor.cs
+++ b/Ryujinx.Graphics/CdmaProcessor.cs
@@ -8,41 +8,41 @@ namespace Ryujinx.Graphics
private const int MethSetMethod = 0x10;
private const int MethSetData = 0x11;
- private NvGpu Gpu;
+ private NvGpu _gpu;
- public CdmaProcessor(NvGpu Gpu)
+ public CdmaProcessor(NvGpu gpu)
{
- this.Gpu = Gpu;
+ _gpu = gpu;
}
- public void PushCommands(NvGpuVmm Vmm, int[] CmdBuffer)
+ public void PushCommands(NvGpuVmm vmm, int[] cmdBuffer)
{
- List Commands = new List