Add (R)shrn_V Sse opt.; add "Part" & "Shift" opt..
Remove Tmp stuff; remove Dup. Nits.
This commit is contained in:
parent
c4fe61103b
commit
69450534ac
1 changed files with 131 additions and 46 deletions
|
@ -5,6 +5,7 @@ using ChocolArm64.State;
|
|||
using ChocolArm64.Translation;
|
||||
using System;
|
||||
using System.Reflection.Emit;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
|
||||
using static ChocolArm64.Instructions.InstEmitSimdHelper;
|
||||
|
@ -13,9 +14,65 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
static partial class InstEmit
|
||||
{
|
||||
#region "Masks"
|
||||
private static readonly long[] _masks_RshrnShrn = new long[]
|
||||
{
|
||||
14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
|
||||
13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
|
||||
11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
|
||||
};
|
||||
#endregion
|
||||
|
||||
public static void Rshrn_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorShrImmNarrowOpZx(context, round: true);
|
||||
if (Optimizations.UseSsse3)
|
||||
{
|
||||
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
|
||||
|
||||
Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], VectorUIntTypesPerSizeLog2[op.Size + 1] };
|
||||
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
|
||||
Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
|
||||
Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size + 1] };
|
||||
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
|
||||
|
||||
string nameMov = op.RegisterSize == RegisterSize.Simd128
|
||||
? nameof(Sse.MoveLowToHigh)
|
||||
: nameof(Sse.MoveHighToLow);
|
||||
|
||||
int shift = GetImmShr(op);
|
||||
|
||||
long roundConst = 1L << (shift - 1);
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I8(roundConst);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); // value
|
||||
|
||||
context.EmitLdc_I8(_masks_RshrnShrn[op.Size]); // mask
|
||||
context.Emit(OpCodes.Dup); // mask
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
|
||||
|
||||
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameMov));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorShrImmNarrowOpZx(context, round: true);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Shl_S(ILEmitterCtx context)
|
||||
|
@ -80,12 +137,13 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -102,7 +160,45 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
public static void Shrn_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorShrImmNarrowOpZx(context, round: false);
|
||||
if (Optimizations.UseSsse3)
|
||||
{
|
||||
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
|
||||
|
||||
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
|
||||
Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
|
||||
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
|
||||
|
||||
string nameMov = op.RegisterSize == RegisterSize.Simd128
|
||||
? nameof(Sse.MoveLowToHigh)
|
||||
: nameof(Sse.MoveHighToLow);
|
||||
|
||||
int shift = GetImmShr(op);
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); // value
|
||||
|
||||
context.EmitLdc_I8(_masks_RshrnShrn[op.Size]); // mask
|
||||
context.Emit(OpCodes.Dup); // mask
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
|
||||
|
||||
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameMov));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorShrImmNarrowOpZx(context, round: false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Sli_V(ILEmitterCtx context)
|
||||
|
@ -271,8 +367,7 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
|
||||
|
||||
if (Optimizations.UseSse2 && op.Size > 0
|
||||
&& op.Size < 3)
|
||||
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
|
||||
{
|
||||
Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
|
||||
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
|
||||
|
@ -282,16 +377,13 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdc_I4(eSize - shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
|
||||
|
||||
context.EmitLdc_I4(eSize - 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
|
||||
|
@ -320,8 +412,7 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
|
||||
|
||||
if (Optimizations.UseSse2 && op.Size > 0
|
||||
&& op.Size < 3)
|
||||
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
|
||||
{
|
||||
Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
|
||||
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
|
||||
|
@ -332,16 +423,13 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdc_I4(eSize - shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
|
||||
|
||||
context.EmitLdc_I4(eSize - 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
|
||||
|
@ -403,17 +491,21 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
|
||||
if (shift != 0)
|
||||
{
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
|
||||
}
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
|
@ -432,8 +524,7 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
|
||||
|
||||
if (Optimizations.UseSse2 && op.Size > 0
|
||||
&& op.Size < 3)
|
||||
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
|
||||
{
|
||||
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
|
||||
|
||||
|
@ -464,8 +555,7 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
|
||||
|
||||
if (Optimizations.UseSse2 && op.Size > 0
|
||||
&& op.Size < 3)
|
||||
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
|
||||
{
|
||||
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
|
||||
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
|
||||
|
@ -612,16 +702,13 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdc_I4(eSize - shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
|
||||
|
||||
context.EmitLdc_I4(eSize - 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
@ -661,16 +748,13 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdc_I4(eSize - shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
|
||||
|
||||
context.EmitLdc_I4(eSize - 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
@ -732,17 +816,21 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
|
||||
if (shift != 0)
|
||||
{
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
|
||||
}
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
|
@ -899,12 +987,9 @@ namespace ChocolArm64.Instructions
|
|||
context.Emit(OpCodes.Add);
|
||||
}
|
||||
|
||||
EmitVectorInsertTmp(context, index, op.Size);
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
|
|
Loading…
Add table
Reference in a new issue