Add Mla/Mls/Mul_V Sse opt.. Add "Part" opt..

Remove EmitLd/Stvectmp2(), remove Dup.
Nits.
This commit is contained in:
LDj3SNuD 2019-03-10 00:39:58 +01:00 committed by GitHub
commit 20823a9634
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,4 +1,5 @@
// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
// https://www.agner.org/optimize/#vectorclass @ vectori128.h
using ChocolArm64.Decoders; using ChocolArm64.Decoders;
using ChocolArm64.State; using ChocolArm64.State;
@ -429,7 +430,7 @@ namespace ChocolArm64.Instructions
Type[] typesAddH = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; Type[] typesAddH = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup); context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH)); context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
@ -442,7 +443,7 @@ namespace ChocolArm64.Instructions
Type[] typesAddH = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; Type[] typesAddH = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup); context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH)); context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
@ -749,7 +750,7 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
@ -771,7 +772,7 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(op.Index | op.Index << 1); context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
@ -864,7 +865,7 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
@ -886,7 +887,7 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(op.Index | op.Index << 1); context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
@ -1001,7 +1002,7 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
@ -1021,7 +1022,7 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(op.Index | op.Index << 1); context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
@ -1771,6 +1772,12 @@ namespace ChocolArm64.Instructions
} }
public static void Mla_V(ILEmitterCtx context) public static void Mla_V(ILEmitterCtx context)
{
if (Optimizations.UseSse41)
{
EmitSse41Mul_AddSub(context, nameof(Sse2.Add));
}
else
{ {
EmitVectorTernaryOpZx(context, () => EmitVectorTernaryOpZx(context, () =>
{ {
@ -1778,6 +1785,7 @@ namespace ChocolArm64.Instructions
context.Emit(OpCodes.Add); context.Emit(OpCodes.Add);
}); });
} }
}
public static void Mla_Ve(ILEmitterCtx context) public static void Mla_Ve(ILEmitterCtx context)
{ {
@ -1789,6 +1797,12 @@ namespace ChocolArm64.Instructions
} }
public static void Mls_V(ILEmitterCtx context) public static void Mls_V(ILEmitterCtx context)
{
if (Optimizations.UseSse41)
{
EmitSse41Mul_AddSub(context, nameof(Sse2.Subtract));
}
else
{ {
EmitVectorTernaryOpZx(context, () => EmitVectorTernaryOpZx(context, () =>
{ {
@ -1796,6 +1810,7 @@ namespace ChocolArm64.Instructions
context.Emit(OpCodes.Sub); context.Emit(OpCodes.Sub);
}); });
} }
}
public static void Mls_Ve(ILEmitterCtx context) public static void Mls_Ve(ILEmitterCtx context)
{ {
@ -1807,9 +1822,16 @@ namespace ChocolArm64.Instructions
} }
public static void Mul_V(ILEmitterCtx context) public static void Mul_V(ILEmitterCtx context)
{
if (Optimizations.UseSse41)
{
EmitSse41Mul_AddSub(context);
}
else
{ {
EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul)); EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul));
} }
}
public static void Mul_Ve(ILEmitterCtx context) public static void Mul_Ve(ILEmitterCtx context)
{ {
@ -1923,19 +1945,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) }; nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -1969,13 +1995,14 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) }; nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -1999,23 +2026,16 @@ namespace ChocolArm64.Instructions
Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitStvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd));
context.EmitLdvectmp(); context.EmitLdvec(op.Rn);
context.EmitLdvectmp2(); context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd));
context.EmitLdc_I4(1); context.Emit(OpCodes.Ldc_I4_1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
@ -2185,20 +2205,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16) ? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32); : nameof(Sse41.ConvertToVector128Int32);
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@ -2244,20 +2268,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16) ? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32); : nameof(Sse41.ConvertToVector128Int32);
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@ -2441,19 +2469,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) }; nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -2482,13 +2514,14 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) }; nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -2594,19 +2627,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) }; nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -2659,13 +2696,14 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) }; nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -2689,23 +2727,16 @@ namespace ChocolArm64.Instructions
Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitStvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd));
context.EmitLdvectmp(); context.EmitLdvec(op.Rn);
context.EmitLdvectmp2(); context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd));
context.EmitLdc_I4(1); context.Emit(OpCodes.Ldc_I4_1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
@ -2737,8 +2768,7 @@ namespace ChocolArm64.Instructions
Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup); context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub));
@ -2862,20 +2892,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16) ? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32); : nameof(Sse41.ConvertToVector128Int32);
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@ -2921,20 +2955,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16) ? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32); : nameof(Sse41.ConvertToVector128Int32);
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@ -3063,19 +3101,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) }; nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -3104,13 +3146,14 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) }; nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
context.EmitLdc_I4(numBytes); if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@ -3253,5 +3296,77 @@ namespace ChocolArm64.Instructions
EmitVectorZeroUpper(context, op.Rd); EmitVectorZeroUpper(context, op.Rd);
} }
} }
private static void EmitSse41Mul_AddSub(ILEmitterCtx context, string nameAddSub = null)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
if (nameAddSub != null)
{
context.EmitLdvec(op.Rd);
}
if (op.Size == 0)
{
Type[] typesBle = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
Type[] typesMul = new Type[] { typeof(Vector128<short>), typeof(Vector128<short>) };
Type[] typesShs = new Type[] { typeof(Vector128<short>), typeof(byte) };
Type[] typesSav = new Type[] { typeof(int) };
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
context.EmitLdc_I4(0x00FF00FF);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.BlendVariable), typesBle));
}
else if (op.Size == 1)
{
Type[] typesMul = new Type[] { typeof(Vector128<short>), typeof(Vector128<short>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
}
else /* if (op.Size == 2) */
{
Type[] typesMul = new Type[] { typeof(Vector128<int>), typeof(Vector128<int>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.MultiplyLow), typesMul));
}
if (nameAddSub != null)
{
Type[] typesAddSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
context.EmitCall(typeof(Sse2).GetMethod(nameAddSub, typesAddSub));
}
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
} }
} }