Add S/Uabd/l_V Sse opt.. Remove Dup (Srhadd_V).

This commit is contained in:
LDj3SNuD 2019-03-14 23:29:01 +01:00 committed by GitHub
parent 6c3fd1edb6
commit cdf293c751
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1915,20 +1915,124 @@ namespace ChocolArm64.Instructions
public static void Sabd_V(ILEmitterCtx context)
{
EmitVectorBinaryOpSx(context, () =>
if (Optimizations.UseSse2)
{
context.Emit(OpCodes.Sub);
EmitAbs(context);
});
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
Type[] typesAndOr = new Type[] { typeof(Vector128<long>), typeof(Vector128<long>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSub));
context.EmitStvectmp(); // Cmp mask
context.EmitLdvectmp(); // Cmp mask
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr));
context.EmitLdvectmp(); // Cmp mask
context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr));
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorBinaryOpSx(context, () =>
{
context.Emit(OpCodes.Sub);
EmitAbs(context);
});
}
}
public static void Sabdl_V(ILEmitterCtx context)
{
EmitVectorWidenRnRmBinaryOpSx(context, () =>
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
if (Optimizations.UseSse41 && op.Size < 2)
{
context.Emit(OpCodes.Sub);
EmitAbs(context);
});
Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], VectorIntTypesPerSizeLog2[op.Size + 1] };
Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAndOr = new Type[] { typeof(Vector128<long>), typeof(Vector128<long>) };
Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] };
string nameCvt = op.Size == 0
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
context.EmitLdvec(op.Rn);
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm);
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitStvectmp2(); // Long Rm
context.EmitStvectmp(); // Long Rn
context.EmitLdvectmp(); // Long Rn
context.EmitLdvectmp2(); // Long Rm
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSub));
context.EmitStvectmp3(); // Cmp mask
context.EmitLdvectmp3(); // Cmp mask
context.EmitLdvectmp(); // Long Rn
context.EmitLdvectmp2(); // Long Rm
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr));
context.EmitLdvectmp3(); // Cmp mask
context.EmitLdvectmp2(); // Long Rm
context.EmitLdvectmp(); // Long Rn
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr));
context.EmitStvec(op.Rd);
}
else
{
EmitVectorWidenRnRmBinaryOpSx(context, () =>
{
context.Emit(OpCodes.Sub);
EmitAbs(context);
});
}
}
public static void Sadalp_V(ILEmitterCtx context)
@ -2424,8 +2528,8 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I4(op.Size == 0 ? sbyte.MinValue : short.MinValue);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
context.EmitLdvectmp();
context.EmitLdvec(op.Rn);
context.EmitLdvectmp();
@ -2598,20 +2702,150 @@ namespace ChocolArm64.Instructions
public static void Uabd_V(ILEmitterCtx context)
{
EmitVectorBinaryOpZx(context, () =>
if (Optimizations.UseSse41)
{
context.Emit(OpCodes.Sub);
EmitAbs(context);
});
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] };
Type[] typesAndOr = new Type[] { typeof(Vector128<long>), typeof(Vector128<long>) };
Type[] typesSav = new Type[] { typeof(long) };
Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rn);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmpSub));
context.EmitLdc_I8(-1L);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr));
context.EmitStvectmp(); // Cmp mask
context.EmitLdvectmp(); // Cmp mask
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr));
context.EmitLdvectmp(); // Cmp mask
context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr));
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorBinaryOpZx(context, () =>
{
context.Emit(OpCodes.Sub);
EmitAbs(context);
});
}
}
public static void Uabdl_V(ILEmitterCtx context)
{
EmitVectorWidenRnRmBinaryOpZx(context, () =>
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
if (Optimizations.UseSse41 && op.Size < 2)
{
context.Emit(OpCodes.Sub);
EmitAbs(context);
});
Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], VectorUIntTypesPerSizeLog2[op.Size + 1] };
Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], VectorIntTypesPerSizeLog2 [op.Size + 1] };
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAndOr = new Type[] { typeof(Vector128<long>), typeof(Vector128<long>) };
Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
Type[] typesSav = new Type[] { typeof(long) };
string nameCvt = op.Size == 0
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
context.EmitLdvec(op.Rn);
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm);
if (op.RegisterSize == RegisterSize.Simd128)
{
context.Emit(OpCodes.Ldc_I4_8);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
}
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitStvectmp2(); // Long Rm
context.EmitStvectmp(); // Long Rn
context.EmitLdvectmp2(); // Long Rm
context.EmitLdvectmp(); // Long Rn
context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.Max), typesMax));
context.EmitLdvectmp2(); // Long Rm
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmpSub));
context.EmitLdc_I8(-1L);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr));
context.EmitStvectmp3(); // Cmp mask
context.EmitLdvectmp3(); // Cmp mask
context.EmitLdvectmp(); // Long Rn
context.EmitLdvectmp2(); // Long Rm
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr));
context.EmitLdvectmp3(); // Cmp mask
context.EmitLdvectmp2(); // Long Rm
context.EmitLdvectmp(); // Long Rn
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr));
context.EmitStvec(op.Rd);
}
else
{
EmitVectorWidenRnRmBinaryOpZx(context, () =>
{
context.Emit(OpCodes.Sub);
EmitAbs(context);
});
}
}
public static void Uadalp_V(ILEmitterCtx context)