From cdf293c751b18c0071d2c5f5ceea74d60111b09c Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Thu, 14 Mar 2019 23:29:01 +0100 Subject: [PATCH] Add S/Uabd/l_V Sse opt.. Remove Dup (Srhadd_V). --- .../Instructions/InstEmitSimdArithmetic.cs | 268 ++++++++++++++++-- 1 file changed, 251 insertions(+), 17 deletions(-) diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs index 1e7e88f515..eca1fa4d2f 100644 --- a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs @@ -1915,20 +1915,124 @@ namespace ChocolArm64.Instructions public static void Sabd_V(ILEmitterCtx context) { - EmitVectorBinaryOpSx(context, () => + if (Optimizations.UseSse2) { - context.Emit(OpCodes.Sub); - EmitAbs(context); - }); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesAndOr = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSub)); + + context.EmitStvectmp(); // Cmp mask + context.EmitLdvectmp(); // Cmp mask + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr)); + + context.EmitLdvectmp(); // Cmp mask + + context.EmitLdvec(op.Rm); + context.EmitLdvec(op.Rn); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } } public static void Sabdl_V(ILEmitterCtx context) { - EmitVectorWidenRnRmBinaryOpSx(context, () => + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) { - context.Emit(OpCodes.Sub); - EmitAbs(context); - }); + Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], VectorIntTypesPerSizeLog2[op.Size + 1] }; + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAndOr = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + + string nameCvt = op.Size == 0 + ? nameof(Sse41.ConvertToVector128Int16) + : nameof(Sse41.ConvertToVector128Int32); + + context.EmitLdvec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitLdvec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitStvectmp2(); // Long Rm + context.EmitStvectmp(); // Long Rn + + context.EmitLdvectmp(); // Long Rn + context.EmitLdvectmp2(); // Long Rm + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSub)); + + context.EmitStvectmp3(); // Cmp mask + context.EmitLdvectmp3(); // Cmp mask + + context.EmitLdvectmp(); // Long Rn + context.EmitLdvectmp2(); // Long Rm + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr)); + + context.EmitLdvectmp3(); // Cmp mask + + context.EmitLdvectmp2(); // Long Rm + context.EmitLdvectmp(); // Long Rn + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr)); + + context.EmitStvec(op.Rd); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } } public static void Sadalp_V(ILEmitterCtx context) @@ -2424,8 +2528,8 @@ namespace ChocolArm64.Instructions context.EmitLdc_I4(op.Size == 0 ? sbyte.MinValue : short.MinValue); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); - context.Emit(OpCodes.Dup); context.EmitStvectmp(); + context.EmitLdvectmp(); context.EmitLdvec(op.Rn); context.EmitLdvectmp(); @@ -2598,20 +2702,150 @@ namespace ChocolArm64.Instructions public static void Uabd_V(ILEmitterCtx context) { - EmitVectorBinaryOpZx(context, () => + if (Optimizations.UseSse41) { - context.Emit(OpCodes.Sub); - EmitAbs(context); - }); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] }; + Type[] typesAndOr = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(long) }; + + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + context.EmitLdvec(op.Rm); + context.EmitLdvec(op.Rn); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax)); + + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmpSub)); + + context.EmitLdc_I8(-1L); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr)); + + context.EmitStvectmp(); // Cmp mask + context.EmitLdvectmp(); // Cmp mask + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr)); + + context.EmitLdvectmp(); // Cmp mask + + context.EmitLdvec(op.Rm); + context.EmitLdvec(op.Rn); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } } public static void Uabdl_V(ILEmitterCtx context) { - EmitVectorWidenRnRmBinaryOpZx(context, () => + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) { - context.Emit(OpCodes.Sub); - EmitAbs(context); - }); + Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], VectorUIntTypesPerSizeLog2[op.Size + 1] }; + Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], VectorIntTypesPerSizeLog2 [op.Size + 1] }; + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAndOr = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesSav = new Type[] { typeof(long) }; + + string nameCvt = op.Size == 0 + ? nameof(Sse41.ConvertToVector128Int16) + : nameof(Sse41.ConvertToVector128Int32); + + context.EmitLdvec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitLdvec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitStvectmp2(); // Long Rm + context.EmitStvectmp(); // Long Rn + + context.EmitLdvectmp2(); // Long Rm + context.EmitLdvectmp(); // Long Rn + + context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.Max), typesMax)); + + context.EmitLdvectmp2(); // Long Rm + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmpSub)); + + context.EmitLdc_I8(-1L); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr)); + + context.EmitStvectmp3(); // Cmp mask + context.EmitLdvectmp3(); // Cmp mask + + context.EmitLdvectmp(); // Long Rn + context.EmitLdvectmp2(); // Long Rm + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr)); + + context.EmitLdvectmp3(); // Cmp mask + + context.EmitLdvectmp2(); // Long Rm + context.EmitLdvectmp(); // Long Rn + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr)); + + context.EmitStvec(op.Rd); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } } public static void Uadalp_V(ILEmitterCtx context)