diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs deleted file mode 100644 index 1bd483640c..0000000000 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ /dev/null @@ -1,2387 +0,0 @@ -// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h - -using ChocolArm64.Decoder; -using ChocolArm64.State; -using ChocolArm64.Translation; -using System; -using System.Reflection; -using System.Reflection.Emit; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; - -using static ChocolArm64.Instruction.AInstEmitSimdHelper; - -namespace ChocolArm64.Instruction -{ - static partial class AInstEmit - { - public static void Abs_S(AILEmitterCtx Context) - { - EmitScalarUnaryOpSx(Context, () => EmitAbs(Context)); - } - - public static void Abs_V(AILEmitterCtx Context) - { - EmitVectorUnaryOpSx(Context, () => EmitAbs(Context)); - } - - public static void Add_S(AILEmitterCtx Context) - { - EmitScalarBinaryOpZx(Context, () => Context.Emit(OpCodes.Add)); - } - - public static void Add_V(AILEmitterCtx Context) - { - if (AOptimizations.UseSse2) - { - EmitSse2Op(Context, nameof(Sse2.Add)); - } - else - { - EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Add)); - } - } - - public static void Addhn_V(AILEmitterCtx Context) - { - EmitHighNarrow(Context, () => Context.Emit(OpCodes.Add), Round: false); - } - - public static void Addp_S(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size); - EmitVectorExtractZx(Context, Op.Rn, 1, Op.Size); - - Context.Emit(OpCodes.Add); - - EmitScalarSet(Context, Op.Rd, Op.Size); - } - - public static void Addp_V(AILEmitterCtx Context) - { - EmitVectorPairwiseOpZx(Context, () => Context.Emit(OpCodes.Add)); - } - - public static void Addv_V(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int Bytes = Op.GetBitsCount() >> 3; - int Elems = Bytes >> Op.Size; - - EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size); - - for (int Index = 1; Index < Elems; Index++) - { - EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); - - Context.Emit(OpCodes.Add); - } - - EmitScalarSet(Context, Op.Rd, Op.Size); - } - - public static void Cls_V(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int Bytes = Op.GetBitsCount() >> 3; - int Elems = Bytes >> Op.Size; - - int ESize = 8 << Op.Size; - - for (int Index = 0; Index < Elems; Index++) - { - EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); - - Context.EmitLdc_I4(ESize); - - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingSigns)); - - EmitVectorInsert(Context, Op.Rd, Index, Op.Size); - } - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - public static void Clz_V(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int Bytes = Op.GetBitsCount() >> 3; - int Elems = Bytes >> Op.Size; - - int ESize = 8 << Op.Size; - - for (int Index = 0; Index < Elems; Index++) - { - EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); - - if (Lzcnt.IsSupported && ESize == 32) - { - Context.Emit(OpCodes.Conv_U4); - - Context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { typeof(uint) })); - - Context.Emit(OpCodes.Conv_U8); - } - else - { - Context.EmitLdc_I4(ESize); - - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros)); - } - - EmitVectorInsert(Context, Op.Rd, Index, Op.Size); - } - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - public static void Cnt_V(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int Elems = Op.RegisterSize == ARegisterSize.SIMD128 ? 16 : 8; - - for (int Index = 0; Index < Elems; Index++) - { - EmitVectorExtractZx(Context, Op.Rn, Index, 0); - - if (Popcnt.IsSupported) - { - Context.EmitCall(typeof(Popcnt).GetMethod(nameof(Popcnt.PopCount), new Type[] { typeof(ulong) })); - } - else - { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8)); - } - - EmitVectorInsert(Context, Op.Rd, Index, 0); - } - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - public static void Fabd_S(AILEmitterCtx Context) - { - EmitScalarBinaryOpF(Context, () => - { - Context.Emit(OpCodes.Sub); - - EmitUnaryMathCall(Context, nameof(Math.Abs)); - }); - } - - public static void Fabs_S(AILEmitterCtx Context) - { - EmitScalarUnaryOpF(Context, () => - { - EmitUnaryMathCall(Context, nameof(Math.Abs)); - }); - } - - public static void Fabs_V(AILEmitterCtx Context) - { - EmitVectorUnaryOpF(Context, () => - { - EmitUnaryMathCall(Context, nameof(Math.Abs)); - }); - } - - public static void Fadd_S(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitScalarSseOrSse2OpF(Context, nameof(Sse.AddScalar)); - } - else - { - EmitScalarBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPAdd)); - }); - } - } - - public static void Fadd_V(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitVectorSseOrSse2OpF(Context, nameof(Sse.Add)); - } - else - { - EmitVectorBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPAdd)); - }); - } - } - - public static void Faddp_S(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - EmitVectorExtractF(Context, Op.Rn, 0, SizeF); - EmitVectorExtractF(Context, Op.Rn, 1, SizeF); - - Context.Emit(OpCodes.Add); - - EmitScalarSetF(Context, Op.Rd, SizeF); - } - - public static void Faddp_V(AILEmitterCtx Context) - { - EmitVectorPairwiseOpF(Context, () => Context.Emit(OpCodes.Add)); - } - - public static void Fdiv_S(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitScalarSseOrSse2OpF(Context, nameof(Sse.DivideScalar)); - } - else - { - EmitScalarBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPDiv)); - }); - } - } - - public static void Fdiv_V(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitVectorSseOrSse2OpF(Context, nameof(Sse.Divide)); - } - else - { - EmitVectorBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPDiv)); - }); - } - } - - public static void Fmadd_S(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse2) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - if (Op.Size == 0) - { - Type[] TypesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; - - Context.EmitLdvec(Op.Ra); - Context.EmitLdvec(Op.Rn); - Context.EmitLdvec(Op.Rm); - - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), TypesMulAdd)); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AddScalar), TypesMulAdd)); - - Context.EmitStvec(Op.Rd); - - EmitVectorZero32_128(Context, Op.Rd); - } - else /* if (Op.Size == 1) */ - { - Type[] TypesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; - - EmitLdvecWithCastToDouble(Context, Op.Ra); - EmitLdvecWithCastToDouble(Context, Op.Rn); - EmitLdvecWithCastToDouble(Context, Op.Rm); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), TypesMulAdd)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AddScalar), TypesMulAdd)); - - EmitStvecWithCastFromDouble(Context, Op.Rd); - - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitScalarTernaryRaOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulAdd)); - }); - } - } - - public static void Fmax_S(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitScalarSseOrSse2OpF(Context, nameof(Sse.MaxScalar)); - } - else - { - EmitScalarBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMax)); - }); - } - } - - public static void Fmax_V(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitVectorSseOrSse2OpF(Context, nameof(Sse.Max)); - } - else - { - EmitVectorBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMax)); - }); - } - } - - public static void Fmaxnm_S(AILEmitterCtx Context) - { - EmitScalarBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMaxNum)); - }); - } - - public static void Fmaxnm_V(AILEmitterCtx Context) - { - EmitVectorBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMaxNum)); - }); - } - - public static void Fmaxp_V(AILEmitterCtx Context) - { - EmitVectorPairwiseOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMax)); - }); - } - - public static void Fmin_S(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitScalarSseOrSse2OpF(Context, nameof(Sse.MinScalar)); - } - else - { - EmitScalarBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMin)); - }); - } - } - - public static void Fmin_V(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitVectorSseOrSse2OpF(Context, nameof(Sse.Min)); - } - else - { - EmitVectorBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMin)); - }); - } - } - - public static void Fminnm_S(AILEmitterCtx Context) - { - EmitScalarBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMinNum)); - }); - } - - public static void Fminnm_V(AILEmitterCtx Context) - { - EmitVectorBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMinNum)); - }); - } - - public static void Fminp_V(AILEmitterCtx Context) - { - EmitVectorPairwiseOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMin)); - }); - } - - public static void Fmla_Se(AILEmitterCtx Context) - { - EmitScalarTernaryOpByElemF(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Add); - }); - } - - public static void Fmla_V(AILEmitterCtx Context) - { - EmitVectorTernaryOpF(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Add); - }); - } - - public static void Fmla_Ve(AILEmitterCtx Context) - { - EmitVectorTernaryOpByElemF(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Add); - }); - } - - public static void Fmls_Se(AILEmitterCtx Context) - { - EmitScalarTernaryOpByElemF(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - }); - } - - public static void Fmls_V(AILEmitterCtx Context) - { - EmitVectorTernaryOpF(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - }); - } - - public static void Fmls_Ve(AILEmitterCtx Context) - { - EmitVectorTernaryOpByElemF(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - }); - } - - public static void Fmsub_S(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse2) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - if (Op.Size == 0) - { - Type[] TypesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - - Context.EmitLdvec(Op.Ra); - Context.EmitLdvec(Op.Rn); - Context.EmitLdvec(Op.Rm); - - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), TypesMulSub)); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), TypesMulSub)); - - Context.EmitStvec(Op.Rd); - - EmitVectorZero32_128(Context, Op.Rd); - } - else /* if (Op.Size == 1) */ - { - Type[] TypesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - - EmitLdvecWithCastToDouble(Context, Op.Ra); - EmitLdvecWithCastToDouble(Context, Op.Rn); - EmitLdvecWithCastToDouble(Context, Op.Rm); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), TypesMulSub)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), TypesMulSub)); - - EmitStvecWithCastFromDouble(Context, Op.Rd); - - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitScalarTernaryRaOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulSub)); - }); - } - } - - public static void Fmul_S(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitScalarSseOrSse2OpF(Context, nameof(Sse.MultiplyScalar)); - } - else - { - EmitScalarBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMul)); - }); - } - } - - public static void Fmul_Se(AILEmitterCtx Context) - { - EmitScalarBinaryOpByElemF(Context, () => Context.Emit(OpCodes.Mul)); - } - - public static void Fmul_V(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitVectorSseOrSse2OpF(Context, nameof(Sse.Multiply)); - } - else - { - EmitVectorBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMul)); - }); - } - } - - public static void Fmul_Ve(AILEmitterCtx Context) - { - EmitVectorBinaryOpByElemF(Context, () => Context.Emit(OpCodes.Mul)); - } - - public static void Fmulx_S(AILEmitterCtx Context) - { - EmitScalarBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX)); - }); - } - - public static void Fmulx_Se(AILEmitterCtx Context) - { - EmitScalarBinaryOpByElemF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX)); - }); - } - - public static void Fmulx_V(AILEmitterCtx Context) - { - EmitVectorBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX)); - }); - } - - public static void Fmulx_Ve(AILEmitterCtx Context) - { - EmitVectorBinaryOpByElemF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX)); - }); - } - - public static void Fneg_S(AILEmitterCtx Context) - { - EmitScalarUnaryOpF(Context, () => Context.Emit(OpCodes.Neg)); - } - - public static void Fneg_V(AILEmitterCtx Context) - { - EmitVectorUnaryOpF(Context, () => Context.Emit(OpCodes.Neg)); - } - - public static void Fnmadd_S(AILEmitterCtx Context) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int SizeF = Op.Size & 1; - - EmitVectorExtractF(Context, Op.Rn, 0, SizeF); - - Context.Emit(OpCodes.Neg); - - EmitVectorExtractF(Context, Op.Rm, 0, SizeF); - - Context.Emit(OpCodes.Mul); - - EmitVectorExtractF(Context, Op.Ra, 0, SizeF); - - Context.Emit(OpCodes.Sub); - - EmitScalarSetF(Context, Op.Rd, SizeF); - } - - public static void Fnmsub_S(AILEmitterCtx Context) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int SizeF = Op.Size & 1; - - EmitVectorExtractF(Context, Op.Rn, 0, SizeF); - EmitVectorExtractF(Context, Op.Rm, 0, SizeF); - - Context.Emit(OpCodes.Mul); - - EmitVectorExtractF(Context, Op.Ra, 0, SizeF); - - Context.Emit(OpCodes.Sub); - - EmitScalarSetF(Context, Op.Rd, SizeF); - } - - public static void Fnmul_S(AILEmitterCtx Context) - { - EmitScalarBinaryOpF(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Neg); - }); - } - - public static void Frecpe_S(AILEmitterCtx Context) - { - EmitScalarUnaryOpF(Context, () => - { - EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate)); - }); - } - - public static void Frecpe_V(AILEmitterCtx Context) - { - EmitVectorUnaryOpF(Context, () => - { - EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate)); - }); - } - - public static void Frecps_S(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse2) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Type[] TypesSsv = new Type[] { typeof(float) }; - Type[] TypesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - - Context.EmitLdc_R4(2f); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), TypesSsv)); - - Context.EmitLdvec(Op.Rn); - Context.EmitLdvec(Op.Rm); - - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), TypesMulSub)); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), TypesMulSub)); - - Context.EmitStvec(Op.Rd); - - EmitVectorZero32_128(Context, Op.Rd); - } - else /* if (SizeF == 1) */ - { - Type[] TypesSsv = new Type[] { typeof(double) }; - Type[] TypesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - - Context.EmitLdc_R8(2d); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), TypesSsv)); - - EmitLdvecWithCastToDouble(Context, Op.Rn); - EmitLdvecWithCastToDouble(Context, Op.Rm); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), TypesMulSub)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), TypesMulSub)); - - EmitStvecWithCastFromDouble(Context, Op.Rd); - - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitScalarBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPRecipStepFused)); - }); - } - } - - public static void Frecps_V(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse2) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Type[] TypesSav = new Type[] { typeof(float) }; - Type[] TypesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - - Context.EmitLdc_R4(2f); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), TypesSav)); - - Context.EmitLdvec(Op.Rn); - Context.EmitLdvec(Op.Rm); - - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), TypesMulSub)); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), TypesMulSub)); - - Context.EmitStvec(Op.Rd); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else /* if (SizeF == 1) */ - { - Type[] TypesSav = new Type[] { typeof(double) }; - Type[] TypesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - - Context.EmitLdc_R8(2d); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), TypesSav)); - - EmitLdvecWithCastToDouble(Context, Op.Rn); - EmitLdvecWithCastToDouble(Context, Op.Rm); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), TypesMulSub)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), TypesMulSub)); - - EmitStvecWithCastFromDouble(Context, Op.Rd); - } - } - else - { - EmitVectorBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPRecipStepFused)); - }); - } - } - - public static void Frecpx_S(AILEmitterCtx Context) - { - EmitScalarUnaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPRecpX)); - }); - } - - public static void Frinta_S(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - EmitVectorExtractF(Context, Op.Rn, 0, Op.Size); - - EmitRoundMathCall(Context, MidpointRounding.AwayFromZero); - - EmitScalarSetF(Context, Op.Rd, Op.Size); - } - - public static void Frinta_V(AILEmitterCtx Context) - { - EmitVectorUnaryOpF(Context, () => - { - EmitRoundMathCall(Context, MidpointRounding.AwayFromZero); - }); - } - - public static void Frinti_S(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - EmitScalarUnaryOpF(Context, () => - { - Context.EmitLdarg(ATranslatedSub.StateArgIdx); - - if (Op.Size == 0) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF)); - } - else if (Op.Size == 1) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round)); - } - else - { - throw new InvalidOperationException(); - } - }); - } - - public static void Frinti_V(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - EmitVectorUnaryOpF(Context, () => - { - Context.EmitLdarg(ATranslatedSub.StateArgIdx); - - if (SizeF == 0) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF)); - } - else if (SizeF == 1) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round)); - } - else - { - throw new InvalidOperationException(); - } - }); - } - - public static void Frintm_S(AILEmitterCtx Context) - { - EmitScalarUnaryOpF(Context, () => - { - EmitUnaryMathCall(Context, nameof(Math.Floor)); - }); - } - - public static void Frintm_V(AILEmitterCtx Context) - { - EmitVectorUnaryOpF(Context, () => - { - EmitUnaryMathCall(Context, nameof(Math.Floor)); - }); - } - - public static void Frintn_S(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - EmitVectorExtractF(Context, Op.Rn, 0, Op.Size); - - EmitRoundMathCall(Context, MidpointRounding.ToEven); - - EmitScalarSetF(Context, Op.Rd, Op.Size); - } - - public static void Frintn_V(AILEmitterCtx Context) - { - EmitVectorUnaryOpF(Context, () => - { - EmitRoundMathCall(Context, MidpointRounding.ToEven); - }); - } - - public static void Frintp_S(AILEmitterCtx Context) - { - EmitScalarUnaryOpF(Context, () => - { - EmitUnaryMathCall(Context, nameof(Math.Ceiling)); - }); - } - - public static void Frintp_V(AILEmitterCtx Context) - { - EmitVectorUnaryOpF(Context, () => - { - EmitUnaryMathCall(Context, nameof(Math.Ceiling)); - }); - } - - public static void Frintx_S(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - EmitScalarUnaryOpF(Context, () => - { - Context.EmitLdarg(ATranslatedSub.StateArgIdx); - - if (Op.Size == 0) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF)); - } - else if (Op.Size == 1) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round)); - } - else - { - throw new InvalidOperationException(); - } - }); - } - - public static void Frintx_V(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - EmitVectorUnaryOpF(Context, () => - { - Context.EmitLdarg(ATranslatedSub.StateArgIdx); - - if (Op.Size == 0) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF)); - } - else if (Op.Size == 1) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round)); - } - else - { - throw new InvalidOperationException(); - } - }); - } - - public static void Frsqrte_S(AILEmitterCtx Context) - { - EmitScalarUnaryOpF(Context, () => - { - EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.InvSqrtEstimate)); - }); - } - - public static void Frsqrte_V(AILEmitterCtx Context) - { - EmitVectorUnaryOpF(Context, () => - { - EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.InvSqrtEstimate)); - }); - } - - public static void Frsqrts_S(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse2) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Type[] TypesSsv = new Type[] { typeof(float) }; - Type[] TypesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - - Context.EmitLdc_R4(0.5f); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), TypesSsv)); - - Context.EmitLdc_R4(3f); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), TypesSsv)); - - Context.EmitLdvec(Op.Rn); - Context.EmitLdvec(Op.Rm); - - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), TypesMulSub)); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), TypesMulSub)); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), TypesMulSub)); - - Context.EmitStvec(Op.Rd); - - EmitVectorZero32_128(Context, Op.Rd); - } - else /* if (SizeF == 1) */ - { - Type[] TypesSsv = new Type[] { typeof(double) }; - Type[] TypesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - - Context.EmitLdc_R8(0.5d); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), TypesSsv)); - - Context.EmitLdc_R8(3d); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), TypesSsv)); - - EmitLdvecWithCastToDouble(Context, Op.Rn); - EmitLdvecWithCastToDouble(Context, Op.Rm); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), TypesMulSub)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), TypesMulSub)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), TypesMulSub)); - - EmitStvecWithCastFromDouble(Context, Op.Rd); - - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitScalarBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPRSqrtStepFused)); - }); - } - } - - public static void Frsqrts_V(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse2) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Type[] TypesSav = new Type[] { typeof(float) }; - Type[] TypesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - - Context.EmitLdc_R4(0.5f); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), TypesSav)); - - Context.EmitLdc_R4(3f); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), TypesSav)); - - Context.EmitLdvec(Op.Rn); - Context.EmitLdvec(Op.Rm); - - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), TypesMulSub)); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), TypesMulSub)); - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), TypesMulSub)); - - Context.EmitStvec(Op.Rd); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else /* if (SizeF == 1) */ - { - Type[] TypesSav = new Type[] { typeof(double) }; - Type[] TypesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - - Context.EmitLdc_R8(0.5d); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), TypesSav)); - - Context.EmitLdc_R8(3d); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), TypesSav)); - - EmitLdvecWithCastToDouble(Context, Op.Rn); - EmitLdvecWithCastToDouble(Context, Op.Rm); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), TypesMulSub)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), TypesMulSub)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), TypesMulSub)); - - EmitStvecWithCastFromDouble(Context, Op.Rd); - } - } - else - { - EmitVectorBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPRSqrtStepFused)); - }); - } - } - - public static void Fsqrt_S(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitScalarSseOrSse2OpF(Context, nameof(Sse.SqrtScalar)); - } - else - { - EmitScalarUnaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPSqrt)); - }); - } - } - - public static void Fsqrt_V(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitVectorSseOrSse2OpF(Context, nameof(Sse.Sqrt)); - } - else - { - EmitVectorUnaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPSqrt)); - }); - } - } - - public static void Fsub_S(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitScalarSseOrSse2OpF(Context, nameof(Sse.SubtractScalar)); - } - else - { - EmitScalarBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPSub)); - }); - } - } - - public static void Fsub_V(AILEmitterCtx Context) - { - if (AOptimizations.FastFP && AOptimizations.UseSse - && AOptimizations.UseSse2) - { - EmitVectorSseOrSse2OpF(Context, nameof(Sse.Subtract)); - } - else - { - EmitVectorBinaryOpF(Context, () => - { - EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPSub)); - }); - } - } - - public static void Mla_V(AILEmitterCtx Context) - { - EmitVectorTernaryOpZx(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Add); - }); - } - - public static void Mla_Ve(AILEmitterCtx Context) - { - EmitVectorTernaryOpByElemZx(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Add); - }); - } - - public static void Mls_V(AILEmitterCtx Context) - { - EmitVectorTernaryOpZx(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - }); - } - - public static void Mls_Ve(AILEmitterCtx Context) - { - EmitVectorTernaryOpByElemZx(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - }); - } - - public static void Mul_V(AILEmitterCtx Context) - { - EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Mul)); - } - - public static void Mul_Ve(AILEmitterCtx Context) - { - EmitVectorBinaryOpByElemZx(Context, () => Context.Emit(OpCodes.Mul)); - } - - public static void Neg_S(AILEmitterCtx Context) - { - EmitScalarUnaryOpSx(Context, () => Context.Emit(OpCodes.Neg)); - } - - public static void Neg_V(AILEmitterCtx Context) - { - EmitVectorUnaryOpSx(Context, () => Context.Emit(OpCodes.Neg)); - } - - public static void Raddhn_V(AILEmitterCtx Context) - { - EmitHighNarrow(Context, () => Context.Emit(OpCodes.Add), Round: true); - } - - public static void Rsubhn_V(AILEmitterCtx Context) - { - EmitHighNarrow(Context, () => Context.Emit(OpCodes.Sub), Round: true); - } - - public static void Saba_V(AILEmitterCtx Context) - { - EmitVectorTernaryOpSx(Context, () => - { - Context.Emit(OpCodes.Sub); - EmitAbs(Context); - - Context.Emit(OpCodes.Add); - }); - } - - public static void Sabal_V(AILEmitterCtx Context) - { - EmitVectorWidenRnRmTernaryOpSx(Context, () => - { - Context.Emit(OpCodes.Sub); - EmitAbs(Context); - - Context.Emit(OpCodes.Add); - }); - } - - public static void Sabd_V(AILEmitterCtx Context) - { - EmitVectorBinaryOpSx(Context, () => - { - Context.Emit(OpCodes.Sub); - EmitAbs(Context); - }); - } - - public static void Sabdl_V(AILEmitterCtx Context) - { - EmitVectorWidenRnRmBinaryOpSx(Context, () => - { - Context.Emit(OpCodes.Sub); - EmitAbs(Context); - }); - } - - public static void Sadalp_V(AILEmitterCtx Context) - { - EmitAddLongPairwise(Context, Signed: true, Accumulate: true); - } - - public static void Saddl_V(AILEmitterCtx Context) - { - if (AOptimizations.UseSse41) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - Type[] TypesSrl = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesCvt = new Type[] { VectorIntTypesPerSizeLog2[Op.Size] }; - Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size + 1], - VectorIntTypesPerSizeLog2[Op.Size + 1] }; - - string[] NamesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), - nameof(Sse41.ConvertToVector128Int32), - nameof(Sse41.ConvertToVector128Int64) }; - - int NumBytes = Op.RegisterSize == ARegisterSize.SIMD128 ? 8 : 0; - - EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NamesCvt[Op.Size], TypesCvt)); - - EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NamesCvt[Op.Size], TypesCvt)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); - - EmitStvecWithSignedCast(Context, Op.Rd, Op.Size + 1); - } - else - { - EmitVectorWidenRnRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Add)); - } - } - - public static void Saddlp_V(AILEmitterCtx Context) - { - EmitAddLongPairwise(Context, Signed: true, Accumulate: false); - } - - public static void Saddw_V(AILEmitterCtx Context) - { - EmitVectorWidenRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Add)); - } - - public static void Shadd_V(AILEmitterCtx Context) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size > 0) - { - Type[] TypesSra = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] }; - - EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); - - Context.Emit(OpCodes.Dup); - Context.EmitStvectmp(); - - EmitLdvecWithSignedCast(Context, Op.Rm, Op.Size); - - Context.Emit(OpCodes.Dup); - Context.EmitStvectmp2(); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), TypesAndXorAdd)); - - Context.EmitLdvectmp(); - Context.EmitLdvectmp2(); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), TypesAndXorAdd)); - - Context.EmitLdc_I4(1); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesSra)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAndXorAdd)); - - EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorBinaryOpSx(Context, () => - { - Context.Emit(OpCodes.Add); - - Context.Emit(OpCodes.Ldc_I4_1); - Context.Emit(OpCodes.Shr); - }); - } - } - - public static void Shsub_V(AILEmitterCtx Context) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size < 2) - { - Type[] TypesSav = new Type[] { IntTypesPerSizeLog2[Op.Size] }; - Type[] TypesAddSub = new Type[] { VectorIntTypesPerSizeLog2 [Op.Size], VectorIntTypesPerSizeLog2 [Op.Size] }; - Type[] TypesAvg = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] }; - - Context.EmitLdc_I4(Op.Size == 0 ? sbyte.MinValue : short.MinValue); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), TypesSav)); - - Context.EmitStvectmp(); - - EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); - Context.EmitLdvectmp(); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAddSub)); - - Context.Emit(OpCodes.Dup); - - EmitLdvecWithSignedCast(Context, Op.Rm, Op.Size); - Context.EmitLdvectmp(); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAddSub)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), TypesAvg)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), TypesAddSub)); - - EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorBinaryOpSx(Context, () => - { - Context.Emit(OpCodes.Sub); - - Context.Emit(OpCodes.Ldc_I4_1); - Context.Emit(OpCodes.Shr); - }); - } - } - - public static void Smax_V(AILEmitterCtx Context) - { - Type[] Types = new Type[] { typeof(long), typeof(long) }; - - MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Max), Types); - - EmitVectorBinaryOpSx(Context, () => Context.EmitCall(MthdInfo)); - } - - public static void Smaxp_V(AILEmitterCtx Context) - { - Type[] Types = new Type[] { typeof(long), typeof(long) }; - - MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Max), Types); - - EmitVectorPairwiseOpSx(Context, () => Context.EmitCall(MthdInfo)); - } - - public static void Smin_V(AILEmitterCtx Context) - { - Type[] Types = new Type[] { typeof(long), typeof(long) }; - - MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Min), Types); - - EmitVectorBinaryOpSx(Context, () => Context.EmitCall(MthdInfo)); - } - - public static void Sminp_V(AILEmitterCtx Context) - { - Type[] Types = new Type[] { typeof(long), typeof(long) }; - - MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Min), Types); - - EmitVectorPairwiseOpSx(Context, () => Context.EmitCall(MthdInfo)); - } - - public static void Smlal_V(AILEmitterCtx Context) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - if (AOptimizations.UseSse41 && Op.Size < 2) - { - Type[] TypesSrl = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesCvt = new Type[] { VectorIntTypesPerSizeLog2[Op.Size] }; - Type[] TypesMulAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size + 1], - VectorIntTypesPerSizeLog2[Op.Size + 1] }; - - Type TypeMul = Op.Size == 0 ? typeof(Sse2) : typeof(Sse41); - - string NameCvt = Op.Size == 0 - ? nameof(Sse41.ConvertToVector128Int16) - : nameof(Sse41.ConvertToVector128Int32); - - int NumBytes = Op.RegisterSize == ARegisterSize.SIMD128 ? 8 : 0; - - EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size + 1); - - EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NameCvt, TypesCvt)); - - EmitLdvecWithSignedCast(Context, Op.Rm, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NameCvt, TypesCvt)); - - Context.EmitCall(TypeMul.GetMethod(nameof(Sse2.MultiplyLow), TypesMulAdd)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesMulAdd)); - - EmitStvecWithSignedCast(Context, Op.Rd, Op.Size + 1); - } - else - { - EmitVectorWidenRnRmTernaryOpSx(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Add); - }); - } - } - - public static void Smlsl_V(AILEmitterCtx Context) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - if (AOptimizations.UseSse41 && Op.Size < 2) - { - Type[] TypesSrl = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesCvt = new Type[] { VectorIntTypesPerSizeLog2[Op.Size] }; - Type[] TypesMulSub = new Type[] { VectorIntTypesPerSizeLog2[Op.Size + 1], - VectorIntTypesPerSizeLog2[Op.Size + 1] }; - - Type TypeMul = Op.Size == 0 ? typeof(Sse2) : typeof(Sse41); - - string NameCvt = Op.Size == 0 - ? nameof(Sse41.ConvertToVector128Int16) - : nameof(Sse41.ConvertToVector128Int32); - - int NumBytes = Op.RegisterSize == ARegisterSize.SIMD128 ? 8 : 0; - - EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size + 1); - - EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NameCvt, TypesCvt)); - - EmitLdvecWithSignedCast(Context, Op.Rm, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NameCvt, TypesCvt)); - - Context.EmitCall(TypeMul.GetMethod(nameof(Sse2.MultiplyLow), TypesMulSub)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), TypesMulSub)); - - EmitStvecWithSignedCast(Context, Op.Rd, Op.Size + 1); - } - else - { - EmitVectorWidenRnRmTernaryOpSx(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - }); - } - } - - public static void Smull_V(AILEmitterCtx Context) - { - EmitVectorWidenRnRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Mul)); - } - - public static void Sqabs_S(AILEmitterCtx Context) - { - EmitScalarSaturatingUnaryOpSx(Context, () => EmitAbs(Context)); - } - - public static void Sqabs_V(AILEmitterCtx Context) - { - EmitVectorSaturatingUnaryOpSx(Context, () => EmitAbs(Context)); - } - - public static void Sqadd_S(AILEmitterCtx Context) - { - EmitScalarSaturatingBinaryOpSx(Context, SaturatingFlags.Add); - } - - public static void Sqadd_V(AILEmitterCtx Context) - { - EmitVectorSaturatingBinaryOpSx(Context, SaturatingFlags.Add); - } - - public static void Sqdmulh_S(AILEmitterCtx Context) - { - EmitSaturatingBinaryOp(Context, () => EmitDoublingMultiplyHighHalf(Context, Round: false), SaturatingFlags.ScalarSx); - } - - public static void Sqdmulh_V(AILEmitterCtx Context) - { - EmitSaturatingBinaryOp(Context, () => EmitDoublingMultiplyHighHalf(Context, Round: false), SaturatingFlags.VectorSx); - } - - public static void Sqneg_S(AILEmitterCtx Context) - { - EmitScalarSaturatingUnaryOpSx(Context, () => Context.Emit(OpCodes.Neg)); - } - - public static void Sqneg_V(AILEmitterCtx Context) - { - EmitVectorSaturatingUnaryOpSx(Context, () => Context.Emit(OpCodes.Neg)); - } - - public static void Sqrdmulh_S(AILEmitterCtx Context) - { - EmitSaturatingBinaryOp(Context, () => EmitDoublingMultiplyHighHalf(Context, Round: true), SaturatingFlags.ScalarSx); - } - - public static void Sqrdmulh_V(AILEmitterCtx Context) - { - EmitSaturatingBinaryOp(Context, () => EmitDoublingMultiplyHighHalf(Context, Round: true), SaturatingFlags.VectorSx); - } - - public static void Sqsub_S(AILEmitterCtx Context) - { - EmitScalarSaturatingBinaryOpSx(Context, SaturatingFlags.Sub); - } - - public static void Sqsub_V(AILEmitterCtx Context) - { - EmitVectorSaturatingBinaryOpSx(Context, SaturatingFlags.Sub); - } - - public static void Sqxtn_S(AILEmitterCtx Context) - { - EmitSaturatingNarrowOp(Context, SaturatingNarrowFlags.ScalarSxSx); - } - - public static void Sqxtn_V(AILEmitterCtx Context) - { - EmitSaturatingNarrowOp(Context, SaturatingNarrowFlags.VectorSxSx); - } - - public static void Sqxtun_S(AILEmitterCtx Context) - { - EmitSaturatingNarrowOp(Context, SaturatingNarrowFlags.ScalarSxZx); - } - - public static void Sqxtun_V(AILEmitterCtx Context) - { - EmitSaturatingNarrowOp(Context, SaturatingNarrowFlags.VectorSxZx); - } - - public static void Srhadd_V(AILEmitterCtx Context) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size < 2) - { - Type[] TypesSav = new Type[] { IntTypesPerSizeLog2[Op.Size] }; - Type[] TypesSubAdd = new Type[] { VectorIntTypesPerSizeLog2 [Op.Size], VectorIntTypesPerSizeLog2 [Op.Size] }; - Type[] TypesAvg = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] }; - - Context.EmitLdc_I4(Op.Size == 0 ? sbyte.MinValue : short.MinValue); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), TypesSav)); - - Context.Emit(OpCodes.Dup); - Context.EmitStvectmp(); - - EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); - Context.EmitLdvectmp(); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), TypesSubAdd)); - - EmitLdvecWithSignedCast(Context, Op.Rm, Op.Size); - Context.EmitLdvectmp(); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), TypesSubAdd)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), TypesAvg)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesSubAdd)); - - EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorBinaryOpSx(Context, () => - { - Context.Emit(OpCodes.Add); - - Context.Emit(OpCodes.Ldc_I4_1); - Context.Emit(OpCodes.Add); - - Context.Emit(OpCodes.Ldc_I4_1); - Context.Emit(OpCodes.Shr); - }); - } - } - - public static void Ssubl_V(AILEmitterCtx Context) - { - if (AOptimizations.UseSse41) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - Type[] TypesSrl = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesCvt = new Type[] { VectorIntTypesPerSizeLog2[Op.Size] }; - Type[] TypesSub = new Type[] { VectorIntTypesPerSizeLog2[Op.Size + 1], - VectorIntTypesPerSizeLog2[Op.Size + 1] }; - - string[] NamesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), - nameof(Sse41.ConvertToVector128Int32), - nameof(Sse41.ConvertToVector128Int64) }; - - int NumBytes = Op.RegisterSize == ARegisterSize.SIMD128 ? 8 : 0; - - EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NamesCvt[Op.Size], TypesCvt)); - - EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NamesCvt[Op.Size], TypesCvt)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), TypesSub)); - - EmitStvecWithSignedCast(Context, Op.Rd, Op.Size + 1); - } - else - { - EmitVectorWidenRnRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Sub)); - } - } - - public static void Ssubw_V(AILEmitterCtx Context) - { - EmitVectorWidenRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Sub)); - } - - public static void Sub_S(AILEmitterCtx Context) - { - EmitScalarBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub)); - } - - public static void Sub_V(AILEmitterCtx Context) - { - if (AOptimizations.UseSse2) - { - EmitSse2Op(Context, nameof(Sse2.Subtract)); - } - else - { - EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub)); - } - } - - public static void Subhn_V(AILEmitterCtx Context) - { - EmitHighNarrow(Context, () => Context.Emit(OpCodes.Sub), Round: false); - } - - public static void Suqadd_S(AILEmitterCtx Context) - { - EmitScalarSaturatingBinaryOpSx(Context, SaturatingFlags.Accumulate); - } - - public static void Suqadd_V(AILEmitterCtx Context) - { - EmitVectorSaturatingBinaryOpSx(Context, SaturatingFlags.Accumulate); - } - - public static void Uaba_V(AILEmitterCtx Context) - { - EmitVectorTernaryOpZx(Context, () => - { - Context.Emit(OpCodes.Sub); - EmitAbs(Context); - - Context.Emit(OpCodes.Add); - }); - } - - public static void Uabal_V(AILEmitterCtx Context) - { - EmitVectorWidenRnRmTernaryOpZx(Context, () => - { - Context.Emit(OpCodes.Sub); - EmitAbs(Context); - - Context.Emit(OpCodes.Add); - }); - } - - public static void Uabd_V(AILEmitterCtx Context) - { - EmitVectorBinaryOpZx(Context, () => - { - Context.Emit(OpCodes.Sub); - EmitAbs(Context); - }); - } - - public static void Uabdl_V(AILEmitterCtx Context) - { - EmitVectorWidenRnRmBinaryOpZx(Context, () => - { - Context.Emit(OpCodes.Sub); - EmitAbs(Context); - }); - } - - public static void Uadalp_V(AILEmitterCtx Context) - { - EmitAddLongPairwise(Context, Signed: false, Accumulate: true); - } - - public static void Uaddl_V(AILEmitterCtx Context) - { - if (AOptimizations.UseSse41) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesCvt = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size] }; - Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size + 1], - VectorUIntTypesPerSizeLog2[Op.Size + 1] }; - - string[] NamesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), - nameof(Sse41.ConvertToVector128Int32), - nameof(Sse41.ConvertToVector128Int64) }; - - int NumBytes = Op.RegisterSize == ARegisterSize.SIMD128 ? 8 : 0; - - EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NamesCvt[Op.Size], TypesCvt)); - - EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NamesCvt[Op.Size], TypesCvt)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); - - EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size + 1); - } - else - { - EmitVectorWidenRnRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Add)); - } - } - - public static void Uaddlp_V(AILEmitterCtx Context) - { - EmitAddLongPairwise(Context, Signed: false, Accumulate: false); - } - - public static void Uaddlv_V(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int Bytes = Op.GetBitsCount() >> 3; - int Elems = Bytes >> Op.Size; - - EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size); - - for (int Index = 1; Index < Elems; Index++) - { - EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); - - Context.Emit(OpCodes.Add); - } - - EmitScalarSet(Context, Op.Rd, Op.Size + 1); - } - - public static void Uaddw_V(AILEmitterCtx Context) - { - EmitVectorWidenRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Add)); - } - - public static void Uhadd_V(AILEmitterCtx Context) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size > 0) - { - Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] }; - - EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); - - Context.Emit(OpCodes.Dup); - Context.EmitStvectmp(); - - EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size); - - Context.Emit(OpCodes.Dup); - Context.EmitStvectmp2(); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), TypesAndXorAdd)); - - Context.EmitLdvectmp(); - Context.EmitLdvectmp2(); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), TypesAndXorAdd)); - - Context.EmitLdc_I4(1); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesSrl)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAndXorAdd)); - - EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorBinaryOpZx(Context, () => - { - Context.Emit(OpCodes.Add); - - Context.Emit(OpCodes.Ldc_I4_1); - Context.Emit(OpCodes.Shr_Un); - }); - } - } - - public static void Uhsub_V(AILEmitterCtx Context) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size < 2) - { - Type[] TypesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] }; - - EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); - Context.Emit(OpCodes.Dup); - - EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), TypesAvgSub)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), TypesAvgSub)); - - EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorBinaryOpZx(Context, () => - { - Context.Emit(OpCodes.Sub); - - Context.Emit(OpCodes.Ldc_I4_1); - Context.Emit(OpCodes.Shr_Un); - }); - } - } - - public static void Umax_V(AILEmitterCtx Context) - { - Type[] Types = new Type[] { typeof(ulong), typeof(ulong) }; - - MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Max), Types); - - EmitVectorBinaryOpZx(Context, () => Context.EmitCall(MthdInfo)); - } - - public static void Umaxp_V(AILEmitterCtx Context) - { - Type[] Types = new Type[] { typeof(ulong), typeof(ulong) }; - - MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Max), Types); - - EmitVectorPairwiseOpZx(Context, () => Context.EmitCall(MthdInfo)); - } - - public static void Umin_V(AILEmitterCtx Context) - { - Type[] Types = new Type[] { typeof(ulong), typeof(ulong) }; - - MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Min), Types); - - EmitVectorBinaryOpZx(Context, () => Context.EmitCall(MthdInfo)); - } - - public static void Uminp_V(AILEmitterCtx Context) - { - Type[] Types = new Type[] { typeof(ulong), typeof(ulong) }; - - MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Min), Types); - - EmitVectorPairwiseOpZx(Context, () => Context.EmitCall(MthdInfo)); - } - - public static void Umlal_V(AILEmitterCtx Context) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - if (AOptimizations.UseSse41 && Op.Size < 2) - { - Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesCvt = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size] }; - Type[] TypesMulAdd = new Type[] { VectorIntTypesPerSizeLog2 [Op.Size + 1], - VectorIntTypesPerSizeLog2 [Op.Size + 1] }; - - Type TypeMul = Op.Size == 0 ? typeof(Sse2) : typeof(Sse41); - - string NameCvt = Op.Size == 0 - ? nameof(Sse41.ConvertToVector128Int16) - : nameof(Sse41.ConvertToVector128Int32); - - int NumBytes = Op.RegisterSize == ARegisterSize.SIMD128 ? 8 : 0; - - EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size + 1); - - EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NameCvt, TypesCvt)); - - EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NameCvt, TypesCvt)); - - Context.EmitCall(TypeMul.GetMethod(nameof(Sse2.MultiplyLow), TypesMulAdd)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesMulAdd)); - - EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size + 1); - } - else - { - EmitVectorWidenRnRmTernaryOpZx(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Add); - }); - } - } - - public static void Umlsl_V(AILEmitterCtx Context) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - if (AOptimizations.UseSse41 && Op.Size < 2) - { - Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesCvt = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size] }; - Type[] TypesMulSub = new Type[] { VectorIntTypesPerSizeLog2 [Op.Size + 1], - VectorIntTypesPerSizeLog2 [Op.Size + 1] }; - - Type TypeMul = Op.Size == 0 ? typeof(Sse2) : typeof(Sse41); - - string NameCvt = Op.Size == 0 - ? nameof(Sse41.ConvertToVector128Int16) - : nameof(Sse41.ConvertToVector128Int32); - - int NumBytes = Op.RegisterSize == ARegisterSize.SIMD128 ? 8 : 0; - - EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size + 1); - - EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NameCvt, TypesCvt)); - - EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NameCvt, TypesCvt)); - - Context.EmitCall(TypeMul.GetMethod(nameof(Sse2.MultiplyLow), TypesMulSub)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), TypesMulSub)); - - EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size + 1); - } - else - { - EmitVectorWidenRnRmTernaryOpZx(Context, () => - { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - }); - } - } - - public static void Umull_V(AILEmitterCtx Context) - { - EmitVectorWidenRnRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Mul)); - } - - public static void Uqadd_S(AILEmitterCtx Context) - { - EmitScalarSaturatingBinaryOpZx(Context, SaturatingFlags.Add); - } - - public static void Uqadd_V(AILEmitterCtx Context) - { - EmitVectorSaturatingBinaryOpZx(Context, SaturatingFlags.Add); - } - - public static void Uqsub_S(AILEmitterCtx Context) - { - EmitScalarSaturatingBinaryOpZx(Context, SaturatingFlags.Sub); - } - - public static void Uqsub_V(AILEmitterCtx Context) - { - EmitVectorSaturatingBinaryOpZx(Context, SaturatingFlags.Sub); - } - - public static void Uqxtn_S(AILEmitterCtx Context) - { - EmitSaturatingNarrowOp(Context, SaturatingNarrowFlags.ScalarZxZx); - } - - public static void Uqxtn_V(AILEmitterCtx Context) - { - EmitSaturatingNarrowOp(Context, SaturatingNarrowFlags.VectorZxZx); - } - - public static void Urhadd_V(AILEmitterCtx Context) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size < 2) - { - Type[] TypesAvg = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] }; - - EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); - EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), TypesAvg)); - - EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorBinaryOpZx(Context, () => - { - Context.Emit(OpCodes.Add); - - Context.Emit(OpCodes.Ldc_I4_1); - Context.Emit(OpCodes.Add); - - Context.Emit(OpCodes.Ldc_I4_1); - Context.Emit(OpCodes.Shr_Un); - }); - } - } - - public static void Usqadd_S(AILEmitterCtx Context) - { - EmitScalarSaturatingBinaryOpZx(Context, SaturatingFlags.Accumulate); - } - - public static void Usqadd_V(AILEmitterCtx Context) - { - EmitVectorSaturatingBinaryOpZx(Context, SaturatingFlags.Accumulate); - } - - public static void Usubl_V(AILEmitterCtx Context) - { - if (AOptimizations.UseSse41) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesCvt = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size] }; - Type[] TypesSub = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size + 1], - VectorUIntTypesPerSizeLog2[Op.Size + 1] }; - - string[] NamesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), - nameof(Sse41.ConvertToVector128Int32), - nameof(Sse41.ConvertToVector128Int64) }; - - int NumBytes = Op.RegisterSize == ARegisterSize.SIMD128 ? 8 : 0; - - EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NamesCvt[Op.Size], TypesCvt)); - - EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size); - - Context.EmitLdc_I4(NumBytes); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), TypesSrl)); - - Context.EmitCall(typeof(Sse41).GetMethod(NamesCvt[Op.Size], TypesCvt)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), TypesSub)); - - EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size + 1); - } - else - { - EmitVectorWidenRnRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub)); - } - } - - public static void Usubw_V(AILEmitterCtx Context) - { - EmitVectorWidenRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub)); - } - - private static void EmitAbs(AILEmitterCtx Context) - { - AILLabel LblTrue = new AILLabel(); - - Context.Emit(OpCodes.Dup); - Context.Emit(OpCodes.Ldc_I4_0); - Context.Emit(OpCodes.Bge_S, LblTrue); - - Context.Emit(OpCodes.Neg); - - Context.MarkLabel(LblTrue); - } - - private static void EmitAddLongPairwise(AILEmitterCtx Context, bool Signed, bool Accumulate) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int Words = Op.GetBitsCount() >> 4; - int Pairs = Words >> Op.Size; - - for (int Index = 0; Index < Pairs; Index++) - { - int Idx = Index << 1; - - EmitVectorExtract(Context, Op.Rn, Idx, Op.Size, Signed); - EmitVectorExtract(Context, Op.Rn, Idx + 1, Op.Size, Signed); - - Context.Emit(OpCodes.Add); - - if (Accumulate) - { - EmitVectorExtract(Context, Op.Rd, Index, Op.Size + 1, Signed); - - Context.Emit(OpCodes.Add); - } - - EmitVectorInsertTmp(Context, Index, Op.Size + 1); - } - - Context.EmitLdvectmp(); - Context.EmitStvec(Op.Rd); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - private static void EmitDoublingMultiplyHighHalf(AILEmitterCtx Context, bool Round) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int ESize = 8 << Op.Size; - - Context.Emit(OpCodes.Mul); - - if (!Round) - { - Context.EmitAsr(ESize - 1); - } - else - { - long RoundConst = 1L << (ESize - 1); - - AILLabel LblTrue = new AILLabel(); - - Context.EmitLsl(1); - - Context.EmitLdc_I8(RoundConst); - - Context.Emit(OpCodes.Add); - - Context.EmitAsr(ESize); - - Context.Emit(OpCodes.Dup); - Context.EmitLdc_I8((long)int.MinValue); - Context.Emit(OpCodes.Bne_Un_S, LblTrue); - - Context.Emit(OpCodes.Neg); - - Context.MarkLabel(LblTrue); - } - } - - private static void EmitHighNarrow(AILEmitterCtx Context, Action Emit, bool Round) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int Elems = 8 >> Op.Size; - - int ESize = 8 << Op.Size; - - int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; - - long RoundConst = 1L << (ESize - 1); - - if (Part != 0) - { - Context.EmitLdvec(Op.Rd); - Context.EmitStvectmp(); - } - - for (int Index = 0; Index < Elems; Index++) - { - EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); - EmitVectorExtractZx(Context, Op.Rm, Index, Op.Size + 1); - - Emit(); - - if (Round) - { - Context.EmitLdc_I8(RoundConst); - - Context.Emit(OpCodes.Add); - } - - Context.EmitLsr(ESize); - - EmitVectorInsertTmp(Context, Part + Index, Op.Size); - } - - Context.EmitLdvectmp(); - Context.EmitStvec(Op.Rd); - - if (Part == 0) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - } -} diff --git a/ChocolArm64/Instruction/InstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/InstEmitSimdArithmetic.cs new file mode 100644 index 0000000000..5668bb6442 --- /dev/null +++ b/ChocolArm64/Instruction/InstEmitSimdArithmetic.cs @@ -0,0 +1,2387 @@ +// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h + +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection; +using System.Reflection.Emit; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +using static ChocolArm64.Instructions.InstEmitSimdHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Abs_S(ILEmitterCtx context) + { + EmitScalarUnaryOpSx(context, () => EmitAbs(context)); + } + + public static void Abs_V(ILEmitterCtx context) + { + EmitVectorUnaryOpSx(context, () => EmitAbs(context)); + } + + public static void Add_S(ILEmitterCtx context) + { + EmitScalarBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + } + + public static void Add_V(ILEmitterCtx context) + { + if (Optimizations.UseSse2) + { + EmitSse2Op(context, nameof(Sse2.Add)); + } + else + { + EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + } + } + + public static void Addhn_V(ILEmitterCtx context) + { + EmitHighNarrow(context, () => context.Emit(OpCodes.Add), round: false); + } + + public static void Addp_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorExtractZx(context, op.Rn, 0, op.Size); + EmitVectorExtractZx(context, op.Rn, 1, op.Size); + + context.Emit(OpCodes.Add); + + EmitScalarSet(context, op.Rd, op.Size); + } + + public static void Addp_V(ILEmitterCtx context) + { + EmitVectorPairwiseOpZx(context, () => context.Emit(OpCodes.Add)); + } + + public static void Addv_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + EmitVectorExtractZx(context, op.Rn, 0, op.Size); + + for (int index = 1; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size); + + context.Emit(OpCodes.Add); + } + + EmitScalarSet(context, op.Rd, op.Size); + } + + public static void Cls_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + int eSize = 8 << op.Size; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size); + + context.EmitLdc_I4(eSize); + + SoftFallback.EmitCall(context, nameof(SoftFallback.CountLeadingSigns)); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Clz_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + int eSize = 8 << op.Size; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size); + + if (Lzcnt.IsSupported && eSize == 32) + { + context.Emit(OpCodes.Conv_U4); + + context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { typeof(uint) })); + + context.Emit(OpCodes.Conv_U8); + } + else + { + context.EmitLdc_I4(eSize); + + SoftFallback.EmitCall(context, nameof(SoftFallback.CountLeadingZeros)); + } + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Cnt_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, 0); + + if (Popcnt.IsSupported) + { + context.EmitCall(typeof(Popcnt).GetMethod(nameof(Popcnt.PopCount), new Type[] { typeof(ulong) })); + } + else + { + SoftFallback.EmitCall(context, nameof(SoftFallback.CountSetBits8)); + } + + EmitVectorInsert(context, op.Rd, index, 0); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Fabd_S(ILEmitterCtx context) + { + EmitScalarBinaryOpF(context, () => + { + context.Emit(OpCodes.Sub); + + EmitUnaryMathCall(context, nameof(Math.Abs)); + }); + } + + public static void Fabs_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Abs)); + }); + } + + public static void Fabs_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Abs)); + }); + } + + public static void Fadd_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.AddScalar)); + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd)); + }); + } + } + + public static void Fadd_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Add)); + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd)); + }); + } + } + + public static void Faddp_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorExtractF(context, op.Rn, 0, sizeF); + EmitVectorExtractF(context, op.Rn, 1, sizeF); + + context.Emit(OpCodes.Add); + + EmitScalarSetF(context, op.Rd, sizeF); + } + + public static void Faddp_V(ILEmitterCtx context) + { + EmitVectorPairwiseOpF(context, () => context.Emit(OpCodes.Add)); + } + + public static void Fdiv_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.DivideScalar)); + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv)); + }); + } + } + + public static void Fdiv_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Divide)); + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv)); + }); + } + } + + public static void Fmadd_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (op.Size == 0) + { + Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Ra); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), typesMulAdd)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AddScalar), typesMulAdd)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (op.Size == 1) */ + { + Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(context, op.Ra); + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulAdd)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AddScalar), typesMulAdd)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarTernaryRaOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd)); + }); + } + } + + public static void Fmax_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.MaxScalar)); + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax)); + }); + } + } + + public static void Fmax_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Max)); + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax)); + }); + } + } + + public static void Fmaxnm_S(ILEmitterCtx context) + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum)); + }); + } + + public static void Fmaxnm_V(ILEmitterCtx context) + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum)); + }); + } + + public static void Fmaxp_V(ILEmitterCtx context) + { + EmitVectorPairwiseOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax)); + }); + } + + public static void Fmin_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.MinScalar)); + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin)); + }); + } + } + + public static void Fmin_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Min)); + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin)); + }); + } + } + + public static void Fminnm_S(ILEmitterCtx context) + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum)); + }); + } + + public static void Fminnm_V(ILEmitterCtx context) + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum)); + }); + } + + public static void Fminp_V(ILEmitterCtx context) + { + EmitVectorPairwiseOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin)); + }); + } + + public static void Fmla_Se(ILEmitterCtx context) + { + EmitScalarTernaryOpByElemF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + + public static void Fmla_V(ILEmitterCtx context) + { + EmitVectorTernaryOpF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + + public static void Fmla_Ve(ILEmitterCtx context) + { + EmitVectorTernaryOpByElemF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + + public static void Fmls_Se(ILEmitterCtx context) + { + EmitScalarTernaryOpByElemF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + + public static void Fmls_V(ILEmitterCtx context) + { + EmitVectorTernaryOpF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + + public static void Fmls_Ve(ILEmitterCtx context) + { + EmitVectorTernaryOpByElemF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + + public static void Fmsub_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (op.Size == 0) + { + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Ra); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesMulSub)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (op.Size == 1) */ + { + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(context, op.Ra); + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarTernaryRaOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub)); + }); + } + } + + public static void Fmul_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.MultiplyScalar)); + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul)); + }); + } + } + + public static void Fmul_Se(ILEmitterCtx context) + { + EmitScalarBinaryOpByElemF(context, () => context.Emit(OpCodes.Mul)); + } + + public static void Fmul_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Multiply)); + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul)); + }); + } + } + + public static void Fmul_Ve(ILEmitterCtx context) + { + EmitVectorBinaryOpByElemF(context, () => context.Emit(OpCodes.Mul)); + } + + public static void Fmulx_S(ILEmitterCtx context) + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX)); + }); + } + + public static void Fmulx_Se(ILEmitterCtx context) + { + EmitScalarBinaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX)); + }); + } + + public static void Fmulx_V(ILEmitterCtx context) + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX)); + }); + } + + public static void Fmulx_Ve(ILEmitterCtx context) + { + EmitVectorBinaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX)); + }); + } + + public static void Fneg_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + } + + public static void Fneg_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + } + + public static void Fnmadd_S(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorExtractF(context, op.Rn, 0, sizeF); + + context.Emit(OpCodes.Neg); + + EmitVectorExtractF(context, op.Rm, 0, sizeF); + + context.Emit(OpCodes.Mul); + + EmitVectorExtractF(context, op.Ra, 0, sizeF); + + context.Emit(OpCodes.Sub); + + EmitScalarSetF(context, op.Rd, sizeF); + } + + public static void Fnmsub_S(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorExtractF(context, op.Rn, 0, sizeF); + EmitVectorExtractF(context, op.Rm, 0, sizeF); + + context.Emit(OpCodes.Mul); + + EmitVectorExtractF(context, op.Ra, 0, sizeF); + + context.Emit(OpCodes.Sub); + + EmitScalarSetF(context, op.Rd, sizeF); + } + + public static void Fnmul_S(ILEmitterCtx context) + { + EmitScalarBinaryOpF(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Neg); + }); + } + + public static void Frecpe_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => + { + EmitUnarySoftFloatCall(context, nameof(SoftFloat.RecipEstimate)); + }); + } + + public static void Frecpe_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitUnarySoftFloatCall(context, nameof(SoftFloat.RecipEstimate)); + }); + } + + public static void Frecps_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R4(2f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesMulSub)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (sizeF == 1) */ + { + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R8(2d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); + + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused)); + }); + } + } + + public static void Frecps_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R4(2f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R8(2d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused)); + }); + } + } + + public static void Frecpx_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX)); + }); + } + + public static void Frinta_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorExtractF(context, op.Rn, 0, op.Size); + + EmitRoundMathCall(context, MidpointRounding.AwayFromZero); + + EmitScalarSetF(context, op.Rd, op.Size); + } + + public static void Frinta_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitRoundMathCall(context, MidpointRounding.AwayFromZero); + }); + } + + public static void Frinti_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitScalarUnaryOpF(context, () => + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (op.Size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + } + else if (op.Size == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + } + else + { + throw new InvalidOperationException(); + } + }); + } + + public static void Frinti_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorUnaryOpF(context, () => + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (sizeF == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + } + else if (sizeF == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + } + else + { + throw new InvalidOperationException(); + } + }); + } + + public static void Frintm_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Floor)); + }); + } + + public static void Frintm_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Floor)); + }); + } + + public static void Frintn_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorExtractF(context, op.Rn, 0, op.Size); + + EmitRoundMathCall(context, MidpointRounding.ToEven); + + EmitScalarSetF(context, op.Rd, op.Size); + } + + public static void Frintn_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitRoundMathCall(context, MidpointRounding.ToEven); + }); + } + + public static void Frintp_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Ceiling)); + }); + } + + public static void Frintp_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Ceiling)); + }); + } + + public static void Frintx_S(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitScalarUnaryOpF(context, () => + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (op.Size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + } + else if (op.Size == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + } + else + { + throw new InvalidOperationException(); + } + }); + } + + public static void Frintx_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + EmitVectorUnaryOpF(context, () => + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (op.Size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + } + else if (op.Size == 1) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + } + else + { + throw new InvalidOperationException(); + } + }); + } + + public static void Frsqrte_S(ILEmitterCtx context) + { + EmitScalarUnaryOpF(context, () => + { + EmitUnarySoftFloatCall(context, nameof(SoftFloat.InvSqrtEstimate)); + }); + } + + public static void Frsqrte_V(ILEmitterCtx context) + { + EmitVectorUnaryOpF(context, () => + { + EmitUnarySoftFloatCall(context, nameof(SoftFloat.InvSqrtEstimate)); + }); + } + + public static void Frsqrts_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R4(0.5f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); + + context.EmitLdc_R4(3f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), typesMulSub)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (sizeF == 1) */ + { + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R8(0.5d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); + + context.EmitLdc_R8(3d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); + + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused)); + }); + } + } + + public static void Frsqrts_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R4(0.5f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); + + context.EmitLdc_R4(3f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R8(0.5d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitLdc_R8(3d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused)); + }); + } + } + + public static void Fsqrt_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.SqrtScalar)); + } + else + { + EmitScalarUnaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt)); + }); + } + } + + public static void Fsqrt_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Sqrt)); + } + else + { + EmitVectorUnaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt)); + }); + } + } + + public static void Fsub_S(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitScalarSseOrSse2OpF(context, nameof(Sse.SubtractScalar)); + } + else + { + EmitScalarBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub)); + }); + } + } + + public static void Fsub_V(ILEmitterCtx context) + { + if (Optimizations.FastFP && Optimizations.UseSse + && Optimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(context, nameof(Sse.Subtract)); + } + else + { + EmitVectorBinaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub)); + }); + } + } + + public static void Mla_V(ILEmitterCtx context) + { + EmitVectorTernaryOpZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + + public static void Mla_Ve(ILEmitterCtx context) + { + EmitVectorTernaryOpByElemZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + + public static void Mls_V(ILEmitterCtx context) + { + EmitVectorTernaryOpZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + + public static void Mls_Ve(ILEmitterCtx context) + { + EmitVectorTernaryOpByElemZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + + public static void Mul_V(ILEmitterCtx context) + { + EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul)); + } + + public static void Mul_Ve(ILEmitterCtx context) + { + EmitVectorBinaryOpByElemZx(context, () => context.Emit(OpCodes.Mul)); + } + + public static void Neg_S(ILEmitterCtx context) + { + EmitScalarUnaryOpSx(context, () => context.Emit(OpCodes.Neg)); + } + + public static void Neg_V(ILEmitterCtx context) + { + EmitVectorUnaryOpSx(context, () => context.Emit(OpCodes.Neg)); + } + + public static void Raddhn_V(ILEmitterCtx context) + { + EmitHighNarrow(context, () => context.Emit(OpCodes.Add), round: true); + } + + public static void Rsubhn_V(ILEmitterCtx context) + { + EmitHighNarrow(context, () => context.Emit(OpCodes.Sub), round: true); + } + + public static void Saba_V(ILEmitterCtx context) + { + EmitVectorTernaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + + context.Emit(OpCodes.Add); + }); + } + + public static void Sabal_V(ILEmitterCtx context) + { + EmitVectorWidenRnRmTernaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + + context.Emit(OpCodes.Add); + }); + } + + public static void Sabd_V(ILEmitterCtx context) + { + EmitVectorBinaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } + + public static void Sabdl_V(ILEmitterCtx context) + { + EmitVectorWidenRnRmBinaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } + + public static void Sadalp_V(ILEmitterCtx context) + { + EmitAddLongPairwise(context, signed: true, accumulate: true); + } + + public static void Saddl_V(ILEmitterCtx context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, () => context.Emit(OpCodes.Add)); + } + } + + public static void Saddlp_V(ILEmitterCtx context) + { + EmitAddLongPairwise(context, signed: true, accumulate: false); + } + + public static void Saddw_V(ILEmitterCtx context) + { + EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Add)); + } + + public static void Shadd_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp2(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd)); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd)); + + context.EmitLdc_I4(1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpSx(context, () => + { + context.Emit(OpCodes.Add); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr); + }); + } + } + + public static void Shsub_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Type[] typesSav = new Type[] { IntTypesPerSizeLog2[op.Size] }; + Type[] typesAddSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] }; + Type[] typesAvg = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + context.EmitLdc_I4(op.Size == 0 ? sbyte.MinValue : short.MinValue); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitStvectmp(); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvectmp(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub)); + + context.Emit(OpCodes.Dup); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvectmp(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesAddSub)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr); + }); + } + } + + public static void Smax_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Smaxp_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorPairwiseOpSx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Smin_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Sminp_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorPairwiseOpSx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Smlal_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + string nameCvt = op.Size == 0 + ? nameof(Sse41.ConvertToVector128Int16) + : nameof(Sse41.ConvertToVector128Int32); + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rd, op.Size + 1); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + } + + public static void Smlsl_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + string nameCvt = op.Size == 0 + ? nameof(Sse41.ConvertToVector128Int16) + : nameof(Sse41.ConvertToVector128Int32); + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rd, op.Size + 1); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + } + + public static void Smull_V(ILEmitterCtx context) + { + EmitVectorWidenRnRmBinaryOpSx(context, () => context.Emit(OpCodes.Mul)); + } + + public static void Sqabs_S(ILEmitterCtx context) + { + EmitScalarSaturatingUnaryOpSx(context, () => EmitAbs(context)); + } + + public static void Sqabs_V(ILEmitterCtx context) + { + EmitVectorSaturatingUnaryOpSx(context, () => EmitAbs(context)); + } + + public static void Sqadd_S(ILEmitterCtx context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Add); + } + + public static void Sqadd_V(ILEmitterCtx context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Add); + } + + public static void Sqdmulh_S(ILEmitterCtx context) + { + EmitSaturatingBinaryOp(context, () => EmitDoublingMultiplyHighHalf(context, round: false), SaturatingFlags.ScalarSx); + } + + public static void Sqdmulh_V(ILEmitterCtx context) + { + EmitSaturatingBinaryOp(context, () => EmitDoublingMultiplyHighHalf(context, round: false), SaturatingFlags.VectorSx); + } + + public static void Sqneg_S(ILEmitterCtx context) + { + EmitScalarSaturatingUnaryOpSx(context, () => context.Emit(OpCodes.Neg)); + } + + public static void Sqneg_V(ILEmitterCtx context) + { + EmitVectorSaturatingUnaryOpSx(context, () => context.Emit(OpCodes.Neg)); + } + + public static void Sqrdmulh_S(ILEmitterCtx context) + { + EmitSaturatingBinaryOp(context, () => EmitDoublingMultiplyHighHalf(context, round: true), SaturatingFlags.ScalarSx); + } + + public static void Sqrdmulh_V(ILEmitterCtx context) + { + EmitSaturatingBinaryOp(context, () => EmitDoublingMultiplyHighHalf(context, round: true), SaturatingFlags.VectorSx); + } + + public static void Sqsub_S(ILEmitterCtx context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Sub); + } + + public static void Sqsub_V(ILEmitterCtx context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Sub); + } + + public static void Sqxtn_S(ILEmitterCtx context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqxtn_V(ILEmitterCtx context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqxtun_S(ILEmitterCtx context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqxtun_V(ILEmitterCtx context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx); + } + + public static void Srhadd_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Type[] typesSav = new Type[] { IntTypesPerSizeLog2[op.Size] }; + Type[] typesSubAdd = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] }; + Type[] typesAvg = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + context.EmitLdc_I4(op.Size == 0 ? sbyte.MinValue : short.MinValue); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvectmp(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAdd)); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvectmp(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesSubAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpSx(context, () => + { + context.Emit(OpCodes.Add); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Add); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr); + }); + } + } + + public static void Ssubl_V(ILEmitterCtx context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub)); + } + } + + public static void Ssubw_V(ILEmitterCtx context) + { + EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub)); + } + + public static void Sub_S(ILEmitterCtx context) + { + EmitScalarBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + } + + public static void Sub_V(ILEmitterCtx context) + { + if (Optimizations.UseSse2) + { + EmitSse2Op(context, nameof(Sse2.Subtract)); + } + else + { + EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + } + } + + public static void Subhn_V(ILEmitterCtx context) + { + EmitHighNarrow(context, () => context.Emit(OpCodes.Sub), round: false); + } + + public static void Suqadd_S(ILEmitterCtx context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate); + } + + public static void Suqadd_V(ILEmitterCtx context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate); + } + + public static void Uaba_V(ILEmitterCtx context) + { + EmitVectorTernaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + + context.Emit(OpCodes.Add); + }); + } + + public static void Uabal_V(ILEmitterCtx context) + { + EmitVectorWidenRnRmTernaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + + context.Emit(OpCodes.Add); + }); + } + + public static void Uabd_V(ILEmitterCtx context) + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } + + public static void Uabdl_V(ILEmitterCtx context) + { + EmitVectorWidenRnRmBinaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } + + public static void Uadalp_V(ILEmitterCtx context) + { + EmitAddLongPairwise(context, signed: false, accumulate: true); + } + + public static void Uaddl_V(ILEmitterCtx context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], + VectorUIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + } + } + + public static void Uaddlp_V(ILEmitterCtx context) + { + EmitAddLongPairwise(context, signed: false, accumulate: false); + } + + public static void Uaddlv_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + EmitVectorExtractZx(context, op.Rn, 0, op.Size); + + for (int index = 1; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size); + + context.Emit(OpCodes.Add); + } + + EmitScalarSet(context, op.Rd, op.Size + 1); + } + + public static void Uaddw_V(ILEmitterCtx context) + { + EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + } + + public static void Uhadd_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp2(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd)); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd)); + + context.EmitLdc_I4(1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Add); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr_Un); + }); + } + } + + public static void Uhsub_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.Emit(OpCodes.Dup); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesAvgSub)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr_Un); + }); + } + } + + public static void Umax_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Umaxp_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorPairwiseOpZx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Umin_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Uminp_V(ILEmitterCtx context) + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorPairwiseOpZx(context, () => context.EmitCall(mthdInfo)); + } + + public static void Umlal_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], + VectorIntTypesPerSizeLog2 [op.Size + 1] }; + + Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + string nameCvt = op.Size == 0 + ? nameof(Sse41.ConvertToVector128Int16) + : nameof(Sse41.ConvertToVector128Int32); + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size + 1); + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } + } + + public static void Umlsl_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], + VectorIntTypesPerSizeLog2 [op.Size + 1] }; + + Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + string nameCvt = op.Size == 0 + ? nameof(Sse41.ConvertToVector128Int16) + : nameof(Sse41.ConvertToVector128Int32); + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size + 1); + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } + } + + public static void Umull_V(ILEmitterCtx context) + { + EmitVectorWidenRnRmBinaryOpZx(context, () => context.Emit(OpCodes.Mul)); + } + + public static void Uqadd_S(ILEmitterCtx context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } + + public static void Uqadd_V(ILEmitterCtx context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } + + public static void Uqsub_S(ILEmitterCtx context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } + + public static void Uqsub_V(ILEmitterCtx context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } + + public static void Uqxtn_S(ILEmitterCtx context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqxtn_V(ILEmitterCtx context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx); + } + + public static void Urhadd_V(ILEmitterCtx context) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Type[] typesAvg = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Add); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Add); + + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr_Un); + }); + } + } + + public static void Usqadd_S(ILEmitterCtx context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } + + public static void Usqadd_V(ILEmitterCtx context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } + + public static void Usubl_V(ILEmitterCtx context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], + VectorUIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + } + } + + public static void Usubw_V(ILEmitterCtx context) + { + EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + } + + private static void EmitAbs(ILEmitterCtx context) + { + ILLabel lblTrue = new ILLabel(); + + context.Emit(OpCodes.Dup); + context.Emit(OpCodes.Ldc_I4_0); + context.Emit(OpCodes.Bge_S, lblTrue); + + context.Emit(OpCodes.Neg); + + context.MarkLabel(lblTrue); + } + + private static void EmitAddLongPairwise(ILEmitterCtx context, bool signed, bool accumulate) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtract(context, op.Rn, idx, op.Size, signed); + EmitVectorExtract(context, op.Rn, idx + 1, op.Size, signed); + + context.Emit(OpCodes.Add); + + if (accumulate) + { + EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + + context.Emit(OpCodes.Add); + } + + EmitVectorInsertTmp(context, index, op.Size + 1); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitDoublingMultiplyHighHalf(ILEmitterCtx context, bool round) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int eSize = 8 << op.Size; + + context.Emit(OpCodes.Mul); + + if (!round) + { + context.EmitAsr(eSize - 1); + } + else + { + long roundConst = 1L << (eSize - 1); + + ILLabel lblTrue = new ILLabel(); + + context.EmitLsl(1); + + context.EmitLdc_I8(roundConst); + + context.Emit(OpCodes.Add); + + context.EmitAsr(eSize); + + context.Emit(OpCodes.Dup); + context.EmitLdc_I8((long)int.MinValue); + context.Emit(OpCodes.Bne_Un_S, lblTrue); + + context.Emit(OpCodes.Neg); + + context.MarkLabel(lblTrue); + } + } + + private static void EmitHighNarrow(ILEmitterCtx context, Action emit, bool round) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int elems = 8 >> op.Size; + + int eSize = 8 << op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + long roundConst = 1L << (eSize - 1); + + if (part != 0) + { + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); + } + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + EmitVectorExtractZx(context, op.Rm, index, op.Size + 1); + + emit(); + + if (round) + { + context.EmitLdc_I8(roundConst); + + context.Emit(OpCodes.Add); + } + + context.EmitLsr(eSize); + + EmitVectorInsertTmp(context, part + index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + } +}