From 03d579243e8b7a73a8a812a5d3d8891c5036f976 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Wed, 31 Oct 2018 16:39:46 +0100 Subject: [PATCH] Update and rename AInstEmitSimdShift.cs to InstEmitSimdShift.cs --- ChocolArm64/Instruction/AInstEmitSimdShift.cs | 865 ------------------ ChocolArm64/Instruction/InstEmitSimdShift.cs | 865 ++++++++++++++++++ 2 files changed, 865 insertions(+), 865 deletions(-) delete mode 100644 ChocolArm64/Instruction/AInstEmitSimdShift.cs create mode 100644 ChocolArm64/Instruction/InstEmitSimdShift.cs diff --git a/ChocolArm64/Instruction/AInstEmitSimdShift.cs b/ChocolArm64/Instruction/AInstEmitSimdShift.cs deleted file mode 100644 index 4f828cf8ad..0000000000 --- a/ChocolArm64/Instruction/AInstEmitSimdShift.cs +++ /dev/null @@ -1,865 +0,0 @@ -// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h - -using ChocolArm64.Decoder; -using ChocolArm64.State; -using ChocolArm64.Translation; -using System; -using System.Reflection.Emit; -using System.Runtime.Intrinsics.X86; - -using static ChocolArm64.Instruction.AInstEmitSimdHelper; - -namespace ChocolArm64.Instruction -{ - static partial class AInstEmit - { - public static void Rshrn_V(AILEmitterCtx Context) - { - EmitVectorShrImmNarrowOpZx(Context, Round: true); - } - - public static void Shl_S(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - EmitScalarUnaryOpZx(Context, () => - { - Context.EmitLdc_I4(GetImmShl(Op)); - - Context.Emit(OpCodes.Shl); - }); - } - - public static void Shl_V(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size > 0) - { - Type[] TypesSll = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - - EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(GetImmShl(Op)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesSll)); - - EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorUnaryOpZx(Context, () => - { - Context.EmitLdc_I4(GetImmShl(Op)); - - Context.Emit(OpCodes.Shl); - }); - } - } - - public static void Shll_V(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int Shift = 8 << Op.Size; - - EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift); - } - - public static void Shrn_V(AILEmitterCtx Context) - { - EmitVectorShrImmNarrowOpZx(Context, Round: false); - } - - public static void Sli_V(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - int Bytes = Op.GetBitsCount() >> 3; - int Elems = Bytes >> Op.Size; - - int Shift = GetImmShl(Op); - - ulong Mask = Shift != 0 ? ulong.MaxValue >> (64 - Shift) : 0; - - for (int Index = 0; Index < Elems; Index++) - { - EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); - - Context.EmitLdc_I4(Shift); - - Context.Emit(OpCodes.Shl); - - EmitVectorExtractZx(Context, Op.Rd, Index, Op.Size); - - Context.EmitLdc_I8((long)Mask); - - Context.Emit(OpCodes.And); - Context.Emit(OpCodes.Or); - - EmitVectorInsert(Context, Op.Rd, Index, Op.Size); - } - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - public static void Sqrshrn_S(AILEmitterCtx Context) - { - EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarSxSx); - } - - public static void Sqrshrn_V(AILEmitterCtx Context) - { - EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorSxSx); - } - - public static void Sqrshrun_S(AILEmitterCtx Context) - { - EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarSxZx); - } - - public static void Sqrshrun_V(AILEmitterCtx Context) - { - EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorSxZx); - } - - public static void Sqshrn_S(AILEmitterCtx Context) - { - EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarSxSx); - } - - public static void Sqshrn_V(AILEmitterCtx Context) - { - EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorSxSx); - } - - public static void Sqshrun_S(AILEmitterCtx Context) - { - EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarSxZx); - } - - public static void Sqshrun_V(AILEmitterCtx Context) - { - EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorSxZx); - } - - public static void Srshr_S(AILEmitterCtx Context) - { - EmitScalarShrImmOpSx(Context, ShrImmFlags.Round); - } - - public static void Srshr_V(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size > 0 - && Op.Size < 3) - { - Type[] TypesShs = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] }; - - int Shift = GetImmShr(Op); - int ESize = 8 << Op.Size; - - EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); - - Context.Emit(OpCodes.Dup); - Context.EmitStvectmp(); - - Context.EmitLdc_I4(ESize - Shift); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs)); - - Context.EmitLdc_I4(ESize - 1); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs)); - - Context.EmitLdvectmp(); - - Context.EmitLdc_I4(Shift); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesShs)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); - - EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorShrImmOpSx(Context, ShrImmFlags.Round); - } - } - - public static void Srsra_S(AILEmitterCtx Context) - { - EmitScalarShrImmOpSx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate); - } - - public static void Srsra_V(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size > 0 - && Op.Size < 3) - { - Type[] TypesShs = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] }; - - int Shift = GetImmShr(Op); - int ESize = 8 << Op.Size; - - EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size); - EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); - - Context.Emit(OpCodes.Dup); - Context.EmitStvectmp(); - - Context.EmitLdc_I4(ESize - Shift); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs)); - - Context.EmitLdc_I4(ESize - 1); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs)); - - Context.EmitLdvectmp(); - - Context.EmitLdc_I4(Shift); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesShs)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); - - EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorShrImmOpSx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate); - } - } - - public static void Sshl_V(AILEmitterCtx Context) - { - EmitVectorShl(Context, Signed: true); - } - - public static void Sshll_V(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op)); - } - - public static void Sshr_S(AILEmitterCtx Context) - { - EmitShrImmOp(Context, ShrImmFlags.ScalarSx); - } - - public static void Sshr_V(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size > 0 - && Op.Size < 3) - { - Type[] TypesSra = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - - EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(GetImmShr(Op)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesSra)); - - EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitShrImmOp(Context, ShrImmFlags.VectorSx); - } - } - - public static void Ssra_S(AILEmitterCtx Context) - { - EmitScalarShrImmOpSx(Context, ShrImmFlags.Accumulate); - } - - public static void Ssra_V(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size > 0 - && Op.Size < 3) - { - Type[] TypesSra = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] }; - - EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size); - EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(GetImmShr(Op)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesSra)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); - - EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate); - } - } - - public static void Uqrshrn_S(AILEmitterCtx Context) - { - EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarZxZx); - } - - public static void Uqrshrn_V(AILEmitterCtx Context) - { - EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorZxZx); - } - - public static void Uqshrn_S(AILEmitterCtx Context) - { - EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarZxZx); - } - - public static void Uqshrn_V(AILEmitterCtx Context) - { - EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorZxZx); - } - - public static void Urshr_S(AILEmitterCtx Context) - { - EmitScalarShrImmOpZx(Context, ShrImmFlags.Round); - } - - public static void Urshr_V(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size > 0) - { - Type[] TypesShs = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] }; - - int Shift = GetImmShr(Op); - int ESize = 8 << Op.Size; - - EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); - - Context.Emit(OpCodes.Dup); - Context.EmitStvectmp(); - - Context.EmitLdc_I4(ESize - Shift); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs)); - - Context.EmitLdc_I4(ESize - 1); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs)); - - Context.EmitLdvectmp(); - - Context.EmitLdc_I4(Shift); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); - - EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorShrImmOpZx(Context, ShrImmFlags.Round); - } - } - - public static void Ursra_S(AILEmitterCtx Context) - { - EmitScalarShrImmOpZx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate); - } - - public static void Ursra_V(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size > 0) - { - Type[] TypesShs = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] }; - - int Shift = GetImmShr(Op); - int ESize = 8 << Op.Size; - - EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size); - EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); - - Context.Emit(OpCodes.Dup); - Context.EmitStvectmp(); - - Context.EmitLdc_I4(ESize - Shift); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs)); - - Context.EmitLdc_I4(ESize - 1); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs)); - - Context.EmitLdvectmp(); - - Context.EmitLdc_I4(Shift); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); - - EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorShrImmOpZx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate); - } - } - - public static void Ushl_V(AILEmitterCtx Context) - { - EmitVectorShl(Context, Signed: false); - } - - public static void Ushll_V(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op)); - } - - public static void Ushr_S(AILEmitterCtx Context) - { - EmitShrImmOp(Context, ShrImmFlags.ScalarZx); - } - - public static void Ushr_V(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size > 0) - { - Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - - EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(GetImmShr(Op)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesSrl)); - - EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitShrImmOp(Context, ShrImmFlags.VectorZx); - } - } - - public static void Usra_S(AILEmitterCtx Context) - { - EmitScalarShrImmOpZx(Context, ShrImmFlags.Accumulate); - } - - public static void Usra_V(AILEmitterCtx Context) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - if (AOptimizations.UseSse2 && Op.Size > 0) - { - Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; - Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] }; - - EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size); - EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); - - Context.EmitLdc_I4(GetImmShr(Op)); - - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesSrl)); - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); - - EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - else - { - EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate); - } - } - - private static void EmitVectorShl(AILEmitterCtx Context, bool Signed) - { - //This instruction shifts the value on vector A by the number of bits - //specified on the signed, lower 8 bits of vector B. If the shift value - //is greater or equal to the data size of each lane, then the result is zero. - //Additionally, negative shifts produces right shifts by the negated shift value. - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int MaxShift = 8 << Op.Size; - - Action Emit = () => - { - AILLabel LblShl = new AILLabel(); - AILLabel LblZero = new AILLabel(); - AILLabel LblEnd = new AILLabel(); - - void EmitShift(OpCode ILOp) - { - Context.Emit(OpCodes.Dup); - - Context.EmitLdc_I4(MaxShift); - - Context.Emit(OpCodes.Bge_S, LblZero); - Context.Emit(ILOp); - Context.Emit(OpCodes.Br_S, LblEnd); - } - - Context.Emit(OpCodes.Conv_I1); - Context.Emit(OpCodes.Dup); - - Context.EmitLdc_I4(0); - - Context.Emit(OpCodes.Bge_S, LblShl); - Context.Emit(OpCodes.Neg); - - EmitShift(Signed - ? OpCodes.Shr - : OpCodes.Shr_Un); - - Context.MarkLabel(LblShl); - - EmitShift(OpCodes.Shl); - - Context.MarkLabel(LblZero); - - Context.Emit(OpCodes.Pop); - Context.Emit(OpCodes.Pop); - - Context.EmitLdc_I8(0); - - Context.MarkLabel(LblEnd); - }; - - if (Signed) - { - EmitVectorBinaryOpSx(Context, Emit); - } - else - { - EmitVectorBinaryOpZx(Context, Emit); - } - } - - [Flags] - private enum ShrImmFlags - { - Scalar = 1 << 0, - Signed = 1 << 1, - - Round = 1 << 2, - Accumulate = 1 << 3, - - ScalarSx = Scalar | Signed, - ScalarZx = Scalar, - - VectorSx = Signed, - VectorZx = 0 - } - - private static void EmitScalarShrImmOpSx(AILEmitterCtx Context, ShrImmFlags Flags) - { - EmitShrImmOp(Context, ShrImmFlags.ScalarSx | Flags); - } - - private static void EmitScalarShrImmOpZx(AILEmitterCtx Context, ShrImmFlags Flags) - { - EmitShrImmOp(Context, ShrImmFlags.ScalarZx | Flags); - } - - private static void EmitVectorShrImmOpSx(AILEmitterCtx Context, ShrImmFlags Flags) - { - EmitShrImmOp(Context, ShrImmFlags.VectorSx | Flags); - } - - private static void EmitVectorShrImmOpZx(AILEmitterCtx Context, ShrImmFlags Flags) - { - EmitShrImmOp(Context, ShrImmFlags.VectorZx | Flags); - } - - private static void EmitShrImmOp(AILEmitterCtx Context, ShrImmFlags Flags) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - bool Scalar = (Flags & ShrImmFlags.Scalar) != 0; - bool Signed = (Flags & ShrImmFlags.Signed) != 0; - bool Round = (Flags & ShrImmFlags.Round) != 0; - bool Accumulate = (Flags & ShrImmFlags.Accumulate) != 0; - - int Shift = GetImmShr(Op); - - long RoundConst = 1L << (Shift - 1); - - int Bytes = Op.GetBitsCount() >> 3; - int Elems = !Scalar ? Bytes >> Op.Size : 1; - - for (int Index = 0; Index < Elems; Index++) - { - EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed); - - if (Op.Size <= 2) - { - if (Round) - { - Context.EmitLdc_I8(RoundConst); - - Context.Emit(OpCodes.Add); - } - - Context.EmitLdc_I4(Shift); - - Context.Emit(Signed ? OpCodes.Shr : OpCodes.Shr_Un); - } - else /* if (Op.Size == 3) */ - { - EmitShrImm_64(Context, Signed, Round ? RoundConst : 0L, Shift); - } - - if (Accumulate) - { - EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed); - - Context.Emit(OpCodes.Add); - } - - EmitVectorInsertTmp(Context, Index, Op.Size); - } - - Context.EmitLdvectmp(); - Context.EmitStvec(Op.Rd); - - if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - private static void EmitVectorShrImmNarrowOpZx(AILEmitterCtx Context, bool Round) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - int Shift = GetImmShr(Op); - - long RoundConst = 1L << (Shift - 1); - - int Elems = 8 >> Op.Size; - - int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; - - if (Part != 0) - { - Context.EmitLdvec(Op.Rd); - Context.EmitStvectmp(); - } - - for (int Index = 0; Index < Elems; Index++) - { - EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); - - if (Round) - { - Context.EmitLdc_I8(RoundConst); - - Context.Emit(OpCodes.Add); - } - - Context.EmitLdc_I4(Shift); - - Context.Emit(OpCodes.Shr_Un); - - EmitVectorInsertTmp(Context, Part + Index, Op.Size); - } - - Context.EmitLdvectmp(); - Context.EmitStvec(Op.Rd); - - if (Part == 0) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - [Flags] - private enum ShrImmSaturatingNarrowFlags - { - Scalar = 1 << 0, - SignedSrc = 1 << 1, - SignedDst = 1 << 2, - - Round = 1 << 3, - - ScalarSxSx = Scalar | SignedSrc | SignedDst, - ScalarSxZx = Scalar | SignedSrc, - ScalarZxZx = Scalar, - - VectorSxSx = SignedSrc | SignedDst, - VectorSxZx = SignedSrc, - VectorZxZx = 0 - } - - private static void EmitRoundShrImmSaturatingNarrowOp(AILEmitterCtx Context, ShrImmSaturatingNarrowFlags Flags) - { - EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.Round | Flags); - } - - private static void EmitShrImmSaturatingNarrowOp(AILEmitterCtx Context, ShrImmSaturatingNarrowFlags Flags) - { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - bool Scalar = (Flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; - bool SignedSrc = (Flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; - bool SignedDst = (Flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; - bool Round = (Flags & ShrImmSaturatingNarrowFlags.Round) != 0; - - int Shift = GetImmShr(Op); - - long RoundConst = 1L << (Shift - 1); - - int Elems = !Scalar ? 8 >> Op.Size : 1; - - int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0; - - if (Scalar) - { - EmitVectorZeroLowerTmp(Context); - } - - if (Part != 0) - { - Context.EmitLdvec(Op.Rd); - Context.EmitStvectmp(); - } - - for (int Index = 0; Index < Elems; Index++) - { - EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc); - - if (Op.Size <= 1 || !Round) - { - if (Round) - { - Context.EmitLdc_I8(RoundConst); - - Context.Emit(OpCodes.Add); - } - - Context.EmitLdc_I4(Shift); - - Context.Emit(SignedSrc ? OpCodes.Shr : OpCodes.Shr_Un); - } - else /* if (Op.Size == 2 && Round) */ - { - EmitShrImm_64(Context, SignedSrc, RoundConst, Shift); // Shift <= 32 - } - - EmitSatQ(Context, Op.Size, SignedSrc, SignedDst); - - EmitVectorInsertTmp(Context, Part + Index, Op.Size); - } - - Context.EmitLdvectmp(); - Context.EmitStvec(Op.Rd); - - if (Part == 0) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - // Dst_64 = (Int(Src_64, Signed) + RoundConst) >> Shift; - private static void EmitShrImm_64( - AILEmitterCtx Context, - bool Signed, - long RoundConst, - int Shift) - { - Context.EmitLdc_I8(RoundConst); - Context.EmitLdc_I4(Shift); - - ASoftFallback.EmitCall(Context, Signed - ? nameof(ASoftFallback.SignedShrImm_64) - : nameof(ASoftFallback.UnsignedShrImm_64)); - } - - private static void EmitVectorShImmWidenBinarySx(AILEmitterCtx Context, Action Emit, int Imm) - { - EmitVectorShImmWidenBinaryOp(Context, Emit, Imm, true); - } - - private static void EmitVectorShImmWidenBinaryZx(AILEmitterCtx Context, Action Emit, int Imm) - { - EmitVectorShImmWidenBinaryOp(Context, Emit, Imm, false); - } - - private static void EmitVectorShImmWidenBinaryOp(AILEmitterCtx Context, Action Emit, int Imm, bool Signed) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int Elems = 8 >> Op.Size; - - int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; - - for (int Index = 0; Index < Elems; Index++) - { - EmitVectorExtract(Context, Op.Rn, Part + Index, Op.Size, Signed); - - Context.EmitLdc_I4(Imm); - - Emit(); - - EmitVectorInsertTmp(Context, Index, Op.Size + 1); - } - - Context.EmitLdvectmp(); - Context.EmitStvec(Op.Rd); - } - } -} diff --git a/ChocolArm64/Instruction/InstEmitSimdShift.cs b/ChocolArm64/Instruction/InstEmitSimdShift.cs new file mode 100644 index 0000000000..b183e8aa66 --- /dev/null +++ b/ChocolArm64/Instruction/InstEmitSimdShift.cs @@ -0,0 +1,865 @@ +// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h + +using ChocolArm64.Decoders; +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; + +using static ChocolArm64.Instructions.InstEmitSimdHelper; + +namespace ChocolArm64.Instructions +{ + static partial class InstEmit + { + public static void Rshrn_V(ILEmitterCtx context) + { + EmitVectorShrImmNarrowOpZx(context, round: true); + } + + public static void Shl_S(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + EmitScalarUnaryOpZx(context, () => + { + context.EmitLdc_I4(GetImmShl(op)); + + context.Emit(OpCodes.Shl); + }); + } + + public static void Shl_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(GetImmShl(op)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorUnaryOpZx(context, () => + { + context.EmitLdc_I4(GetImmShl(op)); + + context.Emit(OpCodes.Shl); + }); + } + } + + public static void Shll_V(ILEmitterCtx context) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int shift = 8 << op.Size; + + EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift); + } + + public static void Shrn_V(ILEmitterCtx context) + { + EmitVectorShrImmNarrowOpZx(context, round: false); + } + + public static void Sli_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + int shift = GetImmShl(op); + + ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size); + + context.EmitLdc_I4(shift); + + context.Emit(OpCodes.Shl); + + EmitVectorExtractZx(context, op.Rd, index, op.Size); + + context.EmitLdc_I8((long)mask); + + context.Emit(OpCodes.And); + context.Emit(OpCodes.Or); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + public static void Sqrshrn_S(ILEmitterCtx context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqrshrn_V(ILEmitterCtx context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqrshrun_S(ILEmitterCtx context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqrshrun_V(ILEmitterCtx context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Sqshrn_S(ILEmitterCtx context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqshrn_V(ILEmitterCtx context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqshrun_S(ILEmitterCtx context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqshrun_V(ILEmitterCtx context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Srshr_S(ILEmitterCtx context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round); + } + + public static void Srshr_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 + && op.Size < 3) + { + Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + context.EmitLdc_I4(eSize - shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); + + context.EmitLdc_I4(eSize - 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitLdvectmp(); + + context.EmitLdc_I4(shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Round); + } + } + + public static void Srsra_S(ILEmitterCtx context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + + public static void Srsra_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 + && op.Size < 3) + { + Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + EmitLdvecWithSignedCast(context, op.Rd, op.Size); + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + context.EmitLdc_I4(eSize - shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); + + context.EmitLdc_I4(eSize - 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitLdvectmp(); + + context.EmitLdc_I4(shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Sshl_V(ILEmitterCtx context) + { + EmitVectorShl(context, signed: true); + } + + public static void Sshll_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + EmitVectorShImmWidenBinarySx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op)); + } + + public static void Sshr_S(ILEmitterCtx context) + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx); + } + + public static void Sshr_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 + && op.Size < 3) + { + Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(GetImmShr(op)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitShrImmOp(context, ShrImmFlags.VectorSx); + } + } + + public static void Ssra_S(ILEmitterCtx context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate); + } + + public static void Ssra_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 + && op.Size < 3) + { + Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + EmitLdvecWithSignedCast(context, op.Rd, op.Size); + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(GetImmShr(op)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate); + } + } + + public static void Uqrshrn_S(ILEmitterCtx context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqrshrn_V(ILEmitterCtx context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + + public static void Uqshrn_S(ILEmitterCtx context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqshrn_V(ILEmitterCtx context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + + public static void Urshr_S(ILEmitterCtx context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round); + } + + public static void Urshr_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + context.EmitLdc_I4(eSize - shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); + + context.EmitLdc_I4(eSize - 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitLdvectmp(); + + context.EmitLdc_I4(shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Round); + } + } + + public static void Ursra_S(ILEmitterCtx context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + + public static void Ursra_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + context.EmitLdc_I4(eSize - shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); + + context.EmitLdc_I4(eSize - 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitLdvectmp(); + + context.EmitLdc_I4(shift); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Ushl_V(ILEmitterCtx context) + { + EmitVectorShl(context, signed: false); + } + + public static void Ushll_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op)); + } + + public static void Ushr_S(ILEmitterCtx context) + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx); + } + + public static void Ushr_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(GetImmShr(op)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitShrImmOp(context, ShrImmFlags.VectorZx); + } + } + + public static void Usra_S(ILEmitterCtx context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate); + } + + public static void Usra_V(ILEmitterCtx context) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + + context.EmitLdc_I4(GetImmShr(op)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate); + } + } + + private static void EmitVectorShl(ILEmitterCtx context, bool signed) + { + //This instruction shifts the value on vector A by the number of bits + //specified on the signed, lower 8 bits of vector B. If the shift value + //is greater or equal to the data size of each lane, then the result is zero. + //Additionally, negative shifts produces right shifts by the negated shift value. + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int maxShift = 8 << op.Size; + + Action emit = () => + { + ILLabel lblShl = new ILLabel(); + ILLabel lblZero = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + void EmitShift(OpCode ilOp) + { + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(maxShift); + + context.Emit(OpCodes.Bge_S, lblZero); + context.Emit(ilOp); + context.Emit(OpCodes.Br_S, lblEnd); + } + + context.Emit(OpCodes.Conv_I1); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(0); + + context.Emit(OpCodes.Bge_S, lblShl); + context.Emit(OpCodes.Neg); + + EmitShift(signed + ? OpCodes.Shr + : OpCodes.Shr_Un); + + context.MarkLabel(lblShl); + + EmitShift(OpCodes.Shl); + + context.MarkLabel(lblZero); + + context.Emit(OpCodes.Pop); + context.Emit(OpCodes.Pop); + + context.EmitLdc_I8(0); + + context.MarkLabel(lblEnd); + }; + + if (signed) + { + EmitVectorBinaryOpSx(context, emit); + } + else + { + EmitVectorBinaryOpZx(context, emit); + } + } + + [Flags] + private enum ShrImmFlags + { + Scalar = 1 << 0, + Signed = 1 << 1, + + Round = 1 << 2, + Accumulate = 1 << 3, + + ScalarSx = Scalar | Signed, + ScalarZx = Scalar, + + VectorSx = Signed, + VectorZx = 0 + } + + private static void EmitScalarShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags); + } + + private static void EmitScalarShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags); + } + + private static void EmitVectorShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.VectorSx | flags); + } + + private static void EmitVectorShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.VectorZx | flags); + } + + private static void EmitShrImmOp(ILEmitterCtx context, ShrImmFlags flags) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + bool scalar = (flags & ShrImmFlags.Scalar) != 0; + bool signed = (flags & ShrImmFlags.Signed) != 0; + bool round = (flags & ShrImmFlags.Round) != 0; + bool accumulate = (flags & ShrImmFlags.Accumulate) != 0; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int bytes = op.GetBitsCount() >> 3; + int elems = !scalar ? bytes >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + if (op.Size <= 2) + { + if (round) + { + context.EmitLdc_I8(roundConst); + + context.Emit(OpCodes.Add); + } + + context.EmitLdc_I4(shift); + + context.Emit(signed ? OpCodes.Shr : OpCodes.Shr_Un); + } + else /* if (op.Size == 3) */ + { + EmitShrImm64(context, signed, round ? roundConst : 0L, shift); + } + + if (accumulate) + { + EmitVectorExtract(context, op.Rd, index, op.Size, signed); + + context.Emit(OpCodes.Add); + } + + EmitVectorInsertTmp(context, index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + private static void EmitVectorShrImmNarrowOpZx(ILEmitterCtx context, bool round) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + if (part != 0) + { + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); + } + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + + if (round) + { + context.EmitLdc_I8(roundConst); + + context.Emit(OpCodes.Add); + } + + context.EmitLdc_I4(shift); + + context.Emit(OpCodes.Shr_Un); + + EmitVectorInsertTmp(context, part + index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + [Flags] + private enum ShrImmSaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + Round = 1 << 3, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + private static void EmitRoundShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags); + } + + private static void EmitShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags) + { + OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; + + bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; + bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = !scalar ? 8 >> op.Size : 1; + + int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; + + if (scalar) + { + EmitVectorZeroLowerTmp(context); + } + + if (part != 0) + { + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); + } + + for (int index = 0; index < elems; index++) + { + EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); + + if (op.Size <= 1 || !round) + { + if (round) + { + context.EmitLdc_I8(roundConst); + + context.Emit(OpCodes.Add); + } + + context.EmitLdc_I4(shift); + + context.Emit(signedSrc ? OpCodes.Shr : OpCodes.Shr_Un); + } + else /* if (op.Size == 2 && round) */ + { + EmitShrImm64(context, signedSrc, roundConst, shift); // shift <= 32 + } + + EmitSatQ(context, op.Size, signedSrc, signedDst); + + EmitVectorInsertTmp(context, part + index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + + // dst64 = (Int(src64, signed) + roundConst) >> shift; + private static void EmitShrImm64( + ILEmitterCtx context, + bool signed, + long roundConst, + int shift) + { + context.EmitLdc_I8(roundConst); + context.EmitLdc_I4(shift); + + SoftFallback.EmitCall(context, signed + ? nameof(SoftFallback.SignedShrImm64) + : nameof(SoftFallback.UnsignedShrImm64)); + } + + private static void EmitVectorShImmWidenBinarySx(ILEmitterCtx context, Action emit, int imm) + { + EmitVectorShImmWidenBinaryOp(context, emit, imm, true); + } + + private static void EmitVectorShImmWidenBinaryZx(ILEmitterCtx context, Action emit, int imm) + { + EmitVectorShImmWidenBinaryOp(context, emit, imm, false); + } + + private static void EmitVectorShImmWidenBinaryOp(ILEmitterCtx context, Action emit, int imm, bool signed) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + context.EmitLdc_I4(imm); + + emit(); + + EmitVectorInsertTmp(context, index, op.Size + 1); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + } + } +}