diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 34ea2e1848..d89212e80c 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -203,12 +203,12 @@ namespace ARMeilleure.Decoders SetA64("0100111000101000010010xxxxxxxxxx", InstName.Aese_V, InstEmit.Aese_V, typeof(OpCodeSimd)); SetA64("0100111000101000011110xxxxxxxxxx", InstName.Aesimc_V, InstEmit.Aesimc_V, typeof(OpCodeSimd)); SetA64("0100111000101000011010xxxxxxxxxx", InstName.Aesmc_V, InstEmit.Aesmc_V, typeof(OpCodeSimd)); - SetA64("0x001110001xxxxx000111xxxxxxxxxx", InstName.And_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110011xxxxx000111xxxxxxxxxx", InstName.Bic_V, null, typeof(OpCodeSimdReg)); - SetA64("0x10111100000xxx<001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp, InstEmit.Dup_Gp, typeof(OpCodeSimdIns)); SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S, InstEmit.Dup_S, typeof(OpCodeSimdIns)); SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstName.Dup_V, InstEmit.Dup_V, typeof(OpCodeSimdIns)); - SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstName.Eor_V, null, typeof(OpCodeSimdReg)); + SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstName.Eor_V, InstEmit.Eor_V, typeof(OpCodeSimdReg)); SetA64("0>101110000xxxxx01011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V, InstEmit.Fabd_V, typeof(OpCodeSimdReg)); @@ -388,15 +388,15 @@ namespace ARMeilleure.Decoders SetA64("0x10111100000xxx110x01xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm)); SetA64("0111111011100000101110xxxxxxxxxx", InstName.Neg_S, InstEmit.Neg_S, typeof(OpCodeSimd)); SetA64("0>101110<<100000101110xxxxxxxxxx", InstName.Neg_V, InstEmit.Neg_V, typeof(OpCodeSimd)); - SetA64("0x10111000100000010110xxxxxxxxxx", InstName.Not_V, null, typeof(OpCodeSimd)); - SetA64("0x001110111xxxxx000111xxxxxxxxxx", InstName.Orn_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110101xxxxx000111xxxxxxxxxx", InstName.Orr_V, null, typeof(OpCodeSimdReg)); - SetA64("0x00111100000xxx<>>xxx100011xxxxxxxxxx", InstName.Rshrn_V, null, typeof(OpCodeSimdShImm)); SetA64("0x101110<<1xxxxx011000xxxxxxxxxx", InstName.Rsubhn_V, InstEmit.Rsubhn_V, typeof(OpCodeSimdReg)); SetA64("0x001110<<1xxxxx011111xxxxxxxxxx", InstName.Saba_V, InstEmit.Saba_V, typeof(OpCodeSimdReg)); @@ -540,7 +540,7 @@ namespace ARMeilleure.Decoders SetA64("0x101111xxxxxxxx0010x0xxxxxxxxxx", InstName.Umlal_Ve, InstEmit.Umlal_Ve, typeof(OpCodeSimdRegElem)); SetA64("0x101110<<1xxxxx101000xxxxxxxxxx", InstName.Umlsl_V, InstEmit.Umlsl_V, typeof(OpCodeSimdReg)); SetA64("0x101111xxxxxxxx0110x0xxxxxxxxxx", InstName.Umlsl_Ve, InstEmit.Umlsl_Ve, typeof(OpCodeSimdRegElem)); - SetA64("0x001110000xxxxx001111xxxxxxxxxx", InstName.Umov_S, null, typeof(OpCodeSimdIns)); + SetA64("0x001110000xxxxx001111xxxxxxxxxx", InstName.Umov_S, InstEmit.Umov_S, typeof(OpCodeSimdIns)); SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", InstName.Umull_V, InstEmit.Umull_V, typeof(OpCodeSimdReg)); SetA64("0x101111xxxxxxxx1010x0xxxxxxxxxx", InstName.Umull_Ve, InstEmit.Umull_Ve, typeof(OpCodeSimdRegElem)); SetA64("01111110xx1xxxxx000011xxxxxxxxxx", InstName.Uqadd_S, InstEmit.Uqadd_S, typeof(OpCodeSimdReg)); diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs index e0a24e514d..8045726fb6 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -1711,7 +1711,7 @@ namespace ARMeilleure.Instructions Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); - EmitSse41Sabd(context, op, n, m, op.Size); + EmitSse41Sabd(context, op, n, m, isLong: false); } else { @@ -1744,7 +1744,7 @@ namespace ARMeilleure.Instructions n = context.AddIntrinsic(movInst, n); m = context.AddIntrinsic(movInst, m); - EmitSse41Sabd(context, op, n, m, op.Size + 1); + EmitSse41Sabd(context, op, n, m, isLong: true); } else { @@ -2362,7 +2362,7 @@ namespace ARMeilleure.Instructions Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); - EmitSse41Uabd(context, op, n, m, op.Size); + EmitSse41Uabd(context, op, n, m, isLong: false); } else { @@ -2395,7 +2395,7 @@ namespace ARMeilleure.Instructions n = context.AddIntrinsic(movInst, n); m = context.AddIntrinsic(movInst, m); - EmitSse41Uabd(context, op, n, m, op.Size + 1); + EmitSse41Uabd(context, op, n, m, isLong: true); } else { @@ -2805,7 +2805,7 @@ namespace ARMeilleure.Instructions } else { - EmitVectorBinaryOpSx(context, (op1, op2) => + EmitVectorBinaryOpZx(context, (op1, op2) => { Operand res = context.Add(op1, op2); @@ -3106,8 +3106,10 @@ namespace ARMeilleure.Instructions OpCodeSimdReg op, Operand n, Operand m, - int size) + bool isLong) { + int size = isLong ? op.Size + 1 : op.Size; + Instruction cmpgtInst = X86PcmpgtInstruction[size]; Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m); @@ -3124,7 +3126,7 @@ namespace ARMeilleure.Instructions res = context.AddIntrinsic(Instruction.X86Por, res, res2); - if (op.RegisterSize == RegisterSize.Simd64) + if (!isLong && op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } @@ -3137,8 +3139,10 @@ namespace ARMeilleure.Instructions OpCodeSimdReg op, Operand n, Operand m, - int size) + bool isLong) { + int size = isLong ? op.Size + 1 : op.Size; + Instruction maxInst = X86PmaxuInstruction[size]; Operand max = context.AddIntrinsic(maxInst, m, n); @@ -3161,7 +3165,7 @@ namespace ARMeilleure.Instructions res = context.AddIntrinsic(Instruction.X86Por, res, res2); - if (op.RegisterSize == RegisterSize.Simd64) + if (!isLong && op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs new file mode 100644 index 0000000000..b37335e6f8 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs @@ -0,0 +1,459 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void And_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Instruction.X86Pand, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2)); + } + } + + public static void Bic_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Instruction.X86Pandn, m, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.BitwiseAnd(op1, context.BitwiseNot(op2)); + }); + } + } + + public static void Bic_Vi(EmitterContext context) + { + EmitVectorImmBinaryOp(context, (op1, op2) => + { + return context.BitwiseAnd(op1, context.BitwiseNot(op2)); + }); + } + + public static void Bif_V(EmitterContext context) + { + EmitBifBit(context, notRm: true); + } + + public static void Bit_V(EmitterContext context) + { + EmitBifBit(context, notRm: false); + } + + private static void EmitBifBit(EmitterContext context, bool notRm) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Instruction.X86Pxor, n, d); + + if (notRm) + { + res = context.AddIntrinsic(Instruction.X86Pandn, m, res); + } + else + { + res = context.AddIntrinsic(Instruction.X86Pand, m, res); + } + + res = context.AddIntrinsic(Instruction.X86Pxor, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1; + + for (int index = 0; index < elems; index++) + { + Operand d = EmitVectorExtractZx(context, op.Rd, index, 3); + Operand n = EmitVectorExtractZx(context, op.Rn, index, 3); + Operand m = EmitVectorExtractZx(context, op.Rm, index, 3); + + if (notRm) + { + m = context.BitwiseNot(m); + } + + Operand e = context.BitwiseExclusiveOr(d, n); + + e = context.BitwiseAnd(e, m); + e = context.BitwiseExclusiveOr(e, d); + + res = EmitVectorInsert(context, res, e, index, 3); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Bsl_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Instruction.X86Pxor, n, m); + + res = context.AddIntrinsic(Instruction.X86Pand, res, d); + res = context.AddIntrinsic(Instruction.X86Pxor, res, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.BitwiseExclusiveOr( + context.BitwiseAnd(op1, + context.BitwiseExclusiveOr(op2, op3)), op3); + }); + } + } + + public static void Eor_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Instruction.X86Pxor, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2)); + } + } + + public static void Not_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand mask = X86GetAllElements(context, -1L); + + Operand res = context.AddIntrinsic(Instruction.X86Pandn, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1)); + } + } + + public static void Orn_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, -1L); + + Operand res = context.AddIntrinsic(Instruction.X86Pandn, m, mask); + + res = context.AddIntrinsic(Instruction.X86Por, res, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.BitwiseOr(op1, context.BitwiseNot(op2)); + }); + } + } + + public static void Orr_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Instruction.X86Por, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2)); + } + } + + public static void Orr_Vi(EmitterContext context) + { + EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2)); + } + + public static void Rbit_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); + + ne = context.Copy(Local(OperandType.I32), ne); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.ReverseBits8)); + + Operand de = context.Call(info, ne); + + de = context.Copy(Local(OperandType.I64), de); + + res = EmitVectorInsert(context, res, de, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Rev16_V(EmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + const long maskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0; + const long maskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0; + + Operand mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + + Operand res = context.AddIntrinsic(Instruction.X86Pshufb, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitRev_V(context, containerSize: 1); + } + } + + public static void Rev32_V(EmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand mask; + + if (op.Size == 0) + { + const long maskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0; + const long maskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + else /* if (op.Size == 1) */ + { + const long maskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0; + const long maskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand res = context.AddIntrinsic(Instruction.X86Pshufb, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitRev_V(context, containerSize: 2); + } + } + + public static void Rev64_V(EmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand mask; + + if (op.Size == 0) + { + const long maskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0; + const long maskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + else if (op.Size == 1) + { + const long maskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0; + const long maskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + else /* if (op.Size == 2) */ + { + const long maskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0; + const long maskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand res = context.AddIntrinsic(Instruction.X86Pshufb, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitRev_V(context, containerSize: 3); + } + } + + private static void EmitRev_V(EmitterContext context, int containerSize) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int containerMask = (1 << (containerSize - op.Size)) - 1; + + for (int index = 0; index < elems; index++) + { + int revIndex = index ^ containerMask; + + Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size); + + res = EmitVectorInsert(context, res, ne, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } +}