diff --git a/ChocolArm64/Instructions/InstEmitSimdLogical.cs b/ChocolArm64/Instructions/InstEmitSimdLogical.cs index f51568ebbe..3473fc5d98 100644 --- a/ChocolArm64/Instructions/InstEmitSimdLogical.cs +++ b/ChocolArm64/Instructions/InstEmitSimdLogical.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instructions.InstEmitSimdHelper; @@ -29,18 +30,14 @@ namespace ChocolArm64.Instructions { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; - Type[] types = new Type[] - { - VectorUIntTypesPerSizeLog2[op.Size], - VectorUIntTypesPerSizeLog2[op.Size] - }; + EmitLdvecWithUnsignedCast(context, op.Rm, 0); + EmitLdvecWithUnsignedCast(context, op.Rn, 0); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), types)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + EmitStvecWithUnsignedCast(context, op.Rd, 0); if (op.RegisterSize == RegisterSize.Simd64) { @@ -68,41 +65,34 @@ namespace ChocolArm64.Instructions public static void Bif_V(ILEmitterCtx context) { - EmitBitBif(context, true); + EmitBifBit(context, notRm: true); } public static void Bit_V(ILEmitterCtx context) { - EmitBitBif(context, false); + EmitBifBit(context, notRm: false); } - private static void EmitBitBif(ILEmitterCtx context, bool notRm) + private static void EmitBifBit(ILEmitterCtx context, bool notRm) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; if (Optimizations.UseSse2) { - Type[] types = new Type[] - { - VectorUIntTypesPerSizeLog2[op.Size], - VectorUIntTypesPerSizeLog2[op.Size] - }; + Type[] typesXorAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); - EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + string nameAndNot = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types)); + EmitLdvecWithUnsignedCast(context, op.Rd, 0); + EmitLdvecWithUnsignedCast(context, op.Rm, 0); + EmitLdvecWithUnsignedCast(context, op.Rn, 0); + EmitLdvecWithUnsignedCast(context, op.Rd, 0); - string name = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameAndNot, typesXorAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot)); - context.EmitCall(typeof(Sse2).GetMethod(name, types)); - - EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); - - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types)); - - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + EmitStvecWithUnsignedCast(context, op.Rd, 0); if (op.RegisterSize == RegisterSize.Simd64) { @@ -111,17 +101,18 @@ namespace ChocolArm64.Instructions } else { - int bytes = op.GetBitsCount() >> 3; - int elems = bytes >> op.Size; + int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1; for (int index = 0; index < elems; index++) { - EmitVectorExtractZx(context, op.Rd, index, op.Size); - EmitVectorExtractZx(context, op.Rn, index, op.Size); + EmitVectorExtractZx(context, op.Rd, index, 3); + context.Emit(OpCodes.Dup); + + EmitVectorExtractZx(context, op.Rn, index, 3); context.Emit(OpCodes.Xor); - EmitVectorExtractZx(context, op.Rm, index, op.Size); + EmitVectorExtractZx(context, op.Rm, index, 3); if (notRm) { @@ -130,11 +121,9 @@ namespace ChocolArm64.Instructions context.Emit(OpCodes.And); - EmitVectorExtractZx(context, op.Rd, index, op.Size); - context.Emit(OpCodes.Xor); - EmitVectorInsert(context, op.Rd, index, op.Size); + EmitVectorInsert(context, op.Rd, index, 3); } if (op.RegisterSize == RegisterSize.Simd64) @@ -150,26 +139,22 @@ namespace ChocolArm64.Instructions { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - Type[] types = new Type[] - { - VectorUIntTypesPerSizeLog2[op.Size], - VectorUIntTypesPerSizeLog2[op.Size] - }; + Type[] typesXorAnd = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rm, 0); + context.Emit(OpCodes.Dup); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types)); + EmitLdvecWithUnsignedCast(context, op.Rn, 0); - EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), types)); + EmitLdvecWithUnsignedCast(context, op.Rd, 0); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesXorAnd)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + EmitStvecWithUnsignedCast(context, op.Rd, 0); if (op.RegisterSize == RegisterSize.Simd64) { @@ -207,16 +192,66 @@ namespace ChocolArm64.Instructions public static void Not_V(ILEmitterCtx context) { - EmitVectorUnaryOpZx(context, () => context.Emit(OpCodes.Not)); + if (Optimizations.UseSse2) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + Type[] typesSav = new Type[] { typeof(byte) }; + Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithUnsignedCast(context, op.Rn, 0); + + context.EmitLdc_I4(byte.MaxValue); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + + EmitStvecWithUnsignedCast(context, op.Rd, 0); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorUnaryOpZx(context, () => context.Emit(OpCodes.Not)); + } } public static void Orn_V(ILEmitterCtx context) { - EmitVectorBinaryOpZx(context, () => + if (Optimizations.UseSse2) { - context.Emit(OpCodes.Not); - context.Emit(OpCodes.Or); - }); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSav = new Type[] { typeof(byte) }; + Type[] typesAndNotOr = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithUnsignedCast(context, op.Rn, 0); + EmitLdvecWithUnsignedCast(context, op.Rm, 0); + + context.EmitLdc_I4(byte.MaxValue); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNotOr)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndNotOr)); + + EmitStvecWithUnsignedCast(context, op.Rd, 0); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Not); + context.Emit(OpCodes.Or); + }); + } } public static void Orr_V(ILEmitterCtx context) @@ -263,28 +298,122 @@ namespace ChocolArm64.Instructions public static void Rev16_V(ILEmitterCtx context) { - EmitRev_V(context, containerSize: 1); + if (Optimizations.UseSsse3) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + Type[] typesSve = new Type[] { typeof(long), typeof(long) }; + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithSignedCast(context, op.Rn, 0); // value + + context.EmitLdc_I8(14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0); // maskE1 + context.EmitLdc_I8(06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0); // maskE0 + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl)); + + EmitStvecWithSignedCast(context, op.Rd, 0); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitRev_V(context, containerSize: 1); + } } public static void Rev32_V(ILEmitterCtx context) { - EmitRev_V(context, containerSize: 2); + if (Optimizations.UseSsse3) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + Type[] typesSve = new Type[] { typeof(long), typeof(long) }; + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value + + if (op.Size == 0) + { + context.EmitLdc_I8(12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0); // maskE1 + context.EmitLdc_I8(04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0); // maskE0 + } + else /* if (op.Size == 1) */ + { + context.EmitLdc_I8(13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0); // maskE1 + context.EmitLdc_I8(05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0); // maskE0 + } + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitRev_V(context, containerSize: 2); + } } public static void Rev64_V(ILEmitterCtx context) { - EmitRev_V(context, containerSize: 3); + if (Optimizations.UseSsse3) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + Type[] typesSve = new Type[] { typeof(long), typeof(long) }; + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value + + if (op.Size == 0) + { + context.EmitLdc_I8(08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0); // maskE1 + context.EmitLdc_I8(00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0); // maskE0 + } + else if (op.Size == 1) + { + context.EmitLdc_I8(09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0); // maskE1 + context.EmitLdc_I8(01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0); // maskE0 + } + else /* if (op.Size == 2) */ + { + context.EmitLdc_I8(11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0); // maskE1 + context.EmitLdc_I8(03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0); // maskE0 + } + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitRev_V(context, containerSize: 3); + } } private static void EmitRev_V(ILEmitterCtx context, int containerSize) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; - if (op.Size >= containerSize) - { - throw new InvalidOperationException(); - } - int bytes = op.GetBitsCount() >> 3; int elems = bytes >> op.Size;