From 908200b907e4d457787232a669afa41d3d01408d Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Mon, 28 Jan 2019 01:48:30 +0100 Subject: [PATCH] Update InstEmitSimdMove.cs --- ChocolArm64/Instructions/InstEmitSimdMove.cs | 241 +++++++++++++++---- 1 file changed, 190 insertions(+), 51 deletions(-) diff --git a/ChocolArm64/Instructions/InstEmitSimdMove.cs b/ChocolArm64/Instructions/InstEmitSimdMove.cs index d40ccff915..7f28625c42 100644 --- a/ChocolArm64/Instructions/InstEmitSimdMove.cs +++ b/ChocolArm64/Instructions/InstEmitSimdMove.cs @@ -386,9 +386,8 @@ namespace ChocolArm64.Instructions 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 }; - Type[] typesMov = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSve = new Type[] { typeof(long), typeof(long) }; Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) }; - Type[] typesSve = new Type[] { typeof(long), typeof(long) }; string nameMov = op.RegisterSize == RegisterSize.Simd128 ? nameof(Sse.MoveLowToHigh) @@ -397,7 +396,7 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); EmitLdvecWithSignedCast(context, op.Rn, 0); @@ -408,7 +407,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl)); - context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov)); + context.EmitCall(typeof(Sse).GetMethod(nameMov)); context.EmitStvec(op.Rd); } @@ -465,22 +464,77 @@ namespace ChocolArm64.Instructions { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - int words = op.GetBitsCount() >> 4; - int pairs = words >> op.Size; - - for (int index = 0; index < pairs; index++) + if (Optimizations.UseSsse3) { - int idx = index << 1; + Type[] GetTypesUpkSfl(int size) => new Type[] { VectorIntTypesPerSizeLog2[size], VectorIntTypesPerSizeLog2[size] }; - EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); - EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + long[] masksE1 = new long[] + { + 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, + 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 + }; - EmitVectorInsertTmp(context, idx + 1, op.Size); - EmitVectorInsertTmp(context, idx, op.Size); + long[] masksE0 = new long[] + { + 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, + 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 + }; + + Type[] typesSve = new Type[] { typeof(long), typeof(long) }; + + string nameUpk = part == 0 + ? nameof(Sse2.UnpackLow) + : nameof(Sse2.UnpackHigh); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value + + if (op.Size < 3) + { + context.EmitLdc_I8(masksE1[op.Size]); // maskE1 + context.EmitLdc_I8(masksE0[op.Size]); // maskE0 + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesUpkSfl(0))); + } + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); // value + + if (op.Size < 3) + { + context.EmitLdc_I8(masksE1[op.Size]); // maskE1 + context.EmitLdc_I8(masksE0[op.Size]); // maskE0 + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesUpkSfl(0))); + } + + context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesUpkSfl(op.Size))); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); } + else + { + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); + EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + + EmitVectorInsertTmp(context, idx + 1, op.Size); + EmitVectorInsertTmp(context, idx, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + } if (op.RegisterSize == RegisterSize.Simd64) { @@ -492,26 +546,119 @@ namespace ChocolArm64.Instructions { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - int words = op.GetBitsCount() >> 4; - int pairs = words >> op.Size; - - for (int index = 0; index < pairs; index++) + if (Optimizations.UseSsse3) { - int idx = index << 1; + Type[] GetTypesUpkSfl(int size) => new Type[] { VectorIntTypesPerSizeLog2[size], VectorIntTypesPerSizeLog2[size] }; - EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); - EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + Type[] typesSve = new Type[] { typeof(long), typeof(long) }; - EmitVectorInsertTmp(context, pairs + index, op.Size); - EmitVectorInsertTmp(context, index, op.Size); + string nameUpk = part == 0 + ? nameof(Sse2.UnpackLow) + : nameof(Sse2.UnpackHigh); + + if (op.RegisterSize == RegisterSize.Simd128) + { + long[] masksE1 = new long[] + { + 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, + 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 + }; + + long[] masksE0 = new long[] + { + 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, + 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 + }; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value + + if (op.Size < 3) + { + context.EmitLdc_I8(masksE1[op.Size]); // maskE1 + context.EmitLdc_I8(masksE0[op.Size]); // maskE0 + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesUpkSfl(0))); + } + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); // value + + if (op.Size < 3) + { + context.EmitLdc_I8(masksE1[op.Size]); // maskE1 + context.EmitLdc_I8(masksE0[op.Size]); // maskE0 + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesUpkSfl(0))); + } + + context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesUpkSfl(3))); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + } + else + { + long[] masksE1 = new long[] + { + 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0 + }; + + long[] masksE0 = new long[] + { + 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0 + }; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesUpkSfl(op.Size))); // value + + if (op.Size < 2) + { + context.EmitLdc_I8(masksE1[op.Size]); // maskE1 + context.EmitLdc_I8(masksE0[op.Size]); // maskE0 + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesUpkSfl(0))); + } + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt64Zero)); + + context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesUpkSfl(3))); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + } } - - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); - - if (op.RegisterSize == RegisterSize.Simd64) + else { - EmitVectorZeroUpper(context, op.Rd); + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); + EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + + EmitVectorInsertTmp(context, pairs + index, op.Size); + EmitVectorInsertTmp(context, index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } } } @@ -521,36 +668,28 @@ namespace ChocolArm64.Instructions if (Optimizations.UseSse2) { - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + Type[] GetTypesUpk(int size) => new Type[] { VectorIntTypesPerSizeLog2[size], VectorIntTypesPerSizeLog2[size] }; - Type[] types = new Type[] - { - VectorUIntTypesPerSizeLog2[op.Size], - VectorUIntTypesPerSizeLog2[op.Size] - }; - - string name = part == 0 || (part != 0 && op.RegisterSize == RegisterSize.Simd64) + string nameUpk = part == 0 ? nameof(Sse2.UnpackLow) : nameof(Sse2.UnpackHigh); - context.EmitCall(typeof(Sse2).GetMethod(name, types)); + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); - if (op.RegisterSize == RegisterSize.Simd64 && part != 0) + if (op.RegisterSize == RegisterSize.Simd128) { - context.EmitLdc_I4(8); + context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesUpk(op.Size))); + } + else + { + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesUpk(op.Size))); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt64Zero)); - Type[] shTypes = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; - - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), shTypes)); + context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesUpk(3))); } - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); - - if (op.RegisterSize == RegisterSize.Simd64 && part == 0) - { - EmitVectorZeroUpper(context, op.Rd); - } + EmitStvecWithSignedCast(context, op.Rd, op.Size); } else {