diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index 086ad90469..89c302f98e 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -1292,11 +1292,17 @@ namespace ARMeilleure.CodeGen.X86 int mask1 = 0b11_10_01_00; mask0 = BitUtils.RotateRight(mask0, index * 2, 8); + //mask1 = BitUtils.RotateLeft (mask1, index * 2, 8); mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8); - context.Assembler.Pshufd(src1, src1, (byte)mask0); - context.Assembler.Movss (dest, src1, src2); - context.Assembler.Pshufd(src1, src1, (byte)mask1); + context.Assembler.Pshufd(src1, src1, (byte)mask0); // Lane to be inserted in position 0. + context.Assembler.Movss (dest, src1, src2); // dest[127:0] = src1[127:32] | src2[31:0] + context.Assembler.Pshufd(dest, dest, (byte)mask1); // Inserted lane in original position. + + if (dest.GetRegister() != src1.GetRegister()) + { + context.Assembler.Pshufd(src1, src1, (byte)mask1); // Restore src1. + } } } }