From c7e78bcb45d62e6fd06bb994572011f5266cb4ff Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 29 Jul 2019 01:46:59 -0300 Subject: [PATCH] Another fix on VectorInsert FP32 (thanks to LDj3SNuD --- ARMeilleure/CodeGen/X86/CodeGenerator.cs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index 086ad90469..89c302f98e 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -1292,11 +1292,17 @@ namespace ARMeilleure.CodeGen.X86 int mask1 = 0b11_10_01_00; mask0 = BitUtils.RotateRight(mask0, index * 2, 8); + //mask1 = BitUtils.RotateLeft (mask1, index * 2, 8); mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8); - context.Assembler.Pshufd(src1, src1, (byte)mask0); - context.Assembler.Movss (dest, src1, src2); - context.Assembler.Pshufd(src1, src1, (byte)mask1); + context.Assembler.Pshufd(src1, src1, (byte)mask0); // Lane to be inserted in position 0. + context.Assembler.Movss (dest, src1, src2); // dest[127:0] = src1[127:32] | src2[31:0] + context.Assembler.Pshufd(dest, dest, (byte)mask1); // Inserted lane in original position. + + if (dest.GetRegister() != src1.GetRegister()) + { + context.Assembler.Pshufd(src1, src1, (byte)mask1); // Restore src1. + } } } }