diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs index fd3b691d4f..a8d62a01fc 100644 --- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs +++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs @@ -52,6 +52,7 @@ namespace ARMeilleure.CodeGen.X86 Add(Intrinsic.X86Divss, new IntrinsicInfo(X86Instruction.Divss, IntrinsicType.Binary)); Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary)); Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary)); + Add(Intrinsic.X86Insertps, new IntrinsicInfo(X86Instruction.Insertps, IntrinsicType.TernaryImm)); Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary)); Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary)); Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary)); diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs index 1f4f10c710..9da0c41ebb 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -548,11 +548,7 @@ namespace ARMeilleure.Instructions } else { - long low = (index < 2) ? (1L << (index * 32 + 31)) : 0; - long high = (index > 1) ? (1L << (index * 32 - 33)) : 0; - Operand mask = X86GetElements(context, high, low); - value = EmitSwapScalar(context, value, reg, doubleWidth); - return context.AddIntrinsic(Intrinsic.X86Blendvps, target, value, mask); + return context.AddIntrinsic(Intrinsic.X86Insertps, target, value, Const(index << 4)); } } @@ -565,14 +561,14 @@ namespace ARMeilleure.Instructions Operand m = GetVecA32(op.Qm); Operand d = GetVecA32(op.Qd); - if (!op.Q) //register swap: move relevant doubleword to destination side + if (!op.Q) // Register swap: move relevant doubleword to destination side. { m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd); } Operand res = vectorFunc(m); - if (!op.Q) //register insert + if (!op.Q) // Register insert. { res = EmitDoubleWordInsert(context, d, res, op.Vd); } @@ -599,7 +595,7 @@ namespace ARMeilleure.Instructions if (side == -1) side = op.Vd; - if (!op.Q) //register swap: move relevant doubleword to destination side + if (!op.Q) // Register swap: move relevant doubleword to destination side. { n = EmitSwapDoubleWordToSide(context, n, op.Vn, side); m = EmitSwapDoubleWordToSide(context, m, op.Vm, side); @@ -607,7 +603,7 @@ namespace ARMeilleure.Instructions Operand res = vectorFunc(n, m); - if (!op.Q) //register insert + if (!op.Q) // Register insert. { if (side != op.Vd) EmitSwapDoubleWordToSide(context, m, side, op.Vd); res = EmitDoubleWordInsert(context, d, res, op.Vd); @@ -633,7 +629,7 @@ namespace ARMeilleure.Instructions Operand d = GetVecA32(op.Qd); Operand initialD = d; - if (!op.Q) //register swap: move relevant doubleword to destination side + if (!op.Q) // Register swap: move relevant doubleword to destination side. { n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd); m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd); @@ -676,15 +672,8 @@ namespace ARMeilleure.Instructions Operand res = scalarFunc(m); - if (false) // op.Vd == op.Vm) //small optimisation: can just swap it back for the result - { - res = EmitSwapScalar(context, res, op.Vd, doubleSize); - } - else - { - // insert scalar into vector - res = EmitInsertScalar(context, d, res, op.Vd, doubleSize); - } + // Insert scalar into vector. + res = EmitInsertScalar(context, d, res, op.Vd, doubleSize); context.Copy(d, res); } @@ -713,15 +702,8 @@ namespace ARMeilleure.Instructions Operand res = scalarFunc(n, m); - if (false) // //small optimisation: can just swap it back for the result - { - res = EmitSwapScalar(context, res, op.Vd, doubleSize); - } - else - { - // insert scalar into vector - res = EmitInsertScalar(context, d, res, op.Vd, doubleSize); - } + // Insert scalar into vector. + res = EmitInsertScalar(context, d, res, op.Vd, doubleSize); context.Copy(d, res); } @@ -752,7 +734,7 @@ namespace ARMeilleure.Instructions Operand res = scalarFunc(d, n, m); - // insert scalar into vector + // Insert scalar into vector. res = EmitInsertScalar(context, initialD, res, op.Vd, doubleSize); context.Copy(initialD, res); @@ -788,14 +770,14 @@ namespace ARMeilleure.Instructions Operand m = GetVecA32(op.Vm >> 2); m = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(dupeMask)); - if (!op.Q) //register swap: move relevant doubleword to destination side + if (!op.Q) // Register swap: move relevant doubleword to destination side. { n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd); } Operand res = vectorFunc(n, m); - if (!op.Q) //register insert + if (!op.Q) // Register insert. { res = EmitDoubleWordInsert(context, d, res, op.Vd); } @@ -824,14 +806,14 @@ namespace ARMeilleure.Instructions Operand m = GetVecA32(op.Vm >> 2); m = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(dupeMask)); - if (!op.Q) //register swap: move relevant doubleword to destination side + if (!op.Q) // Register swap: move relevant doubleword to destination side. { n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd); } Operand res = vectorFunc(d, n, m); - if (!op.Q) //register insert + if (!op.Q) // Register insert. { res = EmitDoubleWordInsert(context, initialD, res, op.Vd); } @@ -911,7 +893,7 @@ namespace ARMeilleure.Instructions Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n - Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n + Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n return context.AddIntrinsic(inst[op.Size], left, right); } diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs index c3f375c4c2..e2e11e9f40 100644 --- a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs +++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs @@ -41,6 +41,7 @@ namespace ARMeilleure.IntermediateRepresentation X86Divss, X86Haddpd, X86Haddps, + X86Insertps, X86Maxpd, X86Maxps, X86Maxsd,