More instructions, some cleanup.

This commit is contained in:
riperiperi 2020-01-20 22:56:23 +00:00
parent b5578b0d47
commit fd08ff0805
5 changed files with 547 additions and 85 deletions

View file

@ -3121,7 +3121,7 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), res);
}
private static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF)
public static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF)
{
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;

View file

@ -15,7 +15,28 @@ namespace ARMeilleure.Instructions
{
public static void Vabs_S(ArmEmitterContext context)
{
EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1));
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarUnaryOpSimd32(context, (m) =>
{
if ((op.Size & 1) == 0)
{
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Andnps, mask, m);
}
else
{
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Andnpd, mask, m);
}
});
}
else
{
EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1));
}
}
public static void Vabs_V(ArmEmitterContext context)
@ -24,7 +45,26 @@ namespace ARMeilleure.Instructions
if (op.F)
{
EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1));
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
if ((op.Size & 1) == 0)
{
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Andnps, mask, m);
}
else
{
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Andnpd, mask, m);
}
});
}
else
{
EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1));
}
}
else
{
@ -182,18 +222,80 @@ namespace ARMeilleure.Instructions
public static void Vneg_S(ArmEmitterContext context)
{
//TODO: intrinsic that XORs the sign bit
EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1));
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
if (Optimizations.UseSse2)
{
EmitScalarUnaryOpSimd32(context, (m) =>
{
if ((op.Size & 1) == 0)
{
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
}
else
{
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
}
});
}
else
{
EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1));
}
}
public static void Vnmul_S(ArmEmitterContext context)
{
EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
if (Optimizations.UseSse2)
{
EmitScalarBinaryOpSimd32(context, (n, m) =>
{
if ((op.Size & 1) == 0)
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res);
}
else
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
}
});
}
else
{
EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
}
}
public static void Vnmla_S(ArmEmitterContext context)
{
if (Optimizations.FastFP)
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
{
if ((op.Size & 1) == 0)
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
res = context.AddIntrinsic(Intrinsic.X86Addss, d, res);
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res);
}
else
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
res = context.AddIntrinsic(Intrinsic.X86Addsd, d, res);
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
}
});
}
else if (Optimizations.FastFP)
{
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{
@ -211,7 +313,29 @@ namespace ARMeilleure.Instructions
public static void Vnmls_S(ArmEmitterContext context)
{
if (Optimizations.FastFP)
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
{
if ((op.Size & 1) == 0)
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
Operand mask = X86GetScalar(context, -0f);
d = context.AddIntrinsic(Intrinsic.X86Xorps, mask, d);
return context.AddIntrinsic(Intrinsic.X86Addss, d, res);
}
else
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
Operand mask = X86GetScalar(context, -0d);
d = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
return context.AddIntrinsic(Intrinsic.X86Addsd, d, res);
}
});
}
else if (Optimizations.FastFP)
{
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{
@ -229,9 +353,30 @@ namespace ARMeilleure.Instructions
public static void Vneg_V(ArmEmitterContext context)
{
if ((context.CurrOp as OpCode32Simd).F)
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
if (op.F)
{
EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1));
if (Optimizations.UseSse2)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
if ((op.Size & 1) == 0)
{
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
}
else
{
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
}
});
}
else
{
EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1));
}
}
else
{
@ -260,22 +405,50 @@ namespace ARMeilleure.Instructions
public static void Vmaxnm_S(ArmEmitterContext context)
{
EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2));
if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse41MaxMinNumOpF32(context, true, true);
}
else
{
EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2));
}
}
public static void Vmaxnm_V(ArmEmitterContext context)
{
EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxNumFpscr, SoftFloat64.FPMaxNumFpscr, op1, op2));
if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse41MaxMinNumOpF32(context, true, false);
}
else
{
EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxNumFpscr, SoftFloat64.FPMaxNumFpscr, op1, op2));
}
}
public static void Vminnm_S(ArmEmitterContext context)
{
EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2));
if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse41MaxMinNumOpF32(context, false, true);
}
else
{
EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2));
}
}
public static void Vminnm_V(ArmEmitterContext context)
{
EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMinNumFpscr, SoftFloat64.FPMinNumFpscr, op1, op2));
if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse41MaxMinNumOpF32(context, false, false);
}
else
{
EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMinNumFpscr, SoftFloat64.FPMinNumFpscr, op1, op2));
}
}
public static void Vmax_V(ArmEmitterContext context)
@ -291,12 +464,12 @@ namespace ARMeilleure.Instructions
return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxFpscr, SoftFloat64.FPMaxFpscr, op1, op2);
});
}
}
public static void Vmax_I(ArmEmitterContext context)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
if (op.U)
{
EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreaterUI(op1, op2), op1, op2));
@ -325,6 +498,7 @@ namespace ARMeilleure.Instructions
public static void Vmin_I(ArmEmitterContext context)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
if (op.U)
{
EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2));
@ -728,5 +902,56 @@ namespace ARMeilleure.Instructions
{
EmitVectorBinaryOpZx32(context, (op1, op2) => context.Subtract(op1, op2));
}
private static void EmitSse41MaxMinNumOpF32(ArmEmitterContext context, bool isMaxNum, bool scalar)
{
IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
Func<Operand, Operand, Operand> genericEmit = (n, m) =>
{
Operand nNum = context.Copy(n);
Operand mNum = context.Copy(m);
Operand nQNaNMask = InstEmit.EmitSse2VectorIsQNaNOpF(context, nNum);
Operand mQNaNMask = InstEmit.EmitSse2VectorIsQNaNOpF(context, mNum);
int sizeF = op.Size & 1;
if (sizeF == 0)
{
Operand negInfMask = X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
nNum = context.AddIntrinsic(Intrinsic.X86Blendvps, nNum, negInfMask, nMask);
mNum = context.AddIntrinsic(Intrinsic.X86Blendvps, mNum, negInfMask, mMask);
return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxps : Intrinsic.X86Minps, nNum, mNum);
}
else /* if (sizeF == 1) */
{
Operand negInfMask = X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
nNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, nNum, negInfMask, nMask);
mNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, mNum, negInfMask, mMask);
return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, nNum, mNum);
}
};
if (scalar)
{
EmitScalarBinaryOpSimd32(context, genericEmit);
}
else
{
EmitVectorBinaryOpSimd32(context, genericEmit);
}
}
}
}

View file

@ -5,6 +5,7 @@ using ARMeilleure.Translation;
using System;
using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
@ -16,7 +17,14 @@ namespace ARMeilleure.Instructions
{
public static void Vceq_V(ArmEmitterContext context)
{
EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false);
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.Equal, false);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false);
}
}
public static void Vceq_I(ArmEmitterContext context)
@ -30,7 +38,14 @@ namespace ARMeilleure.Instructions
if (op.F)
{
EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true);
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.Equal, true);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true);
}
}
else
{
@ -40,7 +55,14 @@ namespace ARMeilleure.Instructions
public static void Vcge_V(ArmEmitterContext context)
{
EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false);
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false);
}
}
public static void Vcge_I(ArmEmitterContext context)
@ -56,7 +78,14 @@ namespace ARMeilleure.Instructions
if (op.F)
{
EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true);
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true);
}
}
else
{
@ -66,7 +95,14 @@ namespace ARMeilleure.Instructions
public static void Vcgt_V(ArmEmitterContext context)
{
EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false);
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.GreaterThan, false);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false);
}
}
public static void Vcgt_I(ArmEmitterContext context)
@ -82,7 +118,14 @@ namespace ARMeilleure.Instructions
if (op.F)
{
EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true);
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.GreaterThan, true);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true);
}
}
else
{
@ -96,7 +139,14 @@ namespace ARMeilleure.Instructions
if (op.F)
{
EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true);
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.LessThanOrEqual, true);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true);
}
}
else
{
@ -110,7 +160,14 @@ namespace ARMeilleure.Instructions
if (op.F)
{
EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true);
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.LessThanOrEqual, true);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true);
}
}
else
{
@ -224,8 +281,74 @@ namespace ARMeilleure.Instructions
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
bool cmpWithZero = (op.Opc & 2) != 0;
int fSize = op.Size & 1;
if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
{
CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
bool doubleSize = fSize != 0;
int shift = doubleSize ? 1 : 2;
Operand m = GetVecA32(op.Vm >> shift);
Operand n = GetVecA32(op.Vd >> shift);
n = EmitSwapScalar(context, n, op.Vd, doubleSize);
m = cmpWithZero ? context.VectorZero() : EmitSwapScalar(context, m, op.Vm, doubleSize);
Operand lblNaN = Label();
Operand lblEnd = Label();
if (!doubleSize)
{
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
context.BranchIfFalse(lblNaN, isOrdered);
Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m);
Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m);
Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m);
EmitSetFPSCRFlags(context, context.BitwiseOr(
context.ShiftLeft(cf, Const(1)),
context.BitwiseOr(
context.ShiftLeft(zf, Const(2)),
context.ShiftLeft(nf, Const(3))
)
));
}
else
{
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
context.BranchIfFalse(lblNaN, isOrdered);
Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m);
Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m);
Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m);
EmitSetFPSCRFlags(context, context.BitwiseOr(
context.ShiftLeft(cf, Const(1)),
context.BitwiseOr(
context.ShiftLeft(zf, Const(2)),
context.ShiftLeft(nf, Const(3))
)
));
}
context.Branch(lblEnd);
context.MarkLabel(lblNaN);
EmitSetFPSCRFlags(context, Const(3));
context.MarkLabel(lblEnd);
}
else
{
int fSize = op.Size & 1;
OperandType type = fSize != 0 ? OperandType.FP64 : OperandType.FP32;
Operand ne = ExtractScalar(context, type, op.Vd);
@ -269,5 +392,28 @@ namespace ARMeilleure.Instructions
SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2));
SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3));
}
private static void EmitSse2CmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
int sizeF = op.Size & 1;
Intrinsic inst = (sizeF == 0) ? Intrinsic.X86Cmpps : Intrinsic.X86Cmppd;
if (zero)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
return context.AddIntrinsic(inst, m, context.VectorZero(), Const((int)cond));
});
}
else
{
EmitVectorBinaryOpSimd32(context, (n, m) =>
{
return context.AddIntrinsic(inst, n, m, Const((int)cond));
});
}
}
}
}

View file

@ -475,7 +475,7 @@ namespace ARMeilleure.Instructions
// Intrinsic Emits
private static Operand EmitSwapDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
public static Operand EmitSwapDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
{
int originalSide = originalV & 1;
int targetSide = targetV & 1;
@ -495,7 +495,7 @@ namespace ARMeilleure.Instructions
}
}
private static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
{
int targetSide = targetV & 1;
int shuffleMask = 2 | 0;
@ -510,7 +510,7 @@ namespace ARMeilleure.Instructions
}
}
private static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
public static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
{
// index into 0, 0 into index. This swap happens at the start and end of an A32 scalar op if required.
int index = reg & (doubleWidth ? 1 : 3);
@ -530,7 +530,7 @@ namespace ARMeilleure.Instructions
}
}
private static Operand EmitInsertScalar(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
public static Operand EmitInsertScalar(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
{
// insert from index 0 in value to index in target
int index = reg & (doubleWidth ? 1 : 3);
@ -556,21 +556,54 @@ namespace ARMeilleure.Instructions
}
}
public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
// Vector Operand Templates
public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
Operand m = GetVecA32(op.Qm);
Operand d = GetVecA32(op.Qd);
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
if (!op.Q) //register swap: move relevant doubleword to destination side
{
m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd);
}
Operand res = context.AddIntrinsic(inst, m);
Operand res = vectorFunc(m);
if (!op.Q) //register insert
{
res = EmitDoubleWordInsert(context, d, res, op.Vd);
}
context.Copy(d, res);
}
public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
}
public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
Operand d = GetVecA32(op.Qd);
if (!op.Q) //register swap: move relevant doubleword to destination side
{
n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd);
m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd);
}
Operand res = vectorFunc(n, m);
if (!op.Q) //register insert
{
@ -584,29 +617,11 @@ namespace ARMeilleure.Instructions
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
Operand d = GetVecA32(op.Qd);
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
if (!op.Q) //register swap: move relevant doubleword to destination side
{
n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd);
m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd);
}
Operand res = context.AddIntrinsic(inst, n, m);
if (!op.Q) //register insert
{
res = EmitDoubleWordInsert(context, d, res, op.Vd);
}
context.Copy(d, res);
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
}
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
@ -615,17 +630,13 @@ namespace ARMeilleure.Instructions
Operand d = GetVecA32(op.Qd);
Operand initialD = d;
Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;
if (!op.Q) //register swap: move relevant doubleword to destination side
{
n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd);
m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd);
}
Operand res = context.AddIntrinsic(inst1, n, m);
res = context.AddIntrinsic(inst2, d, res);
Operand res = vectorFunc(d, n, m);
if (!op.Q) //register insert
{
@ -635,7 +646,21 @@ namespace ARMeilleure.Instructions
context.Copy(initialD, res);
}
public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
{
Operand res = context.AddIntrinsic(inst1, n, m);
return res = context.AddIntrinsic(inst2, d, res);
});
}
public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
@ -646,9 +671,8 @@ namespace ARMeilleure.Instructions
m = EmitSwapScalar(context, m, op.Vm, doubleSize);
Intrinsic inst = doubleSize ? inst64 : inst32;
Operand res = scalarFunc(m);
Operand res = (inst == 0) ? m : context.AddIntrinsic(inst, m);
if (false) // op.Vd == op.Vm) //small optimisation: can just swap it back for the result
{
res = EmitSwapScalar(context, res, op.Vd, doubleSize);
@ -662,7 +686,16 @@ namespace ARMeilleure.Instructions
context.Copy(d, res);
}
public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
}
public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
@ -675,9 +708,7 @@ namespace ARMeilleure.Instructions
n = EmitSwapScalar(context, n, op.Vn, doubleSize);
m = EmitSwapScalar(context, m, op.Vm, doubleSize);
Intrinsic inst = doubleSize ? inst64 : inst32;
Operand res = context.AddIntrinsic(inst, n, m);
Operand res = scalarFunc(n, m);
if (false) // //small optimisation: can just swap it back for the result
{
@ -692,7 +723,16 @@ namespace ARMeilleure.Instructions
context.Copy(d, res);
}
public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
}
public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
@ -707,11 +747,7 @@ namespace ARMeilleure.Instructions
m = EmitSwapScalar(context, m, op.Vm, doubleSize);
d = EmitSwapScalar(context, d, op.Vd, doubleSize);
Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1;
Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;
Operand res = context.AddIntrinsic(inst1, n, m);
res = context.AddIntrinsic(inst2, d, res);
Operand res = scalarFunc(d, n, m);
// insert scalar into vector
res = EmitInsertScalar(context, initialD, res, op.Vd, doubleSize);
@ -719,6 +755,22 @@ namespace ARMeilleure.Instructions
context.Copy(initialD, res);
}
public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0;
int shift = doubleSize ? 1 : 2;
Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1;
Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
{
Operand res = context.AddIntrinsic(inst1, n, m);
return context.AddIntrinsic(inst2, d, res);
});
}
// Generic Functions
public static Operand EmitSoftFloatCallDefaultFpscr(

View file

@ -1,4 +1,5 @@
using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
@ -9,7 +10,14 @@ namespace ARMeilleure.Instructions
{
public static void Vand_I(ArmEmitterContext context)
{
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2));
if (Optimizations.UseSse2)
{
EmitVectorBinaryOpF32(context, Intrinsic.X86Pand, Intrinsic.X86Pand);
}
else
{
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2));
}
}
public static void Vbif(ArmEmitterContext context)
@ -24,33 +32,64 @@ namespace ARMeilleure.Instructions
public static void Vbsl(ArmEmitterContext context)
{
EmitVectorTernaryOpZx32(context, (op1, op2, op3) =>
if (Optimizations.UseSse2)
{
return context.BitwiseExclusiveOr(
context.BitwiseAnd(op1,
context.BitwiseExclusiveOr(op2, op3)), op3);
});
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
{
Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
return context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
});
}
else
{
EmitVectorTernaryOpZx32(context, (op1, op2, op3) =>
{
return context.BitwiseExclusiveOr(
context.BitwiseAnd(op1,
context.BitwiseExclusiveOr(op2, op3)), op3);
});
}
}
public static void Vorr_I(ArmEmitterContext context)
{
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2));
if (Optimizations.UseSse2)
{
EmitVectorBinaryOpF32(context, Intrinsic.X86Por, Intrinsic.X86Por);
}
else
{
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2));
}
}
private static void EmitBifBit(ArmEmitterContext context, bool notRm)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
EmitVectorTernaryOpZx32(context, (d, n, m) =>
if (Optimizations.UseSse2)
{
if (notRm)
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
{
m = context.BitwiseNot(m);
}
return context.BitwiseExclusiveOr(
context.BitwiseAnd(m,
context.BitwiseExclusiveOr(d, n)), d);
});
Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
res = context.AddIntrinsic((notRm) ? Intrinsic.X86Pandn : Intrinsic.X86Pand, m, res);
return context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
});
}
else
{
EmitVectorTernaryOpZx32(context, (d, n, m) =>
{
if (notRm)
{
m = context.BitwiseNot(m);
}
return context.BitwiseExclusiveOr(
context.BitwiseAnd(m,
context.BitwiseExclusiveOr(d, n)), d);
});
}
}
}
}