More instructions, some cleanup.

This commit is contained in:
riperiperi 2020-01-20 22:56:23 +00:00
commit fd08ff0805
5 changed files with 547 additions and 85 deletions

View file

@ -3121,7 +3121,7 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), res); context.Copy(GetVec(op.Rd), res);
} }
private static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF) public static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF)
{ {
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;

View file

@ -15,7 +15,28 @@ namespace ARMeilleure.Instructions
{ {
public static void Vabs_S(ArmEmitterContext context) public static void Vabs_S(ArmEmitterContext context)
{ {
EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1)); OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarUnaryOpSimd32(context, (m) =>
{
if ((op.Size & 1) == 0)
{
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Andnps, mask, m);
}
else
{
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Andnpd, mask, m);
}
});
}
else
{
EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1));
}
} }
public static void Vabs_V(ArmEmitterContext context) public static void Vabs_V(ArmEmitterContext context)
@ -24,7 +45,26 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1)); if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
if ((op.Size & 1) == 0)
{
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Andnps, mask, m);
}
else
{
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Andnpd, mask, m);
}
});
}
else
{
EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1));
}
} }
else else
{ {
@ -182,18 +222,80 @@ namespace ARMeilleure.Instructions
public static void Vneg_S(ArmEmitterContext context) public static void Vneg_S(ArmEmitterContext context)
{ {
//TODO: intrinsic that XORs the sign bit OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1)); if (Optimizations.UseSse2)
{
EmitScalarUnaryOpSimd32(context, (m) =>
{
if ((op.Size & 1) == 0)
{
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
}
else
{
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
}
});
}
else
{
EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1));
}
} }
public static void Vnmul_S(ArmEmitterContext context) public static void Vnmul_S(ArmEmitterContext context)
{ {
EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
if (Optimizations.UseSse2)
{
EmitScalarBinaryOpSimd32(context, (n, m) =>
{
if ((op.Size & 1) == 0)
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res);
}
else
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
}
});
}
else
{
EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
}
} }
public static void Vnmla_S(ArmEmitterContext context) public static void Vnmla_S(ArmEmitterContext context)
{ {
if (Optimizations.FastFP) OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
{
if ((op.Size & 1) == 0)
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
res = context.AddIntrinsic(Intrinsic.X86Addss, d, res);
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res);
}
else
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
res = context.AddIntrinsic(Intrinsic.X86Addsd, d, res);
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
}
});
}
else if (Optimizations.FastFP)
{ {
EmitScalarTernaryOpF32(context, (op1, op2, op3) => EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{ {
@ -211,7 +313,29 @@ namespace ARMeilleure.Instructions
public static void Vnmls_S(ArmEmitterContext context) public static void Vnmls_S(ArmEmitterContext context)
{ {
if (Optimizations.FastFP) OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
{
if ((op.Size & 1) == 0)
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
Operand mask = X86GetScalar(context, -0f);
d = context.AddIntrinsic(Intrinsic.X86Xorps, mask, d);
return context.AddIntrinsic(Intrinsic.X86Addss, d, res);
}
else
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
Operand mask = X86GetScalar(context, -0d);
d = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
return context.AddIntrinsic(Intrinsic.X86Addsd, d, res);
}
});
}
else if (Optimizations.FastFP)
{ {
EmitScalarTernaryOpF32(context, (op1, op2, op3) => EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{ {
@ -229,9 +353,30 @@ namespace ARMeilleure.Instructions
public static void Vneg_V(ArmEmitterContext context) public static void Vneg_V(ArmEmitterContext context)
{ {
if ((context.CurrOp as OpCode32Simd).F) OpCode32Simd op = (OpCode32Simd)context.CurrOp;
if (op.F)
{ {
EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1)); if (Optimizations.UseSse2)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
if ((op.Size & 1) == 0)
{
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
}
else
{
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
}
});
}
else
{
EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1));
}
} }
else else
{ {
@ -260,22 +405,50 @@ namespace ARMeilleure.Instructions
public static void Vmaxnm_S(ArmEmitterContext context) public static void Vmaxnm_S(ArmEmitterContext context)
{ {
EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2)); if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse41MaxMinNumOpF32(context, true, true);
}
else
{
EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2));
}
} }
public static void Vmaxnm_V(ArmEmitterContext context) public static void Vmaxnm_V(ArmEmitterContext context)
{ {
EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxNumFpscr, SoftFloat64.FPMaxNumFpscr, op1, op2)); if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse41MaxMinNumOpF32(context, true, false);
}
else
{
EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxNumFpscr, SoftFloat64.FPMaxNumFpscr, op1, op2));
}
} }
public static void Vminnm_S(ArmEmitterContext context) public static void Vminnm_S(ArmEmitterContext context)
{ {
EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2)); if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse41MaxMinNumOpF32(context, false, true);
}
else
{
EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2));
}
} }
public static void Vminnm_V(ArmEmitterContext context) public static void Vminnm_V(ArmEmitterContext context)
{ {
EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMinNumFpscr, SoftFloat64.FPMinNumFpscr, op1, op2)); if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse41MaxMinNumOpF32(context, false, false);
}
else
{
EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMinNumFpscr, SoftFloat64.FPMinNumFpscr, op1, op2));
}
} }
public static void Vmax_V(ArmEmitterContext context) public static void Vmax_V(ArmEmitterContext context)
@ -291,12 +464,12 @@ namespace ARMeilleure.Instructions
return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxFpscr, SoftFloat64.FPMaxFpscr, op1, op2); return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxFpscr, SoftFloat64.FPMaxFpscr, op1, op2);
}); });
} }
} }
public static void Vmax_I(ArmEmitterContext context) public static void Vmax_I(ArmEmitterContext context)
{ {
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
if (op.U) if (op.U)
{ {
EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreaterUI(op1, op2), op1, op2)); EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreaterUI(op1, op2), op1, op2));
@ -325,6 +498,7 @@ namespace ARMeilleure.Instructions
public static void Vmin_I(ArmEmitterContext context) public static void Vmin_I(ArmEmitterContext context)
{ {
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
if (op.U) if (op.U)
{ {
EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2)); EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2));
@ -728,5 +902,56 @@ namespace ARMeilleure.Instructions
{ {
EmitVectorBinaryOpZx32(context, (op1, op2) => context.Subtract(op1, op2)); EmitVectorBinaryOpZx32(context, (op1, op2) => context.Subtract(op1, op2));
} }
private static void EmitSse41MaxMinNumOpF32(ArmEmitterContext context, bool isMaxNum, bool scalar)
{
IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
Func<Operand, Operand, Operand> genericEmit = (n, m) =>
{
Operand nNum = context.Copy(n);
Operand mNum = context.Copy(m);
Operand nQNaNMask = InstEmit.EmitSse2VectorIsQNaNOpF(context, nNum);
Operand mQNaNMask = InstEmit.EmitSse2VectorIsQNaNOpF(context, mNum);
int sizeF = op.Size & 1;
if (sizeF == 0)
{
Operand negInfMask = X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
nNum = context.AddIntrinsic(Intrinsic.X86Blendvps, nNum, negInfMask, nMask);
mNum = context.AddIntrinsic(Intrinsic.X86Blendvps, mNum, negInfMask, mMask);
return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxps : Intrinsic.X86Minps, nNum, mNum);
}
else /* if (sizeF == 1) */
{
Operand negInfMask = X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
nNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, nNum, negInfMask, nMask);
mNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, mNum, negInfMask, mMask);
return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, nNum, mNum);
}
};
if (scalar)
{
EmitScalarBinaryOpSimd32(context, genericEmit);
}
else
{
EmitVectorBinaryOpSimd32(context, genericEmit);
}
}
} }
} }

View file

@ -5,6 +5,7 @@ using ARMeilleure.Translation;
using System; using System;
using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper32; using static ARMeilleure.Instructions.InstEmitSimdHelper32;
using static ARMeilleure.IntermediateRepresentation.OperandHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper;
@ -16,7 +17,14 @@ namespace ARMeilleure.Instructions
{ {
public static void Vceq_V(ArmEmitterContext context) public static void Vceq_V(ArmEmitterContext context)
{ {
EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false); if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.Equal, false);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false);
}
} }
public static void Vceq_I(ArmEmitterContext context) public static void Vceq_I(ArmEmitterContext context)
@ -30,7 +38,14 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true); if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.Equal, true);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true);
}
} }
else else
{ {
@ -40,7 +55,14 @@ namespace ARMeilleure.Instructions
public static void Vcge_V(ArmEmitterContext context) public static void Vcge_V(ArmEmitterContext context)
{ {
EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false); if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false);
}
} }
public static void Vcge_I(ArmEmitterContext context) public static void Vcge_I(ArmEmitterContext context)
@ -56,7 +78,14 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true); if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true);
}
} }
else else
{ {
@ -66,7 +95,14 @@ namespace ARMeilleure.Instructions
public static void Vcgt_V(ArmEmitterContext context) public static void Vcgt_V(ArmEmitterContext context)
{ {
EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false); if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.GreaterThan, false);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false);
}
} }
public static void Vcgt_I(ArmEmitterContext context) public static void Vcgt_I(ArmEmitterContext context)
@ -82,7 +118,14 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true); if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.GreaterThan, true);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true);
}
} }
else else
{ {
@ -96,7 +139,14 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true); if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.LessThanOrEqual, true);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true);
}
} }
else else
{ {
@ -110,7 +160,14 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true); if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2CmpOpF32(context, CmpCondition.LessThanOrEqual, true);
}
else
{
EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true);
}
} }
else else
{ {
@ -224,8 +281,74 @@ namespace ARMeilleure.Instructions
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
bool cmpWithZero = (op.Opc & 2) != 0; bool cmpWithZero = (op.Opc & 2) != 0;
int fSize = op.Size & 1;
if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
{
CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
bool doubleSize = fSize != 0;
int shift = doubleSize ? 1 : 2;
Operand m = GetVecA32(op.Vm >> shift);
Operand n = GetVecA32(op.Vd >> shift);
n = EmitSwapScalar(context, n, op.Vd, doubleSize);
m = cmpWithZero ? context.VectorZero() : EmitSwapScalar(context, m, op.Vm, doubleSize);
Operand lblNaN = Label();
Operand lblEnd = Label();
if (!doubleSize)
{
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
context.BranchIfFalse(lblNaN, isOrdered);
Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m);
Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m);
Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m);
EmitSetFPSCRFlags(context, context.BitwiseOr(
context.ShiftLeft(cf, Const(1)),
context.BitwiseOr(
context.ShiftLeft(zf, Const(2)),
context.ShiftLeft(nf, Const(3))
)
));
}
else
{
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
context.BranchIfFalse(lblNaN, isOrdered);
Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m);
Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m);
Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m);
EmitSetFPSCRFlags(context, context.BitwiseOr(
context.ShiftLeft(cf, Const(1)),
context.BitwiseOr(
context.ShiftLeft(zf, Const(2)),
context.ShiftLeft(nf, Const(3))
)
));
}
context.Branch(lblEnd);
context.MarkLabel(lblNaN);
EmitSetFPSCRFlags(context, Const(3));
context.MarkLabel(lblEnd);
}
else
{ {
int fSize = op.Size & 1;
OperandType type = fSize != 0 ? OperandType.FP64 : OperandType.FP32; OperandType type = fSize != 0 ? OperandType.FP64 : OperandType.FP32;
Operand ne = ExtractScalar(context, type, op.Vd); Operand ne = ExtractScalar(context, type, op.Vd);
@ -269,5 +392,28 @@ namespace ARMeilleure.Instructions
SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2)); SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2));
SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3)); SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3));
} }
private static void EmitSse2CmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
int sizeF = op.Size & 1;
Intrinsic inst = (sizeF == 0) ? Intrinsic.X86Cmpps : Intrinsic.X86Cmppd;
if (zero)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
return context.AddIntrinsic(inst, m, context.VectorZero(), Const((int)cond));
});
}
else
{
EmitVectorBinaryOpSimd32(context, (n, m) =>
{
return context.AddIntrinsic(inst, n, m, Const((int)cond));
});
}
}
} }
} }

View file

@ -475,7 +475,7 @@ namespace ARMeilleure.Instructions
// Intrinsic Emits // Intrinsic Emits
private static Operand EmitSwapDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV) public static Operand EmitSwapDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
{ {
int originalSide = originalV & 1; int originalSide = originalV & 1;
int targetSide = targetV & 1; int targetSide = targetV & 1;
@ -495,7 +495,7 @@ namespace ARMeilleure.Instructions
} }
} }
private static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV) public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
{ {
int targetSide = targetV & 1; int targetSide = targetV & 1;
int shuffleMask = 2 | 0; int shuffleMask = 2 | 0;
@ -510,7 +510,7 @@ namespace ARMeilleure.Instructions
} }
} }
private static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth) public static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
{ {
// index into 0, 0 into index. This swap happens at the start and end of an A32 scalar op if required. // index into 0, 0 into index. This swap happens at the start and end of an A32 scalar op if required.
int index = reg & (doubleWidth ? 1 : 3); int index = reg & (doubleWidth ? 1 : 3);
@ -530,7 +530,7 @@ namespace ARMeilleure.Instructions
} }
} }
private static Operand EmitInsertScalar(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth) public static Operand EmitInsertScalar(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
{ {
// insert from index 0 in value to index in target // insert from index 0 in value to index in target
int index = reg & (doubleWidth ? 1 : 3); int index = reg & (doubleWidth ? 1 : 3);
@ -556,21 +556,54 @@ namespace ARMeilleure.Instructions
} }
} }
public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) // Vector Operand Templates
public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
{ {
OpCode32Simd op = (OpCode32Simd)context.CurrOp; OpCode32Simd op = (OpCode32Simd)context.CurrOp;
Operand m = GetVecA32(op.Qm); Operand m = GetVecA32(op.Qm);
Operand d = GetVecA32(op.Qd); Operand d = GetVecA32(op.Qd);
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
if (!op.Q) //register swap: move relevant doubleword to destination side if (!op.Q) //register swap: move relevant doubleword to destination side
{ {
m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd); m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd);
} }
Operand res = context.AddIntrinsic(inst, m); Operand res = vectorFunc(m);
if (!op.Q) //register insert
{
res = EmitDoubleWordInsert(context, d, res, op.Vd);
}
context.Copy(d, res);
}
public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
}
public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
Operand d = GetVecA32(op.Qd);
if (!op.Q) //register swap: move relevant doubleword to destination side
{
n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd);
m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd);
}
Operand res = vectorFunc(n, m);
if (!op.Q) //register insert if (!op.Q) //register insert
{ {
@ -584,29 +617,11 @@ namespace ARMeilleure.Instructions
{ {
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
Operand d = GetVecA32(op.Qd);
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
if (!op.Q) //register swap: move relevant doubleword to destination side
{
n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd);
m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd);
}
Operand res = context.AddIntrinsic(inst, n, m);
if (!op.Q) //register insert
{
res = EmitDoubleWordInsert(context, d, res, op.Vd);
}
context.Copy(d, res);
} }
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2) public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
{ {
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
@ -615,17 +630,13 @@ namespace ARMeilleure.Instructions
Operand d = GetVecA32(op.Qd); Operand d = GetVecA32(op.Qd);
Operand initialD = d; Operand initialD = d;
Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;
if (!op.Q) //register swap: move relevant doubleword to destination side if (!op.Q) //register swap: move relevant doubleword to destination side
{ {
n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd); n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd);
m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd); m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd);
} }
Operand res = context.AddIntrinsic(inst1, n, m); Operand res = vectorFunc(d, n, m);
res = context.AddIntrinsic(inst2, d, res);
if (!op.Q) //register insert if (!op.Q) //register insert
{ {
@ -635,7 +646,21 @@ namespace ARMeilleure.Instructions
context.Copy(initialD, res); context.Copy(initialD, res);
} }
public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
{
Operand res = context.AddIntrinsic(inst1, n, m);
return res = context.AddIntrinsic(inst2, d, res);
});
}
public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
{ {
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
@ -646,9 +671,8 @@ namespace ARMeilleure.Instructions
m = EmitSwapScalar(context, m, op.Vm, doubleSize); m = EmitSwapScalar(context, m, op.Vm, doubleSize);
Intrinsic inst = doubleSize ? inst64 : inst32; Operand res = scalarFunc(m);
Operand res = (inst == 0) ? m : context.AddIntrinsic(inst, m);
if (false) // op.Vd == op.Vm) //small optimisation: can just swap it back for the result if (false) // op.Vd == op.Vm) //small optimisation: can just swap it back for the result
{ {
res = EmitSwapScalar(context, res, op.Vd, doubleSize); res = EmitSwapScalar(context, res, op.Vd, doubleSize);
@ -662,37 +686,53 @@ namespace ARMeilleure.Instructions
context.Copy(d, res); context.Copy(d, res);
} }
public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
}
public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0;
int shift = doubleSize ? 1 : 2;
Operand n = GetVecA32(op.Vn >> shift);
Operand m = GetVecA32(op.Vm >> shift);
Operand d = GetVecA32(op.Vd >> shift);
n = EmitSwapScalar(context, n, op.Vn, doubleSize);
m = EmitSwapScalar(context, m, op.Vm, doubleSize);
Operand res = scalarFunc(n, m);
if (false) // //small optimisation: can just swap it back for the result
{
res = EmitSwapScalar(context, res, op.Vd, doubleSize);
}
else
{
// insert scalar into vector
res = EmitInsertScalar(context, d, res, op.Vd, doubleSize);
}
context.Copy(d, res);
}
public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
{ {
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0; Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
int shift = doubleSize ? 1 : 2;
Operand n = GetVecA32(op.Vn >> shift);
Operand m = GetVecA32(op.Vm >> shift);
Operand d = GetVecA32(op.Vd >> shift);
n = EmitSwapScalar(context, n, op.Vn, doubleSize); EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
m = EmitSwapScalar(context, m, op.Vm, doubleSize);
Intrinsic inst = doubleSize ? inst64 : inst32;
Operand res = context.AddIntrinsic(inst, n, m);
if (false) // //small optimisation: can just swap it back for the result
{
res = EmitSwapScalar(context, res, op.Vd, doubleSize);
}
else
{
// insert scalar into vector
res = EmitInsertScalar(context, d, res, op.Vd, doubleSize);
}
context.Copy(d, res);
} }
public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2) public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
{ {
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
@ -707,11 +747,7 @@ namespace ARMeilleure.Instructions
m = EmitSwapScalar(context, m, op.Vm, doubleSize); m = EmitSwapScalar(context, m, op.Vm, doubleSize);
d = EmitSwapScalar(context, d, op.Vd, doubleSize); d = EmitSwapScalar(context, d, op.Vd, doubleSize);
Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1; Operand res = scalarFunc(d, n, m);
Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;
Operand res = context.AddIntrinsic(inst1, n, m);
res = context.AddIntrinsic(inst2, d, res);
// insert scalar into vector // insert scalar into vector
res = EmitInsertScalar(context, initialD, res, op.Vd, doubleSize); res = EmitInsertScalar(context, initialD, res, op.Vd, doubleSize);
@ -719,6 +755,22 @@ namespace ARMeilleure.Instructions
context.Copy(initialD, res); context.Copy(initialD, res);
} }
public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0;
int shift = doubleSize ? 1 : 2;
Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1;
Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
{
Operand res = context.AddIntrinsic(inst1, n, m);
return context.AddIntrinsic(inst2, d, res);
});
}
// Generic Functions // Generic Functions
public static Operand EmitSoftFloatCallDefaultFpscr( public static Operand EmitSoftFloatCallDefaultFpscr(

View file

@ -1,4 +1,5 @@
using ARMeilleure.Decoders; using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation; using ARMeilleure.Translation;
using static ARMeilleure.Instructions.InstEmitSimdHelper32; using static ARMeilleure.Instructions.InstEmitSimdHelper32;
@ -9,7 +10,14 @@ namespace ARMeilleure.Instructions
{ {
public static void Vand_I(ArmEmitterContext context) public static void Vand_I(ArmEmitterContext context)
{ {
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2)); if (Optimizations.UseSse2)
{
EmitVectorBinaryOpF32(context, Intrinsic.X86Pand, Intrinsic.X86Pand);
}
else
{
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2));
}
} }
public static void Vbif(ArmEmitterContext context) public static void Vbif(ArmEmitterContext context)
@ -24,33 +32,64 @@ namespace ARMeilleure.Instructions
public static void Vbsl(ArmEmitterContext context) public static void Vbsl(ArmEmitterContext context)
{ {
EmitVectorTernaryOpZx32(context, (op1, op2, op3) => if (Optimizations.UseSse2)
{ {
return context.BitwiseExclusiveOr( EmitVectorTernaryOpSimd32(context, (d, n, m) =>
context.BitwiseAnd(op1, {
context.BitwiseExclusiveOr(op2, op3)), op3); Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
}); res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
return context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
});
}
else
{
EmitVectorTernaryOpZx32(context, (op1, op2, op3) =>
{
return context.BitwiseExclusiveOr(
context.BitwiseAnd(op1,
context.BitwiseExclusiveOr(op2, op3)), op3);
});
}
} }
public static void Vorr_I(ArmEmitterContext context) public static void Vorr_I(ArmEmitterContext context)
{ {
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2)); if (Optimizations.UseSse2)
{
EmitVectorBinaryOpF32(context, Intrinsic.X86Por, Intrinsic.X86Por);
}
else
{
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2));
}
} }
private static void EmitBifBit(ArmEmitterContext context, bool notRm) private static void EmitBifBit(ArmEmitterContext context, bool notRm)
{ {
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
EmitVectorTernaryOpZx32(context, (d, n, m) => if (Optimizations.UseSse2)
{ {
if (notRm) EmitVectorTernaryOpSimd32(context, (d, n, m) =>
{ {
m = context.BitwiseNot(m); Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
} res = context.AddIntrinsic((notRm) ? Intrinsic.X86Pandn : Intrinsic.X86Pand, m, res);
return context.BitwiseExclusiveOr( return context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
context.BitwiseAnd(m, });
context.BitwiseExclusiveOr(d, n)), d); }
}); else
{
EmitVectorTernaryOpZx32(context, (d, n, m) =>
{
if (notRm)
{
m = context.BitwiseNot(m);
}
return context.BitwiseExclusiveOr(
context.BitwiseAnd(m,
context.BitwiseExclusiveOr(d, n)), d);
});
}
} }
} }
} }