More instructions, some cleanup.
This commit is contained in:
parent
b5578b0d47
commit
fd08ff0805
5 changed files with 547 additions and 85 deletions
|
@ -3121,7 +3121,7 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(GetVec(op.Rd), res);
|
||||
}
|
||||
|
||||
private static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF)
|
||||
public static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF)
|
||||
{
|
||||
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
||||
|
||||
|
|
|
@ -15,7 +15,28 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
public static void Vabs_S(ArmEmitterContext context)
|
||||
{
|
||||
EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1));
|
||||
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitScalarUnaryOpSimd32(context, (m) =>
|
||||
{
|
||||
if ((op.Size & 1) == 0)
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0f);
|
||||
return context.AddIntrinsic(Intrinsic.X86Andnps, mask, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0d);
|
||||
return context.AddIntrinsic(Intrinsic.X86Andnpd, mask, m);
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static void Vabs_V(ArmEmitterContext context)
|
||||
|
@ -24,7 +45,26 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
if (op.F)
|
||||
{
|
||||
EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1));
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitVectorUnaryOpSimd32(context, (m) =>
|
||||
{
|
||||
if ((op.Size & 1) == 0)
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0f);
|
||||
return context.AddIntrinsic(Intrinsic.X86Andnps, mask, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0d);
|
||||
return context.AddIntrinsic(Intrinsic.X86Andnpd, mask, m);
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -182,18 +222,80 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
public static void Vneg_S(ArmEmitterContext context)
|
||||
{
|
||||
//TODO: intrinsic that XORs the sign bit
|
||||
EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1));
|
||||
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
EmitScalarUnaryOpSimd32(context, (m) =>
|
||||
{
|
||||
if ((op.Size & 1) == 0)
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0f);
|
||||
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0d);
|
||||
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vnmul_S(ArmEmitterContext context)
|
||||
{
|
||||
EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
|
||||
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
EmitScalarBinaryOpSimd32(context, (n, m) =>
|
||||
{
|
||||
if ((op.Size & 1) == 0)
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
Operand mask = X86GetScalar(context, -0f);
|
||||
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
Operand mask = X86GetScalar(context, -0d);
|
||||
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vnmla_S(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.FastFP)
|
||||
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
|
||||
{
|
||||
if ((op.Size & 1) == 0)
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addss, d, res);
|
||||
Operand mask = X86GetScalar(context, -0f);
|
||||
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addsd, d, res);
|
||||
Operand mask = X86GetScalar(context, -0d);
|
||||
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
|
||||
}
|
||||
});
|
||||
}
|
||||
else if (Optimizations.FastFP)
|
||||
{
|
||||
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
||||
{
|
||||
|
@ -211,7 +313,29 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
public static void Vnmls_S(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.FastFP)
|
||||
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
|
||||
{
|
||||
if ((op.Size & 1) == 0)
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
Operand mask = X86GetScalar(context, -0f);
|
||||
d = context.AddIntrinsic(Intrinsic.X86Xorps, mask, d);
|
||||
return context.AddIntrinsic(Intrinsic.X86Addss, d, res);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
Operand mask = X86GetScalar(context, -0d);
|
||||
d = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
|
||||
return context.AddIntrinsic(Intrinsic.X86Addsd, d, res);
|
||||
}
|
||||
});
|
||||
}
|
||||
else if (Optimizations.FastFP)
|
||||
{
|
||||
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
||||
{
|
||||
|
@ -229,9 +353,30 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
public static void Vneg_V(ArmEmitterContext context)
|
||||
{
|
||||
if ((context.CurrOp as OpCode32Simd).F)
|
||||
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
|
||||
|
||||
if (op.F)
|
||||
{
|
||||
EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1));
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
EmitVectorUnaryOpSimd32(context, (m) =>
|
||||
{
|
||||
if ((op.Size & 1) == 0)
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0f);
|
||||
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0d);
|
||||
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -260,22 +405,50 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
public static void Vmaxnm_S(ArmEmitterContext context)
|
||||
{
|
||||
EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2));
|
||||
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse41MaxMinNumOpF32(context, true, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vmaxnm_V(ArmEmitterContext context)
|
||||
{
|
||||
EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxNumFpscr, SoftFloat64.FPMaxNumFpscr, op1, op2));
|
||||
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse41MaxMinNumOpF32(context, true, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxNumFpscr, SoftFloat64.FPMaxNumFpscr, op1, op2));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vminnm_S(ArmEmitterContext context)
|
||||
{
|
||||
EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2));
|
||||
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse41MaxMinNumOpF32(context, false, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vminnm_V(ArmEmitterContext context)
|
||||
{
|
||||
EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMinNumFpscr, SoftFloat64.FPMinNumFpscr, op1, op2));
|
||||
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse41MaxMinNumOpF32(context, false, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMinNumFpscr, SoftFloat64.FPMinNumFpscr, op1, op2));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vmax_V(ArmEmitterContext context)
|
||||
|
@ -291,12 +464,12 @@ namespace ARMeilleure.Instructions
|
|||
return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxFpscr, SoftFloat64.FPMaxFpscr, op1, op2);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static void Vmax_I(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
if (op.U)
|
||||
{
|
||||
EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreaterUI(op1, op2), op1, op2));
|
||||
|
@ -325,6 +498,7 @@ namespace ARMeilleure.Instructions
|
|||
public static void Vmin_I(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
if (op.U)
|
||||
{
|
||||
EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2));
|
||||
|
@ -728,5 +902,56 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
EmitVectorBinaryOpZx32(context, (op1, op2) => context.Subtract(op1, op2));
|
||||
}
|
||||
|
||||
private static void EmitSse41MaxMinNumOpF32(ArmEmitterContext context, bool isMaxNum, bool scalar)
|
||||
{
|
||||
IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
|
||||
|
||||
Func<Operand, Operand, Operand> genericEmit = (n, m) =>
|
||||
{
|
||||
Operand nNum = context.Copy(n);
|
||||
Operand mNum = context.Copy(m);
|
||||
|
||||
Operand nQNaNMask = InstEmit.EmitSse2VectorIsQNaNOpF(context, nNum);
|
||||
Operand mQNaNMask = InstEmit.EmitSse2VectorIsQNaNOpF(context, mNum);
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Operand negInfMask = X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
|
||||
|
||||
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
|
||||
Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
|
||||
|
||||
nNum = context.AddIntrinsic(Intrinsic.X86Blendvps, nNum, negInfMask, nMask);
|
||||
mNum = context.AddIntrinsic(Intrinsic.X86Blendvps, mNum, negInfMask, mMask);
|
||||
|
||||
return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxps : Intrinsic.X86Minps, nNum, mNum);
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
Operand negInfMask = X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
|
||||
|
||||
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
|
||||
Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
|
||||
|
||||
nNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, nNum, negInfMask, nMask);
|
||||
mNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, mNum, negInfMask, mMask);
|
||||
|
||||
return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, nNum, mNum);
|
||||
}
|
||||
};
|
||||
|
||||
if (scalar)
|
||||
{
|
||||
EmitScalarBinaryOpSimd32(context, genericEmit);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpSimd32(context, genericEmit);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ using ARMeilleure.Translation;
|
|||
using System;
|
||||
|
||||
using static ARMeilleure.Instructions.InstEmitHelper;
|
||||
using static ARMeilleure.Instructions.InstEmitSimdHelper;
|
||||
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
|
||||
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
|
@ -16,7 +17,14 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
public static void Vceq_V(ArmEmitterContext context)
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false);
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitSse2CmpOpF32(context, CmpCondition.Equal, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vceq_I(ArmEmitterContext context)
|
||||
|
@ -30,7 +38,14 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
if (op.F)
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true);
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitSse2CmpOpF32(context, CmpCondition.Equal, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -40,7 +55,14 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
public static void Vcge_V(ArmEmitterContext context)
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false);
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitSse2CmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vcge_I(ArmEmitterContext context)
|
||||
|
@ -56,7 +78,14 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
if (op.F)
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true);
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitSse2CmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -66,7 +95,14 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
public static void Vcgt_V(ArmEmitterContext context)
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false);
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitSse2CmpOpF32(context, CmpCondition.GreaterThan, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vcgt_I(ArmEmitterContext context)
|
||||
|
@ -82,7 +118,14 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
if (op.F)
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true);
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitSse2CmpOpF32(context, CmpCondition.GreaterThan, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -96,7 +139,14 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
if (op.F)
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true);
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitSse2CmpOpF32(context, CmpCondition.LessThanOrEqual, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -110,7 +160,14 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
if (op.F)
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true);
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitSse2CmpOpF32(context, CmpCondition.LessThanOrEqual, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -224,8 +281,74 @@ namespace ARMeilleure.Instructions
|
|||
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
|
||||
|
||||
bool cmpWithZero = (op.Opc & 2) != 0;
|
||||
int fSize = op.Size & 1;
|
||||
|
||||
if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
|
||||
{
|
||||
CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
|
||||
|
||||
bool doubleSize = fSize != 0;
|
||||
int shift = doubleSize ? 1 : 2;
|
||||
Operand m = GetVecA32(op.Vm >> shift);
|
||||
Operand n = GetVecA32(op.Vd >> shift);
|
||||
|
||||
n = EmitSwapScalar(context, n, op.Vd, doubleSize);
|
||||
m = cmpWithZero ? context.VectorZero() : EmitSwapScalar(context, m, op.Vm, doubleSize);
|
||||
|
||||
Operand lblNaN = Label();
|
||||
Operand lblEnd = Label();
|
||||
|
||||
if (!doubleSize)
|
||||
{
|
||||
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
|
||||
|
||||
Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
|
||||
|
||||
context.BranchIfFalse(lblNaN, isOrdered);
|
||||
|
||||
Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m);
|
||||
Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m);
|
||||
Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m);
|
||||
|
||||
EmitSetFPSCRFlags(context, context.BitwiseOr(
|
||||
context.ShiftLeft(cf, Const(1)),
|
||||
context.BitwiseOr(
|
||||
context.ShiftLeft(zf, Const(2)),
|
||||
context.ShiftLeft(nf, Const(3))
|
||||
)
|
||||
));
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
|
||||
|
||||
Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
|
||||
|
||||
context.BranchIfFalse(lblNaN, isOrdered);
|
||||
|
||||
Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m);
|
||||
Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m);
|
||||
Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m);
|
||||
|
||||
EmitSetFPSCRFlags(context, context.BitwiseOr(
|
||||
context.ShiftLeft(cf, Const(1)),
|
||||
context.BitwiseOr(
|
||||
context.ShiftLeft(zf, Const(2)),
|
||||
context.ShiftLeft(nf, Const(3))
|
||||
)
|
||||
));
|
||||
}
|
||||
|
||||
context.Branch(lblEnd);
|
||||
|
||||
context.MarkLabel(lblNaN);
|
||||
|
||||
EmitSetFPSCRFlags(context, Const(3));
|
||||
|
||||
context.MarkLabel(lblEnd);
|
||||
}
|
||||
else
|
||||
{
|
||||
int fSize = op.Size & 1;
|
||||
OperandType type = fSize != 0 ? OperandType.FP64 : OperandType.FP32;
|
||||
|
||||
Operand ne = ExtractScalar(context, type, op.Vd);
|
||||
|
@ -269,5 +392,28 @@ namespace ARMeilleure.Instructions
|
|||
SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2));
|
||||
SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3));
|
||||
}
|
||||
|
||||
private static void EmitSse2CmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
|
||||
{
|
||||
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
Intrinsic inst = (sizeF == 0) ? Intrinsic.X86Cmpps : Intrinsic.X86Cmppd;
|
||||
|
||||
if (zero)
|
||||
{
|
||||
EmitVectorUnaryOpSimd32(context, (m) =>
|
||||
{
|
||||
return context.AddIntrinsic(inst, m, context.VectorZero(), Const((int)cond));
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpSimd32(context, (n, m) =>
|
||||
{
|
||||
return context.AddIntrinsic(inst, n, m, Const((int)cond));
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -475,7 +475,7 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
// Intrinsic Emits
|
||||
|
||||
private static Operand EmitSwapDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
|
||||
public static Operand EmitSwapDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
|
||||
{
|
||||
int originalSide = originalV & 1;
|
||||
int targetSide = targetV & 1;
|
||||
|
@ -495,7 +495,7 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
private static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
|
||||
public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
|
||||
{
|
||||
int targetSide = targetV & 1;
|
||||
int shuffleMask = 2 | 0;
|
||||
|
@ -510,7 +510,7 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
private static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
|
||||
public static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
|
||||
{
|
||||
// index into 0, 0 into index. This swap happens at the start and end of an A32 scalar op if required.
|
||||
int index = reg & (doubleWidth ? 1 : 3);
|
||||
|
@ -530,7 +530,7 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
private static Operand EmitInsertScalar(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
|
||||
public static Operand EmitInsertScalar(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
|
||||
{
|
||||
// insert from index 0 in value to index in target
|
||||
int index = reg & (doubleWidth ? 1 : 3);
|
||||
|
@ -556,21 +556,54 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
||||
// Vector Operand Templates
|
||||
|
||||
public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
|
||||
{
|
||||
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
|
||||
|
||||
Operand m = GetVecA32(op.Qm);
|
||||
Operand d = GetVecA32(op.Qd);
|
||||
|
||||
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
|
||||
|
||||
if (!op.Q) //register swap: move relevant doubleword to destination side
|
||||
{
|
||||
m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd);
|
||||
}
|
||||
|
||||
Operand res = context.AddIntrinsic(inst, m);
|
||||
Operand res = vectorFunc(m);
|
||||
|
||||
if (!op.Q) //register insert
|
||||
{
|
||||
res = EmitDoubleWordInsert(context, d, res, op.Vd);
|
||||
}
|
||||
|
||||
context.Copy(d, res);
|
||||
}
|
||||
|
||||
public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
||||
{
|
||||
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
|
||||
|
||||
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
|
||||
|
||||
EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
|
||||
}
|
||||
|
||||
public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
Operand n = GetVecA32(op.Qn);
|
||||
Operand m = GetVecA32(op.Qm);
|
||||
Operand d = GetVecA32(op.Qd);
|
||||
|
||||
if (!op.Q) //register swap: move relevant doubleword to destination side
|
||||
{
|
||||
n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd);
|
||||
m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd);
|
||||
}
|
||||
|
||||
Operand res = vectorFunc(n, m);
|
||||
|
||||
if (!op.Q) //register insert
|
||||
{
|
||||
|
@ -584,29 +617,11 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
Operand n = GetVecA32(op.Qn);
|
||||
Operand m = GetVecA32(op.Qm);
|
||||
Operand d = GetVecA32(op.Qd);
|
||||
|
||||
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
|
||||
|
||||
if (!op.Q) //register swap: move relevant doubleword to destination side
|
||||
{
|
||||
n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd);
|
||||
m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd);
|
||||
}
|
||||
|
||||
Operand res = context.AddIntrinsic(inst, n, m);
|
||||
|
||||
if (!op.Q) //register insert
|
||||
{
|
||||
res = EmitDoubleWordInsert(context, d, res, op.Vd);
|
||||
}
|
||||
|
||||
context.Copy(d, res);
|
||||
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
|
||||
}
|
||||
|
||||
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
|
||||
public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
|
@ -615,17 +630,13 @@ namespace ARMeilleure.Instructions
|
|||
Operand d = GetVecA32(op.Qd);
|
||||
Operand initialD = d;
|
||||
|
||||
Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
|
||||
Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;
|
||||
|
||||
if (!op.Q) //register swap: move relevant doubleword to destination side
|
||||
{
|
||||
n = EmitSwapDoubleWordToSide(context, n, op.Vn, op.Vd);
|
||||
m = EmitSwapDoubleWordToSide(context, m, op.Vm, op.Vd);
|
||||
}
|
||||
|
||||
Operand res = context.AddIntrinsic(inst1, n, m);
|
||||
res = context.AddIntrinsic(inst2, d, res);
|
||||
Operand res = vectorFunc(d, n, m);
|
||||
|
||||
if (!op.Q) //register insert
|
||||
{
|
||||
|
@ -635,7 +646,21 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(initialD, res);
|
||||
}
|
||||
|
||||
public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
||||
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
|
||||
Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;
|
||||
|
||||
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
|
||||
{
|
||||
Operand res = context.AddIntrinsic(inst1, n, m);
|
||||
return res = context.AddIntrinsic(inst2, d, res);
|
||||
});
|
||||
}
|
||||
|
||||
public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
|
||||
{
|
||||
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
|
||||
|
||||
|
@ -646,9 +671,8 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
m = EmitSwapScalar(context, m, op.Vm, doubleSize);
|
||||
|
||||
Intrinsic inst = doubleSize ? inst64 : inst32;
|
||||
Operand res = scalarFunc(m);
|
||||
|
||||
Operand res = (inst == 0) ? m : context.AddIntrinsic(inst, m);
|
||||
if (false) // op.Vd == op.Vm) //small optimisation: can just swap it back for the result
|
||||
{
|
||||
res = EmitSwapScalar(context, res, op.Vd, doubleSize);
|
||||
|
@ -662,7 +686,16 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(d, res);
|
||||
}
|
||||
|
||||
public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
||||
public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
||||
{
|
||||
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
|
||||
|
||||
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
|
||||
|
||||
EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
|
||||
}
|
||||
|
||||
public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
|
||||
{
|
||||
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
|
||||
|
||||
|
@ -675,9 +708,7 @@ namespace ARMeilleure.Instructions
|
|||
n = EmitSwapScalar(context, n, op.Vn, doubleSize);
|
||||
m = EmitSwapScalar(context, m, op.Vm, doubleSize);
|
||||
|
||||
Intrinsic inst = doubleSize ? inst64 : inst32;
|
||||
|
||||
Operand res = context.AddIntrinsic(inst, n, m);
|
||||
Operand res = scalarFunc(n, m);
|
||||
|
||||
if (false) // //small optimisation: can just swap it back for the result
|
||||
{
|
||||
|
@ -692,7 +723,16 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(d, res);
|
||||
}
|
||||
|
||||
public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
|
||||
public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
||||
{
|
||||
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
|
||||
|
||||
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
|
||||
|
||||
EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
|
||||
}
|
||||
|
||||
public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
|
||||
{
|
||||
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
|
||||
|
||||
|
@ -707,11 +747,7 @@ namespace ARMeilleure.Instructions
|
|||
m = EmitSwapScalar(context, m, op.Vm, doubleSize);
|
||||
d = EmitSwapScalar(context, d, op.Vd, doubleSize);
|
||||
|
||||
Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1;
|
||||
Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;
|
||||
|
||||
Operand res = context.AddIntrinsic(inst1, n, m);
|
||||
res = context.AddIntrinsic(inst2, d, res);
|
||||
Operand res = scalarFunc(d, n, m);
|
||||
|
||||
// insert scalar into vector
|
||||
res = EmitInsertScalar(context, initialD, res, op.Vd, doubleSize);
|
||||
|
@ -719,6 +755,22 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(initialD, res);
|
||||
}
|
||||
|
||||
public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
|
||||
{
|
||||
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
|
||||
|
||||
bool doubleSize = (op.Size & 1) != 0;
|
||||
int shift = doubleSize ? 1 : 2;
|
||||
Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1;
|
||||
Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;
|
||||
|
||||
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
|
||||
{
|
||||
Operand res = context.AddIntrinsic(inst1, n, m);
|
||||
return context.AddIntrinsic(inst2, d, res);
|
||||
});
|
||||
}
|
||||
|
||||
// Generic Functions
|
||||
|
||||
public static Operand EmitSoftFloatCallDefaultFpscr(
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
using ARMeilleure.Decoders;
|
||||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.Translation;
|
||||
|
||||
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
|
||||
|
@ -9,7 +10,14 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
public static void Vand_I(ArmEmitterContext context)
|
||||
{
|
||||
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2));
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
EmitVectorBinaryOpF32(context, Intrinsic.X86Pand, Intrinsic.X86Pand);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vbif(ArmEmitterContext context)
|
||||
|
@ -24,33 +32,64 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
public static void Vbsl(ArmEmitterContext context)
|
||||
{
|
||||
EmitVectorTernaryOpZx32(context, (op1, op2, op3) =>
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
return context.BitwiseExclusiveOr(
|
||||
context.BitwiseAnd(op1,
|
||||
context.BitwiseExclusiveOr(op2, op3)), op3);
|
||||
});
|
||||
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
|
||||
return context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorTernaryOpZx32(context, (op1, op2, op3) =>
|
||||
{
|
||||
return context.BitwiseExclusiveOr(
|
||||
context.BitwiseAnd(op1,
|
||||
context.BitwiseExclusiveOr(op2, op3)), op3);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vorr_I(ArmEmitterContext context)
|
||||
{
|
||||
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2));
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
EmitVectorBinaryOpF32(context, Intrinsic.X86Por, Intrinsic.X86Por);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2));
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitBifBit(ArmEmitterContext context, bool notRm)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
EmitVectorTernaryOpZx32(context, (d, n, m) =>
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
if (notRm)
|
||||
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
|
||||
{
|
||||
m = context.BitwiseNot(m);
|
||||
}
|
||||
return context.BitwiseExclusiveOr(
|
||||
context.BitwiseAnd(m,
|
||||
context.BitwiseExclusiveOr(d, n)), d);
|
||||
});
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
|
||||
res = context.AddIntrinsic((notRm) ? Intrinsic.X86Pandn : Intrinsic.X86Pand, m, res);
|
||||
return context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorTernaryOpZx32(context, (d, n, m) =>
|
||||
{
|
||||
if (notRm)
|
||||
{
|
||||
m = context.BitwiseNot(m);
|
||||
}
|
||||
return context.BitwiseExclusiveOr(
|
||||
context.BitwiseAnd(m,
|
||||
context.BitwiseExclusiveOr(d, n)), d);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue