Optimize Fccmp*_S & Fcmp*_S.
This commit is contained in:
parent
b957e79f8b
commit
2c5a1495b6
4 changed files with 41 additions and 5 deletions
|
@ -265,7 +265,21 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger());
|
||||
|
||||
context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
|
||||
if (intrinOp.Intrinsic == Intrinsic.X86Cvtsi2si)
|
||||
{
|
||||
if (dest.Type == OperandType.I32)
|
||||
{
|
||||
context.Assembler.Movd(dest, source); // int _mm_cvtsi128_si32
|
||||
}
|
||||
else /* if (dest.Type == OperandType.I64) */
|
||||
{
|
||||
context.Assembler.Movq(dest, source); // __int64 _mm_cvtsi128_si64
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary));
|
||||
Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr));
|
||||
Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Cvtsi2si, new IntrinsicInfo(X86Instruction.Movd, IntrinsicType.UnaryToGpr));
|
||||
Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary));
|
||||
|
|
|
@ -451,7 +451,7 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond));
|
||||
|
||||
EmitSetNzcv(context, Const(op.Nzcv));
|
||||
EmitSetNzcv(context, op.Nzcv);
|
||||
|
||||
context.Branch(lblEnd);
|
||||
|
||||
|
@ -462,13 +462,33 @@ namespace ARMeilleure.Instructions
|
|||
context.MarkLabel(lblEnd);
|
||||
}
|
||||
|
||||
private static void EmitSetNzcv(ArmEmitterContext context, int nzcv)
|
||||
{
|
||||
Operand Extract(int value, int bit)
|
||||
{
|
||||
if (bit != 0)
|
||||
{
|
||||
value >>= bit;
|
||||
}
|
||||
|
||||
value &= 1;
|
||||
|
||||
return Const(value);
|
||||
}
|
||||
|
||||
SetFlag(context, PState.VFlag, Extract(nzcv, 0));
|
||||
SetFlag(context, PState.CFlag, Extract(nzcv, 1));
|
||||
SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
|
||||
SetFlag(context, PState.NFlag, Extract(nzcv, 3));
|
||||
}
|
||||
|
||||
private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
|
||||
{
|
||||
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
||||
|
||||
bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
|
||||
|
||||
if (Optimizations.FastFP && Optimizations.UseAvx)
|
||||
if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
|
||||
{
|
||||
Operand n = GetVec(op.Rn);
|
||||
Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm);
|
||||
|
@ -482,7 +502,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
|
||||
|
||||
Operand isOrdered = context.VectorExtract16(ordMask, 0);
|
||||
Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
|
||||
|
||||
context.BranchIfFalse(lblNaN, isOrdered);
|
||||
|
||||
|
@ -499,7 +519,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
|
||||
|
||||
Operand isOrdered = context.VectorExtract16(ordMask, 0);
|
||||
Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
|
||||
|
||||
context.BranchIfFalse(lblNaN, isOrdered);
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ namespace ARMeilleure.IntermediateRepresentation
|
|||
X86Cvtps2pd,
|
||||
X86Cvtsd2si,
|
||||
X86Cvtsd2ss,
|
||||
X86Cvtsi2si,
|
||||
X86Cvtss2sd,
|
||||
X86Divpd,
|
||||
X86Divps,
|
||||
|
|
Loading…
Add table
Reference in a new issue