Optimize Fccmp*_S & Fcmp*_S.

This commit is contained in:
LDj3SNuD 2019-11-09 02:06:36 +01:00
parent b957e79f8b
commit 2c5a1495b6
4 changed files with 41 additions and 5 deletions

View file

@ -265,7 +265,21 @@ namespace ARMeilleure.CodeGen.X86
Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger());
context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
if (intrinOp.Intrinsic == Intrinsic.X86Cvtsi2si)
{
if (dest.Type == OperandType.I32)
{
context.Assembler.Movd(dest, source); // int _mm_cvtsi128_si32
}
else /* if (dest.Type == OperandType.I64) */
{
context.Assembler.Movq(dest, source); // __int64 _mm_cvtsi128_si64
}
}
else
{
context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
}
break;
}

View file

@ -37,6 +37,7 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary));
Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr));
Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary));
Add(Intrinsic.X86Cvtsi2si, new IntrinsicInfo(X86Instruction.Movd, IntrinsicType.UnaryToGpr));
Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary));
Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary));
Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary));

View file

@ -451,7 +451,7 @@ namespace ARMeilleure.Instructions
context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond));
EmitSetNzcv(context, Const(op.Nzcv));
EmitSetNzcv(context, op.Nzcv);
context.Branch(lblEnd);
@ -462,13 +462,33 @@ namespace ARMeilleure.Instructions
context.MarkLabel(lblEnd);
}
private static void EmitSetNzcv(ArmEmitterContext context, int nzcv)
{
Operand Extract(int value, int bit)
{
if (bit != 0)
{
value >>= bit;
}
value &= 1;
return Const(value);
}
SetFlag(context, PState.VFlag, Extract(nzcv, 0));
SetFlag(context, PState.CFlag, Extract(nzcv, 1));
SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
SetFlag(context, PState.NFlag, Extract(nzcv, 3));
}
private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
if (Optimizations.FastFP && Optimizations.UseAvx)
if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
{
Operand n = GetVec(op.Rn);
Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm);
@ -482,7 +502,7 @@ namespace ARMeilleure.Instructions
{
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
Operand isOrdered = context.VectorExtract16(ordMask, 0);
Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
context.BranchIfFalse(lblNaN, isOrdered);
@ -499,7 +519,7 @@ namespace ARMeilleure.Instructions
{
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
Operand isOrdered = context.VectorExtract16(ordMask, 0);
Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
context.BranchIfFalse(lblNaN, isOrdered);

View file

@ -26,6 +26,7 @@ namespace ARMeilleure.IntermediateRepresentation
X86Cvtps2pd,
X86Cvtsd2si,
X86Cvtsd2ss,
X86Cvtsi2si,
X86Cvtss2sd,
X86Divpd,
X86Divps,