Float operands don't need to use the same register when VEX is supported

This commit is contained in:
gdkchan 2019-07-29 13:56:00 -03:00
commit 9f4e6815c7
4 changed files with 49 additions and 13 deletions

View file

@ -272,6 +272,8 @@ namespace ARMeilleure.CodeGen.X86
// - The dividend is always in RDX:RAX. // - The dividend is always in RDX:RAX.
// - The result is always in RAX. // - The result is always in RAX.
// - Additionally it also writes the remainder in RDX. // - Additionally it also writes the remainder in RDX.
if (dest.Type.IsInteger())
{
Operand src1 = operation.GetSource(0); Operand src1 = operation.GetSource(0);
Operand rax = Gpr(X86Register.Rax, src1.Type); Operand rax = Gpr(X86Register.Rax, src1.Type);
@ -287,6 +289,7 @@ namespace ARMeilleure.CodeGen.X86
operation.SetSources(new Operand[] { rdx, rax, operation.GetSource(1) }); operation.SetSources(new Operand[] { rdx, rax, operation.GetSource(1) });
operation.Destination = rax; operation.Destination = rax;
}
break; break;
} }
@ -1115,20 +1118,25 @@ namespace ARMeilleure.CodeGen.X86
switch (operation.Instruction) switch (operation.Instruction)
{ {
case Instruction.Add: case Instruction.Add:
case Instruction.Multiply:
case Instruction.Subtract:
return !HardwareCapabilities.SupportsVexEncoding || operation.Destination.Type.IsInteger();
case Instruction.BitwiseAnd: case Instruction.BitwiseAnd:
case Instruction.BitwiseExclusiveOr: case Instruction.BitwiseExclusiveOr:
case Instruction.BitwiseNot: case Instruction.BitwiseNot:
case Instruction.BitwiseOr: case Instruction.BitwiseOr:
case Instruction.ByteSwap: case Instruction.ByteSwap:
case Instruction.Multiply:
case Instruction.Negate: case Instruction.Negate:
case Instruction.RotateRight: case Instruction.RotateRight:
case Instruction.ShiftLeft: case Instruction.ShiftLeft:
case Instruction.ShiftRightSI: case Instruction.ShiftRightSI:
case Instruction.ShiftRightUI: case Instruction.ShiftRightUI:
case Instruction.Subtract:
return true; return true;
case Instruction.Divide:
return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger();
case Instruction.VectorInsert: case Instruction.VectorInsert:
case Instruction.VectorInsert16: case Instruction.VectorInsert16:
case Instruction.VectorInsert8: case Instruction.VectorInsert8:

View file

@ -417,6 +417,10 @@ namespace ARMeilleure.Instructions
{ {
EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd); EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
} }
else if (Optimizations.FastFP)
{
EmitScalarBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
}
else else
{ {
EmitScalarBinaryOpF(context, (op1, op2) => EmitScalarBinaryOpF(context, (op1, op2) =>
@ -432,6 +436,10 @@ namespace ARMeilleure.Instructions
{ {
EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd); EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd);
} }
else if (Optimizations.FastFP)
{
EmitVectorBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
}
else else
{ {
EmitVectorBinaryOpF(context, (op1, op2) => EmitVectorBinaryOpF(context, (op1, op2) =>
@ -841,6 +849,10 @@ namespace ARMeilleure.Instructions
{ {
EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
} }
else if (Optimizations.FastFP)
{
EmitScalarBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
}
else else
{ {
EmitScalarBinaryOpF(context, (op1, op2) => EmitScalarBinaryOpF(context, (op1, op2) =>
@ -861,6 +873,10 @@ namespace ARMeilleure.Instructions
{ {
EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
} }
else if (Optimizations.FastFP)
{
EmitVectorBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
}
else else
{ {
EmitVectorBinaryOpF(context, (op1, op2) => EmitVectorBinaryOpF(context, (op1, op2) =>
@ -907,6 +923,10 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), res); context.Copy(GetVec(op.Rd), res);
} }
} }
else if (Optimizations.FastFP)
{
EmitVectorBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
}
else else
{ {
EmitVectorBinaryOpByElemF(context, (op1, op2) => EmitVectorBinaryOpByElemF(context, (op1, op2) =>
@ -1552,6 +1572,10 @@ namespace ARMeilleure.Instructions
{ {
EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd); EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
} }
else if (Optimizations.FastFP)
{
EmitScalarBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
}
else else
{ {
EmitScalarBinaryOpF(context, (op1, op2) => EmitScalarBinaryOpF(context, (op1, op2) =>
@ -1567,6 +1591,10 @@ namespace ARMeilleure.Instructions
{ {
EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd); EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
} }
else if (Optimizations.FastFP)
{
EmitVectorBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
}
else else
{ {
EmitVectorBinaryOpF(context, (op1, op2) => EmitVectorBinaryOpF(context, (op1, op2) =>

View file

@ -80,7 +80,7 @@ namespace ARMeilleure.Translation
public Operand Call(Delegate func, params Operand[] callArgs) public Operand Call(Delegate func, params Operand[] callArgs)
{ {
//Add the delegate to the cache to ensure it will not be garbage collected. // Add the delegate to the cache to ensure it will not be garbage collected.
func = DelegateCache.GetOrAdd(func); func = DelegateCache.GetOrAdd(func);
IntPtr ptr = Marshal.GetFunctionPointerForDelegate<Delegate>(func); IntPtr ptr = Marshal.GetFunctionPointerForDelegate<Delegate>(func);

View file

@ -70,7 +70,7 @@ namespace ARMeilleure.Translation
public static void RunPass(ControlFlowGraph cfg) public static void RunPass(ControlFlowGraph cfg)
{ {
// Computer local register inputs and outputs used inside blocks. // Compute local register inputs and outputs used inside blocks.
RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Count]; RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Count];
RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Count]; RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Count];