Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
This commit is contained in:
parent
87238b509c
commit
0bacc3fd63
7 changed files with 117 additions and 53 deletions
|
@ -21,6 +21,8 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
public int CallArgsRegionSize { get; }
|
||||
|
||||
public int VecCalleeSaveSize { get; }
|
||||
|
||||
private struct Jump
|
||||
{
|
||||
public bool IsConditional { get; }
|
||||
|
@ -77,22 +79,27 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
Assembler = new Assembler(stream);
|
||||
|
||||
CallArgsRegionSize = GetCallArgsRegionSize(allocResult);
|
||||
CallArgsRegionSize = GetCallArgsRegionSize(allocResult, out int vecCalleeSaveSize);
|
||||
|
||||
VecCalleeSaveSize = vecCalleeSaveSize;
|
||||
|
||||
_blockOffsets = new long[blocksCount];
|
||||
|
||||
_jumps = new List<Jump>();
|
||||
}
|
||||
|
||||
private int GetCallArgsRegionSize(AllocationResult allocResult)
|
||||
private int GetCallArgsRegionSize(AllocationResult allocResult, out int vecCalleeSaveSize)
|
||||
{
|
||||
//We need to add 8 bytes to the total size, as the call to this
|
||||
//function already pushed 8 bytes (the return address).
|
||||
int mask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters;
|
||||
int intMask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters;
|
||||
int vecMask = CallingConvention.GetVecCalleeSavedRegisters() & allocResult.VecUsedRegisters;
|
||||
|
||||
mask |= 1 << (int)X86Register.Rbp;
|
||||
vecCalleeSaveSize = BitUtils.CountBits(vecMask) * 16;
|
||||
|
||||
int calleeSaveRegionSize = CountBits(mask) * 8 + 8;
|
||||
intMask |= 1 << (int)X86Register.Rbp;
|
||||
|
||||
int calleeSaveRegionSize = BitUtils.CountBits(intMask) * 8 + vecCalleeSaveSize + 8;
|
||||
|
||||
int argsCount = allocResult.MaxCallArgs;
|
||||
|
||||
|
@ -113,20 +120,6 @@ namespace ARMeilleure.CodeGen.X86
|
|||
return callArgsAndFrameSize - frameSize;
|
||||
}
|
||||
|
||||
private static int CountBits(int mask)
|
||||
{
|
||||
int count = 0;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
mask &= ~(1 << BitUtils.LowestBitSet(mask));
|
||||
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
public void EnterBlock(BasicBlock block)
|
||||
{
|
||||
_blockOffsets[block.Index] = _stream.Position;
|
||||
|
|
|
@ -1793,8 +1793,27 @@ namespace ARMeilleure.CodeGen.X86
|
|||
mask &= ~(1 << bit);
|
||||
}
|
||||
|
||||
mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
|
||||
|
||||
int offset = 0;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int bit = BitUtils.LowestBitSet(mask);
|
||||
|
||||
offset -= 16;
|
||||
|
||||
X86MemoryOperand memOp = new X86MemoryOperand(OperandType.V128, Register(X86Register.Rsp), null, Scale.x1, offset);
|
||||
|
||||
context.Assembler.Movdqu(memOp, Xmm((X86Register)bit));
|
||||
|
||||
mask &= ~(1 << bit);
|
||||
}
|
||||
|
||||
int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
|
||||
|
||||
reservedStackSize += context.VecCalleeSaveSize;
|
||||
|
||||
if (reservedStackSize != 0)
|
||||
{
|
||||
context.Assembler.Sub(Register(X86Register.Rsp), new Operand(reservedStackSize));
|
||||
|
@ -1803,17 +1822,36 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
private static void WriteEpilogue(CodeGenContext context)
|
||||
{
|
||||
int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
|
||||
|
||||
mask |= 1 << (int)X86Register.Rbp;
|
||||
|
||||
int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
|
||||
|
||||
reservedStackSize += context.VecCalleeSaveSize;
|
||||
|
||||
if (reservedStackSize != 0)
|
||||
{
|
||||
context.Assembler.Add(Register(X86Register.Rsp), new Operand(reservedStackSize));
|
||||
}
|
||||
|
||||
int mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
|
||||
|
||||
int offset = 0;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int bit = BitUtils.LowestBitSet(mask);
|
||||
|
||||
offset -= 16;
|
||||
|
||||
X86MemoryOperand memOp = new X86MemoryOperand(OperandType.V128, Register(X86Register.Rsp), null, Scale.x1, offset);
|
||||
|
||||
context.Assembler.Movdqu(Xmm((X86Register)bit), memOp);
|
||||
|
||||
mask &= ~(1 << bit);
|
||||
}
|
||||
|
||||
mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
|
||||
|
||||
mask |= 1 << (int)X86Register.Rbp;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int bit = BitUtils.HighestBitSet(mask);
|
||||
|
@ -1849,5 +1887,10 @@ namespace ARMeilleure.CodeGen.X86
|
|||
{
|
||||
return new Operand((int)register, RegisterType.Integer, type);
|
||||
}
|
||||
|
||||
private static Operand Xmm(X86Register register)
|
||||
{
|
||||
return new Operand((int)register, RegisterType.Vector, OperandType.V128);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -27,7 +27,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
Instruction inst = operation.Inst;
|
||||
|
||||
AddConstantCopy(node, operation);
|
||||
HandleConstantCopy(node, operation);
|
||||
|
||||
//Comparison instructions uses CMOVcc, which does not zero the
|
||||
//upper bits of the register (since it's R8), we need to ensure it
|
||||
|
@ -55,14 +55,14 @@ namespace ARMeilleure.CodeGen.X86
|
|||
ReplaceNegateWithXor(node, operation);
|
||||
}
|
||||
|
||||
AddFixedRegisterCopy(node, operation);
|
||||
HandleFixedRegisterCopy(node, operation);
|
||||
|
||||
AddSameDestSrc1Copy(node, operation);
|
||||
HandleSameDestSrc1Copy(node, operation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void AddConstantCopy(LinkedListNode<Node> node, Operation operation)
|
||||
private static void HandleConstantCopy(LinkedListNode<Node> node, Operation operation)
|
||||
{
|
||||
if (operation.SourcesCount == 0 || IsIntrinsic(operation.Inst))
|
||||
{
|
||||
|
@ -229,7 +229,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Delete(node, operation);
|
||||
}
|
||||
|
||||
private static void AddFixedRegisterCopy(LinkedListNode<Node> node, Operation operation)
|
||||
private static void HandleFixedRegisterCopy(LinkedListNode<Node> node, Operation operation)
|
||||
{
|
||||
if (operation.SourcesCount == 0)
|
||||
{
|
||||
|
@ -353,7 +353,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
Operation storeOp = new Operation(Instruction.Store, null, stackAddr, source);
|
||||
|
||||
node.List.AddBefore(node, storeOp);
|
||||
HandleConstantCopy(node.List.AddBefore(node, storeOp), storeOp);
|
||||
|
||||
operation.SetSource(index, stackAddr);
|
||||
}
|
||||
|
@ -390,7 +390,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
Operation srcCopyOp = new Operation(Instruction.Copy, argReg, source);
|
||||
|
||||
node.List.AddBefore(node, srcCopyOp);
|
||||
HandleConstantCopy(node.List.AddBefore(node, srcCopyOp), srcCopyOp);
|
||||
|
||||
operation.SetSource(index + 1, argReg);
|
||||
}
|
||||
|
@ -405,7 +405,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source);
|
||||
|
||||
node.List.AddBefore(node, spillOp);
|
||||
HandleConstantCopy(node.List.AddBefore(node, spillOp), spillOp);
|
||||
|
||||
operation.SetSource(index + 1, new Operand(OperandKind.Undefined));
|
||||
}
|
||||
|
@ -444,7 +444,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
}
|
||||
}
|
||||
|
||||
private static void AddSameDestSrc1Copy(LinkedListNode<Node> node, Operation operation)
|
||||
private static void HandleSameDestSrc1Copy(LinkedListNode<Node> node, Operation operation)
|
||||
{
|
||||
if (operation.Dest == null || operation.SourcesCount == 0)
|
||||
{
|
||||
|
|
|
@ -67,6 +67,20 @@ namespace ARMeilleure.Common
|
|||
return output;
|
||||
}
|
||||
|
||||
public static int CountBits(int value)
|
||||
{
|
||||
int count = 0;
|
||||
|
||||
while (value != 0)
|
||||
{
|
||||
value &= ~(value & -value);
|
||||
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
public static long FillWithOnes(int bits)
|
||||
{
|
||||
return bits == 64 ? -1L : (1L << bits) - 1;
|
||||
|
|
|
@ -13,7 +13,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
static class InstEmitMemoryHelper
|
||||
{
|
||||
private static bool ForceFallback = false;
|
||||
private static bool ForceFallback = true;
|
||||
|
||||
private enum Extension
|
||||
{
|
||||
|
@ -87,6 +87,8 @@ namespace ARMeilleure.Instructions
|
|||
case 2: value = context.SignExtend32(value); break;
|
||||
}
|
||||
}
|
||||
|
||||
context.Copy(GetT(context, rt), value);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -470,9 +470,9 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
context.BranchIfFalse(lblNaN, isOrdered);
|
||||
|
||||
Operand cf = context.AddIntrinsicInt(Instruction.X86Comissge, n, m);
|
||||
Operand zf = context.AddIntrinsicInt(Instruction.X86Comisseq, n, m);
|
||||
Operand nf = context.AddIntrinsicInt(Instruction.X86Comisslt, n, m);
|
||||
Operand cf = context.AddIntrinsicInt(Instruction.X86Comissge, n, m);
|
||||
Operand zf = context.AddIntrinsicInt(Instruction.X86Comisseq, n, m);
|
||||
Operand nf = context.AddIntrinsicInt(Instruction.X86Comisslt, n, m);
|
||||
|
||||
context.Copy(GetFlag(PState.VFlag), Const(0));
|
||||
context.Copy(GetFlag(PState.CFlag), cf);
|
||||
|
@ -487,9 +487,9 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
context.BranchIfFalse(lblNaN, isOrdered);
|
||||
|
||||
Operand cf = context.AddIntrinsicInt(Instruction.X86Comisdge, n, m);
|
||||
Operand zf = context.AddIntrinsicInt(Instruction.X86Comisdeq, n, m);
|
||||
Operand nf = context.AddIntrinsicInt(Instruction.X86Comisdlt, n, m);
|
||||
Operand cf = context.AddIntrinsicInt(Instruction.X86Comisdge, n, m);
|
||||
Operand zf = context.AddIntrinsicInt(Instruction.X86Comisdeq, n, m);
|
||||
Operand nf = context.AddIntrinsicInt(Instruction.X86Comisdlt, n, m);
|
||||
|
||||
context.Copy(GetFlag(PState.VFlag), Const(0));
|
||||
context.Copy(GetFlag(PState.CFlag), cf);
|
||||
|
|
|
@ -706,8 +706,10 @@ namespace ARMeilleure.Instructions
|
|||
return result;
|
||||
}
|
||||
|
||||
public static float FPCompareEQ(float value1, float value2, ExecutionContext context)
|
||||
public static float FPCompareEQ(float value1, float value2)
|
||||
{
|
||||
ExecutionContext context = NativeInterface.GetContext();
|
||||
|
||||
value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
|
||||
value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
|
||||
|
||||
|
@ -730,8 +732,10 @@ namespace ARMeilleure.Instructions
|
|||
return result;
|
||||
}
|
||||
|
||||
public static float FPCompareGE(float value1, float value2, ExecutionContext context)
|
||||
public static float FPCompareGE(float value1, float value2)
|
||||
{
|
||||
ExecutionContext context = NativeInterface.GetContext();
|
||||
|
||||
value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
|
||||
value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
|
||||
|
||||
|
@ -751,8 +755,10 @@ namespace ARMeilleure.Instructions
|
|||
return result;
|
||||
}
|
||||
|
||||
public static float FPCompareGT(float value1, float value2, ExecutionContext context)
|
||||
public static float FPCompareGT(float value1, float value2)
|
||||
{
|
||||
ExecutionContext context = NativeInterface.GetContext();
|
||||
|
||||
value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
|
||||
value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
|
||||
|
||||
|
@ -772,14 +778,14 @@ namespace ARMeilleure.Instructions
|
|||
return result;
|
||||
}
|
||||
|
||||
public static float FPCompareLE(float value1, float value2, ExecutionContext context)
|
||||
public static float FPCompareLE(float value1, float value2)
|
||||
{
|
||||
return FPCompareGE(value2, value1, context);
|
||||
return FPCompareGE(value2, value1);
|
||||
}
|
||||
|
||||
public static float FPCompareLT(float value1, float value2, ExecutionContext context)
|
||||
public static float FPCompareLT(float value1, float value2)
|
||||
{
|
||||
return FPCompareGT(value2, value1, context);
|
||||
return FPCompareGT(value2, value1);
|
||||
}
|
||||
|
||||
public static float FPDiv(float value1, float value2)
|
||||
|
@ -1769,8 +1775,10 @@ namespace ARMeilleure.Instructions
|
|||
return result;
|
||||
}
|
||||
|
||||
public static double FPCompareEQ(double value1, double value2, ExecutionContext context)
|
||||
public static double FPCompareEQ(double value1, double value2)
|
||||
{
|
||||
ExecutionContext context = NativeInterface.GetContext();
|
||||
|
||||
value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
|
||||
value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
|
||||
|
||||
|
@ -1793,8 +1801,10 @@ namespace ARMeilleure.Instructions
|
|||
return result;
|
||||
}
|
||||
|
||||
public static double FPCompareGE(double value1, double value2, ExecutionContext context)
|
||||
public static double FPCompareGE(double value1, double value2)
|
||||
{
|
||||
ExecutionContext context = NativeInterface.GetContext();
|
||||
|
||||
value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
|
||||
value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
|
||||
|
||||
|
@ -1814,8 +1824,10 @@ namespace ARMeilleure.Instructions
|
|||
return result;
|
||||
}
|
||||
|
||||
public static double FPCompareGT(double value1, double value2, ExecutionContext context)
|
||||
public static double FPCompareGT(double value1, double value2)
|
||||
{
|
||||
ExecutionContext context = NativeInterface.GetContext();
|
||||
|
||||
value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
|
||||
value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
|
||||
|
||||
|
@ -1835,14 +1847,14 @@ namespace ARMeilleure.Instructions
|
|||
return result;
|
||||
}
|
||||
|
||||
public static double FPCompareLE(double value1, double value2, ExecutionContext context)
|
||||
public static double FPCompareLE(double value1, double value2)
|
||||
{
|
||||
return FPCompareGE(value2, value1, context);
|
||||
return FPCompareGE(value2, value1);
|
||||
}
|
||||
|
||||
public static double FPCompareLT(double value1, double value2, ExecutionContext context)
|
||||
public static double FPCompareLT(double value1, double value2)
|
||||
{
|
||||
return FPCompareGT(value2, value1, context);
|
||||
return FPCompareGT(value2, value1);
|
||||
}
|
||||
|
||||
public static double FPDiv(double value1, double value2)
|
||||
|
|
Loading…
Add table
Reference in a new issue