From 0bacc3fd63a131530385268d4e5c31791723573c Mon Sep 17 00:00:00 2001 From: gdkchan Date: Wed, 19 Jun 2019 16:38:17 -0300 Subject: [PATCH] Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes --- ARMeilleure/CodeGen/X86/CodeGenContext.cs | 31 +++++------ ARMeilleure/CodeGen/X86/CodeGenerator.cs | 51 +++++++++++++++++-- ARMeilleure/CodeGen/X86/PreAllocator.cs | 18 +++---- ARMeilleure/Common/BitUtils.cs | 14 +++++ .../Instructions/InstEmitMemoryHelper.cs | 4 +- ARMeilleure/Instructions/InstEmitSimdCmp.cs | 12 ++--- ARMeilleure/Instructions/SoftFloat.cs | 40 ++++++++++----- 7 files changed, 117 insertions(+), 53 deletions(-) diff --git a/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/ARMeilleure/CodeGen/X86/CodeGenContext.cs index 5fcae53e6a..46005f9248 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenContext.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenContext.cs @@ -21,6 +21,8 @@ namespace ARMeilleure.CodeGen.X86 public int CallArgsRegionSize { get; } + public int VecCalleeSaveSize { get; } + private struct Jump { public bool IsConditional { get; } @@ -77,22 +79,27 @@ namespace ARMeilleure.CodeGen.X86 Assembler = new Assembler(stream); - CallArgsRegionSize = GetCallArgsRegionSize(allocResult); + CallArgsRegionSize = GetCallArgsRegionSize(allocResult, out int vecCalleeSaveSize); + + VecCalleeSaveSize = vecCalleeSaveSize; _blockOffsets = new long[blocksCount]; _jumps = new List(); } - private int GetCallArgsRegionSize(AllocationResult allocResult) + private int GetCallArgsRegionSize(AllocationResult allocResult, out int vecCalleeSaveSize) { //We need to add 8 bytes to the total size, as the call to this //function already pushed 8 bytes (the return address). - int mask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters; + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetVecCalleeSavedRegisters() & allocResult.VecUsedRegisters; - mask |= 1 << (int)X86Register.Rbp; + vecCalleeSaveSize = BitUtils.CountBits(vecMask) * 16; - int calleeSaveRegionSize = CountBits(mask) * 8 + 8; + intMask |= 1 << (int)X86Register.Rbp; + + int calleeSaveRegionSize = BitUtils.CountBits(intMask) * 8 + vecCalleeSaveSize + 8; int argsCount = allocResult.MaxCallArgs; @@ -113,20 +120,6 @@ namespace ARMeilleure.CodeGen.X86 return callArgsAndFrameSize - frameSize; } - private static int CountBits(int mask) - { - int count = 0; - - while (mask != 0) - { - mask &= ~(1 << BitUtils.LowestBitSet(mask)); - - count++; - } - - return count; - } - public void EnterBlock(BasicBlock block) { _blockOffsets[block.Index] = _stream.Position; diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index 61f3efee07..50f4e417cb 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -1793,8 +1793,27 @@ namespace ARMeilleure.CodeGen.X86 mask &= ~(1 << bit); } + mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + int offset = 0; + + while (mask != 0) + { + int bit = BitUtils.LowestBitSet(mask); + + offset -= 16; + + X86MemoryOperand memOp = new X86MemoryOperand(OperandType.V128, Register(X86Register.Rsp), null, Scale.x1, offset); + + context.Assembler.Movdqu(memOp, Xmm((X86Register)bit)); + + mask &= ~(1 << bit); + } + int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize; + reservedStackSize += context.VecCalleeSaveSize; + if (reservedStackSize != 0) { context.Assembler.Sub(Register(X86Register.Rsp), new Operand(reservedStackSize)); @@ -1803,17 +1822,36 @@ namespace ARMeilleure.CodeGen.X86 private static void WriteEpilogue(CodeGenContext context) { - int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; - - mask |= 1 << (int)X86Register.Rbp; - int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize; + reservedStackSize += context.VecCalleeSaveSize; + if (reservedStackSize != 0) { context.Assembler.Add(Register(X86Register.Rsp), new Operand(reservedStackSize)); } + int mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + int offset = 0; + + while (mask != 0) + { + int bit = BitUtils.LowestBitSet(mask); + + offset -= 16; + + X86MemoryOperand memOp = new X86MemoryOperand(OperandType.V128, Register(X86Register.Rsp), null, Scale.x1, offset); + + context.Assembler.Movdqu(Xmm((X86Register)bit), memOp); + + mask &= ~(1 << bit); + } + + mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + + mask |= 1 << (int)X86Register.Rbp; + while (mask != 0) { int bit = BitUtils.HighestBitSet(mask); @@ -1849,5 +1887,10 @@ namespace ARMeilleure.CodeGen.X86 { return new Operand((int)register, RegisterType.Integer, type); } + + private static Operand Xmm(X86Register register) + { + return new Operand((int)register, RegisterType.Vector, OperandType.V128); + } } } \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs index 852ea8ad5d..0099c6a030 100644 --- a/ARMeilleure/CodeGen/X86/PreAllocator.cs +++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -27,7 +27,7 @@ namespace ARMeilleure.CodeGen.X86 Instruction inst = operation.Inst; - AddConstantCopy(node, operation); + HandleConstantCopy(node, operation); //Comparison instructions uses CMOVcc, which does not zero the //upper bits of the register (since it's R8), we need to ensure it @@ -55,14 +55,14 @@ namespace ARMeilleure.CodeGen.X86 ReplaceNegateWithXor(node, operation); } - AddFixedRegisterCopy(node, operation); + HandleFixedRegisterCopy(node, operation); - AddSameDestSrc1Copy(node, operation); + HandleSameDestSrc1Copy(node, operation); } } } - private static void AddConstantCopy(LinkedListNode node, Operation operation) + private static void HandleConstantCopy(LinkedListNode node, Operation operation) { if (operation.SourcesCount == 0 || IsIntrinsic(operation.Inst)) { @@ -229,7 +229,7 @@ namespace ARMeilleure.CodeGen.X86 Delete(node, operation); } - private static void AddFixedRegisterCopy(LinkedListNode node, Operation operation) + private static void HandleFixedRegisterCopy(LinkedListNode node, Operation operation) { if (operation.SourcesCount == 0) { @@ -353,7 +353,7 @@ namespace ARMeilleure.CodeGen.X86 Operation storeOp = new Operation(Instruction.Store, null, stackAddr, source); - node.List.AddBefore(node, storeOp); + HandleConstantCopy(node.List.AddBefore(node, storeOp), storeOp); operation.SetSource(index, stackAddr); } @@ -390,7 +390,7 @@ namespace ARMeilleure.CodeGen.X86 Operation srcCopyOp = new Operation(Instruction.Copy, argReg, source); - node.List.AddBefore(node, srcCopyOp); + HandleConstantCopy(node.List.AddBefore(node, srcCopyOp), srcCopyOp); operation.SetSource(index + 1, argReg); } @@ -405,7 +405,7 @@ namespace ARMeilleure.CodeGen.X86 Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source); - node.List.AddBefore(node, spillOp); + HandleConstantCopy(node.List.AddBefore(node, spillOp), spillOp); operation.SetSource(index + 1, new Operand(OperandKind.Undefined)); } @@ -444,7 +444,7 @@ namespace ARMeilleure.CodeGen.X86 } } - private static void AddSameDestSrc1Copy(LinkedListNode node, Operation operation) + private static void HandleSameDestSrc1Copy(LinkedListNode node, Operation operation) { if (operation.Dest == null || operation.SourcesCount == 0) { diff --git a/ARMeilleure/Common/BitUtils.cs b/ARMeilleure/Common/BitUtils.cs index 2e8c763cd3..55344608cc 100644 --- a/ARMeilleure/Common/BitUtils.cs +++ b/ARMeilleure/Common/BitUtils.cs @@ -67,6 +67,20 @@ namespace ARMeilleure.Common return output; } + public static int CountBits(int value) + { + int count = 0; + + while (value != 0) + { + value &= ~(value & -value); + + count++; + } + + return count; + } + public static long FillWithOnes(int bits) { return bits == 64 ? -1L : (1L << bits) - 1; diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs index bcb516e84d..9981278c20 100644 --- a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs +++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs @@ -13,7 +13,7 @@ namespace ARMeilleure.Instructions { static class InstEmitMemoryHelper { - private static bool ForceFallback = false; + private static bool ForceFallback = true; private enum Extension { @@ -87,6 +87,8 @@ namespace ARMeilleure.Instructions case 2: value = context.SignExtend32(value); break; } } + + context.Copy(GetT(context, rt), value); } } diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs index d27dc18a8d..e57160b125 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCmp.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs @@ -470,9 +470,9 @@ namespace ARMeilleure.Instructions context.BranchIfFalse(lblNaN, isOrdered); - Operand cf = context.AddIntrinsicInt(Instruction.X86Comissge, n, m); - Operand zf = context.AddIntrinsicInt(Instruction.X86Comisseq, n, m); - Operand nf = context.AddIntrinsicInt(Instruction.X86Comisslt, n, m); + Operand cf = context.AddIntrinsicInt(Instruction.X86Comissge, n, m); + Operand zf = context.AddIntrinsicInt(Instruction.X86Comisseq, n, m); + Operand nf = context.AddIntrinsicInt(Instruction.X86Comisslt, n, m); context.Copy(GetFlag(PState.VFlag), Const(0)); context.Copy(GetFlag(PState.CFlag), cf); @@ -487,9 +487,9 @@ namespace ARMeilleure.Instructions context.BranchIfFalse(lblNaN, isOrdered); - Operand cf = context.AddIntrinsicInt(Instruction.X86Comisdge, n, m); - Operand zf = context.AddIntrinsicInt(Instruction.X86Comisdeq, n, m); - Operand nf = context.AddIntrinsicInt(Instruction.X86Comisdlt, n, m); + Operand cf = context.AddIntrinsicInt(Instruction.X86Comisdge, n, m); + Operand zf = context.AddIntrinsicInt(Instruction.X86Comisdeq, n, m); + Operand nf = context.AddIntrinsicInt(Instruction.X86Comisdlt, n, m); context.Copy(GetFlag(PState.VFlag), Const(0)); context.Copy(GetFlag(PState.CFlag), cf); diff --git a/ARMeilleure/Instructions/SoftFloat.cs b/ARMeilleure/Instructions/SoftFloat.cs index 244fe37e7c..7358e6b2cb 100644 --- a/ARMeilleure/Instructions/SoftFloat.cs +++ b/ARMeilleure/Instructions/SoftFloat.cs @@ -706,8 +706,10 @@ namespace ARMeilleure.Instructions return result; } - public static float FPCompareEQ(float value1, float value2, ExecutionContext context) + public static float FPCompareEQ(float value1, float value2) { + ExecutionContext context = NativeInterface.GetContext(); + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); value2 = value2.FPUnpack(out FPType type2, out _, out _, context); @@ -730,8 +732,10 @@ namespace ARMeilleure.Instructions return result; } - public static float FPCompareGE(float value1, float value2, ExecutionContext context) + public static float FPCompareGE(float value1, float value2) { + ExecutionContext context = NativeInterface.GetContext(); + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); value2 = value2.FPUnpack(out FPType type2, out _, out _, context); @@ -751,8 +755,10 @@ namespace ARMeilleure.Instructions return result; } - public static float FPCompareGT(float value1, float value2, ExecutionContext context) + public static float FPCompareGT(float value1, float value2) { + ExecutionContext context = NativeInterface.GetContext(); + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); value2 = value2.FPUnpack(out FPType type2, out _, out _, context); @@ -772,14 +778,14 @@ namespace ARMeilleure.Instructions return result; } - public static float FPCompareLE(float value1, float value2, ExecutionContext context) + public static float FPCompareLE(float value1, float value2) { - return FPCompareGE(value2, value1, context); + return FPCompareGE(value2, value1); } - public static float FPCompareLT(float value1, float value2, ExecutionContext context) + public static float FPCompareLT(float value1, float value2) { - return FPCompareGT(value2, value1, context); + return FPCompareGT(value2, value1); } public static float FPDiv(float value1, float value2) @@ -1769,8 +1775,10 @@ namespace ARMeilleure.Instructions return result; } - public static double FPCompareEQ(double value1, double value2, ExecutionContext context) + public static double FPCompareEQ(double value1, double value2) { + ExecutionContext context = NativeInterface.GetContext(); + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); value2 = value2.FPUnpack(out FPType type2, out _, out _, context); @@ -1793,8 +1801,10 @@ namespace ARMeilleure.Instructions return result; } - public static double FPCompareGE(double value1, double value2, ExecutionContext context) + public static double FPCompareGE(double value1, double value2) { + ExecutionContext context = NativeInterface.GetContext(); + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); value2 = value2.FPUnpack(out FPType type2, out _, out _, context); @@ -1814,8 +1824,10 @@ namespace ARMeilleure.Instructions return result; } - public static double FPCompareGT(double value1, double value2, ExecutionContext context) + public static double FPCompareGT(double value1, double value2) { + ExecutionContext context = NativeInterface.GetContext(); + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); value2 = value2.FPUnpack(out FPType type2, out _, out _, context); @@ -1835,14 +1847,14 @@ namespace ARMeilleure.Instructions return result; } - public static double FPCompareLE(double value1, double value2, ExecutionContext context) + public static double FPCompareLE(double value1, double value2) { - return FPCompareGE(value2, value1, context); + return FPCompareGE(value2, value1); } - public static double FPCompareLT(double value1, double value2, ExecutionContext context) + public static double FPCompareLT(double value1, double value2) { - return FPCompareGT(value2, value1, context); + return FPCompareGT(value2, value1); } public static double FPDiv(double value1, double value2)