Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
This commit is contained in:
parent
3a0676c596
commit
581b7c8bbf
2 changed files with 576 additions and 329 deletions
|
@ -625,6 +625,11 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Operand dest = operation.Dest;
|
Operand dest = operation.Dest;
|
||||||
Operand source = operation.GetSource(0);
|
Operand source = operation.GetSource(0);
|
||||||
|
|
||||||
|
if (dest.Type != source.Type)
|
||||||
|
{
|
||||||
|
System.Console.WriteLine(dest.Type + " " + source.Type);
|
||||||
|
}
|
||||||
|
|
||||||
EnsureSameType(dest, source);
|
EnsureSameType(dest, source);
|
||||||
|
|
||||||
Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
|
Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
|
||||||
|
@ -1072,16 +1077,62 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
byte index = src2.AsByte();
|
byte index = src2.AsByte();
|
||||||
|
|
||||||
if (dest.Type == OperandType.I32)
|
if (dest.Type == OperandType.I32)
|
||||||
|
{
|
||||||
|
Debug.Assert(index < 4);
|
||||||
|
|
||||||
|
if (HardwareCapabilities.SupportsSse41)
|
||||||
{
|
{
|
||||||
context.Assembler.Pextrd(dest, src1, index);
|
context.Assembler.Pextrd(dest, src1, index);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (index != 0)
|
||||||
|
{
|
||||||
|
int mask0 = 0b11_10_01_00;
|
||||||
|
int mask1 = 0b11_10_01_00;
|
||||||
|
|
||||||
|
mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
|
||||||
|
mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
|
||||||
|
|
||||||
|
context.Assembler.Pshufd(src1, src1, (byte)mask0);
|
||||||
|
context.Assembler.Movd (dest, src1);
|
||||||
|
context.Assembler.Pshufd(src1, src1, (byte)mask1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
context.Assembler.Movd(dest, src1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (dest.Type == OperandType.I64)
|
else if (dest.Type == OperandType.I64)
|
||||||
|
{
|
||||||
|
Debug.Assert(index < 2);
|
||||||
|
|
||||||
|
if (HardwareCapabilities.SupportsSse41)
|
||||||
{
|
{
|
||||||
context.Assembler.Pextrq(dest, src1, index);
|
context.Assembler.Pextrq(dest, src1, index);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//Floating-point type.
|
if (index != 0)
|
||||||
|
{
|
||||||
|
const byte mask = 0b01_00_11_10;
|
||||||
|
|
||||||
|
context.Assembler.Pshufd(src1, src1, mask);
|
||||||
|
context.Assembler.Movq (dest, src1);
|
||||||
|
context.Assembler.Pshufd(src1, src1, mask);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
context.Assembler.Movq(dest, src1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Debug.Assert(index < (dest.Type == OperandType.FP32 ? 4 : 2));
|
||||||
|
|
||||||
|
//Floating-point types.
|
||||||
if ((index >= 2 && dest.Type == OperandType.FP32) ||
|
if ((index >= 2 && dest.Type == OperandType.FP32) ||
|
||||||
(index == 1 && dest.Type == OperandType.FP64))
|
(index == 1 && dest.Type == OperandType.FP64))
|
||||||
{
|
{
|
||||||
|
@ -1111,6 +1162,8 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
byte index = src2.AsByte();
|
byte index = src2.AsByte();
|
||||||
|
|
||||||
|
Debug.Assert(index < 8);
|
||||||
|
|
||||||
context.Assembler.Pextrw(dest, src1, index);
|
context.Assembler.Pextrw(dest, src1, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1125,9 +1178,26 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
byte index = src2.AsByte();
|
byte index = src2.AsByte();
|
||||||
|
|
||||||
//TODO: SSE/SSE2 version.
|
Debug.Assert(index < 16);
|
||||||
|
|
||||||
|
if (HardwareCapabilities.SupportsSse41)
|
||||||
|
{
|
||||||
context.Assembler.Pextrb(dest, src1, index);
|
context.Assembler.Pextrb(dest, src1, index);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
context.Assembler.Pextrw(dest, src1, (byte)(index >> 1));
|
||||||
|
|
||||||
|
if ((index & 1) != 0)
|
||||||
|
{
|
||||||
|
context.Assembler.Shr(dest, new Operand(8), OperandType.I32);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
context.Assembler.Movzx8(dest, dest, OperandType.I32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
|
private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
|
||||||
{
|
{
|
||||||
|
@ -1136,35 +1206,107 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Operand src2 = operation.GetSource(1); //Value
|
Operand src2 = operation.GetSource(1); //Value
|
||||||
Operand src3 = operation.GetSource(2); //Index
|
Operand src3 = operation.GetSource(2); //Index
|
||||||
|
|
||||||
|
if (!HardwareCapabilities.SupportsVexEncoding)
|
||||||
|
{
|
||||||
|
EnsureSameReg(dest, src1);
|
||||||
|
}
|
||||||
|
|
||||||
Debug.Assert(src1.Type == OperandType.V128);
|
Debug.Assert(src1.Type == OperandType.V128);
|
||||||
Debug.Assert(src3.Kind == OperandKind.Constant);
|
Debug.Assert(src3.Kind == OperandKind.Constant);
|
||||||
|
|
||||||
byte index = src3.AsByte();
|
byte index = src3.AsByte();
|
||||||
|
|
||||||
|
void InsertIntSse2(int words)
|
||||||
|
{
|
||||||
|
if (dest.GetRegister() != src1.GetRegister())
|
||||||
|
{
|
||||||
|
context.Assembler.Movdqu(dest, src1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int word = 0; word < words; word++)
|
||||||
|
{
|
||||||
|
// Insert lower 16-bits.
|
||||||
|
context.Assembler.Pinsrw(dest, dest, src2, (byte)(index * words + word));
|
||||||
|
|
||||||
|
// Move next word down.
|
||||||
|
context.Assembler.Ror(src2, new Operand(16), src2.Type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (src2.Type == OperandType.I32)
|
if (src2.Type == OperandType.I32)
|
||||||
{
|
{
|
||||||
//TODO: SSE/SSE2 version.
|
Debug.Assert(index < 4);
|
||||||
|
|
||||||
|
if (HardwareCapabilities.SupportsSse41)
|
||||||
|
{
|
||||||
context.Assembler.Pinsrd(dest, src1, src2, index);
|
context.Assembler.Pinsrd(dest, src1, src2, index);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
InsertIntSse2(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (src2.Type == OperandType.I64)
|
else if (src2.Type == OperandType.I64)
|
||||||
{
|
{
|
||||||
//TODO: SSE/SSE2 version.
|
Debug.Assert(index < 2);
|
||||||
|
|
||||||
|
if (HardwareCapabilities.SupportsSse41)
|
||||||
|
{
|
||||||
context.Assembler.Pinsrq(dest, src1, src2, index);
|
context.Assembler.Pinsrq(dest, src1, src2, index);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
InsertIntSse2(4);
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (src2.Type == OperandType.FP32)
|
else if (src2.Type == OperandType.FP32)
|
||||||
{
|
{
|
||||||
|
Debug.Assert(index < 4);
|
||||||
|
|
||||||
if (index != 0)
|
if (index != 0)
|
||||||
{
|
{
|
||||||
//TODO: SSE/SSE2 version.
|
if (HardwareCapabilities.SupportsSse41)
|
||||||
|
{
|
||||||
context.Assembler.Insertps(dest, src1, src2, (byte)(index << 4));
|
context.Assembler.Insertps(dest, src1, src2, (byte)(index << 4));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
if (src1.GetRegister() == src2.GetRegister())
|
||||||
|
{
|
||||||
|
int mask = 0b11_10_01_00;
|
||||||
|
|
||||||
|
mask &= ~(0b11 << index * 2);
|
||||||
|
|
||||||
|
context.Assembler.Pshufd(dest, src1, (byte)mask);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int mask0 = 0b11_10_01_00;
|
||||||
|
int mask1 = 0b11_10_01_00;
|
||||||
|
|
||||||
|
mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
|
||||||
|
mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
|
||||||
|
|
||||||
|
if (dest.GetRegister() != src1.GetRegister())
|
||||||
|
{
|
||||||
|
context.Assembler.Movdqu(dest, src1);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Assembler.Pshufd(dest, dest, (byte)mask0);
|
||||||
|
context.Assembler.Movss (dest, dest, src2);
|
||||||
|
context.Assembler.Pshufd(dest, dest, (byte)mask1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
context.Assembler.Movss(dest, src1, src2);
|
context.Assembler.Movss(dest, src1, src2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else /* if (src2.Type == OperandType.FP64) */
|
else /* if (src2.Type == OperandType.FP64) */
|
||||||
{
|
{
|
||||||
|
Debug.Assert(index < 2);
|
||||||
|
|
||||||
if (index != 0)
|
if (index != 0)
|
||||||
{
|
{
|
||||||
context.Assembler.Movlhps(dest, src1, src2);
|
context.Assembler.Movlhps(dest, src1, src2);
|
||||||
|
@ -1183,6 +1325,11 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Operand src2 = operation.GetSource(1); //Value
|
Operand src2 = operation.GetSource(1); //Value
|
||||||
Operand src3 = operation.GetSource(2); //Index
|
Operand src3 = operation.GetSource(2); //Index
|
||||||
|
|
||||||
|
if (!HardwareCapabilities.SupportsVexEncoding)
|
||||||
|
{
|
||||||
|
EnsureSameReg(dest, src1);
|
||||||
|
}
|
||||||
|
|
||||||
Debug.Assert(src1.Type == OperandType.V128);
|
Debug.Assert(src1.Type == OperandType.V128);
|
||||||
Debug.Assert(src3.Kind == OperandKind.Constant);
|
Debug.Assert(src3.Kind == OperandKind.Constant);
|
||||||
|
|
||||||
|
@ -1198,12 +1345,22 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Operand src2 = operation.GetSource(1); //Value
|
Operand src2 = operation.GetSource(1); //Value
|
||||||
Operand src3 = operation.GetSource(2); //Index
|
Operand src3 = operation.GetSource(2); //Index
|
||||||
|
|
||||||
|
// It's not possible to emulate this instruction without
|
||||||
|
// SSE 4.1 support without the use of a temporary register,
|
||||||
|
// so we instead handle that case on the pre-allocator when
|
||||||
|
// SSE 4.1 is not supported on the CPU.
|
||||||
|
Debug.Assert(HardwareCapabilities.SupportsSse41);
|
||||||
|
|
||||||
|
if (!HardwareCapabilities.SupportsVexEncoding)
|
||||||
|
{
|
||||||
|
EnsureSameReg(dest, src1);
|
||||||
|
}
|
||||||
|
|
||||||
Debug.Assert(src1.Type == OperandType.V128);
|
Debug.Assert(src1.Type == OperandType.V128);
|
||||||
Debug.Assert(src3.Kind == OperandKind.Constant);
|
Debug.Assert(src3.Kind == OperandKind.Constant);
|
||||||
|
|
||||||
byte index = src3.AsByte();
|
byte index = src3.AsByte();
|
||||||
|
|
||||||
//TODO: SSE/SSE2 version.
|
|
||||||
context.Assembler.Pinsrb(dest, src1, src2, index);
|
context.Assembler.Pinsrb(dest, src1, src2, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,8 @@ using static ARMeilleure.IntermediateRepresentation.OperandHelper;
|
||||||
|
|
||||||
namespace ARMeilleure.CodeGen.X86
|
namespace ARMeilleure.CodeGen.X86
|
||||||
{
|
{
|
||||||
|
using LLNode = LinkedListNode<Node>;
|
||||||
|
|
||||||
static class PreAllocator
|
static class PreAllocator
|
||||||
{
|
{
|
||||||
public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
|
public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
|
||||||
|
@ -18,9 +20,9 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
foreach (BasicBlock block in cctx.Cfg.Blocks)
|
foreach (BasicBlock block in cctx.Cfg.Blocks)
|
||||||
{
|
{
|
||||||
LinkedListNode<Node> nextNode;
|
LLNode nextNode;
|
||||||
|
|
||||||
for (LinkedListNode<Node> node = block.Operations.First; node != null; node = nextNode)
|
for (LLNode node = block.Operations.First; node != null; node = nextNode)
|
||||||
{
|
{
|
||||||
nextNode = node.Next;
|
nextNode = node.Next;
|
||||||
|
|
||||||
|
@ -29,35 +31,19 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
Instruction inst = operation.Inst;
|
|
||||||
|
|
||||||
HandleConstantCopy(node, operation);
|
HandleConstantCopy(node, operation);
|
||||||
|
|
||||||
HandleFixedRegisterCopy(node, operation);
|
|
||||||
|
|
||||||
HandleSameDestSrc1Copy(node, operation);
|
HandleSameDestSrc1Copy(node, operation);
|
||||||
|
|
||||||
//Unsigned integer to FP conversions are not supported on X86.
|
node = HandleFixedRegisterCopy(node, operation);
|
||||||
//We need to turn them into signed integer to FP conversions, and
|
|
||||||
//adjust the final result.
|
|
||||||
if (inst == Instruction.ConvertToFPUI)
|
|
||||||
{
|
|
||||||
ReplaceConvertToFPUIWithSI(node, operation);
|
|
||||||
}
|
|
||||||
|
|
||||||
//There's no SSE FP negate instruction, so we need to transform that into
|
switch (operation.Inst)
|
||||||
//a XOR of the value to be negated with a mask with the highest bit set.
|
|
||||||
//This also produces -0 for a negation of the value 0.
|
|
||||||
if (inst == Instruction.Negate && !operation.GetSource(0).Type.IsInteger())
|
|
||||||
{
|
|
||||||
ReplaceNegateWithXor(node, operation);
|
|
||||||
}
|
|
||||||
|
|
||||||
//Get the maximum number of arguments used on a call. On windows,
|
|
||||||
//when a struct is returned from the call, we also need to pass
|
|
||||||
//the pointer where the struct should be written on the first argument.
|
|
||||||
if (inst == Instruction.Call)
|
|
||||||
{
|
{
|
||||||
|
case Instruction.Call:
|
||||||
|
// Get the maximum number of arguments used on a call.
|
||||||
|
// On windows, when a struct is returned from the call,
|
||||||
|
// we also need to pass the pointer where the struct
|
||||||
|
// should be written on the first argument.
|
||||||
int argsCount = operation.SourcesCount - 1;
|
int argsCount = operation.SourcesCount - 1;
|
||||||
|
|
||||||
if (operation.Dest != null && operation.Dest.Type == OperandType.V128)
|
if (operation.Dest != null && operation.Dest.Type == OperandType.V128)
|
||||||
|
@ -70,23 +56,42 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
maxCallArgs = argsCount;
|
maxCallArgs = argsCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Copy values to registers expected by the function being called,
|
// Copy values to registers expected by the function
|
||||||
//as mandated by the ABI.
|
// being called, as mandated by the ABI.
|
||||||
HandleCallWindowsAbi(stackAlloc, node, operation);
|
node = HandleCallWindowsAbi(stackAlloc, node, operation);
|
||||||
}
|
break;
|
||||||
else if (inst == Instruction.Return)
|
|
||||||
{
|
case Instruction.ConvertToFPUI:
|
||||||
HandleReturnWindowsAbi(cctx, node, preservedArgs, operation);
|
HandleConvertToFPUI(node, operation);
|
||||||
}
|
break;
|
||||||
else if (inst == Instruction.LoadArgument)
|
|
||||||
{
|
case Instruction.LoadArgument:
|
||||||
HandleLoadArgumentWindowsAbi(cctx, node, preservedArgs, operation);
|
HandleLoadArgumentWindowsAbi(cctx, node, preservedArgs, operation);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Instruction.Negate:
|
||||||
|
if (!operation.GetSource(0).Type.IsInteger())
|
||||||
|
{
|
||||||
|
node = HandleNegate(node, operation);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Instruction.Return:
|
||||||
|
HandleReturnWindowsAbi(cctx, node, preservedArgs, operation);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Instruction.VectorInsert8:
|
||||||
|
if (!HardwareCapabilities.SupportsSse41)
|
||||||
|
{
|
||||||
|
node = HandleVectorInsert8(node, operation);
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void HandleConstantCopy(LinkedListNode<Node> node, Operation operation)
|
private static void HandleConstantCopy(LLNode node, Operation operation)
|
||||||
{
|
{
|
||||||
if (operation.SourcesCount == 0 || IsIntrinsic(operation.Inst))
|
if (operation.SourcesCount == 0 || IsIntrinsic(operation.Inst))
|
||||||
{
|
{
|
||||||
|
@ -95,7 +100,6 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
Instruction inst = operation.Inst;
|
Instruction inst = operation.Inst;
|
||||||
|
|
||||||
Operand dest = operation.Dest;
|
|
||||||
Operand src1 = operation.GetSource(0);
|
Operand src1 = operation.GetSource(0);
|
||||||
Operand src2;
|
Operand src2;
|
||||||
|
|
||||||
|
@ -103,25 +107,25 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
{
|
{
|
||||||
if (!src1.Type.IsInteger())
|
if (!src1.Type.IsInteger())
|
||||||
{
|
{
|
||||||
//Handle non-integer types (FP32, FP64 and V128).
|
// Handle non-integer types (FP32, FP64 and V128).
|
||||||
//For instructions without an immediate operand, we do the following:
|
// For instructions without an immediate operand, we do the following:
|
||||||
//- Insert a copy with the constant value (as integer) to a GPR.
|
// - Insert a copy with the constant value (as integer) to a GPR.
|
||||||
//- Insert a copy from the GPR to a XMM register.
|
// - Insert a copy from the GPR to a XMM register.
|
||||||
//- Replace the constant use with the XMM register.
|
// - Replace the constant use with the XMM register.
|
||||||
src1 = AddXmmCopy(node, src1);
|
src1 = AddXmmCopy(node, src1);
|
||||||
|
|
||||||
operation.SetSource(0, src1);
|
operation.SetSource(0, src1);
|
||||||
}
|
}
|
||||||
else if (!HasConstSrc1(inst))
|
else if (!HasConstSrc1(inst))
|
||||||
{
|
{
|
||||||
//Handle integer types.
|
// Handle integer types.
|
||||||
//Most ALU instructions accepts a 32-bits immediate on the second operand.
|
// Most ALU instructions accepts a 32-bits immediate on the second operand.
|
||||||
//We need to ensure the following:
|
// We need to ensure the following:
|
||||||
//- If the constant is on operand 1, we need to move it.
|
// - If the constant is on operand 1, we need to move it.
|
||||||
//-- But first, we try to swap operand 1 and 2 if the instruction is commutative.
|
// -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
|
||||||
//-- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
|
// -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
|
||||||
//- If the constant is on operand 2, we check if the instruction supports it,
|
// - If the constant is on operand 2, we check if the instruction supports it,
|
||||||
//if not, we also add a copy. 64-bits constants are usually not supported.
|
// if not, we also add a copy. 64-bits constants are usually not supported.
|
||||||
if (IsCommutative(inst))
|
if (IsCommutative(inst))
|
||||||
{
|
{
|
||||||
src2 = operation.GetSource(1);
|
src2 = operation.GetSource(1);
|
||||||
|
@ -168,188 +172,27 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void ReplaceConvertToFPUIWithSI(LinkedListNode<Node> node, Operation operation)
|
private static LLNode HandleFixedRegisterCopy(LLNode node, Operation operation)
|
||||||
{
|
{
|
||||||
Operand dest = operation.Dest;
|
Operand dest = operation.Dest;
|
||||||
Operand source = operation.GetSource(0);
|
|
||||||
|
|
||||||
Debug.Assert(source.Type.IsInteger(), $"Invalid source type \"{source.Type}\".");
|
|
||||||
|
|
||||||
LinkedList<Node> nodes = node.List;
|
LinkedList<Node> nodes = node.List;
|
||||||
|
|
||||||
LinkedListNode<Node> temp = node;
|
switch (operation.Inst)
|
||||||
|
|
||||||
if (source.Type == OperandType.I32)
|
|
||||||
{
|
{
|
||||||
//For 32-bits integers, we can just zero-extend to 64-bits,
|
case Instruction.CompareAndSwap128:
|
||||||
//and then use the 64-bits signed conversion instructions.
|
|
||||||
Operand zex = Local(OperandType.I64);
|
|
||||||
|
|
||||||
temp = nodes.AddAfter(temp, new Operation(Instruction.Copy, zex, source));
|
|
||||||
temp = nodes.AddAfter(temp, new Operation(Instruction.ConvertToFP, dest, zex));
|
|
||||||
}
|
|
||||||
else /* if (source.Type == OperandType.I64) */
|
|
||||||
{
|
|
||||||
//For 64-bits integers, we need to do the following:
|
|
||||||
//- Ensure that the integer has the most significant bit clear.
|
|
||||||
//-- This can be done by shifting the value right by 1, that is, dividing by 2.
|
|
||||||
//-- The least significant bit is lost in this case though.
|
|
||||||
//- We can then convert the shifted value with a signed integer instruction.
|
|
||||||
//- The result still needs to be corrected after that.
|
|
||||||
//-- First, we need to multiply the result by 2, as we divided it by 2 before.
|
|
||||||
//--- This can be done efficiently by adding the result to itself.
|
|
||||||
//-- Then, we need to add the least significant bit that was shifted out.
|
|
||||||
//--- We can convert the least significant bit to float, and add it to the result.
|
|
||||||
Operand lsb = Local(OperandType.I64);
|
|
||||||
Operand half = Local(OperandType.I64);
|
|
||||||
|
|
||||||
Operand lsbF = Local(dest.Type);
|
|
||||||
|
|
||||||
temp = nodes.AddAfter(temp, new Operation(Instruction.Copy, lsb, source));
|
|
||||||
temp = nodes.AddAfter(temp, new Operation(Instruction.Copy, half, source));
|
|
||||||
|
|
||||||
temp = nodes.AddAfter(temp, new Operation(Instruction.BitwiseAnd, lsb, lsb, Const(1L)));
|
|
||||||
temp = nodes.AddAfter(temp, new Operation(Instruction.ShiftRightUI, half, half, Const(1)));
|
|
||||||
|
|
||||||
temp = nodes.AddAfter(temp, new Operation(Instruction.ConvertToFP, lsbF, lsb));
|
|
||||||
temp = nodes.AddAfter(temp, new Operation(Instruction.ConvertToFP, dest, half));
|
|
||||||
|
|
||||||
temp = nodes.AddAfter(temp, new Operation(Instruction.Add, dest, dest, dest));
|
|
||||||
temp = nodes.AddAfter(temp, new Operation(Instruction.Add, dest, dest, lsbF));
|
|
||||||
}
|
|
||||||
|
|
||||||
Delete(node, operation);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void ReplaceNegateWithXor(LinkedListNode<Node> node, Operation operation)
|
|
||||||
{
|
|
||||||
Operand dest = operation.Dest;
|
|
||||||
Operand source = operation.GetSource(0);
|
|
||||||
|
|
||||||
Debug.Assert(dest.Type == OperandType.FP32 ||
|
|
||||||
dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\".");
|
|
||||||
|
|
||||||
LinkedList<Node> nodes = node.List;
|
|
||||||
|
|
||||||
LinkedListNode<Node> temp = node;
|
|
||||||
|
|
||||||
Operand res = Local(dest.Type);
|
|
||||||
|
|
||||||
temp = nodes.AddAfter(temp, new Operation(Instruction.VectorOne, res));
|
|
||||||
|
|
||||||
if (dest.Type == OperandType.FP32)
|
|
||||||
{
|
|
||||||
temp = nodes.AddAfter(temp, new IntrinsicOperation(Intrinsic.X86Pslld, res, res, Const(31)));
|
|
||||||
}
|
|
||||||
else /* if (dest.Type == OperandType.FP64) */
|
|
||||||
{
|
|
||||||
temp = nodes.AddAfter(temp, new IntrinsicOperation(Intrinsic.X86Psllq, res, res, Const(63)));
|
|
||||||
}
|
|
||||||
|
|
||||||
temp = nodes.AddAfter(temp, new IntrinsicOperation(Intrinsic.X86Xorps, res, res, source));
|
|
||||||
|
|
||||||
temp = nodes.AddAfter(temp, new Operation(Instruction.Copy, dest, res));
|
|
||||||
|
|
||||||
Delete(node, operation);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void HandleFixedRegisterCopy(LinkedListNode<Node> node, Operation operation)
|
|
||||||
{
|
|
||||||
Instruction inst = operation.Inst;
|
|
||||||
|
|
||||||
Operand dest = operation.Dest;
|
|
||||||
|
|
||||||
//Handle the many restrictions of the CPU Id instruction:
|
|
||||||
//- EAX controls the information returned by this instruction.
|
|
||||||
//- When EAX is 1, feature information is returned.
|
|
||||||
//- The information is written to registers EAX, EBX, ECX and EDX.
|
|
||||||
if (inst == Instruction.CpuId)
|
|
||||||
{
|
|
||||||
Debug.Assert(dest.Type == OperandType.I64);
|
|
||||||
|
|
||||||
Operand eax = Gpr(X86Register.Rax, OperandType.I32);
|
|
||||||
Operand ebx = Gpr(X86Register.Rbx, OperandType.I32);
|
|
||||||
Operand ecx = Gpr(X86Register.Rcx, OperandType.I32);
|
|
||||||
Operand edx = Gpr(X86Register.Rdx, OperandType.I32);
|
|
||||||
|
|
||||||
// Value 0x01 = Version, family and feature information.
|
|
||||||
node.List.AddBefore(node, new Operation(Instruction.Copy, eax, Const(1)));
|
|
||||||
|
|
||||||
// Copy results to the destination register.
|
|
||||||
// The values are split into 2 32-bits registers, we merge them
|
|
||||||
// into a single 64-bits register.
|
|
||||||
Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
|
|
||||||
|
|
||||||
node.List.AddAfter(node, new Operation(Instruction.BitwiseOr, dest, dest, rcx));
|
|
||||||
node.List.AddAfter(node, new Operation(Instruction.ShiftLeft, dest, dest, Const(32)));
|
|
||||||
node.List.AddAfter(node, new Operation(Instruction.ZeroExtend32, dest, edx));
|
|
||||||
|
|
||||||
// We don't care about those two, but their values are overwritten,
|
|
||||||
// so we need to take that into account.
|
|
||||||
node.List.AddAfter(node, new Operation(Instruction.Clobber, ebx));
|
|
||||||
node.List.AddAfter(node, new Operation(Instruction.Clobber, eax));
|
|
||||||
|
|
||||||
operation.Dest = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (operation.SourcesCount == 0)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
Operand src1 = operation.GetSource(0);
|
|
||||||
|
|
||||||
//Handle the many restrictions of the division instructions:
|
|
||||||
//- The dividend is always in RDX:RAX.
|
|
||||||
//- The result is always in RAX.
|
|
||||||
//- Additionally it also writes the remainder in RDX.
|
|
||||||
if (inst == Instruction.Divide || inst == Instruction.DivideUI)
|
|
||||||
{
|
|
||||||
Operand rax = Gpr(X86Register.Rax, src1.Type);
|
|
||||||
Operand rdx = Gpr(X86Register.Rdx, src1.Type);
|
|
||||||
|
|
||||||
node.List.AddBefore(node, new Operation(Instruction.Copy, rax, src1));
|
|
||||||
|
|
||||||
operation.SetSource(0, rax);
|
|
||||||
|
|
||||||
node.List.AddBefore(node, new Operation(Instruction.Clobber, rdx));
|
|
||||||
|
|
||||||
node.List.AddAfter(node, new Operation(Instruction.Copy, dest, rax));
|
|
||||||
|
|
||||||
operation.Dest = rax;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Handle the many restrictions of the i64 * i64 = i128 multiply instructions:
|
|
||||||
//- The multiplicand is always in RAX.
|
|
||||||
//- The lower 64-bits of the result is always in RAX.
|
|
||||||
//- The higher 64-bits of the result is always in RDX.
|
|
||||||
if (inst == Instruction.Multiply64HighSI || inst == Instruction.Multiply64HighUI)
|
|
||||||
{
|
|
||||||
Operand rax = Gpr(X86Register.Rax, src1.Type);
|
|
||||||
Operand rdx = Gpr(X86Register.Rdx, src1.Type);
|
|
||||||
|
|
||||||
node.List.AddBefore(node, new Operation(Instruction.Copy, rax, src1));
|
|
||||||
|
|
||||||
operation.SetSource(0, rax);
|
|
||||||
|
|
||||||
node.List.AddAfter(node, new Operation(Instruction.Copy, dest, rdx));
|
|
||||||
|
|
||||||
operation.Dest = rdx;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Handle the many restrictions of the compare and exchange (16 bytes) instruction:
|
|
||||||
//- The expected value should be in RDX:RAX.
|
|
||||||
//- The new value to be written should be in RCX:RBX.
|
|
||||||
//- The value at the memory location is loaded to RDX:RAX.
|
|
||||||
if (inst == Instruction.CompareAndSwap128)
|
|
||||||
{
|
{
|
||||||
|
// Handle the many restrictions of the compare and exchange (16 bytes) instruction:
|
||||||
|
// - The expected value should be in RDX:RAX.
|
||||||
|
// - The new value to be written should be in RCX:RBX.
|
||||||
|
// - The value at the memory location is loaded to RDX:RAX.
|
||||||
void SplitOperand(Operand source, X86Register lowReg, X86Register highReg)
|
void SplitOperand(Operand source, X86Register lowReg, X86Register highReg)
|
||||||
{
|
{
|
||||||
Operand lr = Gpr(lowReg, OperandType.I64);
|
Operand lr = Gpr(lowReg, OperandType.I64);
|
||||||
Operand hr = Gpr(highReg, OperandType.I64);
|
Operand hr = Gpr(highReg, OperandType.I64);
|
||||||
|
|
||||||
node.List.AddBefore(node, new Operation(Instruction.VectorExtract, lr, source, Const(0)));
|
nodes.AddBefore(node, new Operation(Instruction.VectorExtract, lr, source, Const(0)));
|
||||||
node.List.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1)));
|
nodes.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
SplitOperand(operation.GetSource(1), X86Register.Rax, X86Register.Rdx);
|
SplitOperand(operation.GetSource(1), X86Register.Rax, X86Register.Rdx);
|
||||||
|
@ -358,60 +201,341 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Operand rax = Gpr(X86Register.Rax, OperandType.I64);
|
Operand rax = Gpr(X86Register.Rax, OperandType.I64);
|
||||||
Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
|
Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
|
||||||
|
|
||||||
node.List.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1)));
|
node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, rax));
|
||||||
node.List.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, rax));
|
node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1)));
|
||||||
|
|
||||||
operation.SetSource(1, Undef());
|
operation.SetSource(1, Undef());
|
||||||
operation.SetSource(2, Undef());
|
operation.SetSource(2, Undef());
|
||||||
|
|
||||||
operation.Dest = null;
|
operation.Dest = null;
|
||||||
|
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
//The shift register is always implied to be CL (low 8-bits of RCX or ECX).
|
case Instruction.CpuId:
|
||||||
if (inst.IsShift() && operation.GetSource(1).Kind == OperandKind.LocalVariable)
|
|
||||||
{
|
{
|
||||||
Operand rcx = Gpr(X86Register.Rcx, OperandType.I32);
|
// Handle the many restrictions of the CPU Id instruction:
|
||||||
|
// - EAX controls the information returned by this instruction.
|
||||||
|
// - When EAX is 1, feature information is returned.
|
||||||
|
// - The information is written to registers EAX, EBX, ECX and EDX.
|
||||||
|
Debug.Assert(dest.Type == OperandType.I64);
|
||||||
|
|
||||||
node.List.AddBefore(node, new Operation(Instruction.Copy, rcx, operation.GetSource(1)));
|
Operand eax = Gpr(X86Register.Rax, OperandType.I32);
|
||||||
|
Operand ebx = Gpr(X86Register.Rbx, OperandType.I32);
|
||||||
|
Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
|
||||||
|
Operand edx = Gpr(X86Register.Rdx, OperandType.I32);
|
||||||
|
|
||||||
operation.SetSource(1, rcx);
|
// Value 0x01 = Version, family and feature information.
|
||||||
|
node = nodes.AddBefore(node, new Operation(Instruction.Copy, eax, Const(1)));
|
||||||
|
|
||||||
|
// We don't care about those two, but their values are overwritten,
|
||||||
|
// so we need to take that into account.
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.Clobber, eax));
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.Clobber, ebx));
|
||||||
|
|
||||||
|
// Copy results to the destination register.
|
||||||
|
// The values are split into 2 32-bits registers, we merge them
|
||||||
|
// into a single 64-bits register.
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend32, dest, edx));
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.ShiftLeft, dest, dest, Const(32)));
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, dest, dest, rcx));
|
||||||
|
|
||||||
|
operation.Dest = null;
|
||||||
|
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Handle intrinsics.
|
case Instruction.Divide:
|
||||||
if (IsIntrinsic(inst))
|
case Instruction.DivideUI:
|
||||||
|
{
|
||||||
|
// Handle the many restrictions of the division instructions:
|
||||||
|
// - The dividend is always in RDX:RAX.
|
||||||
|
// - The result is always in RAX.
|
||||||
|
// - Additionally it also writes the remainder in RDX.
|
||||||
|
Operand src1 = operation.GetSource(0);
|
||||||
|
|
||||||
|
Operand rax = Gpr(X86Register.Rax, src1.Type);
|
||||||
|
Operand rdx = Gpr(X86Register.Rdx, src1.Type);
|
||||||
|
|
||||||
|
nodes.AddBefore(node, new Operation(Instruction.Copy, rax, src1));
|
||||||
|
|
||||||
|
operation.SetSource(0, rax);
|
||||||
|
|
||||||
|
nodes.AddBefore(node, new Operation(Instruction.Clobber, rdx));
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rax));
|
||||||
|
|
||||||
|
operation.Dest = rax;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case Instruction.Extended:
|
||||||
{
|
{
|
||||||
IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
|
IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
|
||||||
|
|
||||||
//PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
|
// PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
|
||||||
if (intrinOp.Intrinsic == Intrinsic.X86Pblendvb && !HardwareCapabilities.SupportsVexEncoding)
|
if (intrinOp.Intrinsic == Intrinsic.X86Pblendvb && !HardwareCapabilities.SupportsVexEncoding)
|
||||||
{
|
{
|
||||||
Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
|
Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
|
||||||
|
|
||||||
node.List.AddBefore(node, new Operation(Instruction.Copy, xmm0, operation.GetSource(2)));
|
nodes.AddBefore(node, new Operation(Instruction.Copy, xmm0, operation.GetSource(2)));
|
||||||
|
|
||||||
operation.SetSource(2, xmm0);
|
operation.SetSource(2, xmm0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case Instruction.Multiply64HighSI:
|
||||||
|
case Instruction.Multiply64HighUI:
|
||||||
|
{
|
||||||
|
// Handle the many restrictions of the i64 * i64 = i128 multiply instructions:
|
||||||
|
// - The multiplicand is always in RAX.
|
||||||
|
// - The lower 64-bits of the result is always in RAX.
|
||||||
|
// - The higher 64-bits of the result is always in RDX.
|
||||||
|
Operand src1 = operation.GetSource(0);
|
||||||
|
|
||||||
|
Operand rax = Gpr(X86Register.Rax, src1.Type);
|
||||||
|
Operand rdx = Gpr(X86Register.Rdx, src1.Type);
|
||||||
|
|
||||||
|
nodes.AddBefore(node, new Operation(Instruction.Copy, rax, src1));
|
||||||
|
|
||||||
|
operation.SetSource(0, rax);
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rdx));
|
||||||
|
|
||||||
|
operation.Dest = rdx;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case Instruction.RotateRight:
|
||||||
|
case Instruction.ShiftLeft:
|
||||||
|
case Instruction.ShiftRightSI:
|
||||||
|
case Instruction.ShiftRightUI:
|
||||||
|
{
|
||||||
|
// The shift register is always implied to be CL (low 8-bits of RCX or ECX).
|
||||||
|
if (operation.GetSource(1).Kind == OperandKind.LocalVariable)
|
||||||
|
{
|
||||||
|
Operand rcx = Gpr(X86Register.Rcx, OperandType.I32);
|
||||||
|
|
||||||
|
nodes.AddBefore(node, new Operation(Instruction.Copy, rcx, operation.GetSource(1)));
|
||||||
|
|
||||||
|
operation.SetSource(1, rcx);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void HandleCallWindowsAbi(
|
return node;
|
||||||
StackAllocator stackAlloc,
|
}
|
||||||
LinkedListNode<Node> node,
|
|
||||||
Operation operation)
|
private static void HandleSameDestSrc1Copy(LLNode node, Operation operation)
|
||||||
|
{
|
||||||
|
if (operation.Dest == null || operation.SourcesCount == 0)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Instruction inst = operation.Inst;
|
||||||
|
|
||||||
|
Operand dest = operation.Dest;
|
||||||
|
Operand src1 = operation.GetSource(0);
|
||||||
|
|
||||||
|
// The multiply instruction (that maps to IMUL) is somewhat special, it has
|
||||||
|
// a three operand form where the second source is a immediate value.
|
||||||
|
bool threeOperandForm = inst == Instruction.Multiply && operation.GetSource(1).Kind == OperandKind.Constant;
|
||||||
|
|
||||||
|
if (IsSameOperandDestSrc1(operation) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm)
|
||||||
|
{
|
||||||
|
Operation copyOp = new Operation(Instruction.Copy, dest, src1);
|
||||||
|
|
||||||
|
node.List.AddBefore(node, copyOp);
|
||||||
|
|
||||||
|
operation.SetSource(0, dest);
|
||||||
|
}
|
||||||
|
else if (inst == Instruction.ConditionalSelect)
|
||||||
|
{
|
||||||
|
Operand src3 = operation.GetSource(2);
|
||||||
|
|
||||||
|
Operation copyOp = new Operation(Instruction.Copy, dest, src3);
|
||||||
|
|
||||||
|
node.List.AddBefore(node, copyOp);
|
||||||
|
|
||||||
|
operation.SetSource(2, dest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static LLNode HandleConvertToFPUI(LLNode node, Operation operation)
|
||||||
|
{
|
||||||
|
// Unsigned integer to FP conversions are not supported on X86.
|
||||||
|
// We need to turn them into signed integer to FP conversions, and
|
||||||
|
// adjust the final result.
|
||||||
|
Operand dest = operation.Dest;
|
||||||
|
Operand source = operation.GetSource(0);
|
||||||
|
|
||||||
|
Debug.Assert(source.Type.IsInteger(), $"Invalid source type \"{source.Type}\".");
|
||||||
|
|
||||||
|
LinkedList<Node> nodes = node.List;
|
||||||
|
|
||||||
|
LLNode currentNode = node;
|
||||||
|
|
||||||
|
if (source.Type == OperandType.I32)
|
||||||
|
{
|
||||||
|
// For 32-bits integers, we can just zero-extend to 64-bits,
|
||||||
|
// and then use the 64-bits signed conversion instructions.
|
||||||
|
Operand zex = Local(OperandType.I64);
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend32, zex, source));
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, dest, zex));
|
||||||
|
}
|
||||||
|
else /* if (source.Type == OperandType.I64) */
|
||||||
|
{
|
||||||
|
// For 64-bits integers, we need to do the following:
|
||||||
|
// - Ensure that the integer has the most significant bit clear.
|
||||||
|
// -- This can be done by shifting the value right by 1, that is, dividing by 2.
|
||||||
|
// -- The least significant bit is lost in this case though.
|
||||||
|
// - We can then convert the shifted value with a signed integer instruction.
|
||||||
|
// - The result still needs to be corrected after that.
|
||||||
|
// -- First, we need to multiply the result by 2, as we divided it by 2 before.
|
||||||
|
// --- This can be done efficiently by adding the result to itself.
|
||||||
|
// -- Then, we need to add the least significant bit that was shifted out.
|
||||||
|
// --- We can convert the least significant bit to float, and add it to the result.
|
||||||
|
Operand lsb = Local(OperandType.I64);
|
||||||
|
Operand half = Local(OperandType.I64);
|
||||||
|
|
||||||
|
Operand lsbF = Local(dest.Type);
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.Copy, lsb, source));
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.Copy, half, source));
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.BitwiseAnd, lsb, lsb, Const(1L)));
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.ShiftRightUI, half, half, Const(1)));
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, lsbF, lsb));
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, dest, half));
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.Add, dest, dest, dest));
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.Add, dest, dest, lsbF));
|
||||||
|
}
|
||||||
|
|
||||||
|
Delete(currentNode, operation);
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static LLNode HandleNegate(LLNode node, Operation operation)
|
||||||
|
{
|
||||||
|
// There's no SSE FP negate instruction, so we need to transform that into
|
||||||
|
// a XOR of the value to be negated with a mask with the highest bit set.
|
||||||
|
// This also produces -0 for a negation of the value 0.
|
||||||
|
Operand dest = operation.Dest;
|
||||||
|
Operand source = operation.GetSource(0);
|
||||||
|
|
||||||
|
Debug.Assert(dest.Type == OperandType.FP32 ||
|
||||||
|
dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\".");
|
||||||
|
|
||||||
|
LinkedList<Node> nodes = node.List;
|
||||||
|
|
||||||
|
LLNode currentNode = node;
|
||||||
|
|
||||||
|
Operand res = Local(dest.Type);
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.VectorOne, res));
|
||||||
|
|
||||||
|
if (dest.Type == OperandType.FP32)
|
||||||
|
{
|
||||||
|
node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Pslld, res, res, Const(31)));
|
||||||
|
}
|
||||||
|
else /* if (dest.Type == OperandType.FP64) */
|
||||||
|
{
|
||||||
|
node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Psllq, res, res, Const(63)));
|
||||||
|
}
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Xorps, res, res, source));
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, res));
|
||||||
|
|
||||||
|
Delete(currentNode, operation);
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static LLNode HandleVectorInsert8(LLNode node, Operation operation)
|
||||||
|
{
|
||||||
|
// Handle vector insertion, when SSE 4.1 is not supported.
|
||||||
|
Operand dest = operation.Dest;
|
||||||
|
Operand src1 = operation.GetSource(0); // Vector
|
||||||
|
Operand src2 = operation.GetSource(1); // Value
|
||||||
|
Operand src3 = operation.GetSource(2); // Index
|
||||||
|
|
||||||
|
Debug.Assert(src3.Kind == OperandKind.Constant);
|
||||||
|
|
||||||
|
byte index = src3.AsByte();
|
||||||
|
|
||||||
|
Debug.Assert(index < 16);
|
||||||
|
|
||||||
|
LinkedList<Node> nodes = node.List;
|
||||||
|
|
||||||
|
LLNode currentNode = node;
|
||||||
|
|
||||||
|
Operand temp = Local(OperandType.I32);
|
||||||
|
|
||||||
|
Operation vextOp = new Operation(Instruction.VectorExtract16, temp, src1, Const(index >> 1));
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, vextOp);
|
||||||
|
|
||||||
|
if ((index & 1) != 0)
|
||||||
|
{
|
||||||
|
Operand temp2 = Local(OperandType.I32);
|
||||||
|
|
||||||
|
Operation copyOp = new Operation(Instruction.Copy, temp2, src2);
|
||||||
|
Operation andOp = new Operation(Instruction.ZeroExtend8, temp, temp);
|
||||||
|
Operation shlOp = new Operation(Instruction.ShiftLeft, temp2, temp2, Const(8));
|
||||||
|
Operation orOp = new Operation(Instruction.BitwiseOr, temp, temp, temp2);
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, copyOp);
|
||||||
|
node = nodes.AddAfter(node, andOp);
|
||||||
|
node = nodes.AddAfter(node, shlOp);
|
||||||
|
node = nodes.AddAfter(node, orOp);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Operation andOp = new Operation(Instruction.BitwiseAnd, temp, temp, Const(0xff00));
|
||||||
|
Operation orOp = new Operation(Instruction.BitwiseOr, temp, temp, src2);
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, andOp);
|
||||||
|
node = nodes.AddAfter(node, orOp);
|
||||||
|
}
|
||||||
|
|
||||||
|
Operation vinsOp = new Operation(Instruction.VectorInsert16, dest, src1, temp, Const(index >> 1));
|
||||||
|
|
||||||
|
node = nodes.AddAfter(node, vinsOp);
|
||||||
|
|
||||||
|
Delete(currentNode, operation);
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static LLNode HandleCallWindowsAbi(StackAllocator stackAlloc, LLNode node, Operation operation)
|
||||||
{
|
{
|
||||||
Operand dest = operation.Dest;
|
Operand dest = operation.Dest;
|
||||||
|
|
||||||
//Handle struct arguments.
|
LinkedList<Node> nodes = node.List;
|
||||||
|
|
||||||
|
// Handle struct arguments.
|
||||||
int retArgs = 0;
|
int retArgs = 0;
|
||||||
|
|
||||||
int stackAllocOffset = 0;
|
int stackAllocOffset = 0;
|
||||||
|
|
||||||
int AllocateOnStack(int size)
|
int AllocateOnStack(int size)
|
||||||
{
|
{
|
||||||
//We assume that the stack allocator is initially empty (TotalSize = 0).
|
// We assume that the stack allocator is initially empty (TotalSize = 0).
|
||||||
//Taking that into account, we can reuse the space allocated for other
|
// Taking that into account, we can reuse the space allocated for other
|
||||||
//calls by keeping track of our own allocated size (stackAllocOffset).
|
// calls by keeping track of our own allocated size (stackAllocOffset).
|
||||||
//If the space allocated is not big enough, then we just expand it.
|
// If the space allocated is not big enough, then we just expand it.
|
||||||
int offset = stackAllocOffset;
|
int offset = stackAllocOffset;
|
||||||
|
|
||||||
if (stackAllocOffset + size > stackAlloc.TotalSize)
|
if (stackAllocOffset + size > stackAlloc.TotalSize)
|
||||||
|
@ -432,7 +556,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
Operation allocOp = new Operation(Instruction.StackAlloc, arg0Reg, Const(stackOffset));
|
Operation allocOp = new Operation(Instruction.StackAlloc, arg0Reg, Const(stackOffset));
|
||||||
|
|
||||||
node.List.AddBefore(node, allocOp);
|
nodes.AddBefore(node, allocOp);
|
||||||
|
|
||||||
retArgs = 1;
|
retArgs = 1;
|
||||||
}
|
}
|
||||||
|
@ -449,17 +573,17 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
Operation allocOp = new Operation(Instruction.StackAlloc, stackAddr, Const(stackOffset));
|
Operation allocOp = new Operation(Instruction.StackAlloc, stackAddr, Const(stackOffset));
|
||||||
|
|
||||||
node.List.AddBefore(node, allocOp);
|
nodes.AddBefore(node, allocOp);
|
||||||
|
|
||||||
Operation storeOp = new Operation(Instruction.Store, null, stackAddr, source);
|
Operation storeOp = new Operation(Instruction.Store, null, stackAddr, source);
|
||||||
|
|
||||||
HandleConstantCopy(node.List.AddBefore(node, storeOp), storeOp);
|
HandleConstantCopy(nodes.AddBefore(node, storeOp), storeOp);
|
||||||
|
|
||||||
operation.SetSource(index, stackAddr);
|
operation.SetSource(index, stackAddr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//Handle arguments passed on registers.
|
// Handle arguments passed on registers.
|
||||||
int argsCount = operation.SourcesCount - 1;
|
int argsCount = operation.SourcesCount - 1;
|
||||||
|
|
||||||
int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs;
|
int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs;
|
||||||
|
@ -490,13 +614,13 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
Operation srcCopyOp = new Operation(Instruction.Copy, argReg, source);
|
Operation srcCopyOp = new Operation(Instruction.Copy, argReg, source);
|
||||||
|
|
||||||
HandleConstantCopy(node.List.AddBefore(node, srcCopyOp), srcCopyOp);
|
HandleConstantCopy(nodes.AddBefore(node, srcCopyOp), srcCopyOp);
|
||||||
|
|
||||||
operation.SetSource(index + 1, argReg);
|
operation.SetSource(index + 1, argReg);
|
||||||
}
|
}
|
||||||
|
|
||||||
//The remaining arguments (those that are not passed on registers)
|
// The remaining arguments (those that are not passed on registers)
|
||||||
//should be passed on the stack, we write them to the stack with "SpillArg".
|
// should be passed on the stack, we write them to the stack with "SpillArg".
|
||||||
for (int index = argsCount; index < operation.SourcesCount - 1; index++)
|
for (int index = argsCount; index < operation.SourcesCount - 1; index++)
|
||||||
{
|
{
|
||||||
Operand source = operation.GetSource(index + 1);
|
Operand source = operation.GetSource(index + 1);
|
||||||
|
@ -505,7 +629,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source);
|
Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source);
|
||||||
|
|
||||||
HandleConstantCopy(node.List.AddBefore(node, spillOp), spillOp);
|
HandleConstantCopy(nodes.AddBefore(node, spillOp), spillOp);
|
||||||
|
|
||||||
operation.SetSource(index + 1, new Operand(OperandKind.Undefined));
|
operation.SetSource(index + 1, new Operand(OperandKind.Undefined));
|
||||||
}
|
}
|
||||||
|
@ -520,11 +644,11 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
Operation copyOp = new Operation(Instruction.Copy, retValueAddr, arg0Reg);
|
Operation copyOp = new Operation(Instruction.Copy, retValueAddr, arg0Reg);
|
||||||
|
|
||||||
node.List.AddBefore(node, copyOp);
|
nodes.AddBefore(node, copyOp);
|
||||||
|
|
||||||
Operation loadOp = new Operation(Instruction.Load, dest, retValueAddr);
|
Operation loadOp = new Operation(Instruction.Load, dest, retValueAddr);
|
||||||
|
|
||||||
node.List.AddAfter(node, loadOp);
|
node = nodes.AddAfter(node, loadOp);
|
||||||
|
|
||||||
operation.Dest = null;
|
operation.Dest = null;
|
||||||
}
|
}
|
||||||
|
@ -545,71 +669,18 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
Operation destCopyOp = new Operation(Instruction.Copy, dest, retReg);
|
Operation destCopyOp = new Operation(Instruction.Copy, dest, retReg);
|
||||||
|
|
||||||
node.List.AddAfter(node, destCopyOp);
|
node = nodes.AddAfter(node, destCopyOp);
|
||||||
|
|
||||||
operation.Dest = retReg;
|
operation.Dest = retReg;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private static void HandleReturnWindowsAbi(
|
return node;
|
||||||
CompilerContext cctx,
|
|
||||||
LinkedListNode<Node> node,
|
|
||||||
Operand[] preservedArgs,
|
|
||||||
Operation operation)
|
|
||||||
{
|
|
||||||
if (operation.SourcesCount == 0)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
Operand source = operation.GetSource(0);
|
|
||||||
|
|
||||||
Operand retReg;
|
|
||||||
|
|
||||||
if (source.Type.IsInteger())
|
|
||||||
{
|
|
||||||
retReg = Gpr(CallingConvention.GetIntReturnRegister(), source.Type);
|
|
||||||
}
|
|
||||||
else if (source.Type == OperandType.V128)
|
|
||||||
{
|
|
||||||
if (preservedArgs[0] == null)
|
|
||||||
{
|
|
||||||
Operand preservedArg = Local(OperandType.I64);
|
|
||||||
|
|
||||||
Operand arg0 = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
|
|
||||||
|
|
||||||
Operation copyOp = new Operation(Instruction.Copy, preservedArg, arg0);
|
|
||||||
|
|
||||||
cctx.Cfg.Entry.Operations.AddFirst(copyOp);
|
|
||||||
|
|
||||||
preservedArgs[0] = preservedArg;
|
|
||||||
}
|
|
||||||
|
|
||||||
retReg = preservedArgs[0];
|
|
||||||
}
|
|
||||||
else /* if (regType == RegisterType.Vector) */
|
|
||||||
{
|
|
||||||
retReg = Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (source.Type == OperandType.V128)
|
|
||||||
{
|
|
||||||
Operation retStoreOp = new Operation(Instruction.Store, null, retReg, source);
|
|
||||||
|
|
||||||
node.List.AddBefore(node, retStoreOp);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Operation retCopyOp = new Operation(Instruction.Copy, retReg, source);
|
|
||||||
|
|
||||||
node.List.AddBefore(node, retCopyOp);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void HandleLoadArgumentWindowsAbi(
|
private static void HandleLoadArgumentWindowsAbi(
|
||||||
CompilerContext cctx,
|
CompilerContext cctx,
|
||||||
LinkedListNode<Node> node,
|
LLNode node,
|
||||||
Operand[] preservedArgs,
|
Operand[] preservedArgs,
|
||||||
Operation operation)
|
Operation operation)
|
||||||
{
|
{
|
||||||
|
@ -667,43 +738,62 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void HandleSameDestSrc1Copy(LinkedListNode<Node> node, Operation operation)
|
private static void HandleReturnWindowsAbi(
|
||||||
|
CompilerContext cctx,
|
||||||
|
LLNode node,
|
||||||
|
Operand[] preservedArgs,
|
||||||
|
Operation operation)
|
||||||
{
|
{
|
||||||
if (operation.Dest == null || operation.SourcesCount == 0)
|
if (operation.SourcesCount == 0)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Instruction inst = operation.Inst;
|
Operand source = operation.GetSource(0);
|
||||||
|
|
||||||
Operand dest = operation.Dest;
|
Operand retReg;
|
||||||
Operand src1 = operation.GetSource(0);
|
|
||||||
|
|
||||||
//The multiply instruction (that maps to IMUL) is somewhat special, it has
|
if (source.Type.IsInteger())
|
||||||
//a three operand form where the second source is a immediate value.
|
|
||||||
bool threeOperandForm = inst == Instruction.Multiply && operation.GetSource(1).Kind == OperandKind.Constant;
|
|
||||||
|
|
||||||
if (IsSameOperandDestSrc1(operation) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm)
|
|
||||||
{
|
{
|
||||||
Operation copyOp = new Operation(Instruction.Copy, dest, src1);
|
retReg = Gpr(CallingConvention.GetIntReturnRegister(), source.Type);
|
||||||
|
|
||||||
node.List.AddBefore(node, copyOp);
|
|
||||||
|
|
||||||
operation.SetSource(0, dest);
|
|
||||||
}
|
}
|
||||||
else if (inst == Instruction.ConditionalSelect)
|
else if (source.Type == OperandType.V128)
|
||||||
{
|
{
|
||||||
Operand src3 = operation.GetSource(2);
|
if (preservedArgs[0] == null)
|
||||||
|
{
|
||||||
|
Operand preservedArg = Local(OperandType.I64);
|
||||||
|
|
||||||
Operation copyOp = new Operation(Instruction.Copy, dest, src3);
|
Operand arg0 = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
|
||||||
|
|
||||||
node.List.AddBefore(node, copyOp);
|
Operation copyOp = new Operation(Instruction.Copy, preservedArg, arg0);
|
||||||
|
|
||||||
operation.SetSource(2, dest);
|
cctx.Cfg.Entry.Operations.AddFirst(copyOp);
|
||||||
|
|
||||||
|
preservedArgs[0] = preservedArg;
|
||||||
|
}
|
||||||
|
|
||||||
|
retReg = preservedArgs[0];
|
||||||
|
}
|
||||||
|
else /* if (regType == RegisterType.Vector) */
|
||||||
|
{
|
||||||
|
retReg = Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (source.Type == OperandType.V128)
|
||||||
|
{
|
||||||
|
Operation retStoreOp = new Operation(Instruction.Store, null, retReg, source);
|
||||||
|
|
||||||
|
node.List.AddBefore(node, retStoreOp);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Operation retCopyOp = new Operation(Instruction.Copy, retReg, source);
|
||||||
|
|
||||||
|
node.List.AddBefore(node, retCopyOp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Operand AddXmmCopy(LinkedListNode<Node> node, Operand source)
|
private static Operand AddXmmCopy(LLNode node, Operand source)
|
||||||
{
|
{
|
||||||
Operand temp = Local(source.Type);
|
Operand temp = Local(source.Type);
|
||||||
|
|
||||||
|
@ -716,7 +806,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Operand AddCopy(LinkedListNode<Node> node, Operand source)
|
private static Operand AddCopy(LLNode node, Operand source)
|
||||||
{
|
{
|
||||||
Operand temp = Local(source.Type);
|
Operand temp = Local(source.Type);
|
||||||
|
|
||||||
|
@ -755,7 +845,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
return value == (int)value;
|
return value == (int)value;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void Delete(LinkedListNode<Node> node, Operation operation)
|
private static void Delete(LLNode node, Operation operation)
|
||||||
{
|
{
|
||||||
operation.Dest = null;
|
operation.Dest = null;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue