Add hardware capability detection
This commit is contained in:
parent
679ff7271d
commit
93963c97d2
10 changed files with 203 additions and 53 deletions
|
@ -89,6 +89,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW));
|
Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW));
|
||||||
Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex));
|
Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex));
|
||||||
|
Add(X86Instruction.Cpuid, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fa2, InstructionFlags.RegOnly));
|
||||||
Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3));
|
Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3));
|
||||||
Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex));
|
Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex));
|
||||||
Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
||||||
|
@ -151,7 +152,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3810, InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
|
@ -246,6 +247,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex));
|
Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex));
|
||||||
Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex));
|
Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex));
|
||||||
|
Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
|
Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
|
||||||
Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex));
|
Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex));
|
||||||
|
@ -336,6 +338,11 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
WriteInstruction(src1, null, src2, X86Instruction.Comiss);
|
WriteInstruction(src1, null, src2, X86Instruction.Comiss);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void Cpuid()
|
||||||
|
{
|
||||||
|
WriteInstruction(null, null, OperandType.None, X86Instruction.Cpuid);
|
||||||
|
}
|
||||||
|
|
||||||
public void Cvtsd2ss(Operand dest, Operand src1, Operand src2)
|
public void Cvtsd2ss(Operand dest, Operand src1, Operand src2)
|
||||||
{
|
{
|
||||||
WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss);
|
WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss);
|
||||||
|
@ -794,7 +801,9 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Operand src2,
|
Operand src2,
|
||||||
Operand src3)
|
Operand src3)
|
||||||
{
|
{
|
||||||
//TODO: Non-VEX version.
|
// 3+ operands can only be encoded with the VEX encoding scheme.
|
||||||
|
Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
|
||||||
|
|
||||||
WriteInstruction(dest, src1, src2, inst);
|
WriteInstruction(dest, src1, src2, inst);
|
||||||
|
|
||||||
WriteByte((byte)(src3.AsByte() << 4));
|
WriteByte((byte)(src3.AsByte() << 4));
|
||||||
|
@ -1166,7 +1175,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((flags & InstructionFlags.RegOnly) != 0)
|
if (dest != null && (flags & InstructionFlags.RegOnly) != 0)
|
||||||
{
|
{
|
||||||
opCode += dest.GetRegister().Index & 7;
|
opCode += dest.GetRegister().Index & 7;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,6 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
public BasicBlock CurrBlock { get; private set; }
|
public BasicBlock CurrBlock { get; private set; }
|
||||||
|
|
||||||
public int CallArgsRegionSize { get; }
|
public int CallArgsRegionSize { get; }
|
||||||
|
|
||||||
public int VecCalleeSaveSize { get; }
|
public int VecCalleeSaveSize { get; }
|
||||||
|
|
||||||
private long[] _blockOffsets;
|
private long[] _blockOffsets;
|
||||||
|
@ -82,7 +81,6 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Assembler = new Assembler(stream);
|
Assembler = new Assembler(stream);
|
||||||
|
|
||||||
CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int vecCalleeSaveSize);
|
CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int vecCalleeSaveSize);
|
||||||
|
|
||||||
VecCalleeSaveSize = vecCalleeSaveSize;
|
VecCalleeSaveSize = vecCalleeSaveSize;
|
||||||
|
|
||||||
_blockOffsets = new long[blocksCount];
|
_blockOffsets = new long[blocksCount];
|
||||||
|
|
|
@ -30,6 +30,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(Instruction.BranchIfTrue, GenerateBranchIfTrue);
|
Add(Instruction.BranchIfTrue, GenerateBranchIfTrue);
|
||||||
Add(Instruction.ByteSwap, GenerateByteSwap);
|
Add(Instruction.ByteSwap, GenerateByteSwap);
|
||||||
Add(Instruction.Call, GenerateCall);
|
Add(Instruction.Call, GenerateCall);
|
||||||
|
Add(Instruction.Clobber, GenerateClobber);
|
||||||
Add(Instruction.CompareAndSwap128, GenerateCompareAndSwap128);
|
Add(Instruction.CompareAndSwap128, GenerateCompareAndSwap128);
|
||||||
Add(Instruction.CompareEqual, GenerateCompareEqual);
|
Add(Instruction.CompareEqual, GenerateCompareEqual);
|
||||||
Add(Instruction.CompareGreater, GenerateCompareGreater);
|
Add(Instruction.CompareGreater, GenerateCompareGreater);
|
||||||
|
@ -46,6 +47,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(Instruction.ConvertToFP, GenerateConvertToFP);
|
Add(Instruction.ConvertToFP, GenerateConvertToFP);
|
||||||
Add(Instruction.Copy, GenerateCopy);
|
Add(Instruction.Copy, GenerateCopy);
|
||||||
Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros);
|
Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros);
|
||||||
|
Add(Instruction.CpuId, GenerateCpuId);
|
||||||
Add(Instruction.Divide, GenerateDivide);
|
Add(Instruction.Divide, GenerateDivide);
|
||||||
Add(Instruction.DivideUI, GenerateDivideUI);
|
Add(Instruction.DivideUI, GenerateDivideUI);
|
||||||
Add(Instruction.Fill, GenerateFill);
|
Add(Instruction.Fill, GenerateFill);
|
||||||
|
@ -255,6 +257,11 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
EnsureSameType(dest, src1);
|
EnsureSameType(dest, src1);
|
||||||
|
|
||||||
|
if (!HardwareCapabilities.SupportsVexEncoding)
|
||||||
|
{
|
||||||
|
EnsureSameReg(dest, src1);
|
||||||
|
}
|
||||||
|
|
||||||
Debug.Assert(!dest.Type.IsInteger());
|
Debug.Assert(!dest.Type.IsInteger());
|
||||||
Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant);
|
Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant);
|
||||||
|
|
||||||
|
@ -271,6 +278,11 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
EnsureSameType(dest, src1);
|
EnsureSameType(dest, src1);
|
||||||
|
|
||||||
|
if (!HardwareCapabilities.SupportsVexEncoding)
|
||||||
|
{
|
||||||
|
EnsureSameReg(dest, src1);
|
||||||
|
}
|
||||||
|
|
||||||
Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant);
|
Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant);
|
||||||
|
|
||||||
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte());
|
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte());
|
||||||
|
@ -289,7 +301,18 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
Debug.Assert(!dest.Type.IsInteger());
|
Debug.Assert(!dest.Type.IsInteger());
|
||||||
|
|
||||||
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3);
|
if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding)
|
||||||
|
{
|
||||||
|
context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EnsureSameReg(dest, src1);
|
||||||
|
|
||||||
|
Debug.Assert(src3.GetRegister().Index == 0);
|
||||||
|
|
||||||
|
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -303,6 +326,11 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
EnsureSameType(dest, src1, src2);
|
EnsureSameType(dest, src1, src2);
|
||||||
|
|
||||||
|
if (!HardwareCapabilities.SupportsVexEncoding)
|
||||||
|
{
|
||||||
|
EnsureSameReg(dest, src1);
|
||||||
|
}
|
||||||
|
|
||||||
Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant);
|
Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant);
|
||||||
|
|
||||||
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte());
|
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte());
|
||||||
|
@ -444,6 +472,12 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
context.Assembler.Call(operation.GetSource(0));
|
context.Assembler.Call(operation.GetSource(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void GenerateClobber(CodeGenContext context, Operation operation)
|
||||||
|
{
|
||||||
|
// This is only used to indicate that a register is clobbered to the
|
||||||
|
// register allocator, we don't need to produce any code.
|
||||||
|
}
|
||||||
|
|
||||||
private static void GenerateCompareAndSwap128(CodeGenContext context, Operation operation)
|
private static void GenerateCompareAndSwap128(CodeGenContext context, Operation operation)
|
||||||
{
|
{
|
||||||
Operand source = operation.GetSource(0);
|
Operand source = operation.GetSource(0);
|
||||||
|
@ -646,6 +680,11 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
context.Assembler.Xor(dest, new Operand(operandMask), OperandType.I32);
|
context.Assembler.Xor(dest, new Operand(operandMask), OperandType.I32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void GenerateCpuId(CodeGenContext context, Operation operation)
|
||||||
|
{
|
||||||
|
context.Assembler.Cpuid();
|
||||||
|
}
|
||||||
|
|
||||||
private static void GenerateDivide(CodeGenContext context, Operation operation)
|
private static void GenerateDivide(CodeGenContext context, Operation operation)
|
||||||
{
|
{
|
||||||
Operand dest = operation.Dest;
|
Operand dest = operation.Dest;
|
||||||
|
|
|
@ -1,7 +1,46 @@
|
||||||
|
using ARMeilleure.IntermediateRepresentation;
|
||||||
|
using ARMeilleure.Translation;
|
||||||
|
|
||||||
namespace ARMeilleure.CodeGen.X86
|
namespace ARMeilleure.CodeGen.X86
|
||||||
{
|
{
|
||||||
static class HardwareCapabilities
|
static class HardwareCapabilities
|
||||||
{
|
{
|
||||||
public const bool SupportsVexEncoding = true;
|
private delegate ulong GetFeatureInfo();
|
||||||
|
|
||||||
|
private static ulong _featureInfo;
|
||||||
|
|
||||||
|
public static bool SupportsSse3 => (_featureInfo & (1UL << 0)) != 0;
|
||||||
|
public static bool SupportsPclmulqdq => (_featureInfo & (1UL << 1)) != 0;
|
||||||
|
public static bool SupportsSsse3 => (_featureInfo & (1UL << 9)) != 0;
|
||||||
|
public static bool SupportsFma => (_featureInfo & (1UL << 12)) != 0;
|
||||||
|
public static bool SupportsCx16 => (_featureInfo & (1UL << 13)) != 0;
|
||||||
|
public static bool SupportsSse41 => (_featureInfo & (1UL << 19)) != 0;
|
||||||
|
public static bool SupportsSse42 => (_featureInfo & (1UL << 20)) != 0;
|
||||||
|
public static bool SupportsPopcnt => (_featureInfo & (1UL << 23)) != 0;
|
||||||
|
public static bool SupportsAesni => (_featureInfo & (1UL << 25)) != 0;
|
||||||
|
public static bool SupportsAvx => (_featureInfo & (1UL << 28)) != 0;
|
||||||
|
public static bool SupportsF16c => (_featureInfo & (1UL << 29)) != 0;
|
||||||
|
|
||||||
|
public static bool SupportsSse => (_featureInfo & (1UL << 32 + 25)) != 0;
|
||||||
|
public static bool SupportsSse2 => (_featureInfo & (1UL << 32 + 26)) != 0;
|
||||||
|
|
||||||
|
public static bool ForceLegacySse { get; set; }
|
||||||
|
|
||||||
|
public static bool SupportsVexEncoding => !ForceLegacySse && SupportsAvx;
|
||||||
|
|
||||||
|
static HardwareCapabilities()
|
||||||
|
{
|
||||||
|
EmitterContext context = new EmitterContext();
|
||||||
|
|
||||||
|
Operand featureInfo = context.CpuId();
|
||||||
|
|
||||||
|
context.Return(featureInfo);
|
||||||
|
|
||||||
|
ControlFlowGraph cfg = context.GetControlFlowGraph();
|
||||||
|
|
||||||
|
GetFeatureInfo getFeatureInfo = Compiler.Compile<GetFeatureInfo>(cfg, OperandType.I64);
|
||||||
|
|
||||||
|
_featureInfo = getFeatureInfo();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -255,14 +255,48 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
private static void HandleFixedRegisterCopy(LinkedListNode<Node> node, Operation operation)
|
private static void HandleFixedRegisterCopy(LinkedListNode<Node> node, Operation operation)
|
||||||
{
|
{
|
||||||
|
Instruction inst = operation.Inst;
|
||||||
|
|
||||||
|
Operand dest = operation.Dest;
|
||||||
|
|
||||||
|
//Handle the many restrictions of the CPU Id instruction:
|
||||||
|
//- EAX controls the information returned by this instruction.
|
||||||
|
//- When EAX is 1, feature information is returned.
|
||||||
|
//- The information is written to registers EAX, EBX, ECX and EDX.
|
||||||
|
if (inst == Instruction.CpuId)
|
||||||
|
{
|
||||||
|
Debug.Assert(dest.Type == OperandType.I64);
|
||||||
|
|
||||||
|
Operand eax = Gpr(X86Register.Rax, OperandType.I32);
|
||||||
|
Operand ebx = Gpr(X86Register.Rbx, OperandType.I32);
|
||||||
|
Operand ecx = Gpr(X86Register.Rcx, OperandType.I32);
|
||||||
|
Operand edx = Gpr(X86Register.Rdx, OperandType.I32);
|
||||||
|
|
||||||
|
// Value 0x01 = Version, family and feature information.
|
||||||
|
node.List.AddBefore(node, new Operation(Instruction.Copy, eax, Const(1)));
|
||||||
|
|
||||||
|
// Copy results to the destination register.
|
||||||
|
// The values are split into 2 32-bits registers, we merge them
|
||||||
|
// into a single 64-bits register.
|
||||||
|
Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
|
||||||
|
|
||||||
|
node.List.AddAfter(node, new Operation(Instruction.BitwiseOr, dest, dest, rcx));
|
||||||
|
node.List.AddAfter(node, new Operation(Instruction.ShiftLeft, dest, dest, Const(32)));
|
||||||
|
node.List.AddAfter(node, new Operation(Instruction.ZeroExtend32, dest, edx));
|
||||||
|
|
||||||
|
// We don't care about those two, but their values are overwritten,
|
||||||
|
// so we need to take that into account.
|
||||||
|
node.List.AddAfter(node, new Operation(Instruction.Clobber, ebx));
|
||||||
|
node.List.AddAfter(node, new Operation(Instruction.Clobber, eax));
|
||||||
|
|
||||||
|
operation.Dest = null;
|
||||||
|
}
|
||||||
|
|
||||||
if (operation.SourcesCount == 0)
|
if (operation.SourcesCount == 0)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Instruction inst = operation.Inst;
|
|
||||||
|
|
||||||
Operand dest = operation.Dest;
|
|
||||||
Operand src1 = operation.GetSource(0);
|
Operand src1 = operation.GetSource(0);
|
||||||
|
|
||||||
//Handle the many restrictions of the division instructions:
|
//Handle the many restrictions of the division instructions:
|
||||||
|
@ -278,9 +312,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
operation.SetSource(0, rax);
|
operation.SetSource(0, rax);
|
||||||
|
|
||||||
Operation clobberCopyOp = new Operation(Instruction.Copy, rdx, rdx);
|
node.List.AddBefore(node, new Operation(Instruction.Clobber, rdx));
|
||||||
|
|
||||||
node.List.AddBefore(node, clobberCopyOp);
|
|
||||||
|
|
||||||
node.List.AddAfter(node, new Operation(Instruction.Copy, dest, rax));
|
node.List.AddAfter(node, new Operation(Instruction.Copy, dest, rax));
|
||||||
|
|
||||||
|
@ -320,11 +352,8 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
node.List.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1)));
|
node.List.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
Operand src2 = operation.GetSource(1);
|
SplitOperand(operation.GetSource(1), X86Register.Rax, X86Register.Rdx);
|
||||||
Operand src3 = operation.GetSource(2);
|
SplitOperand(operation.GetSource(2), X86Register.Rbx, X86Register.Rcx);
|
||||||
|
|
||||||
SplitOperand(src2, X86Register.Rax, X86Register.Rdx);
|
|
||||||
SplitOperand(src3, X86Register.Rbx, X86Register.Rcx);
|
|
||||||
|
|
||||||
Operand rax = Gpr(X86Register.Rax, OperandType.I64);
|
Operand rax = Gpr(X86Register.Rax, OperandType.I64);
|
||||||
Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
|
Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
|
||||||
|
@ -334,6 +363,8 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
operation.SetSource(1, Undef());
|
operation.SetSource(1, Undef());
|
||||||
operation.SetSource(2, Undef());
|
operation.SetSource(2, Undef());
|
||||||
|
|
||||||
|
operation.Dest = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
//The shift register is always implied to be CL (low 8-bits of RCX or ECX).
|
//The shift register is always implied to be CL (low 8-bits of RCX or ECX).
|
||||||
|
@ -345,6 +376,22 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
operation.SetSource(1, rcx);
|
operation.SetSource(1, rcx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Handle intrinsics.
|
||||||
|
if (IsIntrinsic(inst))
|
||||||
|
{
|
||||||
|
IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
|
||||||
|
|
||||||
|
//PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
|
||||||
|
if (intrinOp.Intrinsic == Intrinsic.X86Pblendvb && !HardwareCapabilities.SupportsVexEncoding)
|
||||||
|
{
|
||||||
|
Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
|
||||||
|
|
||||||
|
node.List.AddBefore(node, new Operation(Instruction.Copy, xmm0, operation.GetSource(2)));
|
||||||
|
|
||||||
|
operation.SetSource(2, xmm0);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void HandleCallWindowsAbi(
|
private static void HandleCallWindowsAbi(
|
||||||
|
@ -696,7 +743,8 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
private static bool IsLongConst(Operand operand)
|
private static bool IsLongConst(Operand operand)
|
||||||
{
|
{
|
||||||
long value = operand.Type == OperandType.I32 ? operand.AsInt32()
|
long value = operand.Type == OperandType.I32
|
||||||
|
? operand.AsInt32()
|
||||||
: operand.AsInt64();
|
: operand.AsInt64();
|
||||||
|
|
||||||
return !ConstFitsOnS32(value);
|
return !ConstFitsOnS32(value);
|
||||||
|
@ -763,7 +811,9 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
{
|
{
|
||||||
bool isUnary = operation.SourcesCount < 2;
|
bool isUnary = operation.SourcesCount < 2;
|
||||||
|
|
||||||
return !HardwareCapabilities.SupportsVexEncoding && !isUnary;
|
bool hasVecDest = operation.Dest != null && operation.Dest.Type == OperandType.V128;
|
||||||
|
|
||||||
|
return !HardwareCapabilities.SupportsVexEncoding && !isUnary && hasVecDest;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -775,10 +825,8 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
{
|
{
|
||||||
case Instruction.Copy:
|
case Instruction.Copy:
|
||||||
case Instruction.LoadArgument:
|
case Instruction.LoadArgument:
|
||||||
case Instruction.LoadFromContext:
|
|
||||||
case Instruction.Spill:
|
case Instruction.Spill:
|
||||||
case Instruction.SpillArg:
|
case Instruction.SpillArg:
|
||||||
case Instruction.StoreToContext:
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Cmpxchg16b,
|
Cmpxchg16b,
|
||||||
Comisd,
|
Comisd,
|
||||||
Comiss,
|
Comiss,
|
||||||
|
Cpuid,
|
||||||
Cvtdq2pd,
|
Cvtdq2pd,
|
||||||
Cvtdq2ps,
|
Cvtdq2ps,
|
||||||
Cvtpd2dq,
|
Cvtpd2dq,
|
||||||
|
@ -179,6 +180,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Unpckhps,
|
Unpckhps,
|
||||||
Unpcklpd,
|
Unpcklpd,
|
||||||
Unpcklps,
|
Unpcklps,
|
||||||
|
Vpblendvb,
|
||||||
Xor,
|
Xor,
|
||||||
Xorpd,
|
Xorpd,
|
||||||
Xorps,
|
Xorps,
|
||||||
|
|
|
@ -324,7 +324,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: true, reverseOps: true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -336,7 +336,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: false, reverseOps: true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -348,7 +348,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: true, reverseOps: true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -360,7 +360,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: false, reverseOps: true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -372,7 +372,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, isLeOrLt: true);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -384,7 +384,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, isLeOrLt: true);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -396,7 +396,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true, isLeOrLt: true);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -408,7 +408,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false, isLeOrLt: true);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -656,15 +656,15 @@ namespace ARMeilleure.Instructions
|
||||||
private enum CmpCondition
|
private enum CmpCondition
|
||||||
{
|
{
|
||||||
Equal = 0,
|
Equal = 0,
|
||||||
GreaterThanOrEqual = 5,
|
LessThan = 1,
|
||||||
GreaterThan = 6
|
LessThanOrEqual = 2
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void EmitCmpSseOrSse2OpF(
|
private static void EmitCmpSseOrSse2OpF(
|
||||||
ArmEmitterContext context,
|
ArmEmitterContext context,
|
||||||
CmpCondition cond,
|
CmpCondition cond,
|
||||||
bool scalar,
|
bool scalar,
|
||||||
bool isLeOrLt = false)
|
bool reverseOps = false)
|
||||||
{
|
{
|
||||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||||
|
|
||||||
|
@ -677,7 +677,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
|
Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
|
||||||
|
|
||||||
Operand res = isLeOrLt
|
Operand res = reverseOps
|
||||||
? context.AddIntrinsic(inst, m, n, Const((int)cond))
|
? context.AddIntrinsic(inst, m, n, Const((int)cond))
|
||||||
: context.AddIntrinsic(inst, n, m, Const((int)cond));
|
: context.AddIntrinsic(inst, n, m, Const((int)cond));
|
||||||
|
|
||||||
|
@ -696,7 +696,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
|
Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
|
||||||
|
|
||||||
Operand res = isLeOrLt
|
Operand res = reverseOps
|
||||||
? context.AddIntrinsic(inst, m, n, Const((int)cond))
|
? context.AddIntrinsic(inst, m, n, Const((int)cond))
|
||||||
: context.AddIntrinsic(inst, n, m, Const((int)cond));
|
: context.AddIntrinsic(inst, n, m, Const((int)cond));
|
||||||
|
|
||||||
|
|
|
@ -67,6 +67,8 @@ namespace ARMeilleure.IntermediateRepresentation
|
||||||
ZeroExtend32,
|
ZeroExtend32,
|
||||||
ZeroExtend8,
|
ZeroExtend8,
|
||||||
|
|
||||||
|
Clobber,
|
||||||
|
CpuId,
|
||||||
Extended,
|
Extended,
|
||||||
Fill,
|
Fill,
|
||||||
LoadFromContext,
|
LoadFromContext,
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
using ARMeilleure.CodeGen.X86;
|
||||||
|
|
||||||
namespace ARMeilleure
|
namespace ARMeilleure
|
||||||
{
|
{
|
||||||
public static class Optimizations
|
public static class Optimizations
|
||||||
|
@ -6,20 +8,26 @@ namespace ARMeilleure
|
||||||
|
|
||||||
public static bool FastFP { get; set; } = true;
|
public static bool FastFP { get; set; } = true;
|
||||||
|
|
||||||
public static bool UseSseIfAvailable { get; set; }
|
public static bool UseSseIfAvailable { get; set; } = true;
|
||||||
public static bool UseSse2IfAvailable { get; set; }
|
public static bool UseSse2IfAvailable { get; set; } = true;
|
||||||
public static bool UseSse3IfAvailable { get; set; }
|
public static bool UseSse3IfAvailable { get; set; } = true;
|
||||||
public static bool UseSsse3IfAvailable { get; set; }
|
public static bool UseSsse3IfAvailable { get; set; } = true;
|
||||||
public static bool UseSse41IfAvailable { get; set; }
|
public static bool UseSse41IfAvailable { get; set; } = true;
|
||||||
public static bool UseSse42IfAvailable { get; set; }
|
public static bool UseSse42IfAvailable { get; set; } = true;
|
||||||
public static bool UsePopCntIfAvailable { get; set; }
|
public static bool UsePopCntIfAvailable { get; set; } = true;
|
||||||
|
|
||||||
internal static bool UseSse { get; set; } = true;
|
public static bool ForceLegacySse
|
||||||
internal static bool UseSse2 { get; set; } = true;
|
{
|
||||||
internal static bool UseSse3 { get; set; } = true;
|
get => HardwareCapabilities.ForceLegacySse;
|
||||||
internal static bool UseSsse3 { get; set; } = true;
|
set => HardwareCapabilities.ForceLegacySse = value;
|
||||||
internal static bool UseSse41 { get; set; } = true;
|
}
|
||||||
internal static bool UseSse42 { get; set; } = true;
|
|
||||||
internal static bool UsePopCnt { get; set; } = true;
|
internal static bool UseSse => UseSseIfAvailable && HardwareCapabilities.SupportsSse;
|
||||||
|
internal static bool UseSse2 => UseSse2IfAvailable && HardwareCapabilities.SupportsSse2;
|
||||||
|
internal static bool UseSse3 => UseSse3IfAvailable && HardwareCapabilities.SupportsSse3;
|
||||||
|
internal static bool UseSsse3 => UseSsse3IfAvailable && HardwareCapabilities.SupportsSsse3;
|
||||||
|
internal static bool UseSse41 => UseSse41IfAvailable && HardwareCapabilities.SupportsSse41;
|
||||||
|
internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42;
|
||||||
|
internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -200,6 +200,11 @@ namespace ARMeilleure.Translation
|
||||||
return Add(Instruction.CountLeadingZeros, Local(op1.Type), op1);
|
return Add(Instruction.CountLeadingZeros, Local(op1.Type), op1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal Operand CpuId()
|
||||||
|
{
|
||||||
|
return Add(Instruction.CpuId, Local(OperandType.I64));
|
||||||
|
}
|
||||||
|
|
||||||
public Operand Divide(Operand op1, Operand op2)
|
public Operand Divide(Operand op1, Operand op2)
|
||||||
{
|
{
|
||||||
return Add(Instruction.Divide, Local(op1.Type), op1, op2);
|
return Add(Instruction.Divide, Local(op1.Type), op1, op2);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue