Add hardware capability detection
This commit is contained in:
parent
679ff7271d
commit
93963c97d2
10 changed files with 203 additions and 53 deletions
|
@ -89,6 +89,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW));
|
||||
Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex));
|
||||
Add(X86Instruction.Cpuid, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fa2, InstructionFlags.RegOnly));
|
||||
Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3));
|
||||
Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex));
|
||||
Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
||||
|
@ -151,7 +152,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3810, InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
|
@ -246,6 +247,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex));
|
||||
Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex));
|
||||
Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
|
||||
Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex));
|
||||
|
@ -336,6 +338,11 @@ namespace ARMeilleure.CodeGen.X86
|
|||
WriteInstruction(src1, null, src2, X86Instruction.Comiss);
|
||||
}
|
||||
|
||||
public void Cpuid()
|
||||
{
|
||||
WriteInstruction(null, null, OperandType.None, X86Instruction.Cpuid);
|
||||
}
|
||||
|
||||
public void Cvtsd2ss(Operand dest, Operand src1, Operand src2)
|
||||
{
|
||||
WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss);
|
||||
|
@ -794,7 +801,9 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Operand src2,
|
||||
Operand src3)
|
||||
{
|
||||
//TODO: Non-VEX version.
|
||||
// 3+ operands can only be encoded with the VEX encoding scheme.
|
||||
Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
|
||||
|
||||
WriteInstruction(dest, src1, src2, inst);
|
||||
|
||||
WriteByte((byte)(src3.AsByte() << 4));
|
||||
|
@ -1166,7 +1175,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
}
|
||||
}
|
||||
|
||||
if ((flags & InstructionFlags.RegOnly) != 0)
|
||||
if (dest != null && (flags & InstructionFlags.RegOnly) != 0)
|
||||
{
|
||||
opCode += dest.GetRegister().Index & 7;
|
||||
}
|
||||
|
|
|
@ -22,8 +22,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
public BasicBlock CurrBlock { get; private set; }
|
||||
|
||||
public int CallArgsRegionSize { get; }
|
||||
|
||||
public int VecCalleeSaveSize { get; }
|
||||
public int VecCalleeSaveSize { get; }
|
||||
|
||||
private long[] _blockOffsets;
|
||||
|
||||
|
@ -82,8 +81,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Assembler = new Assembler(stream);
|
||||
|
||||
CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int vecCalleeSaveSize);
|
||||
|
||||
VecCalleeSaveSize = vecCalleeSaveSize;
|
||||
VecCalleeSaveSize = vecCalleeSaveSize;
|
||||
|
||||
_blockOffsets = new long[blocksCount];
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(Instruction.BranchIfTrue, GenerateBranchIfTrue);
|
||||
Add(Instruction.ByteSwap, GenerateByteSwap);
|
||||
Add(Instruction.Call, GenerateCall);
|
||||
Add(Instruction.Clobber, GenerateClobber);
|
||||
Add(Instruction.CompareAndSwap128, GenerateCompareAndSwap128);
|
||||
Add(Instruction.CompareEqual, GenerateCompareEqual);
|
||||
Add(Instruction.CompareGreater, GenerateCompareGreater);
|
||||
|
@ -46,6 +47,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(Instruction.ConvertToFP, GenerateConvertToFP);
|
||||
Add(Instruction.Copy, GenerateCopy);
|
||||
Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros);
|
||||
Add(Instruction.CpuId, GenerateCpuId);
|
||||
Add(Instruction.Divide, GenerateDivide);
|
||||
Add(Instruction.DivideUI, GenerateDivideUI);
|
||||
Add(Instruction.Fill, GenerateFill);
|
||||
|
@ -255,6 +257,11 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
EnsureSameType(dest, src1);
|
||||
|
||||
if (!HardwareCapabilities.SupportsVexEncoding)
|
||||
{
|
||||
EnsureSameReg(dest, src1);
|
||||
}
|
||||
|
||||
Debug.Assert(!dest.Type.IsInteger());
|
||||
Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant);
|
||||
|
||||
|
@ -271,6 +278,11 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
EnsureSameType(dest, src1);
|
||||
|
||||
if (!HardwareCapabilities.SupportsVexEncoding)
|
||||
{
|
||||
EnsureSameReg(dest, src1);
|
||||
}
|
||||
|
||||
Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant);
|
||||
|
||||
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte());
|
||||
|
@ -289,7 +301,18 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
Debug.Assert(!dest.Type.IsInteger());
|
||||
|
||||
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3);
|
||||
if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding)
|
||||
{
|
||||
context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3);
|
||||
}
|
||||
else
|
||||
{
|
||||
EnsureSameReg(dest, src1);
|
||||
|
||||
Debug.Assert(src3.GetRegister().Index == 0);
|
||||
|
||||
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -303,6 +326,11 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
EnsureSameType(dest, src1, src2);
|
||||
|
||||
if (!HardwareCapabilities.SupportsVexEncoding)
|
||||
{
|
||||
EnsureSameReg(dest, src1);
|
||||
}
|
||||
|
||||
Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant);
|
||||
|
||||
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte());
|
||||
|
@ -444,6 +472,12 @@ namespace ARMeilleure.CodeGen.X86
|
|||
context.Assembler.Call(operation.GetSource(0));
|
||||
}
|
||||
|
||||
private static void GenerateClobber(CodeGenContext context, Operation operation)
|
||||
{
|
||||
// This is only used to indicate that a register is clobbered to the
|
||||
// register allocator, we don't need to produce any code.
|
||||
}
|
||||
|
||||
private static void GenerateCompareAndSwap128(CodeGenContext context, Operation operation)
|
||||
{
|
||||
Operand source = operation.GetSource(0);
|
||||
|
@ -646,6 +680,11 @@ namespace ARMeilleure.CodeGen.X86
|
|||
context.Assembler.Xor(dest, new Operand(operandMask), OperandType.I32);
|
||||
}
|
||||
|
||||
private static void GenerateCpuId(CodeGenContext context, Operation operation)
|
||||
{
|
||||
context.Assembler.Cpuid();
|
||||
}
|
||||
|
||||
private static void GenerateDivide(CodeGenContext context, Operation operation)
|
||||
{
|
||||
Operand dest = operation.Dest;
|
||||
|
|
|
@ -1,7 +1,46 @@
|
|||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.Translation;
|
||||
|
||||
namespace ARMeilleure.CodeGen.X86
|
||||
{
|
||||
static class HardwareCapabilities
|
||||
{
|
||||
public const bool SupportsVexEncoding = true;
|
||||
private delegate ulong GetFeatureInfo();
|
||||
|
||||
private static ulong _featureInfo;
|
||||
|
||||
public static bool SupportsSse3 => (_featureInfo & (1UL << 0)) != 0;
|
||||
public static bool SupportsPclmulqdq => (_featureInfo & (1UL << 1)) != 0;
|
||||
public static bool SupportsSsse3 => (_featureInfo & (1UL << 9)) != 0;
|
||||
public static bool SupportsFma => (_featureInfo & (1UL << 12)) != 0;
|
||||
public static bool SupportsCx16 => (_featureInfo & (1UL << 13)) != 0;
|
||||
public static bool SupportsSse41 => (_featureInfo & (1UL << 19)) != 0;
|
||||
public static bool SupportsSse42 => (_featureInfo & (1UL << 20)) != 0;
|
||||
public static bool SupportsPopcnt => (_featureInfo & (1UL << 23)) != 0;
|
||||
public static bool SupportsAesni => (_featureInfo & (1UL << 25)) != 0;
|
||||
public static bool SupportsAvx => (_featureInfo & (1UL << 28)) != 0;
|
||||
public static bool SupportsF16c => (_featureInfo & (1UL << 29)) != 0;
|
||||
|
||||
public static bool SupportsSse => (_featureInfo & (1UL << 32 + 25)) != 0;
|
||||
public static bool SupportsSse2 => (_featureInfo & (1UL << 32 + 26)) != 0;
|
||||
|
||||
public static bool ForceLegacySse { get; set; }
|
||||
|
||||
public static bool SupportsVexEncoding => !ForceLegacySse && SupportsAvx;
|
||||
|
||||
static HardwareCapabilities()
|
||||
{
|
||||
EmitterContext context = new EmitterContext();
|
||||
|
||||
Operand featureInfo = context.CpuId();
|
||||
|
||||
context.Return(featureInfo);
|
||||
|
||||
ControlFlowGraph cfg = context.GetControlFlowGraph();
|
||||
|
||||
GetFeatureInfo getFeatureInfo = Compiler.Compile<GetFeatureInfo>(cfg, OperandType.I64);
|
||||
|
||||
_featureInfo = getFeatureInfo();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -255,14 +255,48 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
private static void HandleFixedRegisterCopy(LinkedListNode<Node> node, Operation operation)
|
||||
{
|
||||
Instruction inst = operation.Inst;
|
||||
|
||||
Operand dest = operation.Dest;
|
||||
|
||||
//Handle the many restrictions of the CPU Id instruction:
|
||||
//- EAX controls the information returned by this instruction.
|
||||
//- When EAX is 1, feature information is returned.
|
||||
//- The information is written to registers EAX, EBX, ECX and EDX.
|
||||
if (inst == Instruction.CpuId)
|
||||
{
|
||||
Debug.Assert(dest.Type == OperandType.I64);
|
||||
|
||||
Operand eax = Gpr(X86Register.Rax, OperandType.I32);
|
||||
Operand ebx = Gpr(X86Register.Rbx, OperandType.I32);
|
||||
Operand ecx = Gpr(X86Register.Rcx, OperandType.I32);
|
||||
Operand edx = Gpr(X86Register.Rdx, OperandType.I32);
|
||||
|
||||
// Value 0x01 = Version, family and feature information.
|
||||
node.List.AddBefore(node, new Operation(Instruction.Copy, eax, Const(1)));
|
||||
|
||||
// Copy results to the destination register.
|
||||
// The values are split into 2 32-bits registers, we merge them
|
||||
// into a single 64-bits register.
|
||||
Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
|
||||
|
||||
node.List.AddAfter(node, new Operation(Instruction.BitwiseOr, dest, dest, rcx));
|
||||
node.List.AddAfter(node, new Operation(Instruction.ShiftLeft, dest, dest, Const(32)));
|
||||
node.List.AddAfter(node, new Operation(Instruction.ZeroExtend32, dest, edx));
|
||||
|
||||
// We don't care about those two, but their values are overwritten,
|
||||
// so we need to take that into account.
|
||||
node.List.AddAfter(node, new Operation(Instruction.Clobber, ebx));
|
||||
node.List.AddAfter(node, new Operation(Instruction.Clobber, eax));
|
||||
|
||||
operation.Dest = null;
|
||||
}
|
||||
|
||||
if (operation.SourcesCount == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Instruction inst = operation.Inst;
|
||||
|
||||
Operand dest = operation.Dest;
|
||||
Operand src1 = operation.GetSource(0);
|
||||
|
||||
//Handle the many restrictions of the division instructions:
|
||||
|
@ -278,9 +312,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
operation.SetSource(0, rax);
|
||||
|
||||
Operation clobberCopyOp = new Operation(Instruction.Copy, rdx, rdx);
|
||||
|
||||
node.List.AddBefore(node, clobberCopyOp);
|
||||
node.List.AddBefore(node, new Operation(Instruction.Clobber, rdx));
|
||||
|
||||
node.List.AddAfter(node, new Operation(Instruction.Copy, dest, rax));
|
||||
|
||||
|
@ -320,11 +352,8 @@ namespace ARMeilleure.CodeGen.X86
|
|||
node.List.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1)));
|
||||
}
|
||||
|
||||
Operand src2 = operation.GetSource(1);
|
||||
Operand src3 = operation.GetSource(2);
|
||||
|
||||
SplitOperand(src2, X86Register.Rax, X86Register.Rdx);
|
||||
SplitOperand(src3, X86Register.Rbx, X86Register.Rcx);
|
||||
SplitOperand(operation.GetSource(1), X86Register.Rax, X86Register.Rdx);
|
||||
SplitOperand(operation.GetSource(2), X86Register.Rbx, X86Register.Rcx);
|
||||
|
||||
Operand rax = Gpr(X86Register.Rax, OperandType.I64);
|
||||
Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
|
||||
|
@ -334,6 +363,8 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
operation.SetSource(1, Undef());
|
||||
operation.SetSource(2, Undef());
|
||||
|
||||
operation.Dest = null;
|
||||
}
|
||||
|
||||
//The shift register is always implied to be CL (low 8-bits of RCX or ECX).
|
||||
|
@ -345,6 +376,22 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
operation.SetSource(1, rcx);
|
||||
}
|
||||
|
||||
//Handle intrinsics.
|
||||
if (IsIntrinsic(inst))
|
||||
{
|
||||
IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
|
||||
|
||||
//PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
|
||||
if (intrinOp.Intrinsic == Intrinsic.X86Pblendvb && !HardwareCapabilities.SupportsVexEncoding)
|
||||
{
|
||||
Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
|
||||
|
||||
node.List.AddBefore(node, new Operation(Instruction.Copy, xmm0, operation.GetSource(2)));
|
||||
|
||||
operation.SetSource(2, xmm0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void HandleCallWindowsAbi(
|
||||
|
@ -696,8 +743,9 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
private static bool IsLongConst(Operand operand)
|
||||
{
|
||||
long value = operand.Type == OperandType.I32 ? operand.AsInt32()
|
||||
: operand.AsInt64();
|
||||
long value = operand.Type == OperandType.I32
|
||||
? operand.AsInt32()
|
||||
: operand.AsInt64();
|
||||
|
||||
return !ConstFitsOnS32(value);
|
||||
}
|
||||
|
@ -763,7 +811,9 @@ namespace ARMeilleure.CodeGen.X86
|
|||
{
|
||||
bool isUnary = operation.SourcesCount < 2;
|
||||
|
||||
return !HardwareCapabilities.SupportsVexEncoding && !isUnary;
|
||||
bool hasVecDest = operation.Dest != null && operation.Dest.Type == OperandType.V128;
|
||||
|
||||
return !HardwareCapabilities.SupportsVexEncoding && !isUnary && hasVecDest;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -775,10 +825,8 @@ namespace ARMeilleure.CodeGen.X86
|
|||
{
|
||||
case Instruction.Copy:
|
||||
case Instruction.LoadArgument:
|
||||
case Instruction.LoadFromContext:
|
||||
case Instruction.Spill:
|
||||
case Instruction.SpillArg:
|
||||
case Instruction.StoreToContext:
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Cmpxchg16b,
|
||||
Comisd,
|
||||
Comiss,
|
||||
Cpuid,
|
||||
Cvtdq2pd,
|
||||
Cvtdq2ps,
|
||||
Cvtpd2dq,
|
||||
|
@ -179,6 +180,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Unpckhps,
|
||||
Unpcklpd,
|
||||
Unpcklps,
|
||||
Vpblendvb,
|
||||
Xor,
|
||||
Xorpd,
|
||||
Xorps,
|
||||
|
|
|
@ -324,7 +324,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true);
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: true, reverseOps: true);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -336,7 +336,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false);
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: false, reverseOps: true);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -348,7 +348,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true);
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: true, reverseOps: true);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -360,7 +360,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false);
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: false, reverseOps: true);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -372,7 +372,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, isLeOrLt: true);
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: true);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -384,7 +384,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, isLeOrLt: true);
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: false);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -396,7 +396,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true, isLeOrLt: true);
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: true);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -408,7 +408,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false, isLeOrLt: true);
|
||||
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: false);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -655,16 +655,16 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
private enum CmpCondition
|
||||
{
|
||||
Equal = 0,
|
||||
GreaterThanOrEqual = 5,
|
||||
GreaterThan = 6
|
||||
Equal = 0,
|
||||
LessThan = 1,
|
||||
LessThanOrEqual = 2
|
||||
}
|
||||
|
||||
private static void EmitCmpSseOrSse2OpF(
|
||||
ArmEmitterContext context,
|
||||
CmpCondition cond,
|
||||
bool scalar,
|
||||
bool isLeOrLt = false)
|
||||
bool reverseOps = false)
|
||||
{
|
||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||
|
||||
|
@ -677,7 +677,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
|
||||
|
||||
Operand res = isLeOrLt
|
||||
Operand res = reverseOps
|
||||
? context.AddIntrinsic(inst, m, n, Const((int)cond))
|
||||
: context.AddIntrinsic(inst, n, m, Const((int)cond));
|
||||
|
||||
|
@ -696,7 +696,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
|
||||
|
||||
Operand res = isLeOrLt
|
||||
Operand res = reverseOps
|
||||
? context.AddIntrinsic(inst, m, n, Const((int)cond))
|
||||
: context.AddIntrinsic(inst, n, m, Const((int)cond));
|
||||
|
||||
|
|
|
@ -67,6 +67,8 @@ namespace ARMeilleure.IntermediateRepresentation
|
|||
ZeroExtend32,
|
||||
ZeroExtend8,
|
||||
|
||||
Clobber,
|
||||
CpuId,
|
||||
Extended,
|
||||
Fill,
|
||||
LoadFromContext,
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
using ARMeilleure.CodeGen.X86;
|
||||
|
||||
namespace ARMeilleure
|
||||
{
|
||||
public static class Optimizations
|
||||
|
@ -6,20 +8,26 @@ namespace ARMeilleure
|
|||
|
||||
public static bool FastFP { get; set; } = true;
|
||||
|
||||
public static bool UseSseIfAvailable { get; set; }
|
||||
public static bool UseSse2IfAvailable { get; set; }
|
||||
public static bool UseSse3IfAvailable { get; set; }
|
||||
public static bool UseSsse3IfAvailable { get; set; }
|
||||
public static bool UseSse41IfAvailable { get; set; }
|
||||
public static bool UseSse42IfAvailable { get; set; }
|
||||
public static bool UsePopCntIfAvailable { get; set; }
|
||||
public static bool UseSseIfAvailable { get; set; } = true;
|
||||
public static bool UseSse2IfAvailable { get; set; } = true;
|
||||
public static bool UseSse3IfAvailable { get; set; } = true;
|
||||
public static bool UseSsse3IfAvailable { get; set; } = true;
|
||||
public static bool UseSse41IfAvailable { get; set; } = true;
|
||||
public static bool UseSse42IfAvailable { get; set; } = true;
|
||||
public static bool UsePopCntIfAvailable { get; set; } = true;
|
||||
|
||||
internal static bool UseSse { get; set; } = true;
|
||||
internal static bool UseSse2 { get; set; } = true;
|
||||
internal static bool UseSse3 { get; set; } = true;
|
||||
internal static bool UseSsse3 { get; set; } = true;
|
||||
internal static bool UseSse41 { get; set; } = true;
|
||||
internal static bool UseSse42 { get; set; } = true;
|
||||
internal static bool UsePopCnt { get; set; } = true;
|
||||
public static bool ForceLegacySse
|
||||
{
|
||||
get => HardwareCapabilities.ForceLegacySse;
|
||||
set => HardwareCapabilities.ForceLegacySse = value;
|
||||
}
|
||||
|
||||
internal static bool UseSse => UseSseIfAvailable && HardwareCapabilities.SupportsSse;
|
||||
internal static bool UseSse2 => UseSse2IfAvailable && HardwareCapabilities.SupportsSse2;
|
||||
internal static bool UseSse3 => UseSse3IfAvailable && HardwareCapabilities.SupportsSse3;
|
||||
internal static bool UseSsse3 => UseSsse3IfAvailable && HardwareCapabilities.SupportsSsse3;
|
||||
internal static bool UseSse41 => UseSse41IfAvailable && HardwareCapabilities.SupportsSse41;
|
||||
internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42;
|
||||
internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
|
||||
}
|
||||
}
|
|
@ -200,6 +200,11 @@ namespace ARMeilleure.Translation
|
|||
return Add(Instruction.CountLeadingZeros, Local(op1.Type), op1);
|
||||
}
|
||||
|
||||
internal Operand CpuId()
|
||||
{
|
||||
return Add(Instruction.CpuId, Local(OperandType.I64));
|
||||
}
|
||||
|
||||
public Operand Divide(Operand op1, Operand op2)
|
||||
{
|
||||
return Add(Instruction.Divide, Local(op1.Type), op1, op2);
|
||||
|
|
Loading…
Add table
Reference in a new issue