diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index 401dccffe9..0668f7e0ea 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -89,6 +89,7 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW)); Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex)); + Add(X86Instruction.Cpuid, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fa2, InstructionFlags.RegOnly)); Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3)); Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex)); Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2)); @@ -151,7 +152,7 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66)); - Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3810, InstructionFlags.Prefix66)); Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66)); @@ -246,6 +247,7 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex)); Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex)); + Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None)); Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex)); @@ -336,6 +338,11 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(src1, null, src2, X86Instruction.Comiss); } + public void Cpuid() + { + WriteInstruction(null, null, OperandType.None, X86Instruction.Cpuid); + } + public void Cvtsd2ss(Operand dest, Operand src1, Operand src2) { WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss); @@ -794,7 +801,9 @@ namespace ARMeilleure.CodeGen.X86 Operand src2, Operand src3) { - //TODO: Non-VEX version. + // 3+ operands can only be encoded with the VEX encoding scheme. + Debug.Assert(HardwareCapabilities.SupportsVexEncoding); + WriteInstruction(dest, src1, src2, inst); WriteByte((byte)(src3.AsByte() << 4)); @@ -1166,7 +1175,7 @@ namespace ARMeilleure.CodeGen.X86 } } - if ((flags & InstructionFlags.RegOnly) != 0) + if (dest != null && (flags & InstructionFlags.RegOnly) != 0) { opCode += dest.GetRegister().Index & 7; } diff --git a/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/ARMeilleure/CodeGen/X86/CodeGenContext.cs index 88f80c69d0..1f6453ed1f 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenContext.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenContext.cs @@ -22,8 +22,7 @@ namespace ARMeilleure.CodeGen.X86 public BasicBlock CurrBlock { get; private set; } public int CallArgsRegionSize { get; } - - public int VecCalleeSaveSize { get; } + public int VecCalleeSaveSize { get; } private long[] _blockOffsets; @@ -82,8 +81,7 @@ namespace ARMeilleure.CodeGen.X86 Assembler = new Assembler(stream); CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int vecCalleeSaveSize); - - VecCalleeSaveSize = vecCalleeSaveSize; + VecCalleeSaveSize = vecCalleeSaveSize; _blockOffsets = new long[blocksCount]; diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index 292327998e..76da88adf8 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -30,6 +30,7 @@ namespace ARMeilleure.CodeGen.X86 Add(Instruction.BranchIfTrue, GenerateBranchIfTrue); Add(Instruction.ByteSwap, GenerateByteSwap); Add(Instruction.Call, GenerateCall); + Add(Instruction.Clobber, GenerateClobber); Add(Instruction.CompareAndSwap128, GenerateCompareAndSwap128); Add(Instruction.CompareEqual, GenerateCompareEqual); Add(Instruction.CompareGreater, GenerateCompareGreater); @@ -46,6 +47,7 @@ namespace ARMeilleure.CodeGen.X86 Add(Instruction.ConvertToFP, GenerateConvertToFP); Add(Instruction.Copy, GenerateCopy); Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros); + Add(Instruction.CpuId, GenerateCpuId); Add(Instruction.Divide, GenerateDivide); Add(Instruction.DivideUI, GenerateDivideUI); Add(Instruction.Fill, GenerateFill); @@ -255,6 +257,11 @@ namespace ARMeilleure.CodeGen.X86 EnsureSameType(dest, src1); + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + Debug.Assert(!dest.Type.IsInteger()); Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant); @@ -271,6 +278,11 @@ namespace ARMeilleure.CodeGen.X86 EnsureSameType(dest, src1); + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant); context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte()); @@ -289,7 +301,18 @@ namespace ARMeilleure.CodeGen.X86 Debug.Assert(!dest.Type.IsInteger()); - context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3); + if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding) + { + context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3); + } + else + { + EnsureSameReg(dest, src1); + + Debug.Assert(src3.GetRegister().Index == 0); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2); + } break; } @@ -303,6 +326,11 @@ namespace ARMeilleure.CodeGen.X86 EnsureSameType(dest, src1, src2); + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant); context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte()); @@ -444,6 +472,12 @@ namespace ARMeilleure.CodeGen.X86 context.Assembler.Call(operation.GetSource(0)); } + private static void GenerateClobber(CodeGenContext context, Operation operation) + { + // This is only used to indicate that a register is clobbered to the + // register allocator, we don't need to produce any code. + } + private static void GenerateCompareAndSwap128(CodeGenContext context, Operation operation) { Operand source = operation.GetSource(0); @@ -646,6 +680,11 @@ namespace ARMeilleure.CodeGen.X86 context.Assembler.Xor(dest, new Operand(operandMask), OperandType.I32); } + private static void GenerateCpuId(CodeGenContext context, Operation operation) + { + context.Assembler.Cpuid(); + } + private static void GenerateDivide(CodeGenContext context, Operation operation) { Operand dest = operation.Dest; diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs index be5905394e..54dd4be97f 100644 --- a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs +++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs @@ -1,7 +1,46 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + namespace ARMeilleure.CodeGen.X86 { static class HardwareCapabilities { - public const bool SupportsVexEncoding = true; + private delegate ulong GetFeatureInfo(); + + private static ulong _featureInfo; + + public static bool SupportsSse3 => (_featureInfo & (1UL << 0)) != 0; + public static bool SupportsPclmulqdq => (_featureInfo & (1UL << 1)) != 0; + public static bool SupportsSsse3 => (_featureInfo & (1UL << 9)) != 0; + public static bool SupportsFma => (_featureInfo & (1UL << 12)) != 0; + public static bool SupportsCx16 => (_featureInfo & (1UL << 13)) != 0; + public static bool SupportsSse41 => (_featureInfo & (1UL << 19)) != 0; + public static bool SupportsSse42 => (_featureInfo & (1UL << 20)) != 0; + public static bool SupportsPopcnt => (_featureInfo & (1UL << 23)) != 0; + public static bool SupportsAesni => (_featureInfo & (1UL << 25)) != 0; + public static bool SupportsAvx => (_featureInfo & (1UL << 28)) != 0; + public static bool SupportsF16c => (_featureInfo & (1UL << 29)) != 0; + + public static bool SupportsSse => (_featureInfo & (1UL << 32 + 25)) != 0; + public static bool SupportsSse2 => (_featureInfo & (1UL << 32 + 26)) != 0; + + public static bool ForceLegacySse { get; set; } + + public static bool SupportsVexEncoding => !ForceLegacySse && SupportsAvx; + + static HardwareCapabilities() + { + EmitterContext context = new EmitterContext(); + + Operand featureInfo = context.CpuId(); + + context.Return(featureInfo); + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + GetFeatureInfo getFeatureInfo = Compiler.Compile(cfg, OperandType.I64); + + _featureInfo = getFeatureInfo(); + } } } \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs index 60188a8a9f..edc30fbfac 100644 --- a/ARMeilleure/CodeGen/X86/PreAllocator.cs +++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -255,14 +255,48 @@ namespace ARMeilleure.CodeGen.X86 private static void HandleFixedRegisterCopy(LinkedListNode node, Operation operation) { + Instruction inst = operation.Inst; + + Operand dest = operation.Dest; + + //Handle the many restrictions of the CPU Id instruction: + //- EAX controls the information returned by this instruction. + //- When EAX is 1, feature information is returned. + //- The information is written to registers EAX, EBX, ECX and EDX. + if (inst == Instruction.CpuId) + { + Debug.Assert(dest.Type == OperandType.I64); + + Operand eax = Gpr(X86Register.Rax, OperandType.I32); + Operand ebx = Gpr(X86Register.Rbx, OperandType.I32); + Operand ecx = Gpr(X86Register.Rcx, OperandType.I32); + Operand edx = Gpr(X86Register.Rdx, OperandType.I32); + + // Value 0x01 = Version, family and feature information. + node.List.AddBefore(node, new Operation(Instruction.Copy, eax, Const(1))); + + // Copy results to the destination register. + // The values are split into 2 32-bits registers, we merge them + // into a single 64-bits register. + Operand rcx = Gpr(X86Register.Rcx, OperandType.I64); + + node.List.AddAfter(node, new Operation(Instruction.BitwiseOr, dest, dest, rcx)); + node.List.AddAfter(node, new Operation(Instruction.ShiftLeft, dest, dest, Const(32))); + node.List.AddAfter(node, new Operation(Instruction.ZeroExtend32, dest, edx)); + + // We don't care about those two, but their values are overwritten, + // so we need to take that into account. + node.List.AddAfter(node, new Operation(Instruction.Clobber, ebx)); + node.List.AddAfter(node, new Operation(Instruction.Clobber, eax)); + + operation.Dest = null; + } + if (operation.SourcesCount == 0) { return; } - Instruction inst = operation.Inst; - - Operand dest = operation.Dest; Operand src1 = operation.GetSource(0); //Handle the many restrictions of the division instructions: @@ -278,9 +312,7 @@ namespace ARMeilleure.CodeGen.X86 operation.SetSource(0, rax); - Operation clobberCopyOp = new Operation(Instruction.Copy, rdx, rdx); - - node.List.AddBefore(node, clobberCopyOp); + node.List.AddBefore(node, new Operation(Instruction.Clobber, rdx)); node.List.AddAfter(node, new Operation(Instruction.Copy, dest, rax)); @@ -320,11 +352,8 @@ namespace ARMeilleure.CodeGen.X86 node.List.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1))); } - Operand src2 = operation.GetSource(1); - Operand src3 = operation.GetSource(2); - - SplitOperand(src2, X86Register.Rax, X86Register.Rdx); - SplitOperand(src3, X86Register.Rbx, X86Register.Rcx); + SplitOperand(operation.GetSource(1), X86Register.Rax, X86Register.Rdx); + SplitOperand(operation.GetSource(2), X86Register.Rbx, X86Register.Rcx); Operand rax = Gpr(X86Register.Rax, OperandType.I64); Operand rdx = Gpr(X86Register.Rdx, OperandType.I64); @@ -334,6 +363,8 @@ namespace ARMeilleure.CodeGen.X86 operation.SetSource(1, Undef()); operation.SetSource(2, Undef()); + + operation.Dest = null; } //The shift register is always implied to be CL (low 8-bits of RCX or ECX). @@ -345,6 +376,22 @@ namespace ARMeilleure.CodeGen.X86 operation.SetSource(1, rcx); } + + //Handle intrinsics. + if (IsIntrinsic(inst)) + { + IntrinsicOperation intrinOp = (IntrinsicOperation)operation; + + //PBLENDVB last operand is always implied to be XMM0 when VEX is not supported. + if (intrinOp.Intrinsic == Intrinsic.X86Pblendvb && !HardwareCapabilities.SupportsVexEncoding) + { + Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128); + + node.List.AddBefore(node, new Operation(Instruction.Copy, xmm0, operation.GetSource(2))); + + operation.SetSource(2, xmm0); + } + } } private static void HandleCallWindowsAbi( @@ -696,8 +743,9 @@ namespace ARMeilleure.CodeGen.X86 private static bool IsLongConst(Operand operand) { - long value = operand.Type == OperandType.I32 ? operand.AsInt32() - : operand.AsInt64(); + long value = operand.Type == OperandType.I32 + ? operand.AsInt32() + : operand.AsInt64(); return !ConstFitsOnS32(value); } @@ -763,7 +811,9 @@ namespace ARMeilleure.CodeGen.X86 { bool isUnary = operation.SourcesCount < 2; - return !HardwareCapabilities.SupportsVexEncoding && !isUnary; + bool hasVecDest = operation.Dest != null && operation.Dest.Type == OperandType.V128; + + return !HardwareCapabilities.SupportsVexEncoding && !isUnary && hasVecDest; } return false; @@ -775,10 +825,8 @@ namespace ARMeilleure.CodeGen.X86 { case Instruction.Copy: case Instruction.LoadArgument: - case Instruction.LoadFromContext: case Instruction.Spill: case Instruction.SpillArg: - case Instruction.StoreToContext: return true; } diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index 2dcd6a08df..10ba891aa5 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -22,6 +22,7 @@ namespace ARMeilleure.CodeGen.X86 Cmpxchg16b, Comisd, Comiss, + Cpuid, Cvtdq2pd, Cvtdq2ps, Cvtpd2dq, @@ -179,6 +180,7 @@ namespace ARMeilleure.CodeGen.X86 Unpckhps, Unpcklpd, Unpcklps, + Vpblendvb, Xor, Xorpd, Xorps, diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs index f27121bb33..a5246ba05e 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCmp.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs @@ -324,7 +324,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true); + EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: true, reverseOps: true); } else { @@ -336,7 +336,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false); + EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: false, reverseOps: true); } else { @@ -348,7 +348,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true); + EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: true, reverseOps: true); } else { @@ -360,7 +360,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false); + EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: false, reverseOps: true); } else { @@ -372,7 +372,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, isLeOrLt: true); + EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: true); } else { @@ -384,7 +384,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, isLeOrLt: true); + EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: false); } else { @@ -396,7 +396,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true, isLeOrLt: true); + EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: true); } else { @@ -408,7 +408,7 @@ namespace ARMeilleure.Instructions { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false, isLeOrLt: true); + EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: false); } else { @@ -655,16 +655,16 @@ namespace ARMeilleure.Instructions private enum CmpCondition { - Equal = 0, - GreaterThanOrEqual = 5, - GreaterThan = 6 + Equal = 0, + LessThan = 1, + LessThanOrEqual = 2 } private static void EmitCmpSseOrSse2OpF( ArmEmitterContext context, CmpCondition cond, bool scalar, - bool isLeOrLt = false) + bool reverseOps = false) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -677,7 +677,7 @@ namespace ARMeilleure.Instructions { Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps; - Operand res = isLeOrLt + Operand res = reverseOps ? context.AddIntrinsic(inst, m, n, Const((int)cond)) : context.AddIntrinsic(inst, n, m, Const((int)cond)); @@ -696,7 +696,7 @@ namespace ARMeilleure.Instructions { Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd; - Operand res = isLeOrLt + Operand res = reverseOps ? context.AddIntrinsic(inst, m, n, Const((int)cond)) : context.AddIntrinsic(inst, n, m, Const((int)cond)); diff --git a/ARMeilleure/IntermediateRepresentation/Instruction.cs b/ARMeilleure/IntermediateRepresentation/Instruction.cs index d43c250ca2..863f385849 100644 --- a/ARMeilleure/IntermediateRepresentation/Instruction.cs +++ b/ARMeilleure/IntermediateRepresentation/Instruction.cs @@ -67,6 +67,8 @@ namespace ARMeilleure.IntermediateRepresentation ZeroExtend32, ZeroExtend8, + Clobber, + CpuId, Extended, Fill, LoadFromContext, diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs index 813b608b49..8e84662ba7 100644 --- a/ARMeilleure/Optimizations.cs +++ b/ARMeilleure/Optimizations.cs @@ -1,3 +1,5 @@ +using ARMeilleure.CodeGen.X86; + namespace ARMeilleure { public static class Optimizations @@ -6,20 +8,26 @@ namespace ARMeilleure public static bool FastFP { get; set; } = true; - public static bool UseSseIfAvailable { get; set; } - public static bool UseSse2IfAvailable { get; set; } - public static bool UseSse3IfAvailable { get; set; } - public static bool UseSsse3IfAvailable { get; set; } - public static bool UseSse41IfAvailable { get; set; } - public static bool UseSse42IfAvailable { get; set; } - public static bool UsePopCntIfAvailable { get; set; } + public static bool UseSseIfAvailable { get; set; } = true; + public static bool UseSse2IfAvailable { get; set; } = true; + public static bool UseSse3IfAvailable { get; set; } = true; + public static bool UseSsse3IfAvailable { get; set; } = true; + public static bool UseSse41IfAvailable { get; set; } = true; + public static bool UseSse42IfAvailable { get; set; } = true; + public static bool UsePopCntIfAvailable { get; set; } = true; - internal static bool UseSse { get; set; } = true; - internal static bool UseSse2 { get; set; } = true; - internal static bool UseSse3 { get; set; } = true; - internal static bool UseSsse3 { get; set; } = true; - internal static bool UseSse41 { get; set; } = true; - internal static bool UseSse42 { get; set; } = true; - internal static bool UsePopCnt { get; set; } = true; + public static bool ForceLegacySse + { + get => HardwareCapabilities.ForceLegacySse; + set => HardwareCapabilities.ForceLegacySse = value; + } + + internal static bool UseSse => UseSseIfAvailable && HardwareCapabilities.SupportsSse; + internal static bool UseSse2 => UseSse2IfAvailable && HardwareCapabilities.SupportsSse2; + internal static bool UseSse3 => UseSse3IfAvailable && HardwareCapabilities.SupportsSse3; + internal static bool UseSsse3 => UseSsse3IfAvailable && HardwareCapabilities.SupportsSsse3; + internal static bool UseSse41 => UseSse41IfAvailable && HardwareCapabilities.SupportsSse41; + internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42; + internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt; } } \ No newline at end of file diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs index be00326c2f..a1cdf71092 100644 --- a/ARMeilleure/Translation/EmitterContext.cs +++ b/ARMeilleure/Translation/EmitterContext.cs @@ -200,6 +200,11 @@ namespace ARMeilleure.Translation return Add(Instruction.CountLeadingZeros, Local(op1.Type), op1); } + internal Operand CpuId() + { + return Add(Instruction.CpuId, Local(OperandType.I64)); + } + public Operand Divide(Operand op1, Operand op2) { return Add(Instruction.Divide, Local(op1.Type), op1, op2);