diff --git a/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs b/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs index df1b558ccf..d217185070 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs @@ -2,15 +2,21 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { struct AllocationResult { - public int UsedRegisters { get; } - public int SpillRegionSize { get; } - public int MaxCallArgs { get; } + public int IntUsedRegisters { get; } + public int VecUsedRegisters { get; } + public int SpillRegionSize { get; } + public int MaxCallArgs { get; } - public AllocationResult(int usedRegisters, int spillRegionSize, int maxCallArgs) + public AllocationResult( + int intUsedRegisters, + int vecUsedRegisters, + int spillRegionSize, + int maxCallArgs) { - UsedRegisters = usedRegisters; - SpillRegionSize = spillRegionSize; - MaxCallArgs = maxCallArgs; + IntUsedRegisters = intUsedRegisters; + VecUsedRegisters = vecUsedRegisters; + SpillRegionSize = spillRegionSize; + MaxCallArgs = maxCallArgs; } } } \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScan.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScan.cs index bcfd9f2ed7..5e61c980f7 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LinearScan.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScan.cs @@ -1,7 +1,6 @@ using ARMeilleure.CodeGen.X86; using ARMeilleure.Common; using ARMeilleure.IntermediateRepresentation; -using ARMeilleure.State; using ARMeilleure.Translation; using System; using System.Collections.Generic; @@ -40,7 +39,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public BitMap Active { get; } public BitMap Inactive { get; } - public int UsedRegisters { get; set; } + public int IntUsedRegisters { get; set; } + public int VecUsedRegisters { get; set; } public AllocationContext(RegisterMasks masks, int intervalsCount) { @@ -102,7 +102,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - for (int index = RegistersCount; index < _intervals.Count; index++) + for (int index = RegistersCount * 2; index < _intervals.Count; index++) { if (!_intervals[index].IsSpilled) { @@ -113,7 +113,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators InsertSplitCopies(); InsertSplitCopiesAtEdges(cfg); - return new AllocationResult(context.UsedRegisters, context.StackAlloc.TotalSize, maxCallArgs); + return new AllocationResult( + context.IntUsedRegisters, + context.VecUsedRegisters, + context.StackAlloc.TotalSize, + maxCallArgs); } private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex) @@ -148,23 +152,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - int availableRegs = context.Masks.IntAvailableRegisters; - - foreach (int iIndex in context.Active) - { - availableRegs &= ~(1 << GetInterval(iIndex).Register.Index); - } - - foreach (int iIndex in context.Inactive) - { - LiveInterval interval = GetInterval(iIndex); - - if (interval.Overlaps(current)) - { - availableRegs &= ~(1 << interval.Register.Index); - } - } - if (!TryAllocateRegWithoutSpill(context, current, cIndex)) { AllocateRegWithSpill(context, current, cIndex); @@ -173,11 +160,15 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex) { + RegisterType regType = current.Local.Type.ToRegisterType(); + + int availableRegisters = context.Masks.GetAvailableRegisters(regType); + int[] freePositions = new int[RegistersCount]; for (int index = 0; index < RegistersCount; index++) { - if ((context.Masks.IntAvailableRegisters & (1 << index)) != 0) + if ((availableRegisters & (1 << index)) != 0) { freePositions[index] = int.MaxValue; } @@ -187,14 +178,17 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { LiveInterval interval = GetInterval(iIndex); - freePositions[interval.Register.Index] = 0; + if (interval.Register.Type == regType) + { + freePositions[interval.Register.Index] = 0; + } } foreach (int iIndex in context.Inactive) { LiveInterval interval = GetInterval(iIndex); - if (interval.Overlaps(current)) + if (interval.Register.Type == regType && interval.Overlaps(current)) { int nextOverlap = interval.NextOverlap(current); @@ -233,9 +227,16 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - current.Register = new Register(selectedReg, RegisterType.Integer); + current.Register = new Register(selectedReg, regType); - context.UsedRegisters |= 1 << selectedReg; + if (regType == RegisterType.Integer) + { + context.IntUsedRegisters |= 1 << selectedReg; + } + else /* if (regType == RegisterType.Vector) */ + { + context.VecUsedRegisters |= 1 << selectedReg; + } context.Active.Set(cIndex); @@ -244,13 +245,17 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex) { + RegisterType regType = current.Local.Type.ToRegisterType(); + + int availableRegisters = context.Masks.GetAvailableRegisters(regType); + int[] usePositions = new int[RegistersCount]; int[] blockedPositions = new int[RegistersCount]; for (int index = 0; index < RegistersCount; index++) { - if ((context.Masks.IntAvailableRegisters & (1 << index)) != 0) + if ((availableRegisters & (1 << index)) != 0) { usePositions[index] = int.MaxValue; @@ -274,7 +279,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { LiveInterval interval = GetInterval(iIndex); - if (!interval.IsFixed) + if (!interval.IsFixed && interval.Register.Type == regType) { int nextUse = interval.NextUseAfter(current.Start); @@ -289,7 +294,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { LiveInterval interval = GetInterval(iIndex); - if (!interval.IsFixed && interval.Overlaps(current)) + if (!interval.IsFixed && interval.Register.Type == regType && interval.Overlaps(current)) { int nextUse = interval.NextUseAfter(current.Start); @@ -304,7 +309,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { LiveInterval interval = GetInterval(iIndex); - if (interval.IsFixed) + if (interval.IsFixed && interval.Register.Type == regType) { SetBlockedPosition(interval.Register.Index, 0); } @@ -314,7 +319,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { LiveInterval interval = GetInterval(iIndex); - if (interval.IsFixed && interval.Overlaps(current)) + if (interval.IsFixed && interval.Register.Type == regType && interval.Overlaps(current)) { SetBlockedPosition(interval.Register.Index, interval.NextOverlap(current)); } @@ -344,7 +349,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { //Spill made the register available for the entire current lifetime, //so we only need to split the intervals using the selected register. - current.Register = new Register(selectedReg, RegisterType.Integer); + current.Register = new Register(selectedReg, regType); SplitAndSpillOverlappingIntervals(context, current); @@ -355,7 +360,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators //There are conflicts even after spill due to the use of fixed registers //that can't be spilled, so we need to also split current at the point of //the first fixed register use. - current.Register = new Register(selectedReg, RegisterType.Integer); + current.Register = new Register(selectedReg, regType); LiveInterval splitChild = current.Split(GetSplitPosition(blockedPositions[selectedReg])); @@ -745,7 +750,10 @@ namespace ARMeilleure.CodeGen.RegisterAllocators throw new ArgumentException("Spilled intervals are not allowed."); } - return new Operand(interval.Register.Index, RegisterType.Integer, interval.Local.Type); + return new Operand( + interval.Register.Index, + interval.Register.Type, + interval.Local.Type); } private static int GetSplitPosition(int position) @@ -771,9 +779,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators for (int index = 0; index < RegistersCount; index++) { - LiveInterval interval = new LiveInterval(new Register(index, RegisterType.Integer)); - - _intervals.Add(interval); + _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer))); + _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector))); } HashSet visited = new HashSet(); @@ -812,7 +819,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators _blockRanges = new LiveRange[cfg.Blocks.Count]; - int mapSize = _intervals.Count + RegistersCount; + int mapSize = _intervals.Count; BitMap[] blkLiveGen = new BitMap[cfg.Blocks.Count]; BitMap[] blkLiveKill = new BitMap[cfg.Blocks.Count]; @@ -940,18 +947,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators if (node is Operation operation && operation.Inst == Instruction.Call) { - int callerSavedRegs = regMasks.IntCallerSavedRegisters; - - while (callerSavedRegs != 0) - { - int callerSavedReg = BitUtils.LowestBitSet(callerSavedRegs); - - LiveInterval interval = _intervals[callerSavedReg]; - - interval.AddRange(operationPos, operationPos + 1); - - callerSavedRegs &= ~(1 << callerSavedReg); - } + AddIntervalCallerSavedReg(regMasks.IntCallerSavedRegisters, operationPos, RegisterType.Integer); + AddIntervalCallerSavedReg(regMasks.VecCallerSavedRegisters, operationPos, RegisterType.Vector); if (maxCallArgs < operation.SourcesCount - 1) { @@ -962,6 +959,24 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } + private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType) + { + while (mask != 0) + { + int regIndex = BitUtils.LowestBitSet(mask); + + Debug.Assert(regIndex < RegistersCount, "Invalid register index."); + + Register callerSavedReg = new Register(regIndex, regType); + + LiveInterval interval = _intervals[GetRegisterId(callerSavedReg)]; + + interval.AddRange(operationPos, operationPos + 1); + + mask &= ~(1 << regIndex); + } + } + private static int GetOperandId(Operand operand) { if (operand.Kind == OperandKind.LocalVariable) @@ -970,7 +985,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } else if (operand.Kind == OperandKind.Register) { - return operand.GetRegister().Index; + return GetRegisterId(operand.GetRegister()); } else { @@ -978,6 +993,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } + private static int GetRegisterId(Register register) + { + return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0); + } + private void CoalesceCopies(BasicBlock[] blocks) { foreach (BasicBlock block in blocks) diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs index 78d52bbc74..e5db43c508 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs @@ -1,5 +1,4 @@ using ARMeilleure.IntermediateRepresentation; -using ARMeilleure.State; using System; using System.Collections.Generic; using System.Diagnostics; diff --git a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs index 13aac7845b..9652224e5f 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs @@ -1,19 +1,47 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + namespace ARMeilleure.CodeGen.RegisterAllocators { struct RegisterMasks { public int IntAvailableRegisters { get; } + public int VecAvailableRegisters { get; } public int IntCallerSavedRegisters { get; } + public int VecCallerSavedRegisters { get; } public int IntCalleeSavedRegisters { get; } + public int VecCalleeSavedRegisters { get; } public RegisterMasks( int intAvailableRegisters, + int vecAvailableRegisters, int intCallerSavedRegisters, - int intCalleeSavedRegisters) + int vecCallerSavedRegisters, + int intCalleeSavedRegisters, + int vecCalleeSavedRegisters) { IntAvailableRegisters = intAvailableRegisters; + VecAvailableRegisters = vecAvailableRegisters; IntCallerSavedRegisters = intCallerSavedRegisters; + VecCallerSavedRegisters = vecCallerSavedRegisters; IntCalleeSavedRegisters = intCalleeSavedRegisters; + VecCalleeSavedRegisters = vecCalleeSavedRegisters; + } + + public int GetAvailableRegisters(RegisterType type) + { + if (type == RegisterType.Integer) + { + return IntAvailableRegisters; + } + else if (type == RegisterType.Vector) + { + return VecAvailableRegisters; + } + else + { + throw new ArgumentException($"Invalid register type \"{type}\"."); + } } } } \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index c8ae8eeab9..82ecee9f9b 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -1,5 +1,4 @@ using ARMeilleure.IntermediateRepresentation; -using ARMeilleure.State; using System; using System.Diagnostics; using System.IO; @@ -10,15 +9,21 @@ namespace ARMeilleure.CodeGen.X86 { private const int BadOp = 0; - private const int OpModRMBits = 16; - - private const int R16Prefix = 0x66; + private const int OpModRMBits = 24; + [Flags] private enum InstFlags { None = 0, RegOnly = 1 << 0, - Reg8 = 1 << 1 + Reg8 = 1 << 1, + Vex = 1 << 2, + + PrefixBit = 16, + PrefixMask = 3 << PrefixBit, + Prefix66 = 1 << PrefixBit, + PrefixF3 = 2 << PrefixBit, + PrefixF2 = 3 << PrefixBit } private struct InstInfo @@ -56,40 +61,154 @@ namespace ARMeilleure.CodeGen.X86 { _instTable = new InstInfo[(int)X86Instruction.Count]; - // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags - Add(X86Instruction.Add, new InstInfo(0x000001, 0x000083, 0x000081, BadOp, 0x000003, InstFlags.None)); - Add(X86Instruction.And, new InstInfo(0x000021, 0x040083, 0x040081, BadOp, 0x000023, InstFlags.None)); - Add(X86Instruction.Bsr, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000fbd, InstFlags.None)); - Add(X86Instruction.Bswap, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000fc8, InstFlags.RegOnly)); - Add(X86Instruction.Call, new InstInfo(0x0200ff, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Cmovcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f40, InstFlags.None)); - Add(X86Instruction.Cmp, new InstInfo(0x000039, 0x070083, 0x070081, BadOp, 0x00003b, InstFlags.None)); - Add(X86Instruction.Div, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x0600f7, InstFlags.None)); - Add(X86Instruction.Idiv, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x0700f7, InstFlags.None)); - Add(X86Instruction.Imul, new InstInfo(BadOp, 0x00006b, 0x000069, BadOp, 0x000faf, InstFlags.None)); - Add(X86Instruction.Imul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x0500f7, InstFlags.None)); - Add(X86Instruction.Mov, new InstInfo(0x000089, BadOp, 0x0000c7, 0x0000b8, 0x00008b, InstFlags.None)); - Add(X86Instruction.Mov16, new InstInfo(0x000089, BadOp, 0x0000c7, BadOp, 0x00008b, InstFlags.None)); - Add(X86Instruction.Mov8, new InstInfo(0x000088, 0x0000c6, BadOp, BadOp, 0x00008a, InstFlags.None)); - Add(X86Instruction.Movsx16, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000fbf, InstFlags.None)); - Add(X86Instruction.Movsx32, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000063, InstFlags.None)); - Add(X86Instruction.Movsx8, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000fbe, InstFlags.None)); - Add(X86Instruction.Movzx16, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000fb7, InstFlags.None)); - Add(X86Instruction.Movzx8, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000fb6, InstFlags.None)); - Add(X86Instruction.Mul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x0400f7, InstFlags.None)); - Add(X86Instruction.Neg, new InstInfo(0x0300f7, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Not, new InstInfo(0x0200f7, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Or, new InstInfo(0x000009, 0x010083, 0x010081, BadOp, 0x00000b, InstFlags.None)); - Add(X86Instruction.Pop, new InstInfo(0x00008f, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Push, new InstInfo(BadOp, 0x00006a, 0x000068, BadOp, 0x0600ff, InstFlags.None)); - Add(X86Instruction.Ror, new InstInfo(0x0100d3, 0x0100c1, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Sar, new InstInfo(0x0700d3, 0x0700c1, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Setcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f90, InstFlags.Reg8)); - Add(X86Instruction.Shl, new InstInfo(0x0400d3, 0x0400c1, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Shr, new InstInfo(0x0500d3, 0x0500c1, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Sub, new InstInfo(0x000029, 0x050083, 0x050081, BadOp, 0x00002b, InstFlags.None)); - Add(X86Instruction.Test, new InstInfo(0x000085, BadOp, 0x0000f7, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Xor, new InstInfo(0x000031, 0x060083, 0x060081, BadOp, 0x000033, InstFlags.None)); + // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags + Add(X86Instruction.Add, new InstInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstFlags.None)); + Add(X86Instruction.Addpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Addps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex)); + Add(X86Instruction.Addsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Addss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.And, new InstInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstFlags.None)); + Add(X86Instruction.Andnpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Andnps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstFlags.Vex)); + Add(X86Instruction.Bsr, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstFlags.None)); + Add(X86Instruction.Bswap, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc8, InstFlags.RegOnly)); + Add(X86Instruction.Call, new InstInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Cmovcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstFlags.None)); + Add(X86Instruction.Cmp, new InstInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstFlags.None)); + Add(X86Instruction.Div, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstFlags.None)); + Add(X86Instruction.Divpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Divps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex)); + Add(X86Instruction.Divsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Divss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Haddpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Haddps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Idiv, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstFlags.None)); + Add(X86Instruction.Imul, new InstInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstFlags.None)); + Add(X86Instruction.Imul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstFlags.None)); + Add(X86Instruction.Insertps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Maxpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Maxps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex)); + Add(X86Instruction.Maxsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Maxss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Minpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Minps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex)); + Add(X86Instruction.Minsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Minss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Mov, new InstInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstFlags.None)); + Add(X86Instruction.Mov16, new InstInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstFlags.Prefix66)); + Add(X86Instruction.Mov8, new InstInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstFlags.None)); + Add(X86Instruction.Movd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6e, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Movdqu, new InstInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Movhlps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstFlags.Vex)); + Add(X86Instruction.Movlhps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstFlags.Vex)); + Add(X86Instruction.Movq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Movsd, new InstInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Movss, new InstInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Movsx16, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstFlags.None)); + Add(X86Instruction.Movsx32, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstFlags.None)); + Add(X86Instruction.Movsx8, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstFlags.None)); + Add(X86Instruction.Movzx16, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstFlags.None)); + Add(X86Instruction.Movzx8, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstFlags.None)); + Add(X86Instruction.Mul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstFlags.None)); + Add(X86Instruction.Mulpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Mulps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex)); + Add(X86Instruction.Mulsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Mulss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Neg, new InstInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Not, new InstInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Or, new InstInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstFlags.None)); + Add(X86Instruction.Paddb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Paddd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Paddq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Paddw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pand, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pandn, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pavgb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pavgw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pblendvb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpeqb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpeqd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpeqq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpeqw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpgtb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpgtd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpgtq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpgtw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pextrb, new InstInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pextrd, new InstInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pextrw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pinsrb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pinsrd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pinsrw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmaxsb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmaxsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmaxsw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmaxub, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmaxud, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmaxuw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pminsb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pminsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pminsw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pminub, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pminud, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pminuw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmovsxbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmovsxdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmovsxwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmovzxbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmovzxdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmovzxwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmulld, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmullw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pop, new InstInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Popcnt, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstFlags.PrefixF3)); + Add(X86Instruction.Por, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pshufd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psllw, new InstInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psrad, new InstInfo(BadOp, 0x00000f72, BadOp, BadOp, 0x00000fe2, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psraw, new InstInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psrld, new InstInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psrlq, new InstInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psrldq, new InstInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psrlw, new InstInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psubb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psubd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psubq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psubw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Push, new InstInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstFlags.None)); + Add(X86Instruction.Pxor, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Rcpps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstFlags.Vex)); + Add(X86Instruction.Rcpss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Ror, new InstInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Roundpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Roundps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex)); + Add(X86Instruction.Roundsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Roundss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Rsqrtps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstFlags.Vex)); + Add(X86Instruction.Rsqrtss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Sar, new InstInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Setcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstFlags.Reg8)); + Add(X86Instruction.Shl, new InstInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Shr, new InstInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Shufpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Shufps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstFlags.Vex)); + Add(X86Instruction.Sqrtpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Sqrtps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex)); + Add(X86Instruction.Sqrtsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Sqrtss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Sub, new InstInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstFlags.None)); + Add(X86Instruction.Subpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Subps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex)); + Add(X86Instruction.Subsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Subss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Test, new InstInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Unpckhpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Unpckhps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstFlags.Vex)); + Add(X86Instruction.Unpcklpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Unpcklps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstFlags.Vex)); + Add(X86Instruction.Xor, new InstInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstFlags.None)); + Add(X86Instruction.Xorpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Xorps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstFlags.Vex)); } private static void Add(X86Instruction inst, InstInfo info) @@ -107,11 +226,41 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(dest, source, X86Instruction.Add); } + public void Addpd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Addpd, source1); + } + + public void Addps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Addps, source1); + } + + public void Addsd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Addsd, source1); + } + + public void Addss(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Addss, source1); + } + public void And(Operand dest, Operand source) { WriteInstruction(dest, source, X86Instruction.And); } + public void Andnpd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Andnpd, source1); + } + + public void Andnps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Andnps, source1); + } + public void Bsr(Operand dest, Operand source) { WriteInstruction(dest, source, X86Instruction.Bsr); @@ -155,6 +304,36 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(null, source, X86Instruction.Div); } + public void Divpd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Divpd, source1); + } + + public void Divps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Divps, source1); + } + + public void Divsd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Divsd, source1); + } + + public void Divss(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Divss, source1); + } + + public void Haddpd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Haddpd, source1); + } + + public void Haddps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Haddps, source1); + } + public void Idiv(Operand source) { WriteInstruction(null, source, X86Instruction.Idiv); @@ -202,6 +381,13 @@ namespace ARMeilleure.CodeGen.X86 } } + public void Insertps(Operand dest, Operand source, Operand source1, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Insertps, source1); + + WriteByte(imm); + } + public void Jcc(X86Condition condition, long offset) { if (ConstFitsOnS8(offset)) @@ -243,6 +429,46 @@ namespace ARMeilleure.CodeGen.X86 } } + public void Maxpd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Maxpd, source1); + } + + public void Maxps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Maxps, source1); + } + + public void Maxsd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Maxsd, source1); + } + + public void Maxss(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Maxss, source1); + } + + public void Minpd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Minpd, source1); + } + + public void Minps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Minps, source1); + } + + public void Minsd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Minsd, source1); + } + + public void Minss(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Minss, source1); + } + public void Mov(Operand dest, Operand source) { WriteInstruction(dest, source, X86Instruction.Mov); @@ -250,8 +476,6 @@ namespace ARMeilleure.CodeGen.X86 public void Mov16(Operand dest, Operand source) { - WriteByte(R16Prefix); - WriteInstruction(dest, source, X86Instruction.Mov16); } @@ -260,6 +484,41 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(dest, source, X86Instruction.Mov8); } + public void Movd(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Movd); + } + + public void Movdqu(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Movdqu); + } + + public void Movhlps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Movhlps, source1); + } + + public void Movlhps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Movlhps, source1); + } + + public void Movq(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Movq); + } + + public void Movsd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Movsd, source1); + } + + public void Movss(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Movss, source1); + } + public void Movsx16(Operand dest, Operand source) { WriteInstruction(dest, source, X86Instruction.Movsx16); @@ -290,6 +549,26 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(null, source, X86Instruction.Mul128); } + public void Mulpd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Mulpd, source1); + } + + public void Mulps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Mulps, source1); + } + + public void Mulsd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Mulsd, source1); + } + + public void Mulss(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Mulss, source1); + } + public void Neg(Operand dest) { WriteInstruction(dest, null, X86Instruction.Neg); @@ -305,6 +584,236 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(dest, source, X86Instruction.Or); } + public void Paddb(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Paddb, source1); + } + + public void Paddd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Paddd, source1); + } + + public void Paddq(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Paddq, source1); + } + + public void Paddw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Paddw, source1); + } + + public void Pand(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pand, source1); + } + + public void Pandn(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pandn, source1); + } + + public void Pavgb(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pavgb, source1); + } + + public void Pavgw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pavgw, source1); + } + + public void Pblendvb(Operand dest, Operand source1, Operand source2, Operand source3) + { + //TODO: Non-VEX version. + WriteInstruction(dest, source2, X86Instruction.Pblendvb, source1); + + WriteByte((byte)(source3.AsByte() << 4)); + } + + public void Pcmpeqb(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pcmpeqb, source1); + } + + public void Pcmpeqd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pcmpeqd, source1); + } + + public void Pcmpeqq(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pcmpeqq, source1); + } + + public void Pcmpeqw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pcmpeqw, source1); + } + + public void Pcmpgtb(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pcmpgtb, source1); + } + + public void Pcmpgtd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pcmpgtd, source1); + } + + public void Pcmpgtq(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pcmpgtq, source1); + } + + public void Pcmpgtw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pcmpgtw, source1); + } + + public void Pextrb(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Pextrb); + + WriteByte(imm); + } + + public void Pextrd(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Pextrd); + + WriteByte(imm); + } + + public void Pextrw(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Pextrw); + + WriteByte(imm); + } + + public void Pinsrb(Operand dest, Operand source, Operand source1, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Pinsrb, source1); + + WriteByte(imm); + } + + public void Pinsrd(Operand dest, Operand source, Operand source1, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Pinsrd, source1); + + WriteByte(imm); + } + + public void Pinsrw(Operand dest, Operand source, Operand source1, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Pinsrw, source1); + + WriteByte(imm); + } + + public void Pmaxsb(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pmaxsb, source1); + } + + public void Pmaxsd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pmaxsd, source1); + } + + public void Pmaxsw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pmaxsw, source1); + } + + public void Pmaxub(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pmaxub, source1); + } + + public void Pmaxud(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pmaxud, source1); + } + + public void Pmaxuw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pmaxuw, source1); + } + + public void Pminsb(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pminsb, source1); + } + + public void Pminsd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pminsd, source1); + } + + public void Pminsw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pminsw, source1); + } + + public void Pminub(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pminub, source1); + } + + public void Pminud(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pminud, source1); + } + + public void Pminuw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pminuw, source1); + } + + public void Pmovsxbw(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Pmovsxbw); + } + + public void Pmovsxdq(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Pmovsxdq); + } + + public void Pmovsxwd(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Pmovsxwd); + } + + public void Pmovzxbw(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Pmovzxbw); + } + + public void Pmovzxdq(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Pmovzxdq); + } + + public void Pmovzxwd(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Pmovzxwd); + } + + public void Pmulld(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pmulld, source1); + } + + public void Pmullw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pmullw, source1); + } + public void Pop(Operand dest) { if (dest.Kind == OperandKind.Register) @@ -317,6 +826,78 @@ namespace ARMeilleure.CodeGen.X86 } } + public void Popcnt(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Popcnt); + } + + public void Por(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Por, source1); + } + + public void Pshufd(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Pshufd); + + WriteByte(imm); + } + + public void Psllw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(source1, source, X86Instruction.Psllw, dest); + } + + public void Psrad(Operand dest, Operand source, Operand source1) + { + WriteInstruction(source1, source, X86Instruction.Psrad, dest); + } + + public void Psraw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(source1, source, X86Instruction.Psraw, dest); + } + + public void Psrld(Operand dest, Operand source, Operand source1) + { + WriteInstruction(source1, source, X86Instruction.Psrld, dest); + } + + public void Psrlq(Operand dest, Operand source, Operand source1) + { + WriteInstruction(source1, source, X86Instruction.Psrlq, dest); + } + + public void Psrldq(Operand dest, Operand source, Operand source1) + { + WriteInstruction(source1, source, X86Instruction.Psrldq, dest); + } + + public void Psrlw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(source1, source, X86Instruction.Psrlw, dest); + } + + public void Psubb(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Psubb, source1); + } + + public void Psubd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Psubd, source1); + } + + public void Psubq(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Psubq, source1); + } + + public void Psubw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Psubw, source1); + } + public void Push(Operand source) { if (source.Kind == OperandKind.Register) @@ -329,6 +910,21 @@ namespace ARMeilleure.CodeGen.X86 } } + public void Pxor(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pxor, source1); + } + + public void Rcpps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Rcpps, source1); + } + + public void Rcpss(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Rcpss, source1); + } + public void Return() { WriteByte(0xc3); @@ -339,6 +935,44 @@ namespace ARMeilleure.CodeGen.X86 WriteShiftInst(dest, source, X86Instruction.Ror); } + public void Roundpd(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Roundpd); + + WriteByte(imm); + } + + public void Roundps(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Roundps); + + WriteByte(imm); + } + + public void Roundsd(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Roundsd); + + WriteByte(imm); + } + + public void Roundss(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Roundss); + + WriteByte(imm); + } + + public void Rsqrtps(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Rsqrtps); + } + + public void Rsqrtss(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Rsqrtss); + } + public void Sar(Operand dest, Operand source) { WriteShiftInst(dest, source, X86Instruction.Sar); @@ -354,6 +988,20 @@ namespace ARMeilleure.CodeGen.X86 WriteShiftInst(dest, source, X86Instruction.Shr); } + public void Shufpd(Operand dest, Operand source, byte imm, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Shufpd, source1); + + WriteByte(imm); + } + + public void Shufps(Operand dest, Operand source, byte imm, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Shufps, source1); + + WriteByte(imm); + } + public void Setcc(Operand dest, X86Condition condition) { InstInfo info = _instTable[(int)X86Instruction.Setcc]; @@ -361,21 +1009,91 @@ namespace ARMeilleure.CodeGen.X86 WriteOpCode(dest, null, info.Flags, info.OpRRM | (int)condition); } + public void Sqrtpd(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Sqrtpd); + } + + public void Sqrtps(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Sqrtps); + } + + public void Sqrtsd(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Sqrtsd); + } + + public void Sqrtss(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Sqrtss); + } + public void Sub(Operand dest, Operand source) { WriteInstruction(dest, source, X86Instruction.Sub); } + public void Subpd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Subpd, source1); + } + + public void Subps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Subps, source1); + } + + public void Subsd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Subsd, source1); + } + + public void Subss(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Subss, source1); + } + public void Test(Operand src1, Operand src2) { WriteInstruction(src1, src2, X86Instruction.Test); } + public void Unpckhpd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Unpckhpd, source1); + } + + public void Unpckhps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Unpckhps, source1); + } + + public void Unpcklpd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Unpcklpd, source1); + } + + public void Unpcklps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Unpcklps, source1); + } + public void Xor(Operand dest, Operand source) { WriteInstruction(dest, source, X86Instruction.Xor); } + public void Xorpd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Xorpd, source1); + } + + public void Xorps(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Xorps, source1); + } + private void WriteShiftInst(Operand dest, Operand source, X86Instruction inst) { if (source.Kind == OperandKind.Register) @@ -393,7 +1111,7 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(dest, source, inst); } - private void WriteInstruction(Operand dest, Operand source, X86Instruction inst) + private void WriteInstruction(Operand dest, Operand source, X86Instruction inst, Operand source1 = null) { InstInfo info = _instTable[(int)inst]; @@ -403,25 +1121,25 @@ namespace ARMeilleure.CodeGen.X86 { if (inst == X86Instruction.Mov8) { - WriteOpCode(dest, null, info.Flags, info.OpRMImm8); + WriteOpCode(dest, null, info.Flags, info.OpRMImm8, source1); WriteByte(source.AsByte()); } else if (inst == X86Instruction.Mov16) { - WriteOpCode(dest, null, info.Flags, info.OpRMImm32); + WriteOpCode(dest, null, info.Flags, info.OpRMImm32, source1); WriteInt16(source.AsInt16()); } else if (IsImm8(source) && info.OpRMImm8 != BadOp) { - WriteOpCode(dest, null, info.Flags, info.OpRMImm8); + WriteOpCode(dest, null, info.Flags, info.OpRMImm8, source1); WriteByte(source.AsByte()); } else if (IsImm32(source) && info.OpRMImm32 != BadOp) { - WriteOpCode(dest, null, info.Flags, info.OpRMImm32); + WriteOpCode(dest, null, info.Flags, info.OpRMImm32, source1); WriteInt32(source.AsInt32()); } @@ -445,11 +1163,11 @@ namespace ARMeilleure.CodeGen.X86 } else if (source.Kind == OperandKind.Register && info.OpRMR != BadOp) { - WriteOpCode(dest, source, info.Flags, info.OpRMR); + WriteOpCode(dest, source, info.Flags, info.OpRMR, source1); } else if (info.OpRRM != BadOp) { - WriteRRMOpCode(dest, source, info.Flags, info.OpRRM); + WriteRRMOpCode(dest, source, info.Flags, info.OpRRM, source1); } else { @@ -458,11 +1176,11 @@ namespace ARMeilleure.CodeGen.X86 } else if (info.OpRRM != BadOp) { - WriteRRMOpCode(dest, source, info.Flags, info.OpRRM); + WriteRRMOpCode(dest, source, info.Flags, info.OpRRM, source1); } else if (info.OpRMR != BadOp) { - WriteOpCode(dest, source, info.Flags, info.OpRMR); + WriteOpCode(dest, source, info.Flags, info.OpRMR, source1); } else { @@ -470,9 +1188,14 @@ namespace ARMeilleure.CodeGen.X86 } } - private void WriteRRMOpCode(Operand dest, Operand source, InstFlags flags, int opCode) + private void WriteRRMOpCode( + Operand dest, + Operand source, + InstFlags flags, + int opCode, + Operand source1 = null) { - WriteOpCode(dest, source, flags, opCode, rrm: true); + WriteOpCode(dest, source, flags, opCode, source1, rrm: true); } private void WriteOpCode( @@ -480,7 +1203,8 @@ namespace ARMeilleure.CodeGen.X86 Operand source, InstFlags flags, int opCode, - bool rrm = false) + Operand source1 = null, + bool rrm = false) { int rexPrefix = GetRexPrefix(dest, source, rrm); @@ -601,13 +1325,76 @@ namespace ARMeilleure.CodeGen.X86 modRM |= 0xc0; } - if (rexPrefix != 0) + if ((flags & InstFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) { - WriteByte((byte)rexPrefix); + int vexByte2 = (int)(flags & InstFlags.PrefixMask) >> (int)InstFlags.PrefixBit; + + if (source1 != null) + { + vexByte2 |= (source1.GetRegister().Index ^ 0xf) << 3; + } + else + { + vexByte2 |= 0b1111 << 3; + } + + ushort opCodeHigh = (ushort)(opCode >> 8); + + if ((rexPrefix & 0b1011) == 0 && opCodeHigh == 0xf) + { + //Two-byte form. + WriteByte(0xc5); + + vexByte2 |= (~rexPrefix & 4) << 5; + + WriteByte((byte)vexByte2); + } + else + { + //Three-byte form. + WriteByte(0xc4); + + int vexByte1 = (~rexPrefix & 7) << 5; + + switch (opCodeHigh) + { + case 0xf: vexByte1 |= 1; break; + case 0xf38: vexByte1 |= 2; break; + case 0xf3a: vexByte1 |= 3; break; + + default: Debug.Assert(false, $"Failed to VEX encode opcode 0x{opCode:X}."); break; + } + + vexByte2 |= (rexPrefix & 8) << 4; + + WriteByte((byte)vexByte1); + WriteByte((byte)vexByte2); + } + + opCode &= 0xff; + } + else + { + switch (flags & InstFlags.PrefixMask) + { + case InstFlags.Prefix66: WriteByte(0x66); break; + case InstFlags.PrefixF2: WriteByte(0xf2); break; + case InstFlags.PrefixF3: WriteByte(0xf3); break; + } + + if (rexPrefix != 0) + { + WriteByte((byte)rexPrefix); + } } Debug.Assert(opCode != BadOp, "Invalid opcode value."); + if ((opCode & 0xff0000) != 0) + { + WriteByte((byte)(opCode >> 16)); + } + if ((opCode & 0xff00) != 0) { WriteByte((byte)(opCode >> 8)); diff --git a/ARMeilleure/CodeGen/X86/CallingConvention.cs b/ARMeilleure/CodeGen/X86/CallingConvention.cs index 3a57c7f0dd..bd04af4a50 100644 --- a/ARMeilleure/CodeGen/X86/CallingConvention.cs +++ b/ARMeilleure/CodeGen/X86/CallingConvention.cs @@ -4,9 +4,11 @@ namespace ARMeilleure.CodeGen.X86 { static class CallingConvention { + private const int RegistersMask = 0xffff; + public static int GetIntAvailableRegisters() { - int mask = 0xffff; + int mask = RegistersMask; mask &= ~(1 << (int)X86Register.Rbp); mask &= ~(1 << (int)X86Register.Rsp); @@ -14,6 +16,11 @@ namespace ARMeilleure.CodeGen.X86 return mask; } + public static int GetVecAvailableRegisters() + { + return RegistersMask; + } + public static int GetIntCallerSavedRegisters() { return (1 << (int)X86Register.Rax) | @@ -25,6 +32,16 @@ namespace ARMeilleure.CodeGen.X86 (1 << (int)X86Register.R11); } + public static int GetVecCallerSavedRegisters() + { + return (1 << (int)X86Register.Xmm0) | + (1 << (int)X86Register.Xmm1) | + (1 << (int)X86Register.Xmm2) | + (1 << (int)X86Register.Xmm3) | + (1 << (int)X86Register.Xmm4) | + (1 << (int)X86Register.Xmm5); + } + public static int GetIntCalleeSavedRegisters() { return (1 << (int)X86Register.Rbx) | @@ -38,7 +55,12 @@ namespace ARMeilleure.CodeGen.X86 (1 << (int)X86Register.R15); } - public static int GetIntArgumentsOnRegsCount() + public static int GetVecCalleeSavedRegisters() + { + return GetVecCallerSavedRegisters() ^ RegistersMask; + } + + public static int GetArgumentsOnRegsCount() { return 4; } @@ -56,9 +78,27 @@ namespace ARMeilleure.CodeGen.X86 throw new ArgumentOutOfRangeException(nameof(index)); } + public static X86Register GetVecArgumentRegister(int index) + { + switch (index) + { + case 0: return X86Register.Xmm0; + case 1: return X86Register.Xmm1; + case 2: return X86Register.Xmm2; + case 3: return X86Register.Xmm3; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + public static X86Register GetIntReturnRegister() { return X86Register.Rax; } + + public static X86Register GetVecReturnRegister() + { + return X86Register.Xmm0; + } } } \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/ARMeilleure/CodeGen/X86/CodeGenContext.cs index 117b5945a3..7e3c90e6b2 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenContext.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenContext.cs @@ -87,7 +87,7 @@ namespace ARMeilleure.CodeGen.X86 { //We need to add 8 bytes to the total size, as the call to this //function already pushed 8 bytes (the return address). - int mask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.UsedRegisters; + int mask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters; mask |= 1 << (int)X86Register.Rbp; @@ -97,7 +97,7 @@ namespace ARMeilleure.CodeGen.X86 //The ABI mandates that the space for at least 4 arguments //is reserved on the stack (this is called shadow space). - if (argsCount < 4) + if (argsCount < 4 && argsCount != 0) { argsCount = 4; } diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index 05a57aa540..d874645777 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -1,10 +1,11 @@ using ARMeilleure.CodeGen.RegisterAllocators; using ARMeilleure.Common; +using ARMeilleure.Diagnostics; using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Memory; -using ARMeilleure.State; using ARMeilleure.Translation; using System; +using System.Diagnostics; using System.IO; namespace ARMeilleure.CodeGen.X86 @@ -69,6 +70,116 @@ namespace ARMeilleure.CodeGen.X86 Add(Instruction.Store8, GenerateStore8); Add(Instruction.StoreToContext, GenerateStoreToContext); Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.VectorExtract, GenerateVectorExtract); + Add(Instruction.VectorExtract16, GenerateVectorExtract16); + Add(Instruction.VectorExtract8, GenerateVectorExtract8); + Add(Instruction.VectorInsert, GenerateVectorInsert); + Add(Instruction.VectorInsert16, GenerateVectorInsert16); + Add(Instruction.VectorInsert8, GenerateVectorInsert8); + Add(Instruction.VectorZero, GenerateVectorZero); + Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64); + Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96); + Add(Instruction.X86Addpd, GenerateX86Addpd); + Add(Instruction.X86Addps, GenerateX86Addps); + Add(Instruction.X86Addsd, GenerateX86Addsd); + Add(Instruction.X86Addss, GenerateX86Addss); + Add(Instruction.X86Andnpd, GenerateX86Andnpd); + Add(Instruction.X86Andnps, GenerateX86Andnps); + Add(Instruction.X86Divpd, GenerateX86Divpd); + Add(Instruction.X86Divps, GenerateX86Divps); + Add(Instruction.X86Divsd, GenerateX86Divsd); + Add(Instruction.X86Divss, GenerateX86Divss); + Add(Instruction.X86Haddpd, GenerateX86Haddpd); + Add(Instruction.X86Haddps, GenerateX86Haddps); + Add(Instruction.X86Maxpd, GenerateX86Maxpd); + Add(Instruction.X86Maxps, GenerateX86Maxps); + Add(Instruction.X86Maxsd, GenerateX86Maxsd); + Add(Instruction.X86Maxss, GenerateX86Maxss); + Add(Instruction.X86Minpd, GenerateX86Minpd); + Add(Instruction.X86Minps, GenerateX86Minps); + Add(Instruction.X86Minsd, GenerateX86Minsd); + Add(Instruction.X86Minss, GenerateX86Minss); + Add(Instruction.X86Movhlps, GenerateX86Movhlps); + Add(Instruction.X86Movlhps, GenerateX86Movlhps); + Add(Instruction.X86Mulpd, GenerateX86Mulpd); + Add(Instruction.X86Mulps, GenerateX86Mulps); + Add(Instruction.X86Mulsd, GenerateX86Mulsd); + Add(Instruction.X86Mulss, GenerateX86Mulss); + Add(Instruction.X86Paddb, GenerateX86Paddb); + Add(Instruction.X86Paddd, GenerateX86Paddd); + Add(Instruction.X86Paddq, GenerateX86Paddq); + Add(Instruction.X86Paddw, GenerateX86Paddw); + Add(Instruction.X86Pand, GenerateX86Pand); + Add(Instruction.X86Pandn, GenerateX86Pandn); + Add(Instruction.X86Pavgb, GenerateX86Pavgb); + Add(Instruction.X86Pavgw, GenerateX86Pavgw); + Add(Instruction.X86Pblendvb, GenerateX86Pblendvb); + Add(Instruction.X86Pcmpeqb, GenerateX86Pcmpeqb); + Add(Instruction.X86Pcmpeqd, GenerateX86Pcmpeqd); + Add(Instruction.X86Pcmpeqq, GenerateX86Pcmpeqq); + Add(Instruction.X86Pcmpeqw, GenerateX86Pcmpeqw); + Add(Instruction.X86Pcmpgtb, GenerateX86Pcmpgtb); + Add(Instruction.X86Pcmpgtd, GenerateX86Pcmpgtd); + Add(Instruction.X86Pcmpgtq, GenerateX86Pcmpgtq); + Add(Instruction.X86Pcmpgtw, GenerateX86Pcmpgtw); + Add(Instruction.X86Pmaxsb, GenerateX86Pmaxsb); + Add(Instruction.X86Pmaxsd, GenerateX86Pmaxsd); + Add(Instruction.X86Pmaxsw, GenerateX86Pmaxsw); + Add(Instruction.X86Pmaxub, GenerateX86Pmaxub); + Add(Instruction.X86Pmaxud, GenerateX86Pmaxud); + Add(Instruction.X86Pmaxuw, GenerateX86Pmaxuw); + Add(Instruction.X86Pminsb, GenerateX86Pminsb); + Add(Instruction.X86Pminsd, GenerateX86Pminsd); + Add(Instruction.X86Pminsw, GenerateX86Pminsw); + Add(Instruction.X86Pminub, GenerateX86Pminub); + Add(Instruction.X86Pminud, GenerateX86Pminud); + Add(Instruction.X86Pminuw, GenerateX86Pminuw); + Add(Instruction.X86Pmovsxbw, GenerateX86Pmovsxbw); + Add(Instruction.X86Pmovsxdq, GenerateX86Pmovsxdq); + Add(Instruction.X86Pmovsxwd, GenerateX86Pmovsxwd); + Add(Instruction.X86Pmovzxbw, GenerateX86Pmovzxbw); + Add(Instruction.X86Pmovzxdq, GenerateX86Pmovzxdq); + Add(Instruction.X86Pmovzxwd, GenerateX86Pmovzxwd); + Add(Instruction.X86Pmulld, GenerateX86Pmulld); + Add(Instruction.X86Pmullw, GenerateX86Pmullw); + Add(Instruction.X86Popcnt, GenerateX86Popcnt); + Add(Instruction.X86Por, GenerateX86Por); + Add(Instruction.X86Psllw, GenerateX86Psllw); + Add(Instruction.X86Psrad, GenerateX86Psrad); + Add(Instruction.X86Psraw, GenerateX86Psraw); + Add(Instruction.X86Psrld, GenerateX86Psrld); + Add(Instruction.X86Psrlq, GenerateX86Psrlq); + Add(Instruction.X86Psrldq, GenerateX86Psrldq); + Add(Instruction.X86Psrlw, GenerateX86Psrlw); + Add(Instruction.X86Psubb, GenerateX86Psubb); + Add(Instruction.X86Psubd, GenerateX86Psubd); + Add(Instruction.X86Psubq, GenerateX86Psubq); + Add(Instruction.X86Psubw, GenerateX86Psubw); + Add(Instruction.X86Pxor, GenerateX86Pxor); + Add(Instruction.X86Rcpps, GenerateX86Rcpps); + Add(Instruction.X86Rcpss, GenerateX86Rcpss); + Add(Instruction.X86Roundpd, GenerateX86Roundpd); + Add(Instruction.X86Roundps, GenerateX86Roundps); + Add(Instruction.X86Roundsd, GenerateX86Roundsd); + Add(Instruction.X86Roundss, GenerateX86Roundss); + Add(Instruction.X86Rsqrtps, GenerateX86Rsqrtps); + Add(Instruction.X86Rsqrtss, GenerateX86Rsqrtss); + Add(Instruction.X86Shufpd, GenerateX86Shufpd); + Add(Instruction.X86Shufps, GenerateX86Shufps); + Add(Instruction.X86Sqrtpd, GenerateX86Sqrtpd); + Add(Instruction.X86Sqrtps, GenerateX86Sqrtps); + Add(Instruction.X86Sqrtsd, GenerateX86Sqrtsd); + Add(Instruction.X86Sqrtss, GenerateX86Sqrtss); + Add(Instruction.X86Subpd, GenerateX86Subpd); + Add(Instruction.X86Subps, GenerateX86Subps); + Add(Instruction.X86Subsd, GenerateX86Subsd); + Add(Instruction.X86Subss, GenerateX86Subss); + Add(Instruction.X86Unpckhpd, GenerateX86Unpckhpd); + Add(Instruction.X86Unpckhps, GenerateX86Unpckhps); + Add(Instruction.X86Unpcklpd, GenerateX86Unpcklpd); + Add(Instruction.X86Unpcklps, GenerateX86Unpcklps); + Add(Instruction.X86Xorpd, GenerateX86Xorpd); + Add(Instruction.X86Xorps, GenerateX86Xorps); } private static void Add(Instruction inst, Action func) @@ -78,17 +189,28 @@ namespace ARMeilleure.CodeGen.X86 public static byte[] Generate(ControlFlowGraph cfg, MemoryManager memory) { + Logger.StartPass(PassName.PreAllocation); + PreAllocator.RunPass(cfg, memory); + Logger.EndPass(PassName.PreAllocation); + + Logger.StartPass(PassName.RegisterAllocation); + LinearScan regAlloc = new LinearScan(); RegisterMasks regMasks = new RegisterMasks( CallingConvention.GetIntAvailableRegisters(), + CallingConvention.GetVecAvailableRegisters(), CallingConvention.GetIntCallerSavedRegisters(), - CallingConvention.GetIntCalleeSavedRegisters()); + CallingConvention.GetVecCallerSavedRegisters(), + CallingConvention.GetIntCalleeSavedRegisters(), + CallingConvention.GetVecCalleeSavedRegisters()); AllocationResult allocResult = regAlloc.RunPass(cfg, regMasks); + Logger.EndPass(PassName.RegisterAllocation, cfg); + using (MemoryStream stream = new MemoryStream()) { CodeGenContext context = new CodeGenContext(stream, allocResult, cfg.Blocks.Count); @@ -132,7 +254,22 @@ namespace ARMeilleure.CodeGen.X86 { ValidateDestSrc1(operation); - context.Assembler.Add(operation.Dest, operation.GetSource(1)); + Operand dest = operation.Dest; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + if (dest.Type.IsInteger()) + { + context.Assembler.Add(dest, src2); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Addss(dest, src2, src1); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Addsd(dest, src2, src1); + } } private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation) @@ -282,6 +419,18 @@ namespace ARMeilleure.CodeGen.X86 context.Assembler.Mov(dest, source); } + else if (dest.GetRegister().Type == RegisterType.Vector) + { + if (source.GetRegister().Type == RegisterType.Integer) + { + //FIXME. + context.Assembler.Movd(dest, source); + } + else + { + context.Assembler.Movdqu(dest, source); + } + } else { context.Assembler.Mov(dest, source); @@ -318,18 +467,31 @@ namespace ARMeilleure.CodeGen.X86 private static void GenerateDivide(CodeGenContext context, Operation operation) { - Operand divisor = operation.GetSource(1); + Operand dest = operation.Dest; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); - if (divisor.Type == OperandType.I32) + if (dest.Type.IsInteger()) { - context.Assembler.Cdq(); - } - else - { - context.Assembler.Cqo(); - } + if (divisor.Type == OperandType.I32) + { + context.Assembler.Cdq(); + } + else + { + context.Assembler.Cqo(); + } - context.Assembler.Idiv(divisor); + context.Assembler.Idiv(divisor); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Subss(dest, divisor, dividend); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Subsd(dest, divisor, dividend); + } } private static void GenerateDivideUI(CodeGenContext context, Operation operation) @@ -378,7 +540,14 @@ namespace ARMeilleure.CodeGen.X86 X86MemoryOperand memOp = new X86MemoryOperand(dest.Type, rbp, null, Scale.x1, offset.AsInt32()); - context.Assembler.Mov(dest, memOp); + if (dest.GetRegister().Type == RegisterType.Vector) + { + context.Assembler.Movdqu(dest, memOp); + } + else + { + context.Assembler.Mov(dest, memOp); + } } private static void GenerateLoadSx16(CodeGenContext context, Operation operation) @@ -412,13 +581,24 @@ namespace ARMeilleure.CodeGen.X86 Operand src1 = operation.GetSource(0); Operand src2 = operation.GetSource(1); - if (src2.Kind == OperandKind.Constant) + if (dest.Type.IsInteger()) { - context.Assembler.Imul(dest, src1, src2); + if (src2.Kind == OperandKind.Constant) + { + context.Assembler.Imul(dest, src1, src2); + } + else + { + context.Assembler.Imul(dest, src2); + } } - else + else if (dest.Type == OperandType.FP32) { - context.Assembler.Imul(dest, src2); + context.Assembler.Mulss(dest, src2, src1); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Mulsd(dest, src2, src1); } } @@ -554,14 +734,707 @@ namespace ARMeilleure.CodeGen.X86 X86MemoryOperand memOp = new X86MemoryOperand(source.Type, rbp, null, Scale.x1, offset.AsInt32()); - context.Assembler.Mov(memOp, source); + if (source.GetRegister().Type == RegisterType.Vector) + { + context.Assembler.Movdqu(memOp, source); + } + else + { + context.Assembler.Mov(memOp, source); + } } private static void GenerateSubtract(CodeGenContext context, Operation operation) { ValidateDestSrc1(operation); - context.Assembler.Sub(operation.Dest, operation.GetSource(1)); + Operand dest = operation.Dest; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sub(dest, src2); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Subss(dest, src2, src1); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Subsd(dest, src2, src1); + } + } + + private static void GenerateVectorExtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Dest; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src2.Kind == OperandKind.Constant, "Index is not constant."); + + byte index = src2.AsByte(); + + if (dest.Type.IsInteger()) + { + context.Assembler.Pextrd(dest, src1, index); + } + else + { + //Floating-point type. + if ((index >= 2 && dest.Type == OperandType.FP32) || + (index == 1 && dest.Type == OperandType.FP64)) + { + context.Assembler.Movhlps(dest, src1, dest); + context.Assembler.Movq(dest, dest); + } + else + { + context.Assembler.Movq(dest, src1); + } + + if (dest.Type == OperandType.FP32) + { + context.Assembler.Pshufd(dest, dest, (byte)(0xfc | (index & 1))); + } + } + } + + private static void GenerateVectorExtract16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Dest; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src2.Kind == OperandKind.Constant, "Index is not constant."); + + byte index = src2.AsByte(); + + context.Assembler.Pextrw(dest, src1, index); + } + + private static void GenerateVectorExtract8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Dest; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src2.Kind == OperandKind.Constant, "Index is not constant."); + + byte index = src2.AsByte(); + + //TODO: SSE/SSE2 version. + context.Assembler.Pextrb(dest, src1, index); + } + + private static void GenerateVectorInsert(CodeGenContext context, Operation operation) + { + Operand dest = operation.Dest; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + Debug.Assert(src3.Kind == OperandKind.Constant, "Index is not constant."); + + byte index = src3.AsByte(); + + if (src2.Type.IsInteger()) + { + //TODO: SSE/SSE2 version. + context.Assembler.Pinsrd(dest, src2, src1, index); + } + else if (src2.Type == OperandType.FP32) + { + if (index != 0) + { + //TODO: SSE/SSE2 version. + context.Assembler.Insertps(dest, src2, src1, (byte)(index << 4)); + } + else + { + context.Assembler.Movss(dest, src2, src1); + } + } + else /* if (src2.Type == OperandType.FP64) */ + { + if (index != 0) + { + context.Assembler.Movlhps(dest, src2, src1); + } + else + { + context.Assembler.Movsd(dest, src2, src1); + } + } + } + + private static void GenerateVectorInsert16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Dest; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + Debug.Assert(src3.Kind == OperandKind.Constant, "Index is not constant."); + + byte index = src3.AsByte(); + + context.Assembler.Pinsrw(dest, src2, src1, index); + } + + private static void GenerateVectorInsert8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Dest; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + Debug.Assert(src3.Kind == OperandKind.Constant, "Index is not constant."); + + byte index = src3.AsByte(); + + //TODO: SSE/SSE2 version. + context.Assembler.Pinsrb(dest, src2, src1, index); + } + + private static void GenerateVectorZero(CodeGenContext context, Operation operation) + { + context.Assembler.Xorps(operation.Dest, operation.Dest, operation.Dest); + } + + private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation) + { + Operand dest = operation.Dest; + Operand src1 = operation.GetSource(0); + + context.Assembler.Movq(dest, src1); + } + + private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation) + { + Operand dest = operation.Dest; + Operand src1 = operation.GetSource(0); + + context.Assembler.Movq(dest, src1); + context.Assembler.Pshufd(dest, dest, 0xfc); + } + + private static void GenerateX86Addpd(CodeGenContext context, Operation operation) + { + context.Assembler.Addpd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Addps(CodeGenContext context, Operation operation) + { + context.Assembler.Addps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Addsd(CodeGenContext context, Operation operation) + { + context.Assembler.Addsd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Addss(CodeGenContext context, Operation operation) + { + context.Assembler.Addss(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Andnpd(CodeGenContext context, Operation operation) + { + context.Assembler.Andnpd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Andnps(CodeGenContext context, Operation operation) + { + context.Assembler.Andnps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Divpd(CodeGenContext context, Operation operation) + { + context.Assembler.Divpd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Divps(CodeGenContext context, Operation operation) + { + context.Assembler.Divps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Divsd(CodeGenContext context, Operation operation) + { + context.Assembler.Divsd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Divss(CodeGenContext context, Operation operation) + { + context.Assembler.Divss(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Haddpd(CodeGenContext context, Operation operation) + { + context.Assembler.Addpd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Haddps(CodeGenContext context, Operation operation) + { + context.Assembler.Addps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Maxpd(CodeGenContext context, Operation operation) + { + context.Assembler.Maxpd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Maxps(CodeGenContext context, Operation operation) + { + context.Assembler.Maxps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Maxsd(CodeGenContext context, Operation operation) + { + context.Assembler.Maxsd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Maxss(CodeGenContext context, Operation operation) + { + context.Assembler.Maxss(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Minpd(CodeGenContext context, Operation operation) + { + context.Assembler.Minpd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Minps(CodeGenContext context, Operation operation) + { + context.Assembler.Minps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Minsd(CodeGenContext context, Operation operation) + { + context.Assembler.Minsd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Minss(CodeGenContext context, Operation operation) + { + context.Assembler.Minss(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Movhlps(CodeGenContext context, Operation operation) + { + context.Assembler.Movhlps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Movlhps(CodeGenContext context, Operation operation) + { + context.Assembler.Movlhps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Mulpd(CodeGenContext context, Operation operation) + { + context.Assembler.Mulpd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Mulps(CodeGenContext context, Operation operation) + { + context.Assembler.Mulps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Mulsd(CodeGenContext context, Operation operation) + { + context.Assembler.Mulsd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Mulss(CodeGenContext context, Operation operation) + { + context.Assembler.Mulss(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Paddb(CodeGenContext context, Operation operation) + { + context.Assembler.Paddb(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Paddd(CodeGenContext context, Operation operation) + { + context.Assembler.Paddd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Paddq(CodeGenContext context, Operation operation) + { + context.Assembler.Paddq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Paddw(CodeGenContext context, Operation operation) + { + context.Assembler.Paddw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pand(CodeGenContext context, Operation operation) + { + context.Assembler.Pand(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pandn(CodeGenContext context, Operation operation) + { + context.Assembler.Pandn(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pavgb(CodeGenContext context, Operation operation) + { + context.Assembler.Pavgb(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pavgw(CodeGenContext context, Operation operation) + { + context.Assembler.Pavgw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pblendvb(CodeGenContext context, Operation operation) + { + context.Assembler.Pblendvb( + operation.Dest, + operation.GetSource(0), + operation.GetSource(1), + operation.GetSource(2)); + } + + private static void GenerateX86Pcmpeqb(CodeGenContext context, Operation operation) + { + context.Assembler.Pcmpeqb(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pcmpeqd(CodeGenContext context, Operation operation) + { + context.Assembler.Pcmpeqd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pcmpeqq(CodeGenContext context, Operation operation) + { + context.Assembler.Pcmpeqq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pcmpeqw(CodeGenContext context, Operation operation) + { + context.Assembler.Pcmpeqw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pcmpgtb(CodeGenContext context, Operation operation) + { + context.Assembler.Pcmpgtb(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pcmpgtd(CodeGenContext context, Operation operation) + { + context.Assembler.Pcmpgtd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pcmpgtq(CodeGenContext context, Operation operation) + { + context.Assembler.Pcmpgtq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pcmpgtw(CodeGenContext context, Operation operation) + { + context.Assembler.Pcmpgtw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pmaxsb(CodeGenContext context, Operation operation) + { + context.Assembler.Pmaxsb(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pmaxsd(CodeGenContext context, Operation operation) + { + context.Assembler.Pmaxsd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pmaxsw(CodeGenContext context, Operation operation) + { + context.Assembler.Pmaxsw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pmaxub(CodeGenContext context, Operation operation) + { + context.Assembler.Pmaxub(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pmaxud(CodeGenContext context, Operation operation) + { + context.Assembler.Pmaxud(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pmaxuw(CodeGenContext context, Operation operation) + { + context.Assembler.Pmaxuw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pminsb(CodeGenContext context, Operation operation) + { + context.Assembler.Pminsb(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pminsd(CodeGenContext context, Operation operation) + { + context.Assembler.Pminsd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pminsw(CodeGenContext context, Operation operation) + { + context.Assembler.Pminsw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pminub(CodeGenContext context, Operation operation) + { + context.Assembler.Pminub(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pminud(CodeGenContext context, Operation operation) + { + context.Assembler.Pminud(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pminuw(CodeGenContext context, Operation operation) + { + context.Assembler.Pminuw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pmovsxbw(CodeGenContext context, Operation operation) + { + context.Assembler.Pmovsxbw(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Pmovsxdq(CodeGenContext context, Operation operation) + { + context.Assembler.Pmovsxdq(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Pmovsxwd(CodeGenContext context, Operation operation) + { + context.Assembler.Pmovsxwd(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Pmovzxbw(CodeGenContext context, Operation operation) + { + context.Assembler.Pmovzxbw(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Pmovzxdq(CodeGenContext context, Operation operation) + { + context.Assembler.Pmovzxdq(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Pmovzxwd(CodeGenContext context, Operation operation) + { + context.Assembler.Pmovzxwd(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Pmulld(CodeGenContext context, Operation operation) + { + context.Assembler.Pmulld(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pmullw(CodeGenContext context, Operation operation) + { + context.Assembler.Pmullw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Popcnt(CodeGenContext context, Operation operation) + { + context.Assembler.Popcnt(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Por(CodeGenContext context, Operation operation) + { + context.Assembler.Por(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Psllw(CodeGenContext context, Operation operation) + { + context.Assembler.Psllw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Psrad(CodeGenContext context, Operation operation) + { + context.Assembler.Psrad(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Psraw(CodeGenContext context, Operation operation) + { + context.Assembler.Psraw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Psrld(CodeGenContext context, Operation operation) + { + context.Assembler.Psrld(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Psrlq(CodeGenContext context, Operation operation) + { + context.Assembler.Psrlq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Psrldq(CodeGenContext context, Operation operation) + { + context.Assembler.Psrldq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Psrlw(CodeGenContext context, Operation operation) + { + context.Assembler.Psrlw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Psubb(CodeGenContext context, Operation operation) + { + context.Assembler.Psubb(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Psubd(CodeGenContext context, Operation operation) + { + context.Assembler.Psubd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Psubq(CodeGenContext context, Operation operation) + { + context.Assembler.Psubq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Psubw(CodeGenContext context, Operation operation) + { + context.Assembler.Psubw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pxor(CodeGenContext context, Operation operation) + { + context.Assembler.Pxor(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Rcpps(CodeGenContext context, Operation operation) + { + context.Assembler.Rcpps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Rcpss(CodeGenContext context, Operation operation) + { + context.Assembler.Rcpss(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Roundpd(CodeGenContext context, Operation operation) + { + context.Assembler.Roundpd(operation.Dest, operation.GetSource(0), operation.GetSource(1).AsByte()); + } + + private static void GenerateX86Roundps(CodeGenContext context, Operation operation) + { + context.Assembler.Roundps(operation.Dest, operation.GetSource(0), operation.GetSource(1).AsByte()); + } + + private static void GenerateX86Roundsd(CodeGenContext context, Operation operation) + { + context.Assembler.Roundsd(operation.Dest, operation.GetSource(0), operation.GetSource(1).AsByte()); + } + + private static void GenerateX86Roundss(CodeGenContext context, Operation operation) + { + context.Assembler.Roundss(operation.Dest, operation.GetSource(0), operation.GetSource(1).AsByte()); + } + + private static void GenerateX86Rsqrtps(CodeGenContext context, Operation operation) + { + context.Assembler.Rsqrtps(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Rsqrtss(CodeGenContext context, Operation operation) + { + context.Assembler.Rsqrtss(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Shufpd(CodeGenContext context, Operation operation) + { + context.Assembler.Shufpd( + operation.Dest, + operation.GetSource(1), + operation.GetSource(2).AsByte(), + operation.GetSource(0)); + } + + private static void GenerateX86Shufps(CodeGenContext context, Operation operation) + { + context.Assembler.Shufps( + operation.Dest, + operation.GetSource(1), + operation.GetSource(2).AsByte(), + operation.GetSource(0)); + } + + private static void GenerateX86Sqrtpd(CodeGenContext context, Operation operation) + { + context.Assembler.Sqrtpd(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Sqrtps(CodeGenContext context, Operation operation) + { + context.Assembler.Sqrtps(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Sqrtsd(CodeGenContext context, Operation operation) + { + context.Assembler.Sqrtsd(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Sqrtss(CodeGenContext context, Operation operation) + { + context.Assembler.Sqrtss(operation.Dest, operation.GetSource(0)); + } + + private static void GenerateX86Subpd(CodeGenContext context, Operation operation) + { + context.Assembler.Subpd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Subps(CodeGenContext context, Operation operation) + { + context.Assembler.Subps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Subsd(CodeGenContext context, Operation operation) + { + context.Assembler.Subsd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Subss(CodeGenContext context, Operation operation) + { + context.Assembler.Subss(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Unpckhpd(CodeGenContext context, Operation operation) + { + context.Assembler.Unpckhpd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Unpckhps(CodeGenContext context, Operation operation) + { + context.Assembler.Unpckhps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Unpcklpd(CodeGenContext context, Operation operation) + { + context.Assembler.Unpcklpd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Unpcklps(CodeGenContext context, Operation operation) + { + context.Assembler.Unpcklps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Xorpd(CodeGenContext context, Operation operation) + { + context.Assembler.Xorpd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Xorps(CodeGenContext context, Operation operation) + { + context.Assembler.Xorps(operation.Dest, operation.GetSource(1), operation.GetSource(0)); } private static void GenerateCompare(CodeGenContext context, Operation operation, X86Condition condition) @@ -604,7 +1477,7 @@ namespace ARMeilleure.CodeGen.X86 private static void WritePrologue(CodeGenContext context) { - int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.UsedRegisters; + int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; mask |= 1 << (int)X86Register.Rbp; @@ -627,7 +1500,7 @@ namespace ARMeilleure.CodeGen.X86 private static void WriteEpilogue(CodeGenContext context) { - int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.UsedRegisters; + int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; mask |= 1 << (int)X86Register.Rbp; diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs new file mode 100644 index 0000000000..be5905394e --- /dev/null +++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.CodeGen.X86 +{ + static class HardwareCapabilities + { + public const bool SupportsVexEncoding = true; + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs index 37c89fcec1..abe299e126 100644 --- a/ARMeilleure/CodeGen/X86/PreAllocator.cs +++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -1,7 +1,6 @@ using ARMeilleure.CodeGen.Optimizations; using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Memory; -using ARMeilleure.State; using ARMeilleure.Translation; using System.Collections.Generic; @@ -118,51 +117,93 @@ namespace ARMeilleure.CodeGen.X86 private static void AddConstantCopy(LinkedListNode node, Operation operation) { - if (operation.SourcesCount == 0) + if (operation.SourcesCount == 0 || HasFixedConst(operation.Inst)) { return; } Instruction inst = operation.Inst; + Operand dest = operation.Dest; Operand src1 = operation.GetSource(0); Operand src2; - if (src1.Kind == OperandKind.Constant && !HasConstSrc1(inst)) + if (src1.Type.IsInteger()) { - if (IsComutative(inst)) + //Handle integer types. + //Most ALU instructions accepts a 32-bits immediate on the second operand. + //We need to ensure the following: + //- If the constant is on operand 1, we need to move it. + //-- But first, we try to swap operand 1 and 2 if the instruction is comutative. + //-- Doing so may allow us to encode the constant as operand 2 and avoid a copy. + //- If the constant is on operand 2, we check if the instruction supports it, + //if not, we also add a copy. 64-bits constants are usually not supported. + bool isVecCopy = inst == Instruction.Copy && !dest.Type.IsInteger(); + + if (src1.Kind == OperandKind.Constant && (!HasConstSrc1(inst) || isVecCopy)) { - src2 = operation.GetSource(1); + if (IsComutative(inst)) + { + src2 = operation.GetSource(1); - Operand temp = src1; + Operand temp = src1; - src1 = src2; - src2 = temp; + src1 = src2; + src2 = temp; + + operation.SetSource(0, src1); + operation.SetSource(1, src2); + } + + if (src1.Kind == OperandKind.Constant) + { + src1 = AddCopy(node, src1); + + operation.SetSource(0, src1); + } + } + + if (operation.SourcesCount < 2) + { + return; + } + + src2 = operation.GetSource(1); + + if (src2.Kind == OperandKind.Constant && (!HasConstSrc2(inst) || IsLongConst(src2))) + { + src2 = AddCopy(node, src2); - operation.SetSource(0, src1); operation.SetSource(1, src2); } - - if (src1.Kind == OperandKind.Constant) + } + else + { + //Handle non-integer types (FP32, FP64 and V128). + //For instructions without an immediate operand, we do the following: + //- Insert a copy with the constant value (as integer) to a GPR. + //- Insert a copy from the GPR to a XMM register. + //- Replace the constant use with the XMM register. + if (src1.Kind == OperandKind.Constant && src1.Type.IsInteger()) { - src1 = AddCopy(node, src1); + src1 = AddXmmCopy(node, src1); operation.SetSource(0, src1); } - } - if (operation.SourcesCount < 2) - { - return; - } + if (operation.SourcesCount < 2) + { + return; + } - src2 = operation.GetSource(1); + src2 = operation.GetSource(1); - if (src2.Kind == OperandKind.Constant && (!HasConstSrc2(inst) || IsLongConst(src2))) - { - src2 = AddCopy(node, src2); + if (src2.Kind == OperandKind.Constant && src2.Type.IsInteger()) + { + src2 = AddXmmCopy(node, src2); - operation.SetSource(1, src2); + operation.SetSource(1, src2); + } } } @@ -248,7 +289,7 @@ namespace ARMeilleure.CodeGen.X86 { int argsCount = operation.SourcesCount; - int maxArgs = CallingConvention.GetIntArgumentsOnRegsCount(); + int maxArgs = CallingConvention.GetArgumentsOnRegsCount(); if (argsCount > maxArgs + 1) { @@ -259,7 +300,18 @@ namespace ARMeilleure.CodeGen.X86 { Operand source = operation.GetSource(index); - Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(index - 1), source.Type); + RegisterType regType = source.Type.ToRegisterType(); + + Operand argReg; + + if (regType == RegisterType.Integer) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(index - 1), source.Type); + } + else /* if (regType == RegisterType.Vector) */ + { + argReg = Xmm(CallingConvention.GetVecArgumentRegister(index - 1), source.Type); + } Operation srcCopyOp = new Operation(Instruction.Copy, argReg, source); @@ -285,7 +337,18 @@ namespace ARMeilleure.CodeGen.X86 if (dest != null) { - Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), dest.Type); + RegisterType regType = dest.Type.ToRegisterType(); + + Operand retReg; + + if (regType == RegisterType.Integer) + { + retReg = Gpr(CallingConvention.GetIntReturnRegister(), dest.Type); + } + else /* if (regType == RegisterType.Vector) */ + { + retReg = Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + } Operation destCopyOp = new Operation(Instruction.Copy, dest, retReg); @@ -312,7 +375,7 @@ namespace ARMeilleure.CodeGen.X86 //a three operand form where the second source is a immediate value. bool threeOperandForm = inst == Instruction.Multiply && operation.GetSource(1).Kind == OperandKind.Constant; - if (IsSameOperandDestSrc1(inst) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm) + if (IsSameOperandDestSrc1(operation) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm) { Operation copyOp = new Operation(Instruction.Copy, dest, src1); @@ -332,6 +395,17 @@ namespace ARMeilleure.CodeGen.X86 } } + private static Operand AddXmmCopy(LinkedListNode node, Operand source) + { + Operand temp = Local(source.Type); + + Operation copyOp = new Operation(Instruction.Copy, temp, AddCopy(node, GetIntConst(source))); + + node.List.AddBefore(node, copyOp); + + return temp; + } + private static Operand AddCopy(LinkedListNode node, Operand source) { Operand temp = Local(source.Type); @@ -343,6 +417,20 @@ namespace ARMeilleure.CodeGen.X86 return temp; } + private static Operand GetIntConst(Operand value) + { + if (value.Type == OperandType.FP32) + { + return Const(value.AsInt32()); + } + else if (value.Type == OperandType.FP64) + { + return Const(value.AsInt64()); + } + + return value; + } + private static bool IsLongConst(Operand operand) { long value = operand.Type == OperandType.I32 ? operand.AsInt32() @@ -405,9 +493,14 @@ namespace ARMeilleure.CodeGen.X86 return Register((int)register, RegisterType.Integer, type); } - private static bool IsSameOperandDestSrc1(Instruction inst) + private static Operand Xmm(X86Register register, OperandType type) { - switch (inst) + return Register((int)register, RegisterType.Vector, type); + } + + private static bool IsSameOperandDestSrc1(Operation operation) + { + switch (operation.Inst) { case Instruction.Add: case Instruction.BitwiseAnd: @@ -423,6 +516,23 @@ namespace ARMeilleure.CodeGen.X86 case Instruction.ShiftRightUI: case Instruction.Subtract: return true; + + case Instruction.VectorInsert: + case Instruction.VectorInsert16: + case Instruction.VectorInsert8: + return !HardwareCapabilities.SupportsVexEncoding; + } + + return IsVexSameOperandDestSrc1(operation); + } + + private static bool IsVexSameOperandDestSrc1(Operation operation) + { + if (IsIntrinsic(operation.Inst)) + { + bool isUnary = operation.SourcesCount < 2; + + return !HardwareCapabilities.SupportsVexEncoding && !isUnary; } return false; @@ -464,6 +574,9 @@ namespace ARMeilleure.CodeGen.X86 case Instruction.ShiftRightSI: case Instruction.ShiftRightUI: case Instruction.Subtract: + case Instruction.VectorExtract: + case Instruction.VectorExtract16: + case Instruction.VectorExtract8: return true; } @@ -486,5 +599,29 @@ namespace ARMeilleure.CodeGen.X86 return false; } + + private static bool HasFixedConst(Instruction inst) + { + switch (inst) + { + case Instruction.LoadFromContext: + case Instruction.StoreToContext: + case Instruction.VectorExtract: + case Instruction.VectorExtract16: + case Instruction.VectorExtract8: + case Instruction.VectorInsert: + case Instruction.VectorInsert16: + case Instruction.VectorInsert8: + return true; + } + + return IsIntrinsic(inst); + } + + private static bool IsIntrinsic(Instruction inst) + { + return inst > Instruction.X86Intrinsic_Start && + inst < Instruction.X86Intrinsic_End; + } } } \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index 0ea9ed1ab7..93f86407e8 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -3,38 +3,152 @@ namespace ARMeilleure.CodeGen.X86 enum X86Instruction { Add, + Addpd, + Addps, + Addsd, + Addss, And, + Andnpd, + Andnps, Bsr, Bswap, Call, Cmovcc, Cmp, Div, + Divpd, + Divps, + Divsd, + Divss, + Haddpd, + Haddps, Idiv, Imul, Imul128, + Insertps, + Maxpd, + Maxps, + Maxsd, + Maxss, + Minpd, + Minps, + Minsd, + Minss, Mov, Mov16, Mov8, + Movd, + Movdqu, + Movhlps, + Movlhps, + Movq, + Movsd, + Movss, Movsx16, Movsx32, Movsx8, Movzx16, Movzx8, Mul128, + Mulpd, + Mulps, + Mulsd, + Mulss, Neg, Not, Or, + Paddb, + Paddd, + Paddq, + Paddw, + Pand, + Pandn, + Pavgb, + Pavgw, + Pblendvb, + Pcmpeqb, + Pcmpeqd, + Pcmpeqq, + Pcmpeqw, + Pcmpgtb, + Pcmpgtd, + Pcmpgtq, + Pcmpgtw, + Pextrb, + Pextrd, + Pextrw, + Pinsrb, + Pinsrd, + Pinsrw, + Pmaxsb, + Pmaxsd, + Pmaxsw, + Pmaxub, + Pmaxud, + Pmaxuw, + Pminsb, + Pminsd, + Pminsw, + Pminub, + Pminud, + Pminuw, + Pmovsxbw, + Pmovsxdq, + Pmovsxwd, + Pmovzxbw, + Pmovzxdq, + Pmovzxwd, + Pmulld, + Pmullw, Pop, + Popcnt, + Por, + Pshufd, + Psllw, + Psrad, + Psraw, + Psrld, + Psrlq, + Psrldq, + Psrlw, + Psubb, + Psubd, + Psubq, + Psubw, Push, + Pxor, + Rcpps, + Rcpss, Ror, + Roundpd, + Roundps, + Roundsd, + Roundss, + Rsqrtps, + Rsqrtss, Sar, Setcc, Shl, Shr, + Shufpd, + Shufps, + Sqrtpd, + Sqrtps, + Sqrtsd, + Sqrtss, Sub, + Subpd, + Subps, + Subsd, + Subss, Test, + Unpckhpd, + Unpckhps, + Unpcklpd, + Unpcklps, Xor, + Xorpd, + Xorps, Count } diff --git a/ARMeilleure/CodeGen/X86/X86Register.cs b/ARMeilleure/CodeGen/X86/X86Register.cs index 14ffaf3bc3..463f20d255 100644 --- a/ARMeilleure/CodeGen/X86/X86Register.cs +++ b/ARMeilleure/CodeGen/X86/X86Register.cs @@ -17,6 +17,23 @@ namespace ARMeilleure.CodeGen.X86 R12 = 12, R13 = 13, R14 = 14, - R15 = 15 + R15 = 15, + + Xmm0 = 0, + Xmm1 = 1, + Xmm2 = 2, + Xmm3 = 3, + Xmm4 = 4, + Xmm5 = 5, + Xmm6 = 6, + Xmm7 = 7, + Xmm8 = 8, + Xmm9 = 9, + Xmm10 = 10, + Xmm11 = 11, + Xmm12 = 12, + Xmm13 = 13, + Xmm14 = 14, + Xmm15 = 15 } } \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCode.cs b/ARMeilleure/Decoders/OpCode.cs index 515066b25c..0bfc2456bc 100644 --- a/ARMeilleure/Decoders/OpCode.cs +++ b/ARMeilleure/Decoders/OpCode.cs @@ -24,6 +24,9 @@ namespace ARMeilleure.Decoders RegisterSize = RegisterSize.Int64; } + public int GetPairsCount() => GetBitsCount() / 16; + public int GetBytesCount() => GetBitsCount() / 8; + public int GetBitsCount() { switch (RegisterSize) diff --git a/ARMeilleure/Decoders/OpCodeSimdFmov.cs b/ARMeilleure/Decoders/OpCodeSimdFmov.cs index 7f776281c8..61a3f077d9 100644 --- a/ARMeilleure/Decoders/OpCodeSimdFmov.cs +++ b/ARMeilleure/Decoders/OpCodeSimdFmov.cs @@ -2,9 +2,9 @@ namespace ARMeilleure.Decoders { class OpCodeSimdFmov : OpCode, IOpCodeSimd { - public int Rd { get; private set; } - public long Imm { get; private set; } - public int Size { get; private set; } + public int Rd { get; private set; } + public long Immediate { get; private set; } + public int Size { get; private set; } public OpCodeSimdFmov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { @@ -25,7 +25,7 @@ namespace ARMeilleure.Decoders Rd = (opCode >> 0) & 0x1f; imm = (opCode >> 13) & 0xff; - Imm = DecoderHelper.DecodeImm8Float(imm, type); + Immediate = DecoderHelper.DecodeImm8Float(imm, type); } } } \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdImm.cs b/ARMeilleure/Decoders/OpCodeSimdImm.cs index 4bed646b23..a1d684bce6 100644 --- a/ARMeilleure/Decoders/OpCodeSimdImm.cs +++ b/ARMeilleure/Decoders/OpCodeSimdImm.cs @@ -2,9 +2,9 @@ namespace ARMeilleure.Decoders { class OpCodeSimdImm : OpCode, IOpCodeSimd { - public int Rd { get; private set; } - public long Imm { get; private set; } - public int Size { get; private set; } + public int Rd { get; private set; } + public long Immediate { get; private set; } + public int Size { get; private set; } public OpCodeSimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { @@ -76,7 +76,7 @@ namespace ARMeilleure.Decoders Size = 0; } - Imm = imm; + Immediate = imm; RegisterSize = ((opCode >> 30) & 1) != 0 ? RegisterSize.Simd128 diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 9f49ffd344..21f2f844ed 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -190,27 +190,27 @@ namespace ARMeilleure.Decoders SetA64("10011011110xxxxx0xxxxxxxxxxxxxxx", InstName.Umulh, InstEmit.Umulh, typeof(OpCodeMul)); //FP & SIMD - SetA64("0101111011100000101110xxxxxxxxxx", InstName.Abs_S, null, typeof(OpCodeSimd)); - SetA64("0>001110<<100000101110xxxxxxxxxx", InstName.Abs_V, null, typeof(OpCodeSimd)); - SetA64("01011110111xxxxx100001xxxxxxxxxx", InstName.Add_S, null, typeof(OpCodeSimdReg)); - SetA64("0>001110<<1xxxxx100001xxxxxxxxxx", InstName.Add_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<1xxxxx010000xxxxxxxxxx", InstName.Addhn_V, null, typeof(OpCodeSimdReg)); - SetA64("0101111011110001101110xxxxxxxxxx", InstName.Addp_S, null, typeof(OpCodeSimd)); - SetA64("0>001110<<1xxxxx101111xxxxxxxxxx", InstName.Addp_V, null, typeof(OpCodeSimdReg)); - SetA64("000011100x110001101110xxxxxxxxxx", InstName.Addv_V, null, typeof(OpCodeSimd)); - SetA64("01001110<<110001101110xxxxxxxxxx", InstName.Addv_V, null, typeof(OpCodeSimd)); - SetA64("0100111000101000010110xxxxxxxxxx", InstName.Aesd_V, null, typeof(OpCodeSimd)); - SetA64("0100111000101000010010xxxxxxxxxx", InstName.Aese_V, null, typeof(OpCodeSimd)); - SetA64("0100111000101000011110xxxxxxxxxx", InstName.Aesimc_V, null, typeof(OpCodeSimd)); - SetA64("0100111000101000011010xxxxxxxxxx", InstName.Aesmc_V, null, typeof(OpCodeSimd)); + SetA64("0101111011100000101110xxxxxxxxxx", InstName.Abs_S, InstEmit.Abs_S, typeof(OpCodeSimd)); + SetA64("0>001110<<100000101110xxxxxxxxxx", InstName.Abs_V, InstEmit.Abs_V, typeof(OpCodeSimd)); + SetA64("01011110111xxxxx100001xxxxxxxxxx", InstName.Add_S, InstEmit.Add_S, typeof(OpCodeSimdReg)); + SetA64("0>001110<<1xxxxx100001xxxxxxxxxx", InstName.Add_V, InstEmit.Add_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx010000xxxxxxxxxx", InstName.Addhn_V, InstEmit.Addhn_V, typeof(OpCodeSimdReg)); + SetA64("0101111011110001101110xxxxxxxxxx", InstName.Addp_S, InstEmit.Addp_S, typeof(OpCodeSimd)); + SetA64("0>001110<<1xxxxx101111xxxxxxxxxx", InstName.Addp_V, InstEmit.Addp_V, typeof(OpCodeSimdReg)); + SetA64("000011100x110001101110xxxxxxxxxx", InstName.Addv_V, InstEmit.Addv_V, typeof(OpCodeSimd)); + SetA64("01001110<<110001101110xxxxxxxxxx", InstName.Addv_V, InstEmit.Addv_V, typeof(OpCodeSimd)); + SetA64("0100111000101000010110xxxxxxxxxx", InstName.Aesd_V, InstEmit.Aesd_V, typeof(OpCodeSimd)); + SetA64("0100111000101000010010xxxxxxxxxx", InstName.Aese_V, InstEmit.Aese_V, typeof(OpCodeSimd)); + SetA64("0100111000101000011110xxxxxxxxxx", InstName.Aesimc_V, InstEmit.Aesimc_V, typeof(OpCodeSimd)); + SetA64("0100111000101000011010xxxxxxxxxx", InstName.Aesmc_V, InstEmit.Aesmc_V, typeof(OpCodeSimd)); SetA64("0x001110001xxxxx000111xxxxxxxxxx", InstName.And_V, null, typeof(OpCodeSimdReg)); SetA64("0x001110011xxxxx000111xxxxxxxxxx", InstName.Bic_V, null, typeof(OpCodeSimdReg)); SetA64("0x10111100000xxx<101110<<1xxxxx100011xxxxxxxxxx", InstName.Cmeq_V, null, typeof(OpCodeSimdReg)); @@ -233,20 +233,20 @@ namespace ARMeilleure.Decoders SetA64("0>001110<<100000101010xxxxxxxxxx", InstName.Cmlt_V, null, typeof(OpCodeSimd)); SetA64("01011110111xxxxx100011xxxxxxxxxx", InstName.Cmtst_S, null, typeof(OpCodeSimdReg)); SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstName.Cmtst_V, null, typeof(OpCodeSimdReg)); - SetA64("0x00111000100000010110xxxxxxxxxx", InstName.Cnt_V, null, typeof(OpCodeSimd)); + SetA64("0x00111000100000010110xxxxxxxxxx", InstName.Cnt_V, InstEmit.Cnt_V, typeof(OpCodeSimd)); SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp, null, typeof(OpCodeSimdIns)); SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S, null, typeof(OpCodeSimdIns)); SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstName.Dup_V, null, typeof(OpCodeSimdIns)); SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstName.Eor_V, null, typeof(OpCodeSimdReg)); SetA64("0>101110000xxxxx01011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V, null, typeof(OpCodeSimdReg)); - SetA64("000111100x100000110000xxxxxxxxxx", InstName.Fabs_S, null, typeof(OpCodeSimd)); - SetA64("0>0011101<100000111110xxxxxxxxxx", InstName.Fabs_V, null, typeof(OpCodeSimd)); - SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstName.Fadd_S, null, typeof(OpCodeSimdReg)); - SetA64("0>0011100<1xxxxx110101xxxxxxxxxx", InstName.Fadd_V, null, typeof(OpCodeSimdReg)); - SetA64("011111100x110000110110xxxxxxxxxx", InstName.Faddp_S, null, typeof(OpCodeSimd)); - SetA64("0>1011100<1xxxxx110101xxxxxxxxxx", InstName.Faddp_V, null, typeof(OpCodeSimdReg)); + SetA64("011111101x1xxxxx110101xxxxxxxxxx", InstName.Fabd_S, InstEmit.Fabd_S, typeof(OpCodeSimdReg)); + SetA64("0>1011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V, InstEmit.Fabd_V, typeof(OpCodeSimdReg)); + SetA64("000111100x100000110000xxxxxxxxxx", InstName.Fabs_S, InstEmit.Fabs_S, typeof(OpCodeSimd)); + SetA64("0>0011101<100000111110xxxxxxxxxx", InstName.Fabs_V, InstEmit.Fabs_V, typeof(OpCodeSimd)); + SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstName.Fadd_S, InstEmit.Fadd_S, typeof(OpCodeSimdReg)); + SetA64("0>0011100<1xxxxx110101xxxxxxxxxx", InstName.Fadd_V, InstEmit.Fadd_V, typeof(OpCodeSimdReg)); + SetA64("011111100x110000110110xxxxxxxxxx", InstName.Faddp_S, InstEmit.Faddp_S, typeof(OpCodeSimd)); + SetA64("0>1011100<1xxxxx110101xxxxxxxxxx", InstName.Faddp_V, InstEmit.Faddp_V, typeof(OpCodeSimdReg)); SetA64("000111100x1xxxxxxxxx01xxxxx0xxxx", InstName.Fccmp_S, null, typeof(OpCodeSimdFcond)); SetA64("000111100x1xxxxxxxxx01xxxxx1xxxx", InstName.Fccmpe_S, null, typeof(OpCodeSimdFcond)); SetA64("010111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_S, null, typeof(OpCodeSimdReg)); @@ -293,73 +293,73 @@ namespace ARMeilleure.Decoders SetA64("0>1011101<100001101110xxxxxxxxxx", InstName.Fcvtzu_V, null, typeof(OpCodeSimd)); SetA64("0x101111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed, null, typeof(OpCodeSimdShImm)); SetA64("0110111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed, null, typeof(OpCodeSimdShImm)); - SetA64("000111100x1xxxxx000110xxxxxxxxxx", InstName.Fdiv_S, null, typeof(OpCodeSimdReg)); - SetA64("0>1011100<1xxxxx111111xxxxxxxxxx", InstName.Fdiv_V, null, typeof(OpCodeSimdReg)); - SetA64("000111110x0xxxxx0xxxxxxxxxxxxxxx", InstName.Fmadd_S, null, typeof(OpCodeSimdReg)); - SetA64("000111100x1xxxxx010010xxxxxxxxxx", InstName.Fmax_S, null, typeof(OpCodeSimdReg)); - SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, null, typeof(OpCodeSimdReg)); - SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, null, typeof(OpCodeSimdReg)); - SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, null, typeof(OpCodeSimdReg)); - SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, null, typeof(OpCodeSimdReg)); - SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, null, typeof(OpCodeSimdReg)); - SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, null, typeof(OpCodeSimdReg)); - SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, null, typeof(OpCodeSimdReg)); - SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, null, typeof(OpCodeSimdReg)); - SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, null, typeof(OpCodeSimdReg)); - SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, null, typeof(OpCodeSimdRegElemF)); - SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", InstName.Fmla_V, null, typeof(OpCodeSimdReg)); - SetA64("0>00111110011101<1xxxxx110011xxxxxxxxxx", InstName.Fmls_V, null, typeof(OpCodeSimdReg)); - SetA64("0>00111111011100<1xxxxx111111xxxxxxxxxx", InstName.Fdiv_V, InstEmit.Fdiv_V, typeof(OpCodeSimdReg)); + SetA64("000111110x0xxxxx0xxxxxxxxxxxxxxx", InstName.Fmadd_S, InstEmit.Fmadd_S, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx010010xxxxxxxxxx", InstName.Fmax_S, InstEmit.Fmax_S, typeof(OpCodeSimdReg)); + SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, InstEmit.Fmax_V, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, InstEmit.Fmaxnm_S, typeof(OpCodeSimdReg)); + SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, InstEmit.Fmaxnm_V, typeof(OpCodeSimdReg)); + SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, typeof(OpCodeSimdReg)); + SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, InstEmit.Fmin_V, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, InstEmit.Fminnm_S, typeof(OpCodeSimdReg)); + SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, InstEmit.Fminnm_V, typeof(OpCodeSimdReg)); + SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, typeof(OpCodeSimdReg)); + SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, typeof(OpCodeSimdRegElemF)); + SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", InstName.Fmla_V, InstEmit.Fmla_V, typeof(OpCodeSimdReg)); + SetA64("0>00111110011101<1xxxxx110011xxxxxxxxxx", InstName.Fmls_V, InstEmit.Fmls_V, typeof(OpCodeSimdReg)); + SetA64("0>00111111011100<1xxxxx110111xxxxxxxxxx", InstName.Fmul_V, null, typeof(OpCodeSimdReg)); - SetA64("0>00111110011100<1xxxxx110111xxxxxxxxxx", InstName.Fmulx_V, null, typeof(OpCodeSimdReg)); - SetA64("0>10111111011101<100000111110xxxxxxxxxx", InstName.Fneg_V, null, typeof(OpCodeSimd)); - SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", InstName.Fnmadd_S, null, typeof(OpCodeSimdReg)); - SetA64("000111110x1xxxxx1xxxxxxxxxxxxxxx", InstName.Fnmsub_S, null, typeof(OpCodeSimdReg)); - SetA64("000111100x1xxxxx100010xxxxxxxxxx", InstName.Fnmul_S, null, typeof(OpCodeSimdReg)); - SetA64("010111101x100001110110xxxxxxxxxx", InstName.Frecpe_S, null, typeof(OpCodeSimd)); - SetA64("0>0011101<100001110110xxxxxxxxxx", InstName.Frecpe_V, null, typeof(OpCodeSimd)); - SetA64("010111100x1xxxxx111111xxxxxxxxxx", InstName.Frecps_S, null, typeof(OpCodeSimdReg)); - SetA64("0>0011100<1xxxxx111111xxxxxxxxxx", InstName.Frecps_V, null, typeof(OpCodeSimdReg)); - SetA64("010111101x100001111110xxxxxxxxxx", InstName.Frecpx_S, null, typeof(OpCodeSimd)); - SetA64("000111100x100110010000xxxxxxxxxx", InstName.Frinta_S, null, typeof(OpCodeSimd)); - SetA64("0>1011100<100001100010xxxxxxxxxx", InstName.Frinta_V, null, typeof(OpCodeSimd)); - SetA64("000111100x100111110000xxxxxxxxxx", InstName.Frinti_S, null, typeof(OpCodeSimd)); - SetA64("0>1011101<100001100110xxxxxxxxxx", InstName.Frinti_V, null, typeof(OpCodeSimd)); - SetA64("000111100x100101010000xxxxxxxxxx", InstName.Frintm_S, null, typeof(OpCodeSimd)); - SetA64("0>0011100<100001100110xxxxxxxxxx", InstName.Frintm_V, null, typeof(OpCodeSimd)); - SetA64("000111100x100100010000xxxxxxxxxx", InstName.Frintn_S, null, typeof(OpCodeSimd)); - SetA64("0>0011100<100001100010xxxxxxxxxx", InstName.Frintn_V, null, typeof(OpCodeSimd)); - SetA64("000111100x100100110000xxxxxxxxxx", InstName.Frintp_S, null, typeof(OpCodeSimd)); - SetA64("0>0011101<100001100010xxxxxxxxxx", InstName.Frintp_V, null, typeof(OpCodeSimd)); - SetA64("000111100x100111010000xxxxxxxxxx", InstName.Frintx_S, null, typeof(OpCodeSimd)); - SetA64("0>1011100<100001100110xxxxxxxxxx", InstName.Frintx_V, null, typeof(OpCodeSimd)); - SetA64("000111100x100101110000xxxxxxxxxx", InstName.Frintz_S, null, typeof(OpCodeSimd)); - SetA64("0>0011101<100001100110xxxxxxxxxx", InstName.Frintz_V, null, typeof(OpCodeSimd)); - SetA64("011111101x100001110110xxxxxxxxxx", InstName.Frsqrte_S, null, typeof(OpCodeSimd)); - SetA64("0>1011101<100001110110xxxxxxxxxx", InstName.Frsqrte_V, null, typeof(OpCodeSimd)); - SetA64("010111101x1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_S, null, typeof(OpCodeSimdReg)); - SetA64("0>0011101<1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_V, null, typeof(OpCodeSimdReg)); - SetA64("000111100x100001110000xxxxxxxxxx", InstName.Fsqrt_S, null, typeof(OpCodeSimd)); - SetA64("0>1011101<100001111110xxxxxxxxxx", InstName.Fsqrt_V, null, typeof(OpCodeSimd)); - SetA64("000111100x1xxxxx001110xxxxxxxxxx", InstName.Fsub_S, null, typeof(OpCodeSimdReg)); - SetA64("0>0011101<1xxxxx110101xxxxxxxxxx", InstName.Fsub_V, null, typeof(OpCodeSimdReg)); + SetA64("000111110x0xxxxx1xxxxxxxxxxxxxxx", InstName.Fmsub_S, InstEmit.Fmsub_S, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx000010xxxxxxxxxx", InstName.Fmul_S, InstEmit.Fmul_S, typeof(OpCodeSimdReg)); + SetA64("010111111xxxxxxx1001x0xxxxxxxxxx", InstName.Fmul_Se, InstEmit.Fmul_Se, typeof(OpCodeSimdRegElemF)); + SetA64("0>1011100<1xxxxx110111xxxxxxxxxx", InstName.Fmul_V, InstEmit.Fmul_V, typeof(OpCodeSimdReg)); + SetA64("0>00111110011100<1xxxxx110111xxxxxxxxxx", InstName.Fmulx_V, InstEmit.Fmulx_V, typeof(OpCodeSimdReg)); + SetA64("0>10111111011101<100000111110xxxxxxxxxx", InstName.Fneg_V, InstEmit.Fneg_V, typeof(OpCodeSimd)); + SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", InstName.Fnmadd_S, InstEmit.Fnmadd_S, typeof(OpCodeSimdReg)); + SetA64("000111110x1xxxxx1xxxxxxxxxxxxxxx", InstName.Fnmsub_S, InstEmit.Fnmsub_S, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx100010xxxxxxxxxx", InstName.Fnmul_S, InstEmit.Fnmul_S, typeof(OpCodeSimdReg)); + SetA64("010111101x100001110110xxxxxxxxxx", InstName.Frecpe_S, InstEmit.Frecpe_S, typeof(OpCodeSimd)); + SetA64("0>0011101<100001110110xxxxxxxxxx", InstName.Frecpe_V, InstEmit.Frecpe_V, typeof(OpCodeSimd)); + SetA64("010111100x1xxxxx111111xxxxxxxxxx", InstName.Frecps_S, InstEmit.Frecps_S, typeof(OpCodeSimdReg)); + SetA64("0>0011100<1xxxxx111111xxxxxxxxxx", InstName.Frecps_V, InstEmit.Frecps_V, typeof(OpCodeSimdReg)); + SetA64("010111101x100001111110xxxxxxxxxx", InstName.Frecpx_S, InstEmit.Frecpx_S, typeof(OpCodeSimd)); + SetA64("000111100x100110010000xxxxxxxxxx", InstName.Frinta_S, InstEmit.Frinta_S, typeof(OpCodeSimd)); + SetA64("0>1011100<100001100010xxxxxxxxxx", InstName.Frinta_V, InstEmit.Frinta_V, typeof(OpCodeSimd)); + SetA64("000111100x100111110000xxxxxxxxxx", InstName.Frinti_S, InstEmit.Frinti_S, typeof(OpCodeSimd)); + SetA64("0>1011101<100001100110xxxxxxxxxx", InstName.Frinti_V, InstEmit.Frinti_V, typeof(OpCodeSimd)); + SetA64("000111100x100101010000xxxxxxxxxx", InstName.Frintm_S, InstEmit.Frintm_S, typeof(OpCodeSimd)); + SetA64("0>0011100<100001100110xxxxxxxxxx", InstName.Frintm_V, InstEmit.Frintm_V, typeof(OpCodeSimd)); + SetA64("000111100x100100010000xxxxxxxxxx", InstName.Frintn_S, InstEmit.Frintn_S, typeof(OpCodeSimd)); + SetA64("0>0011100<100001100010xxxxxxxxxx", InstName.Frintn_V, InstEmit.Frintn_V, typeof(OpCodeSimd)); + SetA64("000111100x100100110000xxxxxxxxxx", InstName.Frintp_S, InstEmit.Frintp_S, typeof(OpCodeSimd)); + SetA64("0>0011101<100001100010xxxxxxxxxx", InstName.Frintp_V, InstEmit.Frintp_V, typeof(OpCodeSimd)); + SetA64("000111100x100111010000xxxxxxxxxx", InstName.Frintx_S, InstEmit.Frintx_S, typeof(OpCodeSimd)); + SetA64("0>1011100<100001100110xxxxxxxxxx", InstName.Frintx_V, InstEmit.Frintx_V, typeof(OpCodeSimd)); + SetA64("000111100x100101110000xxxxxxxxxx", InstName.Frintz_S, InstEmit.Frintz_S, typeof(OpCodeSimd)); + SetA64("0>0011101<100001100110xxxxxxxxxx", InstName.Frintz_V, InstEmit.Frintz_V, typeof(OpCodeSimd)); + SetA64("011111101x100001110110xxxxxxxxxx", InstName.Frsqrte_S, InstEmit.Frsqrte_S, typeof(OpCodeSimd)); + SetA64("0>1011101<100001110110xxxxxxxxxx", InstName.Frsqrte_V, InstEmit.Frsqrte_V, typeof(OpCodeSimd)); + SetA64("010111101x1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_S, InstEmit.Frsqrts_S, typeof(OpCodeSimdReg)); + SetA64("0>0011101<1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_V, InstEmit.Frsqrts_V, typeof(OpCodeSimdReg)); + SetA64("000111100x100001110000xxxxxxxxxx", InstName.Fsqrt_S, InstEmit.Fsqrt_S, typeof(OpCodeSimd)); + SetA64("0>1011101<100001111110xxxxxxxxxx", InstName.Fsqrt_V, InstEmit.Fsqrt_V, typeof(OpCodeSimd)); + SetA64("000111100x1xxxxx001110xxxxxxxxxx", InstName.Fsub_S, InstEmit.Fsub_S, typeof(OpCodeSimdReg)); + SetA64("0>0011101<1xxxxx110101xxxxxxxxxx", InstName.Fsub_V, InstEmit.Fsub_V, typeof(OpCodeSimdReg)); SetA64("01001110000xxxxx000111xxxxxxxxxx", InstName.Ins_Gp, null, typeof(OpCodeSimdIns)); SetA64("01101110000xxxxx0xxxx1xxxxxxxxxx", InstName.Ins_V, null, typeof(OpCodeSimdIns)); SetA64("0x00110001000000xxxxxxxxxxxxxxxx", InstName.Ld__Vms, null, typeof(OpCodeSimdMemMs)); @@ -373,40 +373,40 @@ namespace ARMeilleure.Decoders SetA64("xx111101x1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, null, typeof(OpCodeSimdMemImm)); SetA64("xx111100x11xxxxxxxxx10xxxxxxxxxx", InstName.Ldr, null, typeof(OpCodeSimdMemReg)); SetA64("xx011100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal, null, typeof(OpCodeSimdMemLit)); - SetA64("0x001110<<1xxxxx100101xxxxxxxxxx", InstName.Mla_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101111xxxxxxxx0000x0xxxxxxxxxx", InstName.Mla_Ve, null, typeof(OpCodeSimdRegElem)); - SetA64("0x101110<<1xxxxx100101xxxxxxxxxx", InstName.Mls_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101111xxxxxxxx0100x0xxxxxxxxxx", InstName.Mls_Ve, null, typeof(OpCodeSimdRegElem)); + SetA64("0x001110<<1xxxxx100101xxxxxxxxxx", InstName.Mla_V, InstEmit.Mla_V, typeof(OpCodeSimdReg)); + SetA64("0x101111xxxxxxxx0000x0xxxxxxxxxx", InstName.Mla_Ve, InstEmit.Mla_Ve, typeof(OpCodeSimdRegElem)); + SetA64("0x101110<<1xxxxx100101xxxxxxxxxx", InstName.Mls_V, InstEmit.Mls_V, typeof(OpCodeSimdReg)); + SetA64("0x101111xxxxxxxx0100x0xxxxxxxxxx", InstName.Mls_Ve, InstEmit.Mls_Ve, typeof(OpCodeSimdRegElem)); SetA64("0x00111100000xxx0xx001xxxxxxxxxx", InstName.Movi_V, null, typeof(OpCodeSimdImm)); SetA64("0x00111100000xxx10x001xxxxxxxxxx", InstName.Movi_V, null, typeof(OpCodeSimdImm)); SetA64("0x00111100000xxx110x01xxxxxxxxxx", InstName.Movi_V, null, typeof(OpCodeSimdImm)); SetA64("0xx0111100000xxx111001xxxxxxxxxx", InstName.Movi_V, null, typeof(OpCodeSimdImm)); - SetA64("0x001110<<1xxxxx100111xxxxxxxxxx", InstName.Mul_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001111xxxxxxxx1000x0xxxxxxxxxx", InstName.Mul_Ve, null, typeof(OpCodeSimdRegElem)); + SetA64("0x001110<<1xxxxx100111xxxxxxxxxx", InstName.Mul_V, InstEmit.Mul_V, typeof(OpCodeSimdReg)); + SetA64("0x001111xxxxxxxx1000x0xxxxxxxxxx", InstName.Mul_Ve, InstEmit.Mul_Ve, typeof(OpCodeSimdRegElem)); SetA64("0x10111100000xxx0xx001xxxxxxxxxx", InstName.Mvni_V, null, typeof(OpCodeSimdImm)); SetA64("0x10111100000xxx10x001xxxxxxxxxx", InstName.Mvni_V, null, typeof(OpCodeSimdImm)); SetA64("0x10111100000xxx110x01xxxxxxxxxx", InstName.Mvni_V, null, typeof(OpCodeSimdImm)); - SetA64("0111111011100000101110xxxxxxxxxx", InstName.Neg_S, null, typeof(OpCodeSimd)); - SetA64("0>101110<<100000101110xxxxxxxxxx", InstName.Neg_V, null, typeof(OpCodeSimd)); + SetA64("0111111011100000101110xxxxxxxxxx", InstName.Neg_S, InstEmit.Neg_S, typeof(OpCodeSimd)); + SetA64("0>101110<<100000101110xxxxxxxxxx", InstName.Neg_V, InstEmit.Neg_V, typeof(OpCodeSimd)); SetA64("0x10111000100000010110xxxxxxxxxx", InstName.Not_V, null, typeof(OpCodeSimd)); SetA64("0x001110111xxxxx000111xxxxxxxxxx", InstName.Orn_V, null, typeof(OpCodeSimdReg)); SetA64("0x001110101xxxxx000111xxxxxxxxxx", InstName.Orr_V, null, typeof(OpCodeSimdReg)); SetA64("0x00111100000xxx<>>xxx100011xxxxxxxxxx", InstName.Rshrn_V, null, typeof(OpCodeSimdShImm)); - SetA64("0x101110<<1xxxxx011000xxxxxxxxxx", InstName.Rsubhn_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<1xxxxx011111xxxxxxxxxx", InstName.Saba_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<1xxxxx010100xxxxxxxxxx", InstName.Sabal_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<1xxxxx011101xxxxxxxxxx", InstName.Sabd_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<1xxxxx011100xxxxxxxxxx", InstName.Sabdl_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<100000011010xxxxxxxxxx", InstName.Sadalp_V, null, typeof(OpCodeSimd)); - SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstName.Saddl_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<100000001010xxxxxxxxxx", InstName.Saddlp_V, null, typeof(OpCodeSimd)); - SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstName.Saddw_V, null, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx011000xxxxxxxxxx", InstName.Rsubhn_V, InstEmit.Rsubhn_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx011111xxxxxxxxxx", InstName.Saba_V, InstEmit.Saba_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx010100xxxxxxxxxx", InstName.Sabal_V, InstEmit.Sabal_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx011101xxxxxxxxxx", InstName.Sabd_V, InstEmit.Sabd_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx011100xxxxxxxxxx", InstName.Sabdl_V, InstEmit.Sabdl_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<100000011010xxxxxxxxxx", InstName.Sadalp_V, InstEmit.Sadalp_V, typeof(OpCodeSimd)); + SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstName.Saddl_V, InstEmit.Saddl_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<100000001010xxxxxxxxxx", InstName.Saddlp_V, InstEmit.Saddlp_V, typeof(OpCodeSimd)); + SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstName.Saddw_V, InstEmit.Saddw_V, typeof(OpCodeSimdReg)); SetA64("x00111100x100010000000xxxxxxxxxx", InstName.Scvtf_Gp, null, typeof(OpCodeSimdCvt)); SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstName.Scvtf_Gp_Fixed, null, typeof(OpCodeSimdCvt)); SetA64("010111100x100001110110xxxxxxxxxx", InstName.Scvtf_S, null, typeof(OpCodeSimd)); @@ -421,39 +421,43 @@ namespace ARMeilleure.Decoders SetA64("01011110000xxxxx010100xxxxxxxxxx", InstName.Sha256h2_V, null, typeof(OpCodeSimdReg)); SetA64("0101111000101000001010xxxxxxxxxx", InstName.Sha256su0_V, null, typeof(OpCodeSimd)); SetA64("01011110000xxxxx011000xxxxxxxxxx", InstName.Sha256su1_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", InstName.Shadd_V, null, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", InstName.Shadd_V, InstEmit.Shadd_V, typeof(OpCodeSimdReg)); SetA64("0101111101xxxxxx010101xxxxxxxxxx", InstName.Shl_S, null, typeof(OpCodeSimdShImm)); SetA64("0x00111100>>>xxx010101xxxxxxxxxx", InstName.Shl_V, null, typeof(OpCodeSimdShImm)); SetA64("0100111101xxxxxx010101xxxxxxxxxx", InstName.Shl_V, null, typeof(OpCodeSimdShImm)); SetA64("0x101110<<100001001110xxxxxxxxxx", InstName.Shll_V, null, typeof(OpCodeSimd)); SetA64("0x00111100>>>xxx100001xxxxxxxxxx", InstName.Shrn_V, null, typeof(OpCodeSimdShImm)); - SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", InstName.Shsub_V, null, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", InstName.Shsub_V, InstEmit.Shsub_V, typeof(OpCodeSimdReg)); SetA64("0x1011110>>>>xxx010101xxxxxxxxxx", InstName.Sli_V, null, typeof(OpCodeSimdShImm)); - SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", InstName.Smax_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<1xxxxx101001xxxxxxxxxx", InstName.Smaxp_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", InstName.Smin_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", InstName.Sminp_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", InstName.Smlal_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001111xxxxxxxx0010x0xxxxxxxxxx", InstName.Smlal_Ve, null, typeof(OpCodeSimdRegElem)); - SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", InstName.Smlsl_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001111xxxxxxxx0110x0xxxxxxxxxx", InstName.Smlsl_Ve, null, typeof(OpCodeSimdRegElem)); + SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", InstName.Smax_V, InstEmit.Smax_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx101001xxxxxxxxxx", InstName.Smaxp_V, InstEmit.Smaxp_V, typeof(OpCodeSimdReg)); + SetA64("000011100x110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, typeof(OpCodeSimd)); + SetA64("01001110<<110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, typeof(OpCodeSimd)); + SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", InstName.Smin_V, InstEmit.Smin_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", InstName.Sminp_V, InstEmit.Sminp_V, typeof(OpCodeSimdReg)); + SetA64("000011100x110001101010xxxxxxxxxx", InstName.Sminv_V, InstEmit.Sminv_V, typeof(OpCodeSimd)); + SetA64("01001110<<110001101010xxxxxxxxxx", InstName.Sminv_V, InstEmit.Sminv_V, typeof(OpCodeSimd)); + SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", InstName.Smlal_V, InstEmit.Smlal_V, typeof(OpCodeSimdReg)); + SetA64("0x001111xxxxxxxx0010x0xxxxxxxxxx", InstName.Smlal_Ve, InstEmit.Smlal_Ve, typeof(OpCodeSimdRegElem)); + SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", InstName.Smlsl_V, InstEmit.Smlsl_V, typeof(OpCodeSimdReg)); + SetA64("0x001111xxxxxxxx0110x0xxxxxxxxxx", InstName.Smlsl_Ve, InstEmit.Smlsl_Ve, typeof(OpCodeSimdRegElem)); SetA64("0x001110000xxxxx001011xxxxxxxxxx", InstName.Smov_S, null, typeof(OpCodeSimdIns)); - SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", InstName.Smull_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001111xxxxxxxx1010x0xxxxxxxxxx", InstName.Smull_Ve, null, typeof(OpCodeSimdRegElem)); - SetA64("01011110xx100000011110xxxxxxxxxx", InstName.Sqabs_S, null, typeof(OpCodeSimd)); - SetA64("0>001110<<100000011110xxxxxxxxxx", InstName.Sqabs_V, null, typeof(OpCodeSimd)); - SetA64("01011110xx1xxxxx000011xxxxxxxxxx", InstName.Sqadd_S, null, typeof(OpCodeSimdReg)); - SetA64("0>001110<<1xxxxx000011xxxxxxxxxx", InstName.Sqadd_V, null, typeof(OpCodeSimdReg)); - SetA64("01011110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, null, typeof(OpCodeSimdReg)); - SetA64("01011110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, null, typeof(OpCodeSimdReg)); - SetA64("0x001110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, null, typeof(OpCodeSimdReg)); - SetA64("01111110xx100000011110xxxxxxxxxx", InstName.Sqneg_S, null, typeof(OpCodeSimd)); - SetA64("0>101110<<100000011110xxxxxxxxxx", InstName.Sqneg_V, null, typeof(OpCodeSimd)); - SetA64("01111110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, null, typeof(OpCodeSimdReg)); - SetA64("01111110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, null, typeof(OpCodeSimdReg)); - SetA64("0x101110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, null, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", InstName.Smull_V, InstEmit.Smull_V, typeof(OpCodeSimdReg)); + SetA64("0x001111xxxxxxxx1010x0xxxxxxxxxx", InstName.Smull_Ve, InstEmit.Smull_Ve, typeof(OpCodeSimdRegElem)); + SetA64("01011110xx100000011110xxxxxxxxxx", InstName.Sqabs_S, InstEmit.Sqabs_S, typeof(OpCodeSimd)); + SetA64("0>001110<<100000011110xxxxxxxxxx", InstName.Sqabs_V, InstEmit.Sqabs_V, typeof(OpCodeSimd)); + SetA64("01011110xx1xxxxx000011xxxxxxxxxx", InstName.Sqadd_S, InstEmit.Sqadd_S, typeof(OpCodeSimdReg)); + SetA64("0>001110<<1xxxxx000011xxxxxxxxxx", InstName.Sqadd_V, InstEmit.Sqadd_V, typeof(OpCodeSimdReg)); + SetA64("01011110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, InstEmit.Sqdmulh_S, typeof(OpCodeSimdReg)); + SetA64("01011110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, InstEmit.Sqdmulh_S, typeof(OpCodeSimdReg)); + SetA64("0x001110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, InstEmit.Sqdmulh_V, typeof(OpCodeSimdReg)); + SetA64("0x001110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, InstEmit.Sqdmulh_V, typeof(OpCodeSimdReg)); + SetA64("01111110xx100000011110xxxxxxxxxx", InstName.Sqneg_S, InstEmit.Sqneg_S, typeof(OpCodeSimd)); + SetA64("0>101110<<100000011110xxxxxxxxxx", InstName.Sqneg_V, InstEmit.Sqneg_V, typeof(OpCodeSimd)); + SetA64("01111110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, typeof(OpCodeSimdReg)); + SetA64("01111110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, typeof(OpCodeSimdReg)); + SetA64("0x101110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, typeof(OpCodeSimdReg)); + SetA64("0x101110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, typeof(OpCodeSimdReg)); SetA64("0>001110<<1xxxxx010111xxxxxxxxxx", InstName.Sqrshl_V, null, typeof(OpCodeSimdReg)); SetA64("0101111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_S, null, typeof(OpCodeSimdShImm)); SetA64("0x00111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_V, null, typeof(OpCodeSimdShImm)); @@ -464,13 +468,13 @@ namespace ARMeilleure.Decoders SetA64("0x00111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_V, null, typeof(OpCodeSimdShImm)); SetA64("0111111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_S, null, typeof(OpCodeSimdShImm)); SetA64("0x10111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_V, null, typeof(OpCodeSimdShImm)); - SetA64("01011110xx1xxxxx001011xxxxxxxxxx", InstName.Sqsub_S, null, typeof(OpCodeSimdReg)); - SetA64("0>001110<<1xxxxx001011xxxxxxxxxx", InstName.Sqsub_V, null, typeof(OpCodeSimdReg)); - SetA64("01011110<<100001010010xxxxxxxxxx", InstName.Sqxtn_S, null, typeof(OpCodeSimd)); - SetA64("0x001110<<100001010010xxxxxxxxxx", InstName.Sqxtn_V, null, typeof(OpCodeSimd)); - SetA64("01111110<<100001001010xxxxxxxxxx", InstName.Sqxtun_S, null, typeof(OpCodeSimd)); - SetA64("0x101110<<100001001010xxxxxxxxxx", InstName.Sqxtun_V, null, typeof(OpCodeSimd)); - SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", InstName.Srhadd_V, null, typeof(OpCodeSimdReg)); + SetA64("01011110xx1xxxxx001011xxxxxxxxxx", InstName.Sqsub_S, InstEmit.Sqsub_S, typeof(OpCodeSimdReg)); + SetA64("0>001110<<1xxxxx001011xxxxxxxxxx", InstName.Sqsub_V, InstEmit.Sqsub_V, typeof(OpCodeSimdReg)); + SetA64("01011110<<100001010010xxxxxxxxxx", InstName.Sqxtn_S, InstEmit.Sqxtn_S, typeof(OpCodeSimd)); + SetA64("0x001110<<100001010010xxxxxxxxxx", InstName.Sqxtn_V, InstEmit.Sqxtn_V, typeof(OpCodeSimd)); + SetA64("01111110<<100001001010xxxxxxxxxx", InstName.Sqxtun_S, InstEmit.Sqxtun_S, typeof(OpCodeSimd)); + SetA64("0x101110<<100001001010xxxxxxxxxx", InstName.Sqxtun_V, InstEmit.Sqxtun_V, typeof(OpCodeSimd)); + SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", InstName.Srhadd_V, InstEmit.Srhadd_V, typeof(OpCodeSimdReg)); SetA64("0>001110<<1xxxxx010101xxxxxxxxxx", InstName.Srshl_V, null, typeof(OpCodeSimdReg)); SetA64("0101111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_S, null, typeof(OpCodeSimdShImm)); SetA64("0x00111100>>>xxx001001xxxxxxxxxx", InstName.Srshr_V, null, typeof(OpCodeSimdShImm)); @@ -486,8 +490,8 @@ namespace ARMeilleure.Decoders SetA64("0101111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_S, null, typeof(OpCodeSimdShImm)); SetA64("0x00111100>>>xxx000101xxxxxxxxxx", InstName.Ssra_V, null, typeof(OpCodeSimdShImm)); SetA64("0100111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_V, null, typeof(OpCodeSimdShImm)); - SetA64("0x001110<<1xxxxx001000xxxxxxxxxx", InstName.Ssubl_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<1xxxxx001100xxxxxxxxxx", InstName.Ssubw_V, null, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx001000xxxxxxxxxx", InstName.Ssubl_V, InstEmit.Ssubl_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx001100xxxxxxxxxx", InstName.Ssubw_V, InstEmit.Ssubw_V, typeof(OpCodeSimdReg)); SetA64("0x00110000000000xxxxxxxxxxxxxxxx", InstName.St__Vms, null, typeof(OpCodeSimdMemMs)); SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", InstName.St__Vms, null, typeof(OpCodeSimdMemMs)); SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", InstName.St__Vss, null, typeof(OpCodeSimdMemSs)); @@ -498,54 +502,58 @@ namespace ARMeilleure.Decoders SetA64("xx111100x00xxxxxxxxx11xxxxxxxxxx", InstName.Str, null, typeof(OpCodeSimdMemImm)); SetA64("xx111101x0xxxxxxxxxxxxxxxxxxxxxx", InstName.Str, null, typeof(OpCodeSimdMemImm)); SetA64("xx111100x01xxxxxxxxx10xxxxxxxxxx", InstName.Str, null, typeof(OpCodeSimdMemReg)); - SetA64("01111110111xxxxx100001xxxxxxxxxx", InstName.Sub_S, null, typeof(OpCodeSimdReg)); - SetA64("0>101110<<1xxxxx100001xxxxxxxxxx", InstName.Sub_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<1xxxxx011000xxxxxxxxxx", InstName.Subhn_V, null, typeof(OpCodeSimdReg)); - SetA64("01011110xx100000001110xxxxxxxxxx", InstName.Suqadd_S, null, typeof(OpCodeSimd)); - SetA64("0>001110<<100000001110xxxxxxxxxx", InstName.Suqadd_V, null, typeof(OpCodeSimd)); + SetA64("01111110111xxxxx100001xxxxxxxxxx", InstName.Sub_S, InstEmit.Sub_S, typeof(OpCodeSimdReg)); + SetA64("0>101110<<1xxxxx100001xxxxxxxxxx", InstName.Sub_V, InstEmit.Sub_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx011000xxxxxxxxxx", InstName.Subhn_V, InstEmit.Subhn_V, typeof(OpCodeSimdReg)); + SetA64("01011110xx100000001110xxxxxxxxxx", InstName.Suqadd_S, InstEmit.Suqadd_S, typeof(OpCodeSimd)); + SetA64("0>001110<<100000001110xxxxxxxxxx", InstName.Suqadd_V, InstEmit.Suqadd_V, typeof(OpCodeSimd)); SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", InstName.Tbl_V, null, typeof(OpCodeSimdTbl)); SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", InstName.Trn1_V, null, typeof(OpCodeSimdReg)); SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", InstName.Trn2_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<1xxxxx011111xxxxxxxxxx", InstName.Uaba_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<1xxxxx010100xxxxxxxxxx", InstName.Uabal_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<1xxxxx011101xxxxxxxxxx", InstName.Uabd_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<1xxxxx011100xxxxxxxxxx", InstName.Uabdl_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<100000011010xxxxxxxxxx", InstName.Uadalp_V, null, typeof(OpCodeSimd)); - SetA64("0x101110<<1xxxxx000000xxxxxxxxxx", InstName.Uaddl_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<100000001010xxxxxxxxxx", InstName.Uaddlp_V, null, typeof(OpCodeSimd)); - SetA64("001011100x110000001110xxxxxxxxxx", InstName.Uaddlv_V, null, typeof(OpCodeSimd)); - SetA64("01101110<<110000001110xxxxxxxxxx", InstName.Uaddlv_V, null, typeof(OpCodeSimd)); - SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstName.Uaddw_V, null, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx011111xxxxxxxxxx", InstName.Uaba_V, InstEmit.Uaba_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx010100xxxxxxxxxx", InstName.Uabal_V, InstEmit.Uabal_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx011101xxxxxxxxxx", InstName.Uabd_V, InstEmit.Uabd_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx011100xxxxxxxxxx", InstName.Uabdl_V, InstEmit.Uabdl_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<100000011010xxxxxxxxxx", InstName.Uadalp_V, InstEmit.Uadalp_V, typeof(OpCodeSimd)); + SetA64("0x101110<<1xxxxx000000xxxxxxxxxx", InstName.Uaddl_V, InstEmit.Uaddl_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<100000001010xxxxxxxxxx", InstName.Uaddlp_V, InstEmit.Uaddlp_V, typeof(OpCodeSimd)); + SetA64("001011100x110000001110xxxxxxxxxx", InstName.Uaddlv_V, InstEmit.Uaddlv_V, typeof(OpCodeSimd)); + SetA64("01101110<<110000001110xxxxxxxxxx", InstName.Uaddlv_V, InstEmit.Uaddlv_V, typeof(OpCodeSimd)); + SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstName.Uaddw_V, InstEmit.Uaddw_V, typeof(OpCodeSimdReg)); SetA64("x00111100x100011000000xxxxxxxxxx", InstName.Ucvtf_Gp, null, typeof(OpCodeSimdCvt)); SetA64(">00111100x000011>xxxxxxxxxxxxxxx", InstName.Ucvtf_Gp_Fixed, null, typeof(OpCodeSimdCvt)); SetA64("011111100x100001110110xxxxxxxxxx", InstName.Ucvtf_S, null, typeof(OpCodeSimd)); SetA64("0>1011100<100001110110xxxxxxxxxx", InstName.Ucvtf_V, null, typeof(OpCodeSimd)); - SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstName.Uhadd_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstName.Uhsub_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstName.Umax_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<1xxxxx101001xxxxxxxxxx", InstName.Umaxp_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<1xxxxx011011xxxxxxxxxx", InstName.Umin_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<1xxxxx101011xxxxxxxxxx", InstName.Uminp_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<1xxxxx100000xxxxxxxxxx", InstName.Umlal_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101111xxxxxxxx0010x0xxxxxxxxxx", InstName.Umlal_Ve, null, typeof(OpCodeSimdRegElem)); - SetA64("0x101110<<1xxxxx101000xxxxxxxxxx", InstName.Umlsl_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101111xxxxxxxx0110x0xxxxxxxxxx", InstName.Umlsl_Ve, null, typeof(OpCodeSimdRegElem)); + SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstName.Uhadd_V, InstEmit.Uhadd_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstName.Uhsub_V, InstEmit.Uhsub_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstName.Umax_V, InstEmit.Umax_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx101001xxxxxxxxxx", InstName.Umaxp_V, InstEmit.Umaxp_V, typeof(OpCodeSimdReg)); + SetA64("001011100x110000101010xxxxxxxxxx", InstName.Umaxv_V, InstEmit.Umaxv_V, typeof(OpCodeSimd)); + SetA64("01101110<<110000101010xxxxxxxxxx", InstName.Umaxv_V, InstEmit.Umaxv_V, typeof(OpCodeSimd)); + SetA64("0x101110<<1xxxxx011011xxxxxxxxxx", InstName.Umin_V, InstEmit.Umin_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx101011xxxxxxxxxx", InstName.Uminp_V, InstEmit.Uminp_V, typeof(OpCodeSimdReg)); + SetA64("001011100x110001101010xxxxxxxxxx", InstName.Uminv_V, InstEmit.Uminv_V, typeof(OpCodeSimd)); + SetA64("01101110<<110001101010xxxxxxxxxx", InstName.Uminv_V, InstEmit.Uminv_V, typeof(OpCodeSimd)); + SetA64("0x101110<<1xxxxx100000xxxxxxxxxx", InstName.Umlal_V, InstEmit.Umlal_V, typeof(OpCodeSimdReg)); + SetA64("0x101111xxxxxxxx0010x0xxxxxxxxxx", InstName.Umlal_Ve, InstEmit.Umlal_Ve, typeof(OpCodeSimdRegElem)); + SetA64("0x101110<<1xxxxx101000xxxxxxxxxx", InstName.Umlsl_V, InstEmit.Umlsl_V, typeof(OpCodeSimdReg)); + SetA64("0x101111xxxxxxxx0110x0xxxxxxxxxx", InstName.Umlsl_Ve, InstEmit.Umlsl_Ve, typeof(OpCodeSimdRegElem)); SetA64("0x001110000xxxxx001111xxxxxxxxxx", InstName.Umov_S, null, typeof(OpCodeSimdIns)); - SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", InstName.Umull_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101111xxxxxxxx1010x0xxxxxxxxxx", InstName.Umull_Ve, null, typeof(OpCodeSimdRegElem)); - SetA64("01111110xx1xxxxx000011xxxxxxxxxx", InstName.Uqadd_S, null, typeof(OpCodeSimdReg)); - SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", InstName.Uqadd_V, null, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", InstName.Umull_V, InstEmit.Umull_V, typeof(OpCodeSimdReg)); + SetA64("0x101111xxxxxxxx1010x0xxxxxxxxxx", InstName.Umull_Ve, InstEmit.Umull_Ve, typeof(OpCodeSimdRegElem)); + SetA64("01111110xx1xxxxx000011xxxxxxxxxx", InstName.Uqadd_S, InstEmit.Uqadd_S, typeof(OpCodeSimdReg)); + SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", InstName.Uqadd_V, InstEmit.Uqadd_V, typeof(OpCodeSimdReg)); SetA64("0>101110<<1xxxxx010111xxxxxxxxxx", InstName.Uqrshl_V, null, typeof(OpCodeSimdReg)); SetA64("0111111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_S, null, typeof(OpCodeSimdShImm)); SetA64("0x10111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_V, null, typeof(OpCodeSimdShImm)); SetA64("0>101110<<1xxxxx010011xxxxxxxxxx", InstName.Uqshl_V, null, typeof(OpCodeSimdReg)); SetA64("0111111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_S, null, typeof(OpCodeSimdShImm)); SetA64("0x10111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_V, null, typeof(OpCodeSimdShImm)); - SetA64("01111110xx1xxxxx001011xxxxxxxxxx", InstName.Uqsub_S, null, typeof(OpCodeSimdReg)); - SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", InstName.Uqsub_V, null, typeof(OpCodeSimdReg)); - SetA64("01111110<<100001010010xxxxxxxxxx", InstName.Uqxtn_S, null, typeof(OpCodeSimd)); - SetA64("0x101110<<100001010010xxxxxxxxxx", InstName.Uqxtn_V, null, typeof(OpCodeSimd)); - SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", InstName.Urhadd_V, null, typeof(OpCodeSimdReg)); + SetA64("01111110xx1xxxxx001011xxxxxxxxxx", InstName.Uqsub_S, InstEmit.Uqsub_S, typeof(OpCodeSimdReg)); + SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", InstName.Uqsub_V, InstEmit.Uqsub_V, typeof(OpCodeSimdReg)); + SetA64("01111110<<100001010010xxxxxxxxxx", InstName.Uqxtn_S, InstEmit.Uqxtn_S, typeof(OpCodeSimd)); + SetA64("0x101110<<100001010010xxxxxxxxxx", InstName.Uqxtn_V, InstEmit.Uqxtn_V, typeof(OpCodeSimd)); + SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", InstName.Urhadd_V, InstEmit.Urhadd_V, typeof(OpCodeSimdReg)); SetA64("0>101110<<1xxxxx010101xxxxxxxxxx", InstName.Urshl_V, null, typeof(OpCodeSimdReg)); SetA64("0111111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_S, null, typeof(OpCodeSimdShImm)); SetA64("0x10111100>>>xxx001001xxxxxxxxxx", InstName.Urshr_V, null, typeof(OpCodeSimdShImm)); @@ -558,13 +566,13 @@ namespace ARMeilleure.Decoders SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S, null, typeof(OpCodeSimdShImm)); SetA64("0x10111100>>>xxx000001xxxxxxxxxx", InstName.Ushr_V, null, typeof(OpCodeSimdShImm)); SetA64("0110111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_V, null, typeof(OpCodeSimdShImm)); - SetA64("01111110xx100000001110xxxxxxxxxx", InstName.Usqadd_S, null, typeof(OpCodeSimd)); - SetA64("0>101110<<100000001110xxxxxxxxxx", InstName.Usqadd_V, null, typeof(OpCodeSimd)); + SetA64("01111110xx100000001110xxxxxxxxxx", InstName.Usqadd_S, InstEmit.Usqadd_S, typeof(OpCodeSimd)); + SetA64("0>101110<<100000001110xxxxxxxxxx", InstName.Usqadd_V, InstEmit.Usqadd_V, typeof(OpCodeSimd)); SetA64("0111111101xxxxxx000101xxxxxxxxxx", InstName.Usra_S, null, typeof(OpCodeSimdShImm)); SetA64("0x10111100>>>xxx000101xxxxxxxxxx", InstName.Usra_V, null, typeof(OpCodeSimdShImm)); SetA64("0110111101xxxxxx000101xxxxxxxxxx", InstName.Usra_V, null, typeof(OpCodeSimdShImm)); - SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", InstName.Usubl_V, null, typeof(OpCodeSimdReg)); - SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", InstName.Usubw_V, null, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", InstName.Usubl_V, InstEmit.Usubl_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", InstName.Usubw_V, InstEmit.Usubw_V, typeof(OpCodeSimdReg)); SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", InstName.Uzp1_V, null, typeof(OpCodeSimdReg)); SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", InstName.Uzp2_V, null, typeof(OpCodeSimdReg)); SetA64("0x001110<<100001001010xxxxxxxxxx", InstName.Xtn_V, null, typeof(OpCodeSimd)); diff --git a/ARMeilleure/Diagnostics/Logger.cs b/ARMeilleure/Diagnostics/Logger.cs index 109822beb2..9833564253 100644 --- a/ARMeilleure/Diagnostics/Logger.cs +++ b/ARMeilleure/Diagnostics/Logger.cs @@ -3,16 +3,16 @@ using System; namespace ARMeilleure.Diagnostics { - class Logger + static class Logger { - public void StartPass(PassName name) + public static void StartPass(PassName name) { #if DEBUG WriteOutput(name + " pass started..."); #endif } - public void EndPass(PassName name, ControlFlowGraph cfg) + public static void EndPass(PassName name, ControlFlowGraph cfg) { #if DEBUG EndPass(name); @@ -23,14 +23,14 @@ namespace ARMeilleure.Diagnostics #endif } - public void EndPass(PassName name) + public static void EndPass(PassName name) { #if DEBUG WriteOutput(name + " pass ended..."); #endif } - private void WriteOutput(string text) + private static void WriteOutput(string text) { Console.WriteLine(text); } diff --git a/ARMeilleure/Diagnostics/PassName.cs b/ARMeilleure/Diagnostics/PassName.cs index c9d1e1b160..ac0f246e53 100644 --- a/ARMeilleure/Diagnostics/PassName.cs +++ b/ARMeilleure/Diagnostics/PassName.cs @@ -3,6 +3,8 @@ namespace ARMeilleure.Diagnostics enum PassName { Translation, - SsaConstruction + SsaConstruction, + PreAllocation, + RegisterAllocation } } \ No newline at end of file diff --git a/ARMeilleure/Instructions/CryptoHelper.cs b/ARMeilleure/Instructions/CryptoHelper.cs new file mode 100644 index 0000000000..b6b4a62d36 --- /dev/null +++ b/ARMeilleure/Instructions/CryptoHelper.cs @@ -0,0 +1,279 @@ +// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf + +using ARMeilleure.State; + +namespace ARMeilleure.Instructions +{ + static class CryptoHelper + { +#region "LookUp Tables" + private static readonly byte[] _sBox = new byte[] + { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + }; + + private static readonly byte[] _invSBox = new byte[] + { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + }; + + private static readonly byte[] _gfMul02 = new byte[] + { + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, + 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, + 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, + 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, + 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, + 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, + 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, + 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, + 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, + 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, + 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, + 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, + 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, + 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, + 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 + }; + + private static readonly byte[] _gfMul03 = new byte[] + { + 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, + 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, + 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, + 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, + 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, + 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, + 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, + 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, + 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, + 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, + 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, + 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, + 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, + 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, + 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, + 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a + }; + + private static readonly byte[] _gfMul09 = new byte[] + { + 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, + 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, + 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, + 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, + 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, + 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, + 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, + 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, + 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, + 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, + 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, + 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, + 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, + 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, + 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, + 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 + }; + + private static readonly byte[] _gfMul0B = new byte[] + { + 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, + 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, + 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, + 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, + 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, + 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, + 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, + 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, + 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, + 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, + 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, + 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, + 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, + 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, + 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, + 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 + }; + + private static readonly byte[] _gfMul0D = new byte[] + { + 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, + 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, + 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, + 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, + 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, + 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, + 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, + 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, + 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, + 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, + 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, + 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, + 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, + 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, + 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, + 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 + }; + + private static readonly byte[] _gfMul0E = new byte[] + { + 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, + 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, + 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, + 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, + 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, + 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, + 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, + 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, + 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, + 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, + 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, + 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, + 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, + 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, + 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, + 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d + }; + + private static readonly byte[] _srPerm = new byte[] + { + 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 + }; + + private static readonly byte[] _isrPerm = new byte[] + { + 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11 + }; +#endregion + + public static V128 AesInvMixColumns(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int columns = 0; columns <= 3; columns++) + { + int idx = columns << 2; + + byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3] + byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3] + byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3] + byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3] + + outState[idx + 0] = (byte)((uint)_gfMul0E[row0] ^ _gfMul0B[row1] ^ _gfMul0D[row2] ^ _gfMul09[row3]); + outState[idx + 1] = (byte)((uint)_gfMul09[row0] ^ _gfMul0E[row1] ^ _gfMul0B[row2] ^ _gfMul0D[row3]); + outState[idx + 2] = (byte)((uint)_gfMul0D[row0] ^ _gfMul09[row1] ^ _gfMul0E[row2] ^ _gfMul0B[row3]); + outState[idx + 3] = (byte)((uint)_gfMul0B[row0] ^ _gfMul0D[row1] ^ _gfMul09[row2] ^ _gfMul0E[row3]); + } + + return new V128(outState); + } + + public static V128 AesInvShiftRows(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[_isrPerm[idx]] = inState[idx]; + } + + return new V128(outState); + } + + public static V128 AesInvSubBytes(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[idx] = _invSBox[inState[idx]]; + } + + return new V128(outState); + } + + public static V128 AesMixColumns(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int columns = 0; columns <= 3; columns++) + { + int idx = columns << 2; + + byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3] + byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3] + byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3] + byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3] + + outState[idx + 0] = (byte)((uint)_gfMul02[row0] ^ _gfMul03[row1] ^ row2 ^ row3); + outState[idx + 1] = (byte)((uint)row0 ^ _gfMul02[row1] ^ _gfMul03[row2] ^ row3); + outState[idx + 2] = (byte)((uint)row0 ^ row1 ^ _gfMul02[row2] ^ _gfMul03[row3]); + outState[idx + 3] = (byte)((uint)_gfMul03[row0] ^ row1 ^ row2 ^ _gfMul02[row3]); + } + + return new V128(outState); + } + + public static V128 AesShiftRows(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[_srPerm[idx]] = inState[idx]; + } + + return new V128(outState); + } + + public static V128 AesSubBytes(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[idx] = _sBox[inState[idx]]; + } + + return new V128(outState); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitAlu.cs b/ARMeilleure/Instructions/InstEmitAlu.cs index c3e7ad631c..79f8224d29 100644 --- a/ARMeilleure/Instructions/InstEmitAlu.cs +++ b/ARMeilleure/Instructions/InstEmitAlu.cs @@ -19,7 +19,7 @@ namespace ARMeilleure.Instructions Operand n = GetAluN(context); Operand m = GetAluM(context); - Operand d = context.IAdd(n, m); + Operand d = context.Add(n, m); Operand carry = GetFlag(PState.CFlag); @@ -28,7 +28,7 @@ namespace ARMeilleure.Instructions carry = context.Copy(Local(OperandType.I64), carry); } - d = context.IAdd(d, carry); + d = context.Add(d, carry); if (setFlags) { @@ -43,7 +43,7 @@ namespace ARMeilleure.Instructions public static void Add(EmitterContext context) { - SetAluD(context, context.IAdd(GetAluN(context), GetAluM(context))); + SetAluD(context, context.Add(GetAluN(context), GetAluM(context))); } public static void Adds(EmitterContext context) @@ -51,7 +51,7 @@ namespace ARMeilleure.Instructions Operand n = GetAluN(context); Operand m = GetAluM(context); - Operand d = context.IAdd(n, m); + Operand d = context.Add(n, m); EmitNZFlagsCheck(context, d); @@ -119,7 +119,7 @@ namespace ARMeilleure.Instructions Operand res = context.CountLeadingZeros(context.BitwiseExclusiveOr(nHigh, nLow)); - res = context.ISubtract(res, Const(res.Type, 1)); + res = context.Subtract(res, Const(res.Type, 1)); SetAluDOrZR(context, res); } @@ -195,7 +195,7 @@ namespace ARMeilleure.Instructions Operand n = GetAluN(context); Operand m = GetAluM(context); - Operand d = context.ISubtract(n, m); + Operand d = context.Subtract(n, m); Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1)); @@ -204,7 +204,7 @@ namespace ARMeilleure.Instructions borrow = context.Copy(Local(OperandType.I64), borrow); } - d = context.ISubtract(d, borrow); + d = context.Subtract(d, borrow); if (setFlags) { @@ -219,7 +219,7 @@ namespace ARMeilleure.Instructions public static void Sub(EmitterContext context) { - SetAluD(context, context.ISubtract(GetAluN(context), GetAluM(context))); + SetAluD(context, context.Subtract(GetAluN(context), GetAluM(context))); } public static void Subs(EmitterContext context) @@ -227,7 +227,7 @@ namespace ARMeilleure.Instructions Operand n = GetAluN(context); Operand m = GetAluM(context); - Operand d = context.ISubtract(n, m); + Operand d = context.Subtract(n, m); EmitNZFlagsCheck(context, d); diff --git a/ARMeilleure/Instructions/InstEmitCcmp.cs b/ARMeilleure/Instructions/InstEmitCcmp.cs index 58a393262d..9bdca49e06 100644 --- a/ARMeilleure/Instructions/InstEmitCcmp.cs +++ b/ARMeilleure/Instructions/InstEmitCcmp.cs @@ -38,7 +38,7 @@ namespace ARMeilleure.Instructions if (isNegated) { - Operand d = context.IAdd(n, m); + Operand d = context.Add(n, m); EmitNZFlagsCheck(context, d); @@ -47,7 +47,7 @@ namespace ARMeilleure.Instructions } else { - Operand d = context.ISubtract(n, m); + Operand d = context.Subtract(n, m); EmitNZFlagsCheck(context, d); diff --git a/ARMeilleure/Instructions/InstEmitCrypto.cs b/ARMeilleure/Instructions/InstEmitCrypto.cs new file mode 100644 index 0000000000..03f00ed13f --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitCrypto.cs @@ -0,0 +1,58 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Aesd_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Decrypt)); + + context.Copy(d, context.Call(info, d, n)); + } + + public static void Aese_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Encrypt)); + + context.Copy(d, context.Call(info, d, n)); + } + + public static void Aesimc_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.InverseMixColumns)); + + context.Copy(GetVec(op.Rd), context.Call(info, n)); + } + + public static void Aesmc_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.MixColumns)); + + context.Copy(GetVec(op.Rd), context.Call(info, n)); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitCsel.cs b/ARMeilleure/Instructions/InstEmitCsel.cs index cdea6fa7ac..a894e69bd3 100644 --- a/ARMeilleure/Instructions/InstEmitCsel.cs +++ b/ARMeilleure/Instructions/InstEmitCsel.cs @@ -32,7 +32,7 @@ namespace ARMeilleure.Instructions if (cselOp == CselOperation.Increment) { - m = context.IAdd(m, Const(m.Type, 1)); + m = context.Add(m, Const(m.Type, 1)); } else if (cselOp == CselOperation.Invert) { @@ -40,7 +40,7 @@ namespace ARMeilleure.Instructions } else if (cselOp == CselOperation.Negate) { - m = context.INegate(m); + m = context.Negate(m); } Operand condTrue = GetCondTrue(context, op.Cond); diff --git a/ARMeilleure/Instructions/InstEmitDiv.cs b/ARMeilleure/Instructions/InstEmitDiv.cs index 30cfcb2730..82b2f9e3a4 100644 --- a/ARMeilleure/Instructions/InstEmitDiv.cs +++ b/ARMeilleure/Instructions/InstEmitDiv.cs @@ -50,8 +50,8 @@ namespace ARMeilleure.Instructions } Operand d = unsigned - ? context.IDivideUI(n, m) - : context.IDivide (n, m); + ? context.DivideUI(n, m) + : context.Divide (n, m); SetAluDOrZR(context, d); diff --git a/ARMeilleure/Instructions/InstEmitMemory.cs b/ARMeilleure/Instructions/InstEmitMemory.cs index 5ae4e8e37b..b58d0748d2 100644 --- a/ARMeilleure/Instructions/InstEmitMemory.cs +++ b/ARMeilleure/Instructions/InstEmitMemory.cs @@ -90,7 +90,7 @@ namespace ARMeilleure.Instructions Operand address = GetAddress(context); - Operand address2 = context.IAdd(address, Const(1L << op.Size)); + Operand address2 = context.Add(address, Const(1L << op.Size)); EmitLoad(op.Rt, address); EmitLoad(op.Rt2, address2); @@ -117,7 +117,7 @@ namespace ARMeilleure.Instructions Operand address = GetAddress(context); - Operand address2 = context.IAdd(address, Const(1L << op.Size)); + Operand address2 = context.Add(address, Const(1L << op.Size)); Operand t = GetT(context, op.Rt); Operand t2 = GetT(context, op.Rt2); @@ -141,7 +141,7 @@ namespace ARMeilleure.Instructions //Pre-indexing. if (!op.PostIdx) { - address = context.IAdd(address, Const(op.Immediate)); + address = context.Add(address, Const(op.Immediate)); } break; @@ -158,7 +158,7 @@ namespace ARMeilleure.Instructions m = context.ShiftLeft(m, Const(op.Size)); } - address = context.IAdd(n, m); + address = context.Add(n, m); break; } @@ -174,7 +174,7 @@ namespace ARMeilleure.Instructions { if (op.PostIdx) { - address = context.IAdd(address, Const(op.Immediate)); + address = context.Add(address, Const(op.Immediate)); } context.Copy(GetIntOrSP(op, op.Rn), address); diff --git a/ARMeilleure/Instructions/InstEmitMul.cs b/ARMeilleure/Instructions/InstEmitMul.cs index 4fce120273..99ad0a7851 100644 --- a/ARMeilleure/Instructions/InstEmitMul.cs +++ b/ARMeilleure/Instructions/InstEmitMul.cs @@ -4,7 +4,6 @@ using ARMeilleure.Translation; using System; using static ARMeilleure.Instructions.InstEmitHelper; -using static ARMeilleure.IntermediateRepresentation.OperandHelper; namespace ARMeilleure.Instructions { @@ -21,9 +20,9 @@ namespace ARMeilleure.Instructions Operand n = GetIntOrZR(op, op.Rn); Operand m = GetIntOrZR(op, op.Rm); - Operand res = context.IMultiply(n, m); + Operand res = context.Multiply(n, m); - res = isAdd ? context.IAdd(a, res) : context.ISubtract(a, res); + res = isAdd ? context.Add(a, res) : context.Subtract(a, res); SetIntOrZR(context, op.Rd, res); } @@ -67,9 +66,9 @@ namespace ARMeilleure.Instructions Operand n = GetExtendedRegister32(op.Rn); Operand m = GetExtendedRegister32(op.Rm); - Operand res = context.IMultiply(n, m); + Operand res = context.Multiply(n, m); - res = (flags & MullFlags.Add) != 0 ? context.IAdd(a, res) : context.ISubtract(a, res); + res = (flags & MullFlags.Add) != 0 ? context.Add(a, res) : context.Subtract(a, res); SetIntOrZR(context, op.Rd, res); } diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs new file mode 100644 index 0000000000..bebf96d0bb --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -0,0 +1,3153 @@ +// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h +// https://www.agner.org/optimize/#vectorclass @ vectori128.h + +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func; + + + static partial class InstEmit + { + public static void Abs_S(EmitterContext context) + { + EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + + public static void Abs_V(EmitterContext context) + { + EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + + public static void Add_S(EmitterContext context) + { + EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Add_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction addInst = X86PaddInstruction[op.Size]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Addhn_V(EmitterContext context) + { + EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false); + } + + public static void Addp_S(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size); + + Operand res = context.Add(ne0, ne1); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size)); + } + + public static void Addp_V(EmitterContext context) + { + EmitVectorPairwiseOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Addv_V(EmitterContext context) + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Cls_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int eSize = 8 << op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)); + + Operand de = context.Call(info, ne, Const(eSize)); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Clz_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int eSize = 8 << op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand de; + + if (eSize == 64) + { + de = context.CountLeadingZeros(ne); + } + else + { + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)); + + de = context.Call(info, ne, Const(eSize)); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Cnt_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); + + Operand de; + + if (Optimizations.UsePopCnt) + { + de = context.AddIntrinsic(Instruction.X86Popcnt, ne); + } + else + { + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountSetBits8)); + + de = context.Call(info, ne); + } + + res = EmitVectorInsert(context, res, de, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Fabd_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Instruction.X86Subss, GetVec(op.Rn), GetVec(op.Rm)); + + Operand mask = X86GetScalar(context, -0f); + + res = context.AddIntrinsic(Instruction.X86Andnps, mask, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Instruction.X86Subsd, GetVec(op.Rn), GetVec(op.Rm)); + + Operand mask = X86GetScalar(context, -0d); + + res = context.AddIntrinsic(Instruction.X86Andnpd, mask, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2); + + return EmitUnaryMathCall(context, nameof(Math.Abs), res); + }); + } + } + + public static void Fabd_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Instruction.X86Subps, GetVec(op.Rn), GetVec(op.Rm)); + + Operand mask = X86GetAllElements(context, -0f); + + res = context.AddIntrinsic(Instruction.X86Andnps, mask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Instruction.X86Subpd, GetVec(op.Rn), GetVec(op.Rm)); + + Operand mask = X86GetAllElements(context, -0d); + + res = context.AddIntrinsic(Instruction.X86Andnpd, mask, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2); + + return EmitUnaryMathCall(context, nameof(Math.Abs), res); + }); + } + } + + public static void Fabs_S(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (op.Size == 0) + { + Operand mask = X86GetScalar(context, -0f); + + Operand res = context.AddIntrinsic(Instruction.X86Andnps, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand mask = X86GetScalar(context, -0d); + + Operand res = context.AddIntrinsic(Instruction.X86Andnpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Abs), op1); + }); + } + } + + public static void Fabs_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetAllElements(context, -0f); + + Operand res = context.AddIntrinsic(Instruction.X86Andnps, mask, GetVec(op.Rn)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetAllElements(context, -0d); + + Operand res = context.AddIntrinsic(Instruction.X86Andnpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Abs), op1); + }); + } + } + + public static void Fadd_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Instruction.X86Addss, Instruction.X86Addsd); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2); + }); + } + } + + public static void Fadd_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Instruction.X86Addps, Instruction.X86Addpd); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2); + }); + } + } + + public static void Faddp_S(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse3) + { + EmitScalarBinaryOpF(context, Instruction.X86Haddps, Instruction.X86Haddpd); + } + else + { + OperandType type = sizeF != 0 ? OperandType.FP64 + : OperandType.FP32; + + Operand ne0 = context.VectorExtract(GetVec(op.Rn), Local(type), 0); + Operand ne1 = context.VectorExtract(GetVec(op.Rn), Local(type), 1); + + Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), ne0, ne1); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + + public static void Faddp_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorPairwiseOpF(context, Instruction.X86Addps, Instruction.X86Addpd); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2); + }); + } + } + + public static void Fdiv_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Instruction.X86Divss, Instruction.X86Divsd); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2); + }); + } + } + + public static void Fdiv_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Instruction.X86Divps, Instruction.X86Divpd); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2); + }); + } + } + + public static void Fmadd_S(EmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand a = GetVec(op.Ra); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.Size == 0) + { + Operand res = context.AddIntrinsic(Instruction.X86Mulss, n, m); + + res = context.AddIntrinsic(Instruction.X86Addss, a, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand res = context.AddIntrinsic(Instruction.X86Mulsd, n, m); + + res = context.AddIntrinsic(Instruction.X86Addsd, a, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarTernaryRaOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); + }); + } + } + + public static void Fmax_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Instruction.X86Maxss, Instruction.X86Maxsd); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); + }); + } + } + + public static void Fmax_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Instruction.X86Maxps, Instruction.X86Maxpd); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); + }); + } + } + + public static void Fmaxnm_S(EmitterContext context) + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); + }); + } + + public static void Fmaxnm_V(EmitterContext context) + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); + }); + } + + public static void Fmaxp_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorPairwiseOpF(context, Instruction.X86Maxps, Instruction.X86Maxpd); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); + }); + } + } + + public static void Fmin_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Instruction.X86Minss, Instruction.X86Minsd); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); + }); + } + } + + public static void Fmin_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Instruction.X86Minps, Instruction.X86Minpd); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); + }); + } + } + + public static void Fminnm_S(EmitterContext context) + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); + }); + } + + public static void Fminnm_V(EmitterContext context) + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); + }); + } + + public static void Fminp_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorPairwiseOpF(context, Instruction.X86Minps, Instruction.X86Minpd); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); + }); + } + } + + public static void Fmla_Se(EmitterContext context) // Fused. + { + EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + + public static void Fmla_V(EmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Instruction.X86Mulps, n, m); + + res = context.AddIntrinsic(Instruction.X86Addps, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Instruction.X86Mulpd, n, m); + + res = context.AddIntrinsic(Instruction.X86Addpd, d, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorTernaryOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); + }); + } + } + + public static void Fmla_Ve(EmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Instruction.X86Shufps, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Instruction.X86Mulps, n, res); + res = context.AddIntrinsic(Instruction.X86Addps, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Instruction.X86Shufpd, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Instruction.X86Mulpd, n, res); + res = context.AddIntrinsic(Instruction.X86Addpd, d, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorTernaryOpByElemF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); + }); + } + } + + public static void Fmls_Se(EmitterContext context) // Fused. + { + EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + + public static void Fmls_V(EmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Instruction.X86Mulps, n, m); + + res = context.AddIntrinsic(Instruction.X86Subps, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Instruction.X86Mulpd, n, m); + + res = context.AddIntrinsic(Instruction.X86Subpd, d, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorTernaryOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3); + }); + } + } + + public static void Fmls_Ve(EmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Instruction.X86Shufps, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Instruction.X86Mulps, n, res); + res = context.AddIntrinsic(Instruction.X86Subps, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Instruction.X86Shufpd, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Instruction.X86Mulpd, n, res); + res = context.AddIntrinsic(Instruction.X86Subpd, d, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorTernaryOpByElemF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3); + }); + } + } + + public static void Fmsub_S(EmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand a = GetVec(op.Ra); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.Size == 0) + { + Operand res = context.AddIntrinsic(Instruction.X86Mulss, n, m); + + res = context.AddIntrinsic(Instruction.X86Subss, a, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand res = context.AddIntrinsic(Instruction.X86Mulsd, n, m); + + res = context.AddIntrinsic(Instruction.X86Subsd, a, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarTernaryRaOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3); + }); + } + } + + public static void Fmul_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Instruction.X86Mulss, Instruction.X86Mulsd); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2); + }); + } + } + + public static void Fmul_Se(EmitterContext context) + { + EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Fmul_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Instruction.X86Mulps, Instruction.X86Mulpd); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2); + }); + } + } + + public static void Fmul_Ve(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Instruction.X86Shufps, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Instruction.X86Mulps, n, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Instruction.X86Shufpd, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Instruction.X86Mulpd, n, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2); + }); + } + } + + public static void Fmulx_S(EmitterContext context) + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } + + public static void Fmulx_Se(EmitterContext context) + { + EmitScalarBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } + + public static void Fmulx_V(EmitterContext context) + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } + + public static void Fmulx_Ve(EmitterContext context) + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } + + public static void Fneg_S(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (op.Size == 0) + { + Operand mask = X86GetScalar(context, -0f); + + Operand res = context.AddIntrinsic(Instruction.X86Xorps, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand mask = X86GetScalar(context, -0d); + + Operand res = context.AddIntrinsic(Instruction.X86Xorpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarUnaryOpF(context, (op1) => context.Negate(op1)); + } + } + + public static void Fneg_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetAllElements(context, -0f); + + Operand res = context.AddIntrinsic(Instruction.X86Xorps, mask, GetVec(op.Rn)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetAllElements(context, -0d); + + Operand res = context.AddIntrinsic(Instruction.X86Xorpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorUnaryOpF(context, (op1) => context.Negate(op1)); + } + } + + public static void Fnmadd_S(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 + : OperandType.FP32; + + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), 0); + Operand me = context.VectorExtract(GetVec(op.Rm), Local(type), 0); + Operand ae = context.VectorExtract(GetVec(op.Ra), Local(type), 0); + + Operand res = context.Subtract(context.Multiply(context.Negate(ne), me), ae); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Fnmsub_S(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 + : OperandType.FP32; + + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), 0); + Operand me = context.VectorExtract(GetVec(op.Rm), Local(type), 0); + Operand ae = context.VectorExtract(GetVec(op.Ra), Local(type), 0); + + Operand res = context.Subtract(context.Multiply(ne, me), ae); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Fnmul_S(EmitterContext context) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); + } + + public static void Frecpe_S(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + { + Operand n = GetVec(op.Rn); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(Instruction.X86Rcpss, n)); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate), op1); + }); + } + } + + public static void Frecpe_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + { + Operand n = GetVec(op.Rn); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(Instruction.X86Rcpps, n)); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate), op1); + }); + } + } + + public static void Frecps_S(EmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetScalar(context, 2f); + + Operand res = context.AddIntrinsic(Instruction.X86Mulss, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Instruction.X86Subss, mask, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetScalar(context, 2d); + + Operand res = context.AddIntrinsic(Instruction.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Instruction.X86Subsd, mask, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused), op1, op2); + }); + } + } + + public static void Frecps_V(EmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetAllElements(context, 2f); + + Operand res = context.AddIntrinsic(Instruction.X86Mulps, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Instruction.X86Subps, mask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetAllElements(context, 2d); + + Operand res = context.AddIntrinsic(Instruction.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Instruction.X86Subpd, mask, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused), op1, op2); + }); + } + } + + public static void Frecpx_S(EmitterContext context) + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1); + }); + } + + public static void Frinta_S(EmitterContext context) + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); + }); + } + + public static void Frinta_V(EmitterContext context) + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); + }); + } + + public static void Frinti_S(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + EmitScalarUnaryOpF(context, (op1) => + { + if (op.Size == 0) + { + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.RoundF)); + + return context.Call(info, op1); + } + else /* if (op.Size == 1) */ + { + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Round)); + + return context.Call(info, op1); + } + }); + } + + public static void Frinti_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorUnaryOpF(context, (op1) => + { + if (sizeF == 0) + { + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.RoundF)); + + return context.Call(info, op1); + } + else /* if (sizeF == 1) */ + { + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Round)); + + return context.Call(info, op1); + } + }); + } + + public static void Frintm_S(EmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Floor), op1); + }); + } + } + + public static void Frintm_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitVectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Floor), op1); + }); + } + } + + public static void Frintn_S(EmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitScalarRoundOpF(context, FPRoundingMode.ToNearest); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.ToEven, op1); + }); + } + } + + public static void Frintn_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitVectorRoundOpF(context, FPRoundingMode.ToNearest); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.ToEven, op1); + }); + } + } + + public static void Frintp_S(EmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Ceiling), op1); + }); + } + } + + public static void Frintp_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitVectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Ceiling), op1); + }); + } + } + + public static void Frintx_S(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + EmitScalarUnaryOpF(context, (op1) => + { + if (op.Size == 0) + { + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.RoundF)); + + return context.Call(info, op1); + } + else /* if (op.Size == 1) */ + { + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Round)); + + return context.Call(info, op1); + } + }); + } + + public static void Frintx_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorUnaryOpF(context, (op1) => + { + if (sizeF == 0) + { + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.RoundF)); + + return context.Call(info, op1); + } + else /* if (sizeF == 1) */ + { + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Round)); + + return context.Call(info, op1); + } + }); + } + + public static void Frintz_S(EmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitScalarRoundOpF(context, FPRoundingMode.TowardsZero); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Truncate), op1); + }); + } + } + + public static void Frintz_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitVectorRoundOpF(context, FPRoundingMode.TowardsZero); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Truncate), op1); + }); + } + } + + public static void Frsqrte_S(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + { + EmitScalarUnaryOpF(context, Instruction.X86Rsqrtss, 0); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate), op1); + }); + } + } + + public static void Frsqrte_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + { + EmitVectorUnaryOpF(context, Instruction.X86Rsqrtps, 0); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate), op1); + }); + } + } + + public static void Frsqrts_S(EmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand maskHalf = X86GetScalar(context, 0.5f); + Operand maskThree = X86GetScalar(context, 3f); + + Operand res = context.AddIntrinsic(Instruction.X86Mulss, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Instruction.X86Subss, maskThree, res); + res = context.AddIntrinsic(Instruction.X86Mulss, maskHalf, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand maskHalf = X86GetScalar(context, 0.5d); + Operand maskThree = X86GetScalar(context, 3d); + + Operand res = context.AddIntrinsic(Instruction.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Instruction.X86Subsd, maskThree, res); + res = context.AddIntrinsic(Instruction.X86Mulsd, maskHalf, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused), op1, op2); + }); + } + } + + public static void Frsqrts_V(EmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand maskHalf = X86GetAllElements(context, 0.5f); + Operand maskThree = X86GetAllElements(context, 3f); + + Operand res = context.AddIntrinsic(Instruction.X86Mulps, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Instruction.X86Subps, maskThree, res); + res = context.AddIntrinsic(Instruction.X86Mulps, maskHalf, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand maskHalf = X86GetAllElements(context, 0.5d); + Operand maskThree = X86GetAllElements(context, 3d); + + Operand res = context.AddIntrinsic(Instruction.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Instruction.X86Subpd, maskThree, res); + res = context.AddIntrinsic(Instruction.X86Mulpd, maskHalf, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused), op1, op2); + }); + } + } + + public static void Fsqrt_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarUnaryOpF(context, Instruction.X86Sqrtss, Instruction.X86Sqrtsd); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1); + }); + } + } + + public static void Fsqrt_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorUnaryOpF(context, Instruction.X86Sqrtps, Instruction.X86Sqrtpd); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1); + }); + } + } + + public static void Fsub_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Instruction.X86Subss, Instruction.X86Subsd); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2); + }); + } + } + + public static void Fsub_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Instruction.X86Subps, Instruction.X86Subpd); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2); + }); + } + } + + public static void Mla_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Mul_AddSub(context, AddSub.Add); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Mla_Ve(EmitterContext context) + { + EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + + public static void Mls_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Mul_AddSub(context, AddSub.Subtract); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Mls_Ve(EmitterContext context) + { + EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + + public static void Mul_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Mul_AddSub(context, AddSub.None); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + } + + public static void Mul_Ve(EmitterContext context) + { + EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Neg_S(EmitterContext context) + { + EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1)); + } + + public static void Neg_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Instruction subInst = X86PsubInstruction[op.Size]; + + Operand res = context.AddIntrinsic(subInst, context.VectorZero(), GetVec(op.Rn)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpSx(context, (op1) => context.Negate(op1)); + } + } + + public static void Raddhn_V(EmitterContext context) + { + EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true); + } + + public static void Rsubhn_V(EmitterContext context) + { + EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true); + } + + public static void Saba_V(EmitterContext context) + { + EmitVectorTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + + public static void Sabal_V(EmitterContext context) + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + + public static void Sabd_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + EmitSse41Sabd(context, op, n, m, op.Size); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Sabdl_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = op.Size == 0 + ? Instruction.X86Pmovsxbw + : Instruction.X86Pmovsxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + EmitSse41Sabd(context, op, n, m, op.Size + 1); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Sadalp_V(EmitterContext context) + { + EmitAddLongPairwise(context, signed: true, accumulate: true); + } + + public static void Saddl_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = X86PmovsxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Instruction addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Saddlp_V(EmitterContext context) + { + EmitAddLongPairwise(context, signed: true, accumulate: false); + } + + public static void Saddw_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = X86PmovsxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Instruction addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Shadd_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Instruction.X86Pand, n, m); + Operand res2 = context.AddIntrinsic(Instruction.X86Pxor, n, m); + + Instruction shiftInst = op.Size == 1 ? Instruction.X86Psraw + : Instruction.X86Psrad; + + res2 = context.AddIntrinsic(shiftInst, res2, Const(1)); + + Instruction addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, res2); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + return context.ShiftRightSI(context.Add(op1, op2), Const(1)); + }); + } + } + + public static void Shsub_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u)); + + Instruction addInst = X86PaddInstruction[op.Size]; + + Operand nPlusMask = context.AddIntrinsic(addInst, n, mask); + Operand mPlusMask = context.AddIntrinsic(addInst, m, mask); + + Instruction avgInst = op.Size == 0 ? Instruction.X86Pavgb + : Instruction.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, nPlusMask, mPlusMask); + + Instruction subInst = X86PsubInstruction[op.Size]; + + res = context.AddIntrinsic(subInst, nPlusMask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + return context.ShiftRightSI(context.Subtract(op1, op2), Const(1)); + }); + } + } + + public static void Smax_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction maxInst = X86PmaxsInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo info = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(info, op1, op2)); + } + } + + public static void Smaxp_V(EmitterContext context) + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo info = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(info, op1, op2)); + } + + public static void Smaxv_V(EmitterContext context) + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo info = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(info, op1, op2)); + } + + public static void Smin_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction minInst = X86PminsInstruction[op.Size]; + + Operand res = context.AddIntrinsic(minInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo info = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(info, op1, op2)); + } + } + + public static void Sminp_V(EmitterContext context) + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo info = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(info, op1, op2)); + } + + public static void Sminv_V(EmitterContext context) + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo info = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(info, op1, op2)); + } + + public static void Smlal_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = X86PmovsxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Instruction mullInst = op.Size == 0 ? Instruction.X86Pmullw + : Instruction.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Instruction addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(addInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Smlal_Ve(EmitterContext context) + { + EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + + public static void Smlsl_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = op.Size == 0 + ? Instruction.X86Pmovsxbw + : Instruction.X86Pmovsxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Instruction mullInst = op.Size == 0 ? Instruction.X86Pmullw + : Instruction.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Instruction subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(subInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Smlsl_Ve(EmitterContext context) + { + EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + + public static void Smull_V(EmitterContext context) + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Smull_Ve(EmitterContext context) + { + EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Sqabs_S(EmitterContext context) + { + EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + + public static void Sqabs_V(EmitterContext context) + { + EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + + public static void Sqadd_S(EmitterContext context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Add); + } + + public static void Sqadd_V(EmitterContext context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Add); + } + + public static void Sqdmulh_S(EmitterContext context) + { + EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.ScalarSx); + } + + public static void Sqdmulh_V(EmitterContext context) + { + EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.VectorSx); + } + + public static void Sqneg_S(EmitterContext context) + { + EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + } + + public static void Sqneg_V(EmitterContext context) + { + EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + } + + public static void Sqrdmulh_S(EmitterContext context) + { + EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.ScalarSx); + } + + public static void Sqrdmulh_V(EmitterContext context) + { + EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.VectorSx); + } + + public static void Sqsub_S(EmitterContext context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Sub); + } + + public static void Sqsub_V(EmitterContext context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Sub); + } + + public static void Sqxtn_S(EmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqxtn_V(EmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqxtun_S(EmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqxtun_V(EmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx); + } + + public static void Srhadd_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u)); + + Instruction subInst = X86PsubInstruction[op.Size]; + + Operand nMinusMask = context.AddIntrinsic(subInst, n, mask); + Operand mMinusMask = context.AddIntrinsic(subInst, m, mask); + + Instruction avgInst = op.Size == 0 ? Instruction.X86Pavgb + : Instruction.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, nMinusMask, mMinusMask); + + Instruction addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, mask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + Operand res = context.Add(op1, op2); + + res = context.Add(res, Const(1L)); + + return context.ShiftRightSI(res, Const(1)); + }); + } + } + + public static void Ssubl_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = X86PmovsxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Instruction subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Ssubw_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = X86PmovsxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Instruction subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Sub_S(EmitterContext context) + { + EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + + public static void Sub_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction subInst = X86PsubInstruction[op.Size]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Subhn_V(EmitterContext context) + { + EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false); + } + + public static void Suqadd_S(EmitterContext context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate); + } + + public static void Suqadd_V(EmitterContext context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate); + } + + public static void Uaba_V(EmitterContext context) + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + + public static void Uabal_V(EmitterContext context) + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + + public static void Uabd_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + EmitSse41Uabd(context, op, n, m, op.Size); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Uabdl_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = op.Size == 0 + ? Instruction.X86Pmovzxbw + : Instruction.X86Pmovzxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + EmitSse41Uabd(context, op, n, m, op.Size + 1); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Uadalp_V(EmitterContext context) + { + EmitAddLongPairwise(context, signed: false, accumulate: true); + } + + public static void Uaddl_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = X86PmovzxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Instruction addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Uaddlp_V(EmitterContext context) + { + EmitAddLongPairwise(context, signed: false, accumulate: false); + } + + public static void Uaddlv_V(EmitterContext context) + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Uaddw_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = X86PmovzxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Instruction addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Uhadd_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Instruction.X86Pand, n, m); + Operand res2 = context.AddIntrinsic(Instruction.X86Pxor, n, m); + + Instruction shiftInst = op.Size == 1 ? Instruction.X86Psrlw + : Instruction.X86Psrld; + + res2 = context.AddIntrinsic(shiftInst, res2, Const(1)); + + Instruction addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, res2); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.ShiftRightUI(context.Add(op1, op2), Const(1)); + }); + } + } + + public static void Uhsub_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction avgInst = op.Size == 0 ? Instruction.X86Pavgb + : Instruction.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, n, m); + + Instruction subInst = X86PsubInstruction[op.Size]; + + res = context.AddIntrinsic(subInst, n, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.ShiftRightUI(context.Subtract(op1, op2), Const(1)); + }); + } + } + + public static void Umax_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction maxInst = X86PmaxuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo info = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(info, op1, op2)); + } + } + + public static void Umaxp_V(EmitterContext context) + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo info = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(info, op1, op2)); + } + + public static void Umaxv_V(EmitterContext context) + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo info = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(info, op1, op2)); + } + + public static void Umin_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction minInst = X86PminuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(minInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo info = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(info, op1, op2)); + } + } + + public static void Uminp_V(EmitterContext context) + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo info = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(info, op1, op2)); + } + + public static void Uminv_V(EmitterContext context) + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo info = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(info, op1, op2)); + } + + public static void Umlal_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = X86PmovzxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Instruction mullInst = op.Size == 0 ? Instruction.X86Pmullw + : Instruction.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Instruction addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(addInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Umlal_Ve(EmitterContext context) + { + EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + + public static void Umlsl_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = op.Size == 0 + ? Instruction.X86Pmovzxbw + : Instruction.X86Pmovzxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Instruction mullInst = op.Size == 0 ? Instruction.X86Pmullw + : Instruction.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Instruction subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(subInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Umlsl_Ve(EmitterContext context) + { + EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + + public static void Umull_V(EmitterContext context) + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Umull_Ve(EmitterContext context) + { + EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Uqadd_S(EmitterContext context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } + + public static void Uqadd_V(EmitterContext context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } + + public static void Uqsub_S(EmitterContext context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } + + public static void Uqsub_V(EmitterContext context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } + + public static void Uqxtn_S(EmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqxtn_V(EmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx); + } + + public static void Urhadd_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction avgInst = op.Size == 0 ? Instruction.X86Pavgb + : Instruction.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + Operand res = context.Add(op1, op2); + + res = context.Add(res, Const(1L)); + + return context.ShiftRightUI(res, Const(1)); + }); + } + } + + public static void Usqadd_S(EmitterContext context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } + + public static void Usqadd_V(EmitterContext context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } + + public static void Usubl_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = X86PmovzxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Instruction subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Usubw_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Instruction.X86Psrldq, m, Const(8)); + } + + Instruction movInst = X86PmovzxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Instruction subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + private static Operand EmitAbs(EmitterContext context, Operand value) + { + Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0)); + + return context.ConditionalSelect(isPositive, value, context.Negate(value)); + } + + private static void EmitAddLongPairwise(EmitterContext context, bool signed, bool accumulate) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); + Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed); + + Operand e = context.Add(ne0, ne1); + + if (accumulate) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + + e = context.Add(e, de); + } + + res = EmitVectorInsert(context, res, e, index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static Operand EmitDoublingMultiplyHighHalf( + EmitterContext context, + Operand n, + Operand m, + bool round) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int eSize = 8 << op.Size; + + Operand res = context.Multiply(n, m); + + if (!round) + { + res = context.ShiftRightSI(res, Const(eSize - 1)); + } + else + { + long roundConst = 1L << (eSize - 1); + + res = context.ShiftLeft(res, Const(1)); + + res = context.Add(res, Const(roundConst)); + + res = context.ShiftRightSI(res, Const(eSize)); + + Operand isIntMin = context.ICompareEqual(res, Const((long)int.MinValue)); + + res = context.ConditionalSelect(isIntMin, context.Negate(res), res); + } + + return res; + } + + private static void EmitHighNarrow(EmitterContext context, Func2I emit, bool round) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int elems = 8 >> op.Size; + int eSize = 8 << op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + long roundConst = 1L << (eSize - 1); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size + 1); + + Operand de = emit(ne, me); + + if (round) + { + de = context.Add(de, Const(roundConst)); + } + + de = context.ShiftRightUI(de, Const(eSize)); + + res = EmitVectorInsert(context, res, de, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarRoundOpF(EmitterContext context, FPRoundingMode roundMode) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Instruction inst = (op.Size & 1) != 0 ? Instruction.X86Roundsd + : Instruction.X86Roundss; + + Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorRoundOpF(EmitterContext context, FPRoundingMode roundMode) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Instruction inst = (op.Size & 1) != 0 ? Instruction.X86Roundpd + : Instruction.X86Roundps; + + Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private enum AddSub + { + None, + Add, + Subtract + } + + private static void EmitSse41Mul_AddSub(EmitterContext context, AddSub addSub) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = null; + + if (op.Size == 0) + { + Operand ns8 = context.AddIntrinsic(Instruction.X86Psrlw, n, Const(8)); + Operand ms8 = context.AddIntrinsic(Instruction.X86Psrlw, m, Const(8)); + + res = context.AddIntrinsic(Instruction.X86Pmullw, ns8, ms8); + + res = context.AddIntrinsic(Instruction.X86Psllw, res, Const(8)); + + Operand res2 = context.AddIntrinsic(Instruction.X86Pmullw, n, m); + + Operand mask = X86GetAllElements(context, 0x00FF00FF); + + res = context.AddIntrinsic(Instruction.X86Pblendvb, res, res2, mask); + } + else if (op.Size == 1) + { + res = context.AddIntrinsic(Instruction.X86Pmullw, n, m); + } + else + { + res = context.AddIntrinsic(Instruction.X86Pmulld, n, m); + } + + Operand d = GetVec(op.Rd); + + if (addSub == AddSub.Add) + { + switch (op.Size) + { + case 0: res = context.AddIntrinsic(Instruction.X86Paddb, d, res); break; + case 1: res = context.AddIntrinsic(Instruction.X86Paddw, d, res); break; + case 2: res = context.AddIntrinsic(Instruction.X86Paddd, d, res); break; + case 3: res = context.AddIntrinsic(Instruction.X86Paddq, d, res); break; + } + } + else if (addSub == AddSub.Subtract) + { + switch (op.Size) + { + case 0: res = context.AddIntrinsic(Instruction.X86Psubb, d, res); break; + case 1: res = context.AddIntrinsic(Instruction.X86Psubw, d, res); break; + case 2: res = context.AddIntrinsic(Instruction.X86Psubd, d, res); break; + case 3: res = context.AddIntrinsic(Instruction.X86Psubq, d, res); break; + } + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + + private static void EmitSse41Sabd( + EmitterContext context, + OpCodeSimdReg op, + Operand n, + Operand m, + int size) + { + Instruction cmpgtInst = X86PcmpgtInstruction[size]; + + Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m); + + Instruction subInst = X86PsubInstruction[size]; + + Operand res = context.AddIntrinsic(subInst, n, m); + + res = context.AddIntrinsic(Instruction.X86Pand, cmpMask, res); + + Operand res2 = context.AddIntrinsic(subInst, m, n); + + res2 = context.AddIntrinsic(Instruction.X86Pandn, cmpMask, res2); + + res = context.AddIntrinsic(Instruction.X86Por, res, res2); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitSse41Uabd( + EmitterContext context, + OpCodeSimdReg op, + Operand n, + Operand m, + int size) + { + Instruction maxInst = X86PmaxuInstruction[size]; + + Operand max = context.AddIntrinsic(maxInst, m, n); + + Instruction cmpeqInst = X86PcmpeqInstruction[size]; + + Operand cmpMask = context.AddIntrinsic(cmpeqInst, max, m); + + Operand onesMask = X86GetAllElements(context, -1L); + + cmpMask = context.AddIntrinsic(Instruction.X86Pand, cmpMask, onesMask); + + Instruction subInst = X86PsubInstruction[size]; + + Operand res = context.AddIntrinsic(subInst, n, m); + Operand res2 = context.AddIntrinsic(subInst, m, n); + + res = context.AddIntrinsic(Instruction.X86Pand, cmpMask, res); + res2 = context.AddIntrinsic(Instruction.X86Pandn, cmpMask, res2); + + res = context.AddIntrinsic(Instruction.X86Por, res, res2); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs new file mode 100644 index 0000000000..b989fb4ff2 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -0,0 +1,1413 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func; + using Func2I = Func; + using Func3I = Func; + + static class InstEmitSimdHelper + { + public static readonly Instruction[] X86PaddInstruction = new Instruction[] + { + Instruction.X86Paddb, + Instruction.X86Paddw, + Instruction.X86Paddd, + Instruction.X86Paddq + }; + + public static readonly Instruction[] X86PcmpeqInstruction = new Instruction[] + { + Instruction.X86Pcmpeqb, + Instruction.X86Pcmpeqw, + Instruction.X86Pcmpeqd, + Instruction.X86Pcmpeqq + }; + + public static readonly Instruction[] X86PcmpgtInstruction = new Instruction[] + { + Instruction.X86Pcmpgtb, + Instruction.X86Pcmpgtw, + Instruction.X86Pcmpgtd, + Instruction.X86Pcmpgtq + }; + + public static readonly Instruction[] X86PmaxsInstruction = new Instruction[] + { + Instruction.X86Pmaxsb, + Instruction.X86Pmaxsw, + Instruction.X86Pmaxsd + }; + + public static readonly Instruction[] X86PmaxuInstruction = new Instruction[] + { + Instruction.X86Pmaxub, + Instruction.X86Pmaxuw, + Instruction.X86Pmaxud + }; + + public static readonly Instruction[] X86PminsInstruction = new Instruction[] + { + Instruction.X86Pminsb, + Instruction.X86Pminsw, + Instruction.X86Pminsd + }; + + public static readonly Instruction[] X86PminuInstruction = new Instruction[] + { + Instruction.X86Pminub, + Instruction.X86Pminuw, + Instruction.X86Pminud + }; + + public static readonly Instruction[] X86PmovsxInstruction = new Instruction[] + { + Instruction.X86Pmovsxbw, + Instruction.X86Pmovsxwd, + Instruction.X86Pmovsxdq + }; + + public static readonly Instruction[] X86PmovzxInstruction = new Instruction[] + { + Instruction.X86Pmovzxbw, + Instruction.X86Pmovzxwd, + Instruction.X86Pmovzxdq + }; + + public static readonly Instruction[] X86PsubInstruction = new Instruction[] + { + Instruction.X86Psubb, + Instruction.X86Psubw, + Instruction.X86Psubd, + Instruction.X86Psubq + }; + + public static int GetImmShl(OpCodeSimdShImm op) + { + return op.Imm - (8 << op.Size); + } + + public static int GetImmShr(OpCodeSimdShImm op) + { + return (8 << (op.Size + 1)) - op.Imm; + } + + public static Operand X86GetScalar(EmitterContext context, float value) + { + int imm = BitConverter.SingleToInt32Bits(value); + + return context.Copy(Local(OperandType.V128), Const(imm)); + } + + public static Operand X86GetScalar(EmitterContext context, double value) + { + long imm = BitConverter.DoubleToInt64Bits(value); + + return context.Copy(Local(OperandType.V128), Const(imm)); + } + + public static Operand X86GetAllElements(EmitterContext context, float value) + { + return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value)); + } + + public static Operand X86GetAllElements(EmitterContext context, double value) + { + return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value)); + } + + public static Operand X86GetAllElements(EmitterContext context, int value) + { + Operand vector = context.Copy(Local(OperandType.V128), Const(value)); + + vector = context.AddIntrinsic(Instruction.X86Shufps, vector, vector, Const(0)); + + return vector; + } + + public static Operand X86GetAllElements(EmitterContext context, long value) + { + Operand vector = context.Copy(Local(OperandType.V128), Const(value)); + + vector = context.AddIntrinsic(Instruction.X86Movlhps, vector, vector); + + return vector; + } + + public static int X86GetRoundControl(FPRoundingMode roundMode) + { + switch (roundMode) + { + case FPRoundingMode.ToNearest: return 8 | 0; + case FPRoundingMode.TowardsPlusInfinity: return 8 | 2; + case FPRoundingMode.TowardsMinusInfinity: return 8 | 1; + case FPRoundingMode.TowardsZero: return 8 | 3; + } + + throw new ArgumentException($"Invalid rounding mode \"{roundMode}\"."); + } + + public static void EmitScalarUnaryOpF( + EmitterContext context, + Instruction inst32, + Instruction inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Instruction inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarBinaryOpF( + EmitterContext context, + Instruction inst32, + Instruction inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n, m); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpF( + EmitterContext context, + Instruction inst32, + Instruction inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Instruction inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpF( + EmitterContext context, + Instruction inst32, + Instruction inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static Operand EmitUnaryMathCall(EmitterContext context, string name, Operand n) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + MethodInfo info; + + if (sizeF == 0) + { + info = typeof(MathF).GetMethod(name, new Type[] { typeof(float) }); + } + else /* if (sizeF == 1) */ + { + info = typeof(Math).GetMethod(name, new Type[] { typeof(double) }); + } + + return context.Call(info, n); + } + + public static Operand EmitBinaryMathCall(EmitterContext context, string name, Operand n) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + MethodInfo info; + + if (sizeF == 0) + { + info = typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(float) }); + } + else /* if (sizeF == 1) */ + { + info = typeof(Math).GetMethod(name, new Type[] { typeof(double), typeof(double) }); + } + + return context.Call(info, n); + } + + public static Operand EmitRoundMathCall(EmitterContext context, MidpointRounding roundMode, Operand n) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + MethodInfo info; + + if (sizeF == 0) + { + info = typeof(MathF).GetMethod(nameof(MathF.Round), new Type[] { typeof(float), typeof(MidpointRounding) }); + } + else /* if (sizeF == 1) */ + { + info = typeof(Math).GetMethod(nameof(Math.Round), new Type[] { typeof(double), typeof(MidpointRounding) }); + } + + return context.Call(info, n, Const((int)roundMode)); + } + + public static Operand EmitSoftFloatCall(EmitterContext context, string name, params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + Type type = sizeF == 0 ? typeof(SoftFloat32) + : typeof(SoftFloat64); + + return context.Call(type.GetMethod(name), callArgs); + } + + public static void EmitScalarBinaryOpByElemF(EmitterContext context, Func2I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(GetVec(op.Rn), Local(type), 0); + Operand m = context.VectorExtract(GetVec(op.Rm), Local(type), op.Index); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0)); + } + + public static void EmitScalarTernaryOpByElemF(EmitterContext context, Func3I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand d = context.VectorExtract(GetVec(op.Rd), Local(type), 0); + Operand n = context.VectorExtract(GetVec(op.Rn), Local(type), 0); + Operand m = context.VectorExtract(GetVec(op.Rm), Local(type), op.Index); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0)); + } + + public static void EmitScalarUnaryOpSx(EmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarBinaryOpSx(EmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarUnaryOpZx(EmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarBinaryOpZx(EmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarTernaryOpZx(EmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size); + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size); + + d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarUnaryOpF(EmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(GetVec(op.Rn), Local(type), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0)); + } + + public static void EmitScalarBinaryOpF(EmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(GetVec(op.Rn), Local(type), 0); + Operand m = context.VectorExtract(GetVec(op.Rm), Local(type), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0)); + } + + public static void EmitScalarTernaryRaOpF(EmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand a = context.VectorExtract(GetVec(op.Ra), Local(type), 0); + Operand n = context.VectorExtract(GetVec(op.Rn), Local(type), 0); + Operand m = context.VectorExtract(GetVec(op.Rm), Local(type), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0)); + } + + public static void EmitVectorUnaryOpF(EmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), index); + + res = context.VectorInsert(res, emit(ne), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpF(EmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), index); + Operand me = context.VectorExtract(GetVec(op.Rm), Local(type), index); + + res = context.VectorInsert(res, emit(ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpF(EmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(GetVec(op.Rd), Local(type), index); + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), index); + Operand me = context.VectorExtract(GetVec(op.Rm), Local(type), index); + + res = context.VectorInsert(res, emit(de, ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemF(EmitterContext context, Func2I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), index); + Operand me = context.VectorExtract(GetVec(op.Rm), Local(type), op.Index); + + res = context.VectorInsert(res, emit(ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpByElemF(EmitterContext context, Func3I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(GetVec(op.Rd), Local(type), index); + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), index); + Operand me = context.VectorExtract(GetVec(op.Rm), Local(type), op.Index); + + res = context.VectorInsert(res, emit(de, ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpSx(EmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, ne, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpSx(EmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpSx(EmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpZx(EmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, ne, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpZx(EmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpZx(EmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemSx(EmitterContext context, Func2I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemZx(EmitterContext context, Func2I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpByElemZx(EmitterContext context, Func3I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorImmUnaryOp(EmitterContext context, Func1I emit) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, emit(imm), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorImmBinaryOp(EmitterContext context, Func2I emit) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRmBinaryOpSx(EmitterContext context, Func2I emit) + { + EmitVectorWidenRmBinaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRmBinaryOpZx(EmitterContext context, Func2I emit) + { + EmitVectorWidenRmBinaryOp(context, emit, signed: false); + } + + public static void EmitVectorWidenRmBinaryOp(EmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRnRmBinaryOpSx(EmitterContext context, Func2I emit) + { + EmitVectorWidenRnRmBinaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRnRmBinaryOpZx(EmitterContext context, Func2I emit) + { + EmitVectorWidenRnRmBinaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRnRmBinaryOp(EmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRnRmTernaryOpSx(EmitterContext context, Func3I emit) + { + EmitVectorWidenRnRmTernaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRnRmTernaryOpZx(EmitterContext context, Func3I emit) + { + EmitVectorWidenRnRmTernaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRnRmTernaryOp(EmitterContext context, Func3I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenBinaryOpByElemSx(EmitterContext context, Func2I emit) + { + EmitVectorWidenBinaryOpByElem(context, emit, signed: true); + } + + public static void EmitVectorWidenBinaryOpByElemZx(EmitterContext context, Func2I emit) + { + EmitVectorWidenBinaryOpByElem(context, emit, signed: false); + } + + private static void EmitVectorWidenBinaryOpByElem(EmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);; + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenTernaryOpByElemSx(EmitterContext context, Func3I emit) + { + EmitVectorWidenTernaryOpByElem(context, emit, signed: true); + } + + public static void EmitVectorWidenTernaryOpByElemZx(EmitterContext context, Func3I emit) + { + EmitVectorWidenTernaryOpByElem(context, emit, signed: false); + } + + private static void EmitVectorWidenTernaryOpByElem(EmitterContext context, Func3I emit, bool signed) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);; + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorPairwiseOpSx(EmitterContext context, Func2I emit) + { + EmitVectorPairwiseOp(context, emit, signed: true); + } + + public static void EmitVectorPairwiseOpZx(EmitterContext context, Func2I emit) + { + EmitVectorPairwiseOp(context, emit, signed: false); + } + + public static void EmitVectorPairwiseOp(EmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); + Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed); + + Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed); + Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size); + res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorAcrossVectorOpSx(EmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, true); + } + + public static void EmitVectorAcrossVectorOpZx(EmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, false); + } + + public static void EmitVectorAcrossVectorOp(EmitterContext context, Func2I emit, bool signed) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + + Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed); + + for (int index = 1; index < elems; index++) + { + Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + res = emit(res, n); + } + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size)); + } + + public static void EmitVectorPairwiseOpF(EmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int pairs = op.GetPairsCount() >> sizeF + 2; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n0 = context.VectorExtract(GetVec(op.Rn), Local(type), pairIndex); + Operand n1 = context.VectorExtract(GetVec(op.Rn), Local(type), pairIndex + 1); + + Operand m0 = context.VectorExtract(GetVec(op.Rm), Local(type), pairIndex); + Operand m1 = context.VectorExtract(GetVec(op.Rm), Local(type), pairIndex + 1); + + res = context.VectorInsert(res, emit(n0, n1), index); + res = context.VectorInsert(res, emit(m0, m1), pairs + index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorPairwiseOpF(EmitterContext context, Instruction inst32, Instruction inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + if (op.RegisterSize == RegisterSize.Simd64) + { + Operand unpck = context.AddIntrinsic(Instruction.X86Unpcklps, n, m); + + Operand zero = context.VectorZero(); + + Operand part0 = context.AddIntrinsic(Instruction.X86Movlhps, unpck, zero); + Operand part1 = context.AddIntrinsic(Instruction.X86Movhlps, zero, unpck); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1)); + } + else /* if (op.RegisterSize == RegisterSize.Simd128) */ + { + const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0; + const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0; + + Operand part0 = context.AddIntrinsic(Instruction.X86Shufps, n, m, Const(sm0)); + Operand part1 = context.AddIntrinsic(Instruction.X86Shufps, n, m, Const(sm1)); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1)); + } + } + else /* if (sizeF == 1) */ + { + Operand part0 = context.AddIntrinsic(Instruction.X86Unpcklpd, n, m); + Operand part1 = context.AddIntrinsic(Instruction.X86Unpckhpd, n, m); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst64, part0, part1)); + } + } + + + [Flags] + public enum SaturatingFlags + { + Scalar = 1 << 0, + Signed = 1 << 1, + + Add = 1 << 2, + Sub = 1 << 3, + + Accumulate = 1 << 4, + + ScalarSx = Scalar | Signed, + ScalarZx = Scalar, + + VectorSx = Signed, + VectorZx = 0 + } + + public static void EmitScalarSaturatingUnaryOpSx(EmitterContext context, Func1I emit) + { + EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.ScalarSx); + } + + public static void EmitVectorSaturatingUnaryOpSx(EmitterContext context, Func1I emit) + { + EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.VectorSx); + } + + public static void EmitSaturatingUnaryOpSx(EmitterContext context, Func1I emit, SaturatingFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + bool scalar = (flags & SaturatingFlags.Scalar) != 0; + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand de; + + if (op.Size <= 2) + { + de = EmitSatQ(context, emit(ne), op.Size, signedSrc: true, signedDst: true); + } + else /* if (op.Size == 3) */ + { + de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne)); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarSaturatingBinaryOpSx(EmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarSx | flags); + } + + public static void EmitScalarSaturatingBinaryOpZx(EmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarZx | flags); + } + + public static void EmitVectorSaturatingBinaryOpSx(EmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorSx | flags); + } + + public static void EmitVectorSaturatingBinaryOpZx(EmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorZx | flags); + } + + public static void EmitSaturatingBinaryOp(EmitterContext context, Func2I emit, SaturatingFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + bool scalar = (flags & SaturatingFlags.Scalar) != 0; + bool signed = (flags & SaturatingFlags.Signed) != 0; + + bool add = (flags & SaturatingFlags.Add) != 0; + bool sub = (flags & SaturatingFlags.Sub) != 0; + + bool accumulate = (flags & SaturatingFlags.Accumulate) != 0; + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + if (add || sub) + { + OpCodeSimdReg opReg = (OpCodeSimdReg)op; + + for (int index = 0; index < elems; index++) + { + Operand de; + Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed); + Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed); + + if (op.Size <= 2) + { + Operand temp = add ? context.Add (ne, me) + : context.Subtract(ne, me); + + de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed); + } + else if (add) /* if (op.Size == 3) */ + { + de = EmitBinarySatQAdd(context, ne, me, signed); + } + else /* if (sub) */ + { + de = EmitBinarySatQSub(context, ne, me, signed); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + else if (accumulate) + { + for (int index = 0; index < elems; index++) + { + Operand de; + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed); + Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed); + + if (op.Size <= 2) + { + Operand temp = context.Add(ne, me); + + de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed); + } + else /* if (op.Size == 3) */ + { + de = EmitBinarySatQAccumulate(context, ne, me, signed); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + else + { + OpCodeSimdReg opReg = (OpCodeSimdReg)op; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed); + Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed); + + Operand de = EmitSatQ(context, emit(ne, me), op.Size, true, signed); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + + context.Copy(GetVec(op.Rd), res); + } + + [Flags] + public enum SaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + public static void EmitSaturatingNarrowOp(EmitterContext context, SaturatingNarrowFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0; + + int elems = !scalar ? 8 >> op.Size : 1; + + int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); + + Operand temp = EmitSatQ(context, ne, op.Size, signedSrc, signedDst); + + res = EmitVectorInsert(context, res, temp, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned). + public static Operand EmitSatQ(EmitterContext context, Operand op, int sizeDst, bool signedSrc, bool signedDst) + { + if ((uint)sizeDst > 2u) + { + throw new ArgumentOutOfRangeException(nameof(sizeDst)); + } + + string name; + + if (signedSrc) + { + name = signedDst ? nameof(SoftFallback.SignedSrcSignedDstSatQ) + : nameof(SoftFallback.SignedSrcUnsignedDstSatQ); + } + else + { + name = signedDst ? nameof(SoftFallback.UnsignedSrcSignedDstSatQ) + : nameof(SoftFallback.UnsignedSrcUnsignedDstSatQ); + } + + MethodInfo info = typeof(SoftFallback).GetMethod(name); + + return context.Call(info, op, Const(sizeDst)); + } + + // TSrc (64bit) == TDst (64bit); signed. + public static Operand EmitUnarySignedSatQAbsOrNeg(EmitterContext context, Operand op) + { + Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size."); + + return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnarySignedSatQAbsOrNeg)), op); + } + + // TSrcs (64bit) == TDst (64bit); signed, unsigned. + public static Operand EmitBinarySatQAdd(EmitterContext context, Operand op1, Operand op2, bool signed) + { + Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size."); + + string name = signed ? nameof(SoftFallback.BinarySignedSatQAdd) + : nameof(SoftFallback.BinaryUnsignedSatQAdd); + + return context.Call(typeof(SoftFallback).GetMethod(name), op1, op2); + } + + // TSrcs (64bit) == TDst (64bit); signed, unsigned. + public static Operand EmitBinarySatQSub(EmitterContext context, Operand op1, Operand op2, bool signed) + { + Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size."); + + string name = signed ? nameof(SoftFallback.BinarySignedSatQSub) + : nameof(SoftFallback.BinaryUnsignedSatQSub); + + return context.Call(typeof(SoftFallback).GetMethod(name), op1, op2); + } + + // TSrcs (64bit) == TDst (64bit); signed, unsigned. + public static Operand EmitBinarySatQAccumulate(EmitterContext context, Operand op1, Operand op2, bool signed) + { + Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size."); + + string name = signed ? nameof(SoftFallback.BinarySignedSatQAcc) + : nameof(SoftFallback.BinaryUnsignedSatQAcc); + + return context.Call(typeof(SoftFallback).GetMethod(name), op1, op2); + } + + public static Operand EmitVectorExtractSx(EmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract(context, reg, index, size, true); + } + + public static Operand EmitVectorExtractZx(EmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract(context, reg, index, size, false); + } + + public static Operand EmitVectorExtract(EmitterContext context, int reg, int index, int size, bool signed) + { + ThrowIfInvalid(index, size); + + Operand res = Local(size == 3 ? OperandType.I64 + : OperandType.I32); + + switch (size) + { + case 0: context.VectorExtract8 (GetVec(reg), res, index); break; + case 1: context.VectorExtract16(GetVec(reg), res, index); break; + case 2: context.VectorExtract (GetVec(reg), res, index); break; + case 3: context.VectorExtract (GetVec(reg), res, index); break; + } + + res = context.Copy(Local(OperandType.I64), res); + + if (signed) + { + switch (size) + { + case 0: res = context.SignExtend8 (res); break; + case 1: res = context.SignExtend16(res); break; + case 2: res = context.SignExtend32(res); break; + } + } + + return res; + } + + public static Operand EmitVectorInsert(EmitterContext context, Operand vector, Operand value, int index, int size) + { + ThrowIfInvalid(index, size); + + if (size < 3) + { + value = context.Copy(Local(OperandType.I32), value); + } + + switch (size) + { + case 0: vector = context.VectorInsert8 (vector, value, index); break; + case 1: vector = context.VectorInsert16(vector, value, index); break; + case 2: vector = context.VectorInsert (vector, value, index); break; + case 3: vector = context.VectorInsert (vector, value, index); break; + } + + return vector; + } + + private static void ThrowIfInvalid(int index, int size) + { + if ((uint)size > 3u) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if ((uint)index >= 16u >> size) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdMove.cs b/ARMeilleure/Instructions/InstEmitSimdMove.cs new file mode 100644 index 0000000000..e3ad19d957 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdMove.cs @@ -0,0 +1,35 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Fmov_Si(EmitterContext context) + { + OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp; + + Operand imm; + + if (op.Size != 0) + { + imm = Const(op.Immediate); + } + else + { + imm = Const((int)op.Immediate); + } + + context.Copy(GetVec(op.Rd), imm); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index 1501a292da..6e8f29a194 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -314,8 +314,10 @@ namespace ARMeilleure.Instructions Sli_V, Smax_V, Smaxp_V, + Smaxv_V, Smin_V, Sminp_V, + Sminv_V, Smlal_V, Smlal_Ve, Smlsl_V, @@ -390,8 +392,10 @@ namespace ARMeilleure.Instructions Uhsub_V, Umax_V, Umaxp_V, + Umaxv_V, Umin_V, Uminp_V, + Uminv_V, Umlal_V, Umlal_Ve, Umlsl_V, diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs index 6594739ad7..f8fe826c40 100644 --- a/ARMeilleure/Instructions/NativeInterface.cs +++ b/ARMeilleure/Instructions/NativeInterface.cs @@ -39,7 +39,7 @@ namespace ARMeilleure.Instructions GetContext().OnUndefined(address, opCode); } - private static ExecutionContext GetContext() + public static ExecutionContext GetContext() { return _contexts[Thread.CurrentThread]; } diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs index bc63270685..4581b92c7e 100644 --- a/ARMeilleure/Instructions/SoftFallback.cs +++ b/ARMeilleure/Instructions/SoftFallback.cs @@ -1,3 +1,4 @@ +using ARMeilleure.State; using System; namespace ARMeilleure.Instructions @@ -12,7 +13,395 @@ namespace ARMeilleure.Instructions #endregion +#region "Rounding" + public static double Round(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + FPRoundingMode roundMode = context.Fpcr.GetRoundingMode(); + + if (roundMode == FPRoundingMode.ToNearest) + { + return Math.Round(value); // even + } + else if (roundMode == FPRoundingMode.TowardsPlusInfinity) + { + return Math.Ceiling(value); + } + else if (roundMode == FPRoundingMode.TowardsMinusInfinity) + { + return Math.Floor(value); + } + else /* if (roundMode == FPRoundingMode.TowardsZero) */ + { + return Math.Truncate(value); + } + } + + public static float RoundF(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + FPRoundingMode roundMode = context.Fpcr.GetRoundingMode(); + + if (roundMode == FPRoundingMode.ToNearest) + { + return MathF.Round(value); // even + } + else if (roundMode == FPRoundingMode.TowardsPlusInfinity) + { + return MathF.Ceiling(value); + } + else if (roundMode == FPRoundingMode.TowardsMinusInfinity) + { + return MathF.Floor(value); + } + else /* if (roundMode == FPRoundingMode.TowardsZero) */ + { + return MathF.Truncate(value); + } + } +#endregion + +#region "Saturating" + public static long SignedSrcSignedDstSatQ(long op, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + long tMaxValue = (1L << (eSize - 1)) - 1L; + long tMinValue = -(1L << (eSize - 1)); + + if (op > tMaxValue) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else if (op < tMinValue) + { + context.Fpsr |= FPSR.Qc; + + return tMinValue; + } + else + { + return op; + } + } + + public static ulong SignedSrcUnsignedDstSatQ(long op, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + ulong tMaxValue = (1UL << eSize) - 1UL; + ulong tMinValue = 0UL; + + if (op > (long)tMaxValue) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else if (op < (long)tMinValue) + { + context.Fpsr |= FPSR.Qc; + + return tMinValue; + } + else + { + return (ulong)op; + } + } + + public static long UnsignedSrcSignedDstSatQ(ulong op, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + long tMaxValue = (1L << (eSize - 1)) - 1L; + + if (op > (ulong)tMaxValue) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else + { + return (long)op; + } + } + + public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + ulong tMaxValue = (1UL << eSize) - 1UL; + + if (op > tMaxValue) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else + { + return op; + } + } + + public static long UnarySignedSatQAbsOrNeg(long op) + { + ExecutionContext context = NativeInterface.GetContext(); + + if (op == long.MinValue) + { + context.Fpsr |= FPSR.Qc; + + return long.MaxValue; + } + else + { + return op; + } + } + + public static long BinarySignedSatQAdd(long op1, long op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + long add = op1 + op2; + + if ((~(op1 ^ op2) & (op1 ^ add)) < 0L) + { + context.Fpsr |= FPSR.Qc; + + if (op1 < 0L) + { + return long.MinValue; + } + else + { + return long.MaxValue; + } + } + else + { + return add; + } + } + + public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + ulong add = op1 + op2; + + if ((add < op1) && (add < op2)) + { + context.Fpsr |= FPSR.Qc; + + return ulong.MaxValue; + } + else + { + return add; + } + } + + public static long BinarySignedSatQSub(long op1, long op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + long sub = op1 - op2; + + if (((op1 ^ op2) & (op1 ^ sub)) < 0L) + { + context.Fpsr |= FPSR.Qc; + + if (op1 < 0L) + { + return long.MinValue; + } + else + { + return long.MaxValue; + } + } + else + { + return sub; + } + } + + public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + ulong sub = op1 - op2; + + if (op1 < op2) + { + context.Fpsr |= FPSR.Qc; + + return ulong.MinValue; + } + else + { + return sub; + } + } + + public static long BinarySignedSatQAcc(ulong op1, long op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + if (op1 <= (ulong)long.MaxValue) + { + // op1 from ulong.MinValue to (ulong)long.MaxValue + // op2 from long.MinValue to long.MaxValue + + long add = (long)op1 + op2; + + if ((~op2 & add) < 0L) + { + context.Fpsr |= FPSR.Qc; + + return long.MaxValue; + } + else + { + return add; + } + } + else if (op2 >= 0L) + { + // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + // op2 from (long)ulong.MinValue to long.MaxValue + + context.Fpsr |= FPSR.Qc; + + return long.MaxValue; + } + else + { + // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + // op2 from long.MinValue to (long)ulong.MinValue - 1L + + ulong add = op1 + (ulong)op2; + + if (add > (ulong)long.MaxValue) + { + context.Fpsr |= FPSR.Qc; + + return long.MaxValue; + } + else + { + return (long)add; + } + } + } + + public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + if (op1 >= 0L) + { + // op1 from (long)ulong.MinValue to long.MaxValue + // op2 from ulong.MinValue to ulong.MaxValue + + ulong add = (ulong)op1 + op2; + + if ((add < (ulong)op1) && (add < op2)) + { + context.Fpsr |= FPSR.Qc; + + return ulong.MaxValue; + } + else + { + return add; + } + } + else if (op2 > (ulong)long.MaxValue) + { + // op1 from long.MinValue to (long)ulong.MinValue - 1L + // op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + + return (ulong)op1 + op2; + } + else + { + // op1 from long.MinValue to (long)ulong.MinValue - 1L + // op2 from ulong.MinValue to (ulong)long.MaxValue + + long add = op1 + (long)op2; + + if (add < (long)ulong.MinValue) + { + context.Fpsr |= FPSR.Qc; + + return ulong.MinValue; + } + else + { + return (ulong)add; + } + } + } +#endregion + #region "Count" + public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). + { + value ^= value >> 1; + + int highBit = size - 2; + + for (int bit = highBit; bit >= 0; bit--) + { + if (((int)(value >> bit) & 0b1) != 0) + { + return (ulong)(highBit - bit); + } + } + + return (ulong)(size - 1); + } + + private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; + + public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). + { + if (value == 0ul) + { + return (ulong)size; + } + + int nibbleIdx = size; + int preCount, count = 0; + + do + { + nibbleIdx -= 4; + preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111]; + count += preCount; + } + while (preCount == 4); + + return (ulong)count; + } + public static ulong CountSetBits8(ulong value) // "size" is 8 (SIMD&FP Inst.). { value = ((value >> 1) & 0x55ul) + (value & 0x55ul); @@ -26,15 +415,15 @@ namespace ARMeilleure.Instructions private const uint Crc32RevPoly = 0xedb88320; private const uint Crc32cRevPoly = 0x82f63b78; - public static uint Crc32b(uint crc, byte val) => Crc32 (crc, Crc32RevPoly, val); - public static uint Crc32h(uint crc, ushort val) => Crc32h(crc, Crc32RevPoly, val); - public static uint Crc32w(uint crc, uint val) => Crc32w(crc, Crc32RevPoly, val); - public static uint Crc32x(uint crc, ulong val) => Crc32x(crc, Crc32RevPoly, val); + public static uint Crc32b(uint crc, byte value) => Crc32 (crc, Crc32RevPoly, value); + public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value); + public static uint Crc32w(uint crc, uint value) => Crc32w(crc, Crc32RevPoly, value); + public static uint Crc32x(uint crc, ulong value) => Crc32x(crc, Crc32RevPoly, value); - public static uint Crc32cb(uint crc, byte val) => Crc32 (crc, Crc32cRevPoly, val); - public static uint Crc32ch(uint crc, ushort val) => Crc32h(crc, Crc32cRevPoly, val); - public static uint Crc32cw(uint crc, uint val) => Crc32w(crc, Crc32cRevPoly, val); - public static uint Crc32cx(uint crc, ulong val) => Crc32x(crc, Crc32cRevPoly, val); + public static uint Crc32cb(uint crc, byte value) => Crc32 (crc, Crc32cRevPoly, value); + public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value); + public static uint Crc32cw(uint crc, uint value) => Crc32w(crc, Crc32cRevPoly, value); + public static uint Crc32cx(uint crc, ulong value) => Crc32x(crc, Crc32cRevPoly, value); private static uint Crc32h(uint crc, uint poly, ushort val) { @@ -84,7 +473,25 @@ namespace ARMeilleure.Instructions #endregion #region "Aes" + public static V128 Decrypt(V128 value, V128 roundKey) + { + return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey)); + } + public static V128 Encrypt(V128 value, V128 roundKey) + { + return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey)); + } + + public static V128 InverseMixColumns(V128 value) + { + return CryptoHelper.AesInvMixColumns(value); + } + + public static V128 MixColumns(V128 value) + { + return CryptoHelper.AesMixColumns(value); + } #endregion #region "Sha1" diff --git a/ARMeilleure/Instructions/SoftFloat.cs b/ARMeilleure/Instructions/SoftFloat.cs new file mode 100644 index 0000000000..13b9a18089 --- /dev/null +++ b/ARMeilleure/Instructions/SoftFloat.cs @@ -0,0 +1,2741 @@ +using ARMeilleure.State; +using System; +using System.Diagnostics; + +namespace ARMeilleure.Instructions +{ + static class SoftFloat + { + static SoftFloat() + { + RecipEstimateTable = BuildRecipEstimateTable(); + RecipSqrtEstimateTable = BuildRecipSqrtEstimateTable(); + } + + internal static readonly byte[] RecipEstimateTable; + internal static readonly byte[] RecipSqrtEstimateTable; + + private static byte[] BuildRecipEstimateTable() + { + byte[] tbl = new byte[256]; + + for (int idx = 0; idx < 256; idx++) + { + uint src = (uint)idx + 256u; + + Debug.Assert(256u <= src && src < 512u); + + src = (src << 1) + 1u; + + uint aux = (1u << 19) / src; + + uint dst = (aux + 1u) >> 1; + + Debug.Assert(256u <= dst && dst < 512u); + + tbl[idx] = (byte)(dst - 256u); + } + + return tbl; + } + + private static byte[] BuildRecipSqrtEstimateTable() + { + byte[] tbl = new byte[384]; + + for (int idx = 0; idx < 384; idx++) + { + uint src = (uint)idx + 128u; + + Debug.Assert(128u <= src && src < 512u); + + if (src < 256u) + { + src = (src << 1) + 1u; + } + else + { + src = (src >> 1) << 1; + src = (src + 1u) << 1; + } + + uint aux = 512u; + + while (src * (aux + 1u) * (aux + 1u) < (1u << 28)) + { + aux = aux + 1u; + } + + uint dst = (aux + 1u) >> 1; + + Debug.Assert(256u <= dst && dst < 512u); + + tbl[idx] = (byte)(dst - 256u); + } + + return tbl; + } + } + + static class SoftFloat16_32 + { + public static float FPConvert(ushort valueBits) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if ((context.Fpcr & FPCR.Dn) != 0) + { + result = FPDefaultNaN(); + } + else + { + result = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + result = FPInfinity(sign); + } + else if (type == FPType.Zero) + { + result = FPZero(sign); + } + else + { + result = FPRoundCv(real, context); + } + + return result; + } + + private static float FPDefaultNaN() + { + return -float.NaN; + } + + private static float FPInfinity(bool sign) + { + return sign ? float.NegativeInfinity : float.PositiveInfinity; + } + + private static float FPZero(bool sign) + { + return sign ? -0f : +0f; + } + + private static float FPMaxNormal(bool sign) + { + return sign ? float.MinValue : float.MaxValue; + } + + private static double FPUnpackCv( + this ushort valueBits, + out FPType type, + out bool sign, + ExecutionContext context) + { + sign = (~(uint)valueBits & 0x8000u) == 0u; + + uint exp16 = ((uint)valueBits & 0x7C00u) >> 10; + uint frac16 = (uint)valueBits & 0x03FFu; + + double real; + + if (exp16 == 0u) + { + if (frac16 == 0u) + { + type = FPType.Zero; + real = 0d; + } + else + { + type = FPType.Nonzero; // Subnormal. + real = Math.Pow(2d, -14) * ((double)frac16 * Math.Pow(2d, -10)); + } + } + else if (exp16 == 0x1Fu && (context.Fpcr & FPCR.Ahp) == 0) + { + if (frac16 == 0u) + { + type = FPType.Infinity; + real = Math.Pow(2d, 1000); + } + else + { + type = (~frac16 & 0x0200u) == 0u ? FPType.QNaN : FPType.SNaN; + real = 0d; + } + } + else + { + type = FPType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp16 - 15) * (1d + (double)frac16 * Math.Pow(2d, -10)); + } + + return sign ? -real : real; + } + + private static float FPRoundCv(double real, ExecutionContext context) + { + const int minimumExp = -126; + + const int e = 8; + const int f = 23; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp) + { + context.Fpsr |= FPSR.Ufc; + + return FPZero(sign); + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) + { + FPProcessException(FPException.Underflow, context); + } + + bool overflowToInf; + bool roundUp; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case FPRoundingMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case FPRoundingMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case FPRoundingMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == 1u << f) + { + biasedExp = 1u; + } + + if (intMant == 1u << (f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + float result; + + if (biasedExp >= (1u << e) - 1u) + { + result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FPException.Overflow, context); + + error = 1d; + } + else + { + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (biasedExp & 0xFFu) << 23 | (intMant & 0x007FFFFFu))); + } + + if (error != 0d) + { + FPProcessException(FPException.Inexact, context); + } + + return result; + } + + private static float FPConvertNaN(ushort valueBits) + { + return BitConverter.Int32BitsToSingle( + (int)(((uint)valueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)valueBits & 0x01FFu) << 13)); + } + + private static void FPProcessException(FPException exc, ExecutionContext context) + { + int enable = (int)exc + 8; + + if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + } + + static class SoftFloat32_16 + { + public static ushort FPConvert(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = value.FPUnpackCv(out FPType type, out bool sign, out uint valueBits, context); + + bool altHp = (context.Fpcr & FPCR.Ahp) != 0; + + ushort resultBits; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if (altHp) + { + resultBits = FPZero(sign); + } + else if ((context.Fpcr & FPCR.Dn) != 0) + { + resultBits = FPDefaultNaN(); + } + else + { + resultBits = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN || altHp) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + if (altHp) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + resultBits = FPInfinity(sign); + } + } + else if (type == FPType.Zero) + { + resultBits = FPZero(sign); + } + else + { + resultBits = FPRoundCv(real, context); + } + + return resultBits; + } + + private static ushort FPDefaultNaN() + { + return (ushort)0x7E00u; + } + + private static ushort FPInfinity(bool sign) + { + return sign ? (ushort)0xFC00u : (ushort)0x7C00u; + } + + private static ushort FPZero(bool sign) + { + return sign ? (ushort)0x8000u : (ushort)0x0000u; + } + + private static ushort FPMaxNormal(bool sign) + { + return sign ? (ushort)0xFBFFu : (ushort)0x7BFFu; + } + + private static double FPUnpackCv( + this float value, + out FPType type, + out bool sign, + out uint valueBits, + ExecutionContext context) + { + valueBits = (uint)BitConverter.SingleToInt32Bits(value); + + sign = (~valueBits & 0x80000000u) == 0u; + + uint exp32 = (valueBits & 0x7F800000u) >> 23; + uint frac32 = valueBits & 0x007FFFFFu; + + double real; + + if (exp32 == 0u) + { + if (frac32 == 0u || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + real = 0d; + + if (frac32 != 0u) + { + FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; // Subnormal. + real = Math.Pow(2d, -126) * ((double)frac32 * Math.Pow(2d, -23)); + } + } + else if (exp32 == 0xFFu) + { + if (frac32 == 0u) + { + type = FPType.Infinity; + real = Math.Pow(2d, 1000); + } + else + { + type = (~frac32 & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN; + real = 0d; + } + } + else + { + type = FPType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp32 - 127) * (1d + (double)frac32 * Math.Pow(2d, -23)); + } + + return sign ? -real : real; + } + + private static ushort FPRoundCv(double real, ExecutionContext context) + { + const int minimumExp = -14; + + const int e = 5; + const int f = 10; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) + { + FPProcessException(FPException.Underflow, context); + } + + bool overflowToInf; + bool roundUp; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case FPRoundingMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case FPRoundingMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case FPRoundingMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == 1u << f) + { + biasedExp = 1u; + } + + if (intMant == 1u << (f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + ushort resultBits; + + if ((context.Fpcr & FPCR.Ahp) == 0) + { + if (biasedExp >= (1u << e) - 1u) + { + resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FPException.Overflow, context); + + error = 1d; + } + else + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu)); + } + } + else + { + if (biasedExp >= 1u << e) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + FPProcessException(FPException.InvalidOp, context); + + error = 0d; + } + else + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu)); + } + } + + if (error != 0d) + { + FPProcessException(FPException.Inexact, context); + } + + return resultBits; + } + + private static ushort FPConvertNaN(uint valueBits) + { + return (ushort)((valueBits & 0x80000000u) >> 16 | 0x7E00u | (valueBits & 0x003FE000u) >> 13); + } + + private static void FPProcessException(FPException exc, ExecutionContext context) + { + int enable = (int)exc + 8; + + if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + } + + static class SoftFloat32 + { + public static float FPAdd(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == !sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((inf1 && !sign1) || (inf2 && !sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 + value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static int FPCompare(float value1, float value2, bool signalNaNs, ExecutionContext context) + { + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); + + int result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = 0b0011; + + if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else + { + if (value1 == value2) + { + result = 0b0110; + } + else if (value1 < value2) + { + result = 0b1000; + } + else + { + result = 0b0010; + } + } + + return result; + } + + public static float FPCompareEQ(float value1, float value2, ExecutionContext context) + { + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + float result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + if (type1 == FPType.SNaN || type2 == FPType.SNaN) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else + { + result = ZerosOrOnes(value1 == value2); + } + + return result; + } + + public static float FPCompareGE(float value1, float value2, ExecutionContext context) + { + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + float result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = ZerosOrOnes(value1 >= value2); + } + + return result; + } + + public static float FPCompareGT(float value1, float value2, ExecutionContext context) + { + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + float result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = ZerosOrOnes(value1 > value2); + } + + return result; + } + + public static float FPCompareLE(float value1, float value2, ExecutionContext context) + { + return FPCompareGE(value2, value1, context); + } + + public static float FPCompareLT(float value1, float value2, ExecutionContext context) + { + return FPCompareGT(value2, value1, context); + } + + public static float FPDiv(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && inf2) || (zero1 && zero2)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (inf1 || zero2) + { + result = FPInfinity(sign1 ^ sign2); + + if (!inf1) + { + FPProcessException(FPException.DivideByZero, context); + } + } + else if (zero1 || inf2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 / value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPMax(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + if (value1 > value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + } + + return result; + } + + public static float FPMaxNum(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1.FPUnpack(out FPType type1, out _, out _, context); + value2.FPUnpack(out FPType type2, out _, out _, context); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(true); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(true); + } + + return FPMax(value1, value2); + } + + public static float FPMin(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + if (value1 < value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + } + + return result; + } + + public static float FPMinNum(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1.FPUnpack(out FPType type1, out _, out _, context); + value2.FPUnpack(out FPType type2, out _, out _, context); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(false); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(false); + } + + return FPMin(value1, value2); + } + + public static float FPMul(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPMulAdd(float valueA, float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context); + + if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + + if (!done) + { + bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero; + + bool signP = sign1 ^ sign2; + bool infP = inf1 || inf2; + bool zeroP = zero1 || zero2; + + if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((infA && !signA) || (infP && !signP)) + { + result = FPInfinity(false); + } + else if ((infA && signA) || (infP && signP)) + { + result = FPInfinity(true); + } + else if (zeroA && zeroP && signA == signP) + { + result = FPZero(signA); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = valueA + (value1 * value2); + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPMulSub(float valueA, float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + return FPMulAdd(valueA, value1, value2); + } + + public static float FPMulX(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(sign1 ^ sign2); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPRecipEstimate(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out uint op, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Infinity) + { + result = FPZero(sign); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + FPProcessException(FPException.DivideByZero, context); + } + else if (MathF.Abs(value) < MathF.Pow(2f, -128)) + { + bool overflowToInf; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: overflowToInf = true; break; + case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break; + case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break; + case FPRoundingMode.TowardsZero: overflowToInf = false; break; + } + + result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FPException.Overflow, context); + FPProcessException(FPException.Inexact, context); + } + else if ((context.Fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126))) + { + result = FPZero(sign); + + context.Fpsr |= FPSR.Ufc; + } + else + { + ulong fraction = (ulong)(op & 0x007FFFFFu) << 29; + uint exp = (op & 0x7F800000u) >> 23; + + if (exp == 0u) + { + if ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2; + exp -= 1u; + } + else + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + } + + uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + + uint resultExp = 253u - exp; + + uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u; + + fraction = (ulong)(estimate & 0xFFu) << 44; + + if (resultExp == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1; + } + else if (resultExp + 1u == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2; + resultExp = 0u; + } + + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (resultExp & 0xFFu) << 23 | (uint)(fraction >> 29) & 0x007FFFFFu)); + } + + return result; + } + + public static float FPRecipStepFused(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = 2f + (value1 * value2); + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPRecpX(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out uint op, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else + { + uint notExp = (~op >> 23) & 0xFFu; + uint maxExp = 0xFEu; + + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (notExp == 0xFFu ? maxExp : notExp) << 23)); + } + + return result; + } + + public static float FPRSqrtEstimate(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out uint op, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + FPProcessException(FPException.DivideByZero, context); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (type == FPType.Infinity) + { + result = FPZero(false); + } + else + { + ulong fraction = (ulong)(op & 0x007FFFFFu) << 29; + uint exp = (op & 0x7F800000u) >> 23; + + if (exp == 0u) + { + while ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + exp -= 1u; + } + + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + + uint scaled; + + if ((exp & 1u) == 0u) + { + scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + } + else + { + scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45); + } + + uint resultExp = (380u - exp) >> 1; + + uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u; + + result = BitConverter.Int32BitsToSingle((int)((resultExp & 0xFFu) << 23 | (estimate & 0xFFu) << 15)); + } + + return result; + } + + public static float FPRSqrtStepFused(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPOnePointFive(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = (3f + (value1 * value2)) / 2f; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPSqrt(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value = value.FPUnpack(out FPType type, out bool sign, out uint op, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Zero) + { + result = FPZero(sign); + } + else if (type == FPType.Infinity && !sign) + { + result = FPInfinity(sign); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = MathF.Sqrt(value); + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + + return result; + } + + public static float FPSub(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 - value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + private static float FPDefaultNaN() + { + return -float.NaN; + } + + private static float FPInfinity(bool sign) + { + return sign ? float.NegativeInfinity : float.PositiveInfinity; + } + + private static float FPZero(bool sign) + { + return sign ? -0f : +0f; + } + + private static float FPMaxNormal(bool sign) + { + return sign ? float.MinValue : float.MaxValue; + } + + private static float FPTwo(bool sign) + { + return sign ? -2f : +2f; + } + + private static float FPOnePointFive(bool sign) + { + return sign ? -1.5f : +1.5f; + } + + private static float FPNeg(this float value) + { + return -value; + } + + private static float ZerosOrOnes(bool ones) + { + return BitConverter.Int32BitsToSingle(ones ? -1 : 0); + } + + private static float FPUnpack( + this float value, + out FPType type, + out bool sign, + out uint valueBits, + ExecutionContext context) + { + valueBits = (uint)BitConverter.SingleToInt32Bits(value); + + sign = (~valueBits & 0x80000000u) == 0u; + + if ((valueBits & 0x7F800000u) == 0u) + { + if ((valueBits & 0x007FFFFFu) == 0u || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + value = FPZero(sign); + + if ((valueBits & 0x007FFFFFu) != 0u) + { + FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; + } + } + else if ((~valueBits & 0x7F800000u) == 0u) + { + if ((valueBits & 0x007FFFFFu) == 0u) + { + type = FPType.Infinity; + } + else + { + type = (~valueBits & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN; + value = FPZero(sign); + } + } + else + { + type = FPType.Nonzero; + } + + return value; + } + + private static float FPProcessNaNs( + FPType type1, + FPType type2, + uint op1, + uint op2, + out bool done, + ExecutionContext context) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context); + } + + done = false; + + return FPZero(false); + } + + private static float FPProcessNaNs3( + FPType type1, + FPType type2, + FPType type3, + uint op1, + uint op2, + uint op3, + out bool done, + ExecutionContext context) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type3 == FPType.SNaN) + { + return FPProcessNaN(type3, op3, context); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type3 == FPType.QNaN) + { + return FPProcessNaN(type3, op3, context); + } + + done = false; + + return FPZero(false); + } + + private static float FPProcessNaN(FPType type, uint op, ExecutionContext context) + { + if (type == FPType.SNaN) + { + op |= 1u << 22; + + FPProcessException(FPException.InvalidOp, context); + } + + if ((context.Fpcr & FPCR.Dn) != 0) + { + return FPDefaultNaN(); + } + + return BitConverter.Int32BitsToSingle((int)op); + } + + private static void FPProcessException(FPException exc, ExecutionContext context) + { + int enable = (int)exc + 8; + + if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + } + + static class SoftFloat64 + { + public static double FPAdd(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == !sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((inf1 && !sign1) || (inf2 && !sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 + value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static int FPCompare(double value1, double value2, bool signalNaNs, ExecutionContext context) + { + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); + + int result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = 0b0011; + + if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else + { + if (value1 == value2) + { + result = 0b0110; + } + else if (value1 < value2) + { + result = 0b1000; + } + else + { + result = 0b0010; + } + } + + return result; + } + + public static double FPCompareEQ(double value1, double value2, ExecutionContext context) + { + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + double result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + if (type1 == FPType.SNaN || type2 == FPType.SNaN) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else + { + result = ZerosOrOnes(value1 == value2); + } + + return result; + } + + public static double FPCompareGE(double value1, double value2, ExecutionContext context) + { + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + double result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = ZerosOrOnes(value1 >= value2); + } + + return result; + } + + public static double FPCompareGT(double value1, double value2, ExecutionContext context) + { + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + double result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = ZerosOrOnes(value1 > value2); + } + + return result; + } + + public static double FPCompareLE(double value1, double value2, ExecutionContext context) + { + return FPCompareGE(value2, value1, context); + } + + public static double FPCompareLT(double value1, double value2, ExecutionContext context) + { + return FPCompareGT(value2, value1, context); + } + + public static double FPDiv(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && inf2) || (zero1 && zero2)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (inf1 || zero2) + { + result = FPInfinity(sign1 ^ sign2); + + if (!inf1) + { + FPProcessException(FPException.DivideByZero, context); + } + } + else if (zero1 || inf2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 / value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPMax(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + if (value1 > value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + } + + return result; + } + + public static double FPMaxNum(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1.FPUnpack(out FPType type1, out _, out _, context); + value2.FPUnpack(out FPType type2, out _, out _, context); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(true); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(true); + } + + return FPMax(value1, value2); + } + + public static double FPMin(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + if (value1 < value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + } + + return result; + } + + public static double FPMinNum(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1.FPUnpack(out FPType type1, out _, out _, context); + value2.FPUnpack(out FPType type2, out _, out _, context); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(false); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(false); + } + + return FPMin(value1, value2); + } + + public static double FPMul(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPMulAdd(double valueA, double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context); + + if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + + if (!done) + { + bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero; + + bool signP = sign1 ^ sign2; + bool infP = inf1 || inf2; + bool zeroP = zero1 || zero2; + + if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((infA && !signA) || (infP && !signP)) + { + result = FPInfinity(false); + } + else if ((infA && signA) || (infP && signP)) + { + result = FPInfinity(true); + } + else if (zeroA && zeroP && signA == signP) + { + result = FPZero(signA); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = valueA + (value1 * value2); + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPMulSub(double valueA, double value1, double value2) + { + value1 = value1.FPNeg(); + + return FPMulAdd(valueA, value1, value2); + } + + public static double FPMulX(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(sign1 ^ sign2); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPRecipEstimate(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Infinity) + { + result = FPZero(sign); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + FPProcessException(FPException.DivideByZero, context); + } + else if (Math.Abs(value) < Math.Pow(2d, -1024)) + { + bool overflowToInf; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: overflowToInf = true; break; + case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break; + case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break; + case FPRoundingMode.TowardsZero: overflowToInf = false; break; + } + + result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FPException.Overflow, context); + FPProcessException(FPException.Inexact, context); + } + else if ((context.Fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022))) + { + result = FPZero(sign); + + context.Fpsr |= FPSR.Ufc; + } + else + { + ulong fraction = op & 0x000FFFFFFFFFFFFFul; + uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52); + + if (exp == 0u) + { + if ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2; + exp -= 1u; + } + else + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + } + + uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + + uint resultExp = 2045u - exp; + + uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u; + + fraction = (ulong)(estimate & 0xFFu) << 44; + + if (resultExp == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1; + } + else if (resultExp + 1u == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2; + resultExp = 0u; + } + + result = BitConverter.Int64BitsToDouble( + (long)((sign ? 1ul : 0ul) << 63 | (resultExp & 0x7FFul) << 52 | (fraction & 0x000FFFFFFFFFFFFFul))); + } + + return result; + } + + public static double FPRecipStepFused(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = 2d + (value1 * value2); + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPRecpX(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else + { + ulong notExp = (~op >> 52) & 0x7FFul; + ulong maxExp = 0x7FEul; + + result = BitConverter.Int64BitsToDouble( + (long)((sign ? 1ul : 0ul) << 63 | (notExp == 0x7FFul ? maxExp : notExp) << 52)); + } + + return result; + } + + public static double FPRSqrtEstimate(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + FPProcessException(FPException.DivideByZero, context); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (type == FPType.Infinity) + { + result = FPZero(false); + } + else + { + ulong fraction = op & 0x000FFFFFFFFFFFFFul; + uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52); + + if (exp == 0u) + { + while ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + exp -= 1u; + } + + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + + uint scaled; + + if ((exp & 1u) == 0u) + { + scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + } + else + { + scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45); + } + + uint resultExp = (3068u - exp) >> 1; + + uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u; + + result = BitConverter.Int64BitsToDouble((long)((resultExp & 0x7FFul) << 52 | (estimate & 0xFFul) << 44)); + } + + return result; + } + + public static double FPRSqrtStepFused(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPOnePointFive(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = (3d + (value1 * value2)) / 2d; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPSqrt(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Zero) + { + result = FPZero(sign); + } + else if (type == FPType.Infinity && !sign) + { + result = FPInfinity(sign); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = Math.Sqrt(value); + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + + return result; + } + + public static double FPSub(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 - value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + private static double FPDefaultNaN() + { + return -double.NaN; + } + + private static double FPInfinity(bool sign) + { + return sign ? double.NegativeInfinity : double.PositiveInfinity; + } + + private static double FPZero(bool sign) + { + return sign ? -0d : +0d; + } + + private static double FPMaxNormal(bool sign) + { + return sign ? double.MinValue : double.MaxValue; + } + + private static double FPTwo(bool sign) + { + return sign ? -2d : +2d; + } + + private static double FPOnePointFive(bool sign) + { + return sign ? -1.5d : +1.5d; + } + + private static double FPNeg(this double value) + { + return -value; + } + + private static double ZerosOrOnes(bool ones) + { + return BitConverter.Int64BitsToDouble(ones ? -1L : 0L); + } + + private static double FPUnpack( + this double value, + out FPType type, + out bool sign, + out ulong valueBits, + ExecutionContext context) + { + valueBits = (ulong)BitConverter.DoubleToInt64Bits(value); + + sign = (~valueBits & 0x8000000000000000ul) == 0ul; + + if ((valueBits & 0x7FF0000000000000ul) == 0ul) + { + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + value = FPZero(sign); + + if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul) + { + FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; + } + } + else if ((~valueBits & 0x7FF0000000000000ul) == 0ul) + { + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul) + { + type = FPType.Infinity; + } + else + { + type = (~valueBits & 0x0008000000000000ul) == 0ul ? FPType.QNaN : FPType.SNaN; + value = FPZero(sign); + } + } + else + { + type = FPType.Nonzero; + } + + return value; + } + + private static double FPProcessNaNs( + FPType type1, + FPType type2, + ulong op1, + ulong op2, + out bool done, + ExecutionContext context) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context); + } + + done = false; + + return FPZero(false); + } + + private static double FPProcessNaNs3( + FPType type1, + FPType type2, + FPType type3, + ulong op1, + ulong op2, + ulong op3, + out bool done, + ExecutionContext context) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type3 == FPType.SNaN) + { + return FPProcessNaN(type3, op3, context); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type3 == FPType.QNaN) + { + return FPProcessNaN(type3, op3, context); + } + + done = false; + + return FPZero(false); + } + + private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context) + { + if (type == FPType.SNaN) + { + op |= 1ul << 51; + + FPProcessException(FPException.InvalidOp, context); + } + + if ((context.Fpcr & FPCR.Dn) != 0) + { + return FPDefaultNaN(); + } + + return BitConverter.Int64BitsToDouble((long)op); + } + + private static void FPProcessException(FPException exc, ExecutionContext context) + { + int enable = (int)exc + 8; + + if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + } +} diff --git a/ARMeilleure/IntermediateRepresentation/Instruction.cs b/ARMeilleure/IntermediateRepresentation/Instruction.cs index c2a0b9aec5..9cde061ab3 100644 --- a/ARMeilleure/IntermediateRepresentation/Instruction.cs +++ b/ARMeilleure/IntermediateRepresentation/Instruction.cs @@ -54,6 +54,120 @@ namespace ARMeilleure.IntermediateRepresentation Store8, StoreToContext, Subtract, + VectorExtract, + VectorExtract16, + VectorExtract8, + VectorInsert, + VectorInsert16, + VectorInsert8, + VectorZero, + VectorZeroUpper64, + VectorZeroUpper96, + + //Intrinsics + X86Intrinsic_Start, + X86Addpd, + X86Addps, + X86Addsd, + X86Addss, + X86Andnpd, + X86Andnps, + X86Divpd, + X86Divps, + X86Divsd, + X86Divss, + X86Haddpd, + X86Haddps, + X86Maxpd, + X86Maxps, + X86Maxsd, + X86Maxss, + X86Minpd, + X86Minps, + X86Minsd, + X86Minss, + X86Movhlps, + X86Movlhps, + X86Mulpd, + X86Mulps, + X86Mulsd, + X86Mulss, + X86Paddb, + X86Paddd, + X86Paddq, + X86Paddw, + X86Pand, + X86Pandn, + X86Pavgb, + X86Pavgw, + X86Pblendvb, + X86Pcmpeqb, + X86Pcmpeqd, + X86Pcmpeqq, + X86Pcmpeqw, + X86Pcmpgtb, + X86Pcmpgtd, + X86Pcmpgtq, + X86Pcmpgtw, + X86Pmaxsb, + X86Pmaxsd, + X86Pmaxsw, + X86Pmaxub, + X86Pmaxud, + X86Pmaxuw, + X86Pminsb, + X86Pminsd, + X86Pminsw, + X86Pminub, + X86Pminud, + X86Pminuw, + X86Pmovsxbw, + X86Pmovsxdq, + X86Pmovsxwd, + X86Pmovzxbw, + X86Pmovzxdq, + X86Pmovzxwd, + X86Pmulld, + X86Pmullw, + X86Popcnt, + X86Por, + X86Psllw, + X86Psrad, + X86Psraw, + X86Psrld, + X86Psrlq, + X86Psrldq, + X86Psrlw, + X86Psubb, + X86Psubd, + X86Psubq, + X86Psubw, + X86Pxor, + X86Rcpps, + X86Rcpss, + X86Roundpd, + X86Roundps, + X86Roundsd, + X86Roundss, + X86Rsqrtps, + X86Rsqrtss, + X86Shufpd, + X86Shufps, + X86Sqrtpd, + X86Sqrtps, + X86Sqrtsd, + X86Sqrtss, + X86Subpd, + X86Subps, + X86Subsd, + X86Subss, + X86Unpckhpd, + X86Unpckhps, + X86Unpcklpd, + X86Unpcklps, + X86Xorpd, + X86Xorps, + X86Intrinsic_End, Count } diff --git a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs index b57b9f08a0..04948464a1 100644 --- a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs +++ b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs @@ -35,6 +35,11 @@ namespace ARMeilleure.IntermediateRepresentation return new Operand(BitConverter.SingleToInt32Bits(value)); } + public static Operand ConstF(double value) + { + return new Operand(BitConverter.DoubleToInt64Bits(value)); + } + public static Operand Label() { return new Operand(OperandKind.Label); diff --git a/ARMeilleure/IntermediateRepresentation/OperandType.cs b/ARMeilleure/IntermediateRepresentation/OperandType.cs index 63402ba5bc..764d5eb8e4 100644 --- a/ARMeilleure/IntermediateRepresentation/OperandType.cs +++ b/ARMeilleure/IntermediateRepresentation/OperandType.cs @@ -1,3 +1,5 @@ +using System; + namespace ARMeilleure.IntermediateRepresentation { enum OperandType @@ -17,5 +19,19 @@ namespace ARMeilleure.IntermediateRepresentation return type == OperandType.I32 || type == OperandType.I64; } + + public static RegisterType ToRegisterType(this OperandType type) + { + switch (type) + { + case OperandType.FP32: return RegisterType.Vector; + case OperandType.FP64: return RegisterType.Vector; + case OperandType.I32: return RegisterType.Integer; + case OperandType.I64: return RegisterType.Integer; + case OperandType.V128: return RegisterType.Vector; + } + + throw new InvalidOperationException($"Invalid operand type \"{type}\"."); + } } } \ No newline at end of file diff --git a/ARMeilleure/State/Register.cs b/ARMeilleure/IntermediateRepresentation/Register.cs similarity index 94% rename from ARMeilleure/State/Register.cs rename to ARMeilleure/IntermediateRepresentation/Register.cs index 47e72c4a75..745b315382 100644 --- a/ARMeilleure/State/Register.cs +++ b/ARMeilleure/IntermediateRepresentation/Register.cs @@ -1,6 +1,6 @@ using System; -namespace ARMeilleure.State +namespace ARMeilleure.IntermediateRepresentation { struct Register : IEquatable { diff --git a/ARMeilleure/State/RegisterType.cs b/ARMeilleure/IntermediateRepresentation/RegisterType.cs similarity index 62% rename from ARMeilleure/State/RegisterType.cs rename to ARMeilleure/IntermediateRepresentation/RegisterType.cs index 13028f2c58..e71795cb94 100644 --- a/ARMeilleure/State/RegisterType.cs +++ b/ARMeilleure/IntermediateRepresentation/RegisterType.cs @@ -1,4 +1,4 @@ -namespace ARMeilleure.State +namespace ARMeilleure.IntermediateRepresentation { enum RegisterType { diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs index 87afd25138..44aeeeb80c 100644 --- a/ARMeilleure/Optimizations.cs +++ b/ARMeilleure/Optimizations.cs @@ -4,13 +4,22 @@ namespace ARMeilleure { public static bool AssumeStrictAbiCompliance { get; set; } - public static bool FastFP { get; set; } = true; + public static bool FastFP { get; set; } = false; - public static bool UseSseIfAvailable { get; set; } - public static bool UseSse2IfAvailable { get; set; } - public static bool UseSse3IfAvailable { get; set; } - public static bool UseSsse3IfAvailable { get; set; } + public static bool UseSseIfAvailable { get; set; } + public static bool UseSse2IfAvailable { get; set; } + public static bool UseSse3IfAvailable { get; set; } + public static bool UseSsse3IfAvailable { get; set; } public static bool UseSse41IfAvailable { get; set; } - public static bool UseSse42IfAvailable { get; set; } + public static bool UseSse42IfAvailable { get; set; } + public static bool UsePopCntIfAvailable { get; set; } + + internal static bool UseSse { get; set; } = true; + internal static bool UseSse2 { get; set; } = true; + internal static bool UseSse3 { get; set; } = true; + internal static bool UseSsse3 { get; set; } = true; + internal static bool UseSse41 { get; set; } = true; + internal static bool UseSse42 { get; set; } + internal static bool UsePopCnt { get; set; } } } \ No newline at end of file diff --git a/ARMeilleure/State/ExecutionContext.cs b/ARMeilleure/State/ExecutionContext.cs index 33cfd66776..5a618d1237 100644 --- a/ARMeilleure/State/ExecutionContext.cs +++ b/ARMeilleure/State/ExecutionContext.cs @@ -8,6 +8,9 @@ namespace ARMeilleure.State internal IntPtr NativeContextPtr => _nativeContext.BasePtr; + public FPCR Fpcr { get; set; } + public FPSR Fpsr { get; set; } + public event EventHandler Break; public event EventHandler SupervisorCall; public event EventHandler Undefined; diff --git a/ARMeilleure/State/FPCR.cs b/ARMeilleure/State/FPCR.cs new file mode 100644 index 0000000000..511681fa94 --- /dev/null +++ b/ARMeilleure/State/FPCR.cs @@ -0,0 +1,23 @@ +using System; + +namespace ARMeilleure.State +{ + [Flags] + public enum FPCR + { + Ufe = 1 << 11, + Fz = 1 << 24, + Dn = 1 << 25, + Ahp = 1 << 26 + } + + public static class FPCRExtensions + { + private const int RModeShift = 22; + + public static FPRoundingMode GetRoundingMode(this FPCR fpcr) + { + return (FPRoundingMode)(((int)fpcr >> RModeShift) & 3); + } + } +} diff --git a/ARMeilleure/State/FPException.cs b/ARMeilleure/State/FPException.cs new file mode 100644 index 0000000000..e24e07af18 --- /dev/null +++ b/ARMeilleure/State/FPException.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.State +{ + enum FPException + { + InvalidOp = 0, + DivideByZero = 1, + Overflow = 2, + Underflow = 3, + Inexact = 4, + InputDenorm = 7 + } +} diff --git a/ARMeilleure/State/FPRoundingMode.cs b/ARMeilleure/State/FPRoundingMode.cs new file mode 100644 index 0000000000..ee4f876686 --- /dev/null +++ b/ARMeilleure/State/FPRoundingMode.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.State +{ + public enum FPRoundingMode + { + ToNearest = 0, + TowardsPlusInfinity = 1, + TowardsMinusInfinity = 2, + TowardsZero = 3 + } +} diff --git a/ARMeilleure/State/FPSR.cs b/ARMeilleure/State/FPSR.cs new file mode 100644 index 0000000000..c20dc43930 --- /dev/null +++ b/ARMeilleure/State/FPSR.cs @@ -0,0 +1,11 @@ +using System; + +namespace ARMeilleure.State +{ + [Flags] + public enum FPSR + { + Ufc = 1 << 3, + Qc = 1 << 27 + } +} diff --git a/ARMeilleure/State/FPType.cs b/ARMeilleure/State/FPType.cs new file mode 100644 index 0000000000..84e0db8da2 --- /dev/null +++ b/ARMeilleure/State/FPType.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.State +{ + enum FPType + { + Nonzero, + Zero, + Infinity, + QNaN, + SNaN + } +} diff --git a/ARMeilleure/State/NativeContext.cs b/ARMeilleure/State/NativeContext.cs index fe77f2bee5..963818c1b8 100644 --- a/ARMeilleure/State/NativeContext.cs +++ b/ARMeilleure/State/NativeContext.cs @@ -1,3 +1,4 @@ +using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Memory; using System; using System.Runtime.InteropServices; diff --git a/ARMeilleure/State/V128.cs b/ARMeilleure/State/V128.cs index 8706b0f590..00b20fa1d2 100644 --- a/ARMeilleure/State/V128.cs +++ b/ARMeilleure/State/V128.cs @@ -7,19 +7,45 @@ namespace ARMeilleure.State private ulong _e0; private ulong _e1; - public V128(float value) + public V128(float value) : this(value, value, value, value) { } + + public V128(double value) : this(value, value) { } + + public V128(float e0, float e1, float e2, float e3) { - _e0 = (uint)BitConverter.SingleToInt32Bits(value); - _e1 = 0; + _e0 = (ulong)(uint)BitConverter.SingleToInt32Bits(e0) << 0; + _e0 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e1) << 32; + _e1 = (ulong)(uint)BitConverter.SingleToInt32Bits(e2) << 0; + _e1 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e3) << 32; } - public V128(double value) + public V128(double e0, double e1) { - _e0 = (ulong)BitConverter.DoubleToInt64Bits(value); - _e1 = 0; + _e0 = (ulong)BitConverter.DoubleToInt64Bits(e0); + _e1 = (ulong)BitConverter.DoubleToInt64Bits(e1); } - public V128(long e0, long e1) : this((ulong)e0, (ulong)e1) { } + public V128(int e0, int e1, int e2, int e3) + { + _e0 = (ulong)(uint)e0 << 0; + _e0 |= (ulong)(uint)e1 << 32; + _e1 = (ulong)(uint)e2 << 0; + _e1 |= (ulong)(uint)e3 << 32; + } + + public V128(uint e0, uint e1, uint e2, uint e3) + { + _e0 = (ulong)e0 << 0; + _e0 |= (ulong)e1 << 32; + _e1 = (ulong)e2 << 0; + _e1 |= (ulong)e3 << 32; + } + + public V128(long e0, long e1) + { + _e0 = (ulong)e0; + _e1 = (ulong)e1; + } public V128(ulong e0, ulong e1) { @@ -27,6 +53,12 @@ namespace ARMeilleure.State _e1 = e1; } + public V128(byte[] data) + { + _e0 = (ulong)BitConverter.ToInt64(data, 0); + _e1 = (ulong)BitConverter.ToInt64(data, 8); + } + public float AsFloat() { return GetFloat(0); @@ -47,7 +79,8 @@ namespace ARMeilleure.State return BitConverter.Int64BitsToDouble(GetInt64(index)); } - public int GetInt32(int index) => (int)GetUInt32(index); + public int GetInt32(int index) => (int)GetUInt32(index); + public long GetInt64(int index) => (long)GetUInt64(index); public uint GetUInt32(int index) { @@ -56,11 +89,9 @@ namespace ARMeilleure.State throw new ArgumentOutOfRangeException(nameof(index)); } - return (uint)((((index & 2) != 0) ? _e1 : _e0) >> (index & 1)); + return (uint)(GetUInt64(index >> 1) >> (index & 1)); } - public long GetInt64(int index) => (long)GetUInt64(index); - public ulong GetUInt64(int index) { switch (index) @@ -72,11 +103,44 @@ namespace ARMeilleure.State throw new ArgumentOutOfRangeException(nameof(index)); } + public byte[] ToArray() + { + byte[] e0Data = BitConverter.GetBytes(_e0); + byte[] e1Data = BitConverter.GetBytes(_e1); + + byte[] data = new byte[16]; + + Buffer.BlockCopy(e0Data, 0, data, 0, 8); + Buffer.BlockCopy(e1Data, 0, data, 8, 8); + + return data; + } + public override int GetHashCode() { return HashCode.Combine(_e0, _e1); } + public static V128 operator ~(V128 x) + { + return new V128(~x._e0, ~x._e1); + } + + public static V128 operator &(V128 x, V128 y) + { + return new V128(x._e0 & y._e0, x._e1 & y._e1); + } + + public static V128 operator |(V128 x, V128 y) + { + return new V128(x._e0 | y._e0, x._e1 | y._e1); + } + + public static V128 operator ^(V128 x, V128 y) + { + return new V128(x._e0 ^ y._e0, x._e1 ^ y._e1); + } + public static bool operator ==(V128 x, V128 y) { return x.Equals(y); @@ -96,5 +160,10 @@ namespace ARMeilleure.State { return other._e0 == _e0 && other._e1 == _e1; } + + public override string ToString() + { + return $"0x{_e1:X16}{_e0:X16}"; + } } } \ No newline at end of file diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs index 3291ff60bb..6114fda484 100644 --- a/ARMeilleure/Translation/EmitterContext.cs +++ b/ARMeilleure/Translation/EmitterContext.cs @@ -37,6 +37,11 @@ namespace ARMeilleure.Translation _needsNewBlock = true; } + public Operand Add(Operand a, Operand b) + { + return Add(Instruction.Add, Local(a.Type), a, b); + } + public Operand BitwiseAnd(Operand a, Operand b) { return Add(Instruction.BitwiseAnd, Local(a.Type), a, b); @@ -158,9 +163,14 @@ namespace ARMeilleure.Translation return Add(Instruction.CountLeadingZeros, Local(a.Type), a); } - public Operand IAdd(Operand a, Operand b) + public Operand Divide(Operand a, Operand b) { - return Add(Instruction.Add, Local(a.Type), a, b); + return Add(Instruction.Divide, Local(a.Type), a, b); + } + + public Operand DivideUI(Operand a, Operand b) + { + return Add(Instruction.DivideUI, Local(a.Type), a, b); } public Operand ICompareEqual(Operand a, Operand b) @@ -213,31 +223,6 @@ namespace ARMeilleure.Translation return Add(Instruction.CompareNotEqual, Local(OperandType.I32), a, b); } - public Operand IDivide(Operand a, Operand b) - { - return Add(Instruction.Divide, Local(a.Type), a, b); - } - - public Operand IDivideUI(Operand a, Operand b) - { - return Add(Instruction.DivideUI, Local(a.Type), a, b); - } - - public Operand IMultiply(Operand a, Operand b) - { - return Add(Instruction.Multiply, Local(a.Type), a, b); - } - - public Operand INegate(Operand a) - { - return Add(Instruction.Negate, Local(a.Type), a); - } - - public Operand ISubtract(Operand a, Operand b) - { - return Add(Instruction.Subtract, Local(a.Type), a, b); - } - public Operand Load(Operand value, Operand address) { return Add(Instruction.Load, value, address); @@ -268,6 +253,11 @@ namespace ARMeilleure.Translation return Add(Instruction.LoadZx8, value, address); } + public Operand Multiply(Operand a, Operand b) + { + return Add(Instruction.Multiply, Local(a.Type), a, b); + } + public Operand Multiply64HighSI(Operand a, Operand b) { return Add(Instruction.Multiply64HighSI, Local(OperandType.I64), a, b); @@ -278,6 +268,11 @@ namespace ARMeilleure.Translation return Add(Instruction.Multiply64HighUI, Local(OperandType.I64), a, b); } + public Operand Negate(Operand a) + { + return Add(Instruction.Negate, Local(a.Type), a); + } + public Operand Return() { return Add(Instruction.Return); @@ -338,6 +333,61 @@ namespace ARMeilleure.Translation Add(Instruction.Store8, null, address, value); } + public Operand Subtract(Operand a, Operand b) + { + return Add(Instruction.Subtract, Local(a.Type), a, b); + } + + public Operand VectorExtract(Operand vector, Operand value, int index) + { + return Add(Instruction.VectorExtract, value, vector, Const(index)); + } + + public Operand VectorExtract16(Operand vector, Operand value, int index) + { + return Add(Instruction.VectorExtract16, value, vector, Const(index)); + } + + public Operand VectorExtract8(Operand vector, Operand value, int index) + { + return Add(Instruction.VectorExtract8, value, vector, Const(index)); + } + + public Operand VectorInsert(Operand vector, Operand value, int index) + { + return Add(Instruction.VectorInsert, Local(OperandType.V128), vector, value, Const(index)); + } + + public Operand VectorInsert16(Operand vector, Operand value, int index) + { + return Add(Instruction.VectorInsert16, Local(OperandType.V128), vector, value, Const(index)); + } + + public Operand VectorInsert8(Operand vector, Operand value, int index) + { + return Add(Instruction.VectorInsert8, Local(OperandType.V128), vector, value, Const(index)); + } + + public Operand VectorZero() + { + return Add(Instruction.VectorZero, Local(OperandType.V128)); + } + + public Operand VectorZeroUpper64(Operand vector) + { + return Add(Instruction.VectorZeroUpper64, Local(OperandType.V128), vector); + } + + public Operand VectorZeroUpper96(Operand vector) + { + return Add(Instruction.VectorZeroUpper96, Local(OperandType.V128), vector); + } + + public Operand AddIntrinsic(Instruction inst, params Operand[] args) + { + return Add(inst, Local(OperandType.V128), args); + } + private Operand Add(Instruction inst, Operand dest = null, params Operand[] sources) { if (_needsNewBlock) diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index a4e8827e90..8e1343e29a 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -41,8 +41,6 @@ namespace ARMeilleure.Translation private TranslatedFunction Translate(ulong address, ExecutionMode mode) { - Logger logger = new Logger(); - EmitterContext context = new EmitterContext(); Block[] blocks = Decoder.DecodeFunction(_memory, address, ExecutionMode.Aarch64); @@ -55,11 +53,11 @@ namespace ARMeilleure.Translation Dominance.FindDominanceFrontiers(cfg); - logger.StartPass(PassName.SsaConstruction); + Logger.StartPass(PassName.SsaConstruction); Ssa.Rename(cfg); - logger.EndPass(PassName.SsaConstruction, cfg); + Logger.EndPass(PassName.SsaConstruction, cfg); byte[] code = CodeGenerator.Generate(cfg, _memory); diff --git a/Ryujinx.Tests.Unicorn/SimdValue.cs b/Ryujinx.Tests.Unicorn/SimdValue.cs index 2d96741988..7d85df7df1 100644 --- a/Ryujinx.Tests.Unicorn/SimdValue.cs +++ b/Ryujinx.Tests.Unicorn/SimdValue.cs @@ -7,6 +7,46 @@ namespace Ryujinx.Tests.Unicorn private ulong _e0; private ulong _e1; + public SimdValue(float value) : this(value, value, value, value) { } + + public SimdValue(double value) : this(value, value) { } + + public SimdValue(float e0, float e1, float e2, float e3) + { + _e0 = (ulong)(uint)BitConverter.SingleToInt32Bits(e0) << 0; + _e0 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e1) << 32; + _e1 = (ulong)(uint)BitConverter.SingleToInt32Bits(e2) << 0; + _e1 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e3) << 32; + } + + public SimdValue(double e0, double e1) + { + _e0 = (ulong)BitConverter.DoubleToInt64Bits(e0); + _e1 = (ulong)BitConverter.DoubleToInt64Bits(e1); + } + + public SimdValue(int e0, int e1, int e2, int e3) + { + _e0 = (ulong)(uint)e0 << 0; + _e0 |= (ulong)(uint)e1 << 32; + _e1 = (ulong)(uint)e2 << 0; + _e1 |= (ulong)(uint)e3 << 32; + } + + public SimdValue(uint e0, uint e1, uint e2, uint e3) + { + _e0 = (ulong)e0 << 0; + _e0 |= (ulong)e1 << 32; + _e1 = (ulong)e2 << 0; + _e1 |= (ulong)e3 << 32; + } + + public SimdValue(long e0, long e1) + { + _e0 = (ulong)e0; + _e1 = (ulong)e1; + } + public SimdValue(ulong e0, ulong e1) { _e0 = e0; @@ -19,19 +59,6 @@ namespace Ryujinx.Tests.Unicorn _e1 = (ulong)BitConverter.ToInt64(data, 8); } - public byte[] ToArray() - { - byte[] e0Data = BitConverter.GetBytes(_e0); - byte[] e1Data = BitConverter.GetBytes(_e1); - - byte[] data = new byte[16]; - - Buffer.BlockCopy(e0Data, 0, data, 0, 8); - Buffer.BlockCopy(e1Data, 0, data, 8, 8); - - return data; - } - public float AsFloat() { return GetFloat(0); @@ -52,7 +79,8 @@ namespace Ryujinx.Tests.Unicorn return BitConverter.Int64BitsToDouble(GetInt64(index)); } - public int GetInt32(int index) => (int)GetUInt32(index); + public int GetInt32(int index) => (int)GetUInt32(index); + public long GetInt64(int index) => (long)GetUInt64(index); public uint GetUInt32(int index) { @@ -61,11 +89,9 @@ namespace Ryujinx.Tests.Unicorn throw new ArgumentOutOfRangeException(nameof(index)); } - return (uint)((((index & 2) != 0) ? _e1 : _e0) >> (index & 1)); + return (uint)(GetUInt64(index >> 1) >> (index & 1)); } - public long GetInt64(int index) => (long)GetUInt64(index); - public ulong GetUInt64(int index) { switch (index) @@ -77,6 +103,19 @@ namespace Ryujinx.Tests.Unicorn throw new ArgumentOutOfRangeException(nameof(index)); } + public byte[] ToArray() + { + byte[] e0Data = BitConverter.GetBytes(_e0); + byte[] e1Data = BitConverter.GetBytes(_e1); + + byte[] data = new byte[16]; + + Buffer.BlockCopy(e0Data, 0, data, 0, 8); + Buffer.BlockCopy(e1Data, 0, data, 8, 8); + + return data; + } + public override int GetHashCode() { return HashCode.Combine(_e0, _e1); @@ -94,12 +133,17 @@ namespace Ryujinx.Tests.Unicorn public override bool Equals(object obj) { - return obj is SimdValue simdValue && Equals(simdValue); + return obj is SimdValue vector && Equals(vector); } public bool Equals(SimdValue other) { return other._e0 == _e0 && other._e1 == _e1; } + + public override string ToString() + { + return $"0x{_e1:X16}{_e0:X16}"; + } } } \ No newline at end of file diff --git a/Ryujinx.Tests/Cpu/CpuTest.cs b/Ryujinx.Tests/Cpu/CpuTest.cs index f0f864eb97..048082f019 100644 --- a/Ryujinx.Tests/Cpu/CpuTest.cs +++ b/Ryujinx.Tests/Cpu/CpuTest.cs @@ -135,8 +135,8 @@ namespace Ryujinx.Tests.Cpu _context.SetPstateFlag(PState.ZFlag, zero); _context.SetPstateFlag(PState.NFlag, negative); - //_thread.ThreadState.Fpcr = fpcr; - //_thread.ThreadState.Fpsr = fpsr; + _context.Fpcr = (FPCR)fpcr; + _context.Fpsr = (FPSR)fpsr; if (_unicornAvailable) { @@ -361,8 +361,8 @@ namespace Ryujinx.Tests.Cpu Assert.That(V128ToSimdValue(_context.GetV(30)), Is.EqualTo(_unicornEmu.Q[30])); Assert.That(V128ToSimdValue(_context.GetV(31)), Is.EqualTo(_unicornEmu.Q[31])); - //Assert.That(_thread.ThreadState.Fpcr, Is.EqualTo(_unicornEmu.Fpcr)); - //Assert.That(_thread.ThreadState.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask)); + Assert.That((int)_context.Fpcr, Is.EqualTo(_unicornEmu.Fpcr)); + Assert.That((int)_context.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask)); Assert.That(_context.GetPstateFlag(PState.VFlag), Is.EqualTo(_unicornEmu.OverflowFlag)); Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag)); diff --git a/Ryujinx.Tests/Cpu/CpuTestMisc.cs b/Ryujinx.Tests/Cpu/CpuTestMisc.cs index ea8f479761..6d2440c183 100644 --- a/Ryujinx.Tests/Cpu/CpuTestMisc.cs +++ b/Ryujinx.Tests/Cpu/CpuTestMisc.cs @@ -190,7 +190,7 @@ namespace Ryujinx.Tests.Cpu Opcode(0xD65F03C0); ExecuteOpcodes(); - Assert.That(GetVectorE0(GetContext().GetV(0)), Is.EqualTo(16f)); + Assert.That(GetContext().GetV(0).AsFloat(), Is.EqualTo(16f)); } [Explicit]