From 9d9aca1c11dd8987499efefea413d08178d30465 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Wed, 12 Jun 2019 19:33:49 -0300 Subject: [PATCH] Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes --- ARMeilleure/CodeGen/X86/Assembler.cs | 360 +++++---- ARMeilleure/CodeGen/X86/CodeGenerator.cs | 60 ++ ARMeilleure/CodeGen/X86/PreAllocator.cs | 98 +-- ARMeilleure/CodeGen/X86/X86Instruction.cs | 10 + ARMeilleure/Decoders/OpCodeTable.cs | 60 +- .../Instructions/InstEmitSimdArithmetic.cs | 25 +- .../Instructions/InstEmitSimdHelper.cs | 70 +- ARMeilleure/Instructions/InstEmitSimdMove.cs | 754 +++++++++++++++++- ARMeilleure/Instructions/InstName.cs | 1 + ARMeilleure/Instructions/SoftFallback.cs | 67 ++ .../IntermediateRepresentation/Instruction.cs | 10 + 11 files changed, 1247 insertions(+), 268 deletions(-) diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index 82ecee9f9b..ff81530b62 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -61,154 +61,164 @@ namespace ARMeilleure.CodeGen.X86 { _instTable = new InstInfo[(int)X86Instruction.Count]; - // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags - Add(X86Instruction.Add, new InstInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstFlags.None)); - Add(X86Instruction.Addpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Addps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex)); - Add(X86Instruction.Addsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.PrefixF2)); - Add(X86Instruction.Addss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.And, new InstInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstFlags.None)); - Add(X86Instruction.Andnpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Andnps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstFlags.Vex)); - Add(X86Instruction.Bsr, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstFlags.None)); - Add(X86Instruction.Bswap, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc8, InstFlags.RegOnly)); - Add(X86Instruction.Call, new InstInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Cmovcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstFlags.None)); - Add(X86Instruction.Cmp, new InstInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstFlags.None)); - Add(X86Instruction.Div, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstFlags.None)); - Add(X86Instruction.Divpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Divps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex)); - Add(X86Instruction.Divsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.PrefixF2)); - Add(X86Instruction.Divss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.Haddpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Haddps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstFlags.Vex | InstFlags.PrefixF2)); - Add(X86Instruction.Idiv, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstFlags.None)); - Add(X86Instruction.Imul, new InstInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstFlags.None)); - Add(X86Instruction.Imul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstFlags.None)); - Add(X86Instruction.Insertps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Maxpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Maxps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex)); - Add(X86Instruction.Maxsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.PrefixF2)); - Add(X86Instruction.Maxss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.Minpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Minps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex)); - Add(X86Instruction.Minsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.PrefixF2)); - Add(X86Instruction.Minss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.Mov, new InstInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstFlags.None)); - Add(X86Instruction.Mov16, new InstInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstFlags.Prefix66)); - Add(X86Instruction.Mov8, new InstInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstFlags.None)); - Add(X86Instruction.Movd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6e, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Movdqu, new InstInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.Movhlps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstFlags.Vex)); - Add(X86Instruction.Movlhps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstFlags.Vex)); - Add(X86Instruction.Movq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.Movsd, new InstInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstFlags.Vex | InstFlags.PrefixF2)); - Add(X86Instruction.Movss, new InstInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.Movsx16, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstFlags.None)); - Add(X86Instruction.Movsx32, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstFlags.None)); - Add(X86Instruction.Movsx8, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstFlags.None)); - Add(X86Instruction.Movzx16, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstFlags.None)); - Add(X86Instruction.Movzx8, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstFlags.None)); - Add(X86Instruction.Mul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstFlags.None)); - Add(X86Instruction.Mulpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Mulps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex)); - Add(X86Instruction.Mulsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.PrefixF2)); - Add(X86Instruction.Mulss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.Neg, new InstInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Not, new InstInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Or, new InstInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstFlags.None)); - Add(X86Instruction.Paddb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Paddd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Paddq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Paddw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pand, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pandn, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pavgb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pavgw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pblendvb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pcmpeqb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pcmpeqd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pcmpeqq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pcmpeqw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pcmpgtb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pcmpgtd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pcmpgtq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pcmpgtw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pextrb, new InstInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pextrd, new InstInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pextrw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pinsrb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pinsrd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pinsrw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmaxsb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmaxsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmaxsw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmaxub, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmaxud, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmaxuw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pminsb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pminsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pminsw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pminub, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pminud, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pminuw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmovsxbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmovsxdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmovsxwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmovzxbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmovzxdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmovzxwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmulld, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pmullw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pop, new InstInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Popcnt, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstFlags.PrefixF3)); - Add(X86Instruction.Por, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Pshufd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Psllw, new InstInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Psrad, new InstInfo(BadOp, 0x00000f72, BadOp, BadOp, 0x00000fe2, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Psraw, new InstInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Psrld, new InstInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Psrlq, new InstInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Psrldq, new InstInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Psrlw, new InstInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Psubb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Psubd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Psubq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Psubw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Push, new InstInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstFlags.None)); - Add(X86Instruction.Pxor, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Rcpps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstFlags.Vex)); - Add(X86Instruction.Rcpss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.Ror, new InstInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Roundpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Roundps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex)); - Add(X86Instruction.Roundsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.PrefixF2)); - Add(X86Instruction.Roundss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.Rsqrtps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstFlags.Vex)); - Add(X86Instruction.Rsqrtss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.Sar, new InstInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Setcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstFlags.Reg8)); - Add(X86Instruction.Shl, new InstInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Shr, new InstInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Shufpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Shufps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstFlags.Vex)); - Add(X86Instruction.Sqrtpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Sqrtps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex)); - Add(X86Instruction.Sqrtsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.PrefixF2)); - Add(X86Instruction.Sqrtss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.Sub, new InstInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstFlags.None)); - Add(X86Instruction.Subpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Subps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex)); - Add(X86Instruction.Subsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.PrefixF2)); - Add(X86Instruction.Subss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.PrefixF3)); - Add(X86Instruction.Test, new InstInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstFlags.None)); - Add(X86Instruction.Unpckhpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Unpckhps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstFlags.Vex)); - Add(X86Instruction.Unpcklpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Unpcklps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstFlags.Vex)); - Add(X86Instruction.Xor, new InstInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstFlags.None)); - Add(X86Instruction.Xorpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstFlags.Vex | InstFlags.Prefix66)); - Add(X86Instruction.Xorps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstFlags.Vex)); + // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags + Add(X86Instruction.Add, new InstInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstFlags.None)); + Add(X86Instruction.Addpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Addps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex)); + Add(X86Instruction.Addsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Addss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.And, new InstInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstFlags.None)); + Add(X86Instruction.Andnpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Andnps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstFlags.Vex)); + Add(X86Instruction.Bsr, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstFlags.None)); + Add(X86Instruction.Bswap, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc8, InstFlags.RegOnly)); + Add(X86Instruction.Call, new InstInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Cmovcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstFlags.None)); + Add(X86Instruction.Cmp, new InstInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstFlags.None)); + Add(X86Instruction.Div, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstFlags.None)); + Add(X86Instruction.Divpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Divps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex)); + Add(X86Instruction.Divsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Divss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Haddpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Haddps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Idiv, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstFlags.None)); + Add(X86Instruction.Imul, new InstInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstFlags.None)); + Add(X86Instruction.Imul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstFlags.None)); + Add(X86Instruction.Insertps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Maxpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Maxps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex)); + Add(X86Instruction.Maxsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Maxss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Minpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Minps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex)); + Add(X86Instruction.Minsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Minss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Mov, new InstInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstFlags.None)); + Add(X86Instruction.Mov16, new InstInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstFlags.Prefix66)); + Add(X86Instruction.Mov8, new InstInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstFlags.None)); + Add(X86Instruction.Movd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6e, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Movdqu, new InstInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Movhlps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstFlags.Vex)); + Add(X86Instruction.Movlhps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstFlags.Vex)); + Add(X86Instruction.Movq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Movsd, new InstInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Movss, new InstInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Movsx16, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstFlags.None)); + Add(X86Instruction.Movsx32, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstFlags.None)); + Add(X86Instruction.Movsx8, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstFlags.None)); + Add(X86Instruction.Movzx16, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstFlags.None)); + Add(X86Instruction.Movzx8, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstFlags.None)); + Add(X86Instruction.Mul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstFlags.None)); + Add(X86Instruction.Mulpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Mulps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex)); + Add(X86Instruction.Mulsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Mulss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Neg, new InstInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Not, new InstInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Or, new InstInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstFlags.None)); + Add(X86Instruction.Paddb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Paddd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Paddq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Paddw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pand, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pandn, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pavgb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pavgw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pblendvb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpeqb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpeqd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpeqq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpeqw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpgtb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpgtd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpgtq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pcmpgtw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pextrb, new InstInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pextrd, new InstInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pextrw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pinsrb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pinsrd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pinsrw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmaxsb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmaxsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmaxsw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmaxub, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmaxud, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmaxuw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pminsb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pminsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pminsw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pminub, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pminud, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pminuw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmovsxbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmovsxdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmovsxwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmovzxbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmovzxdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmovzxwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmulld, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pmullw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pop, new InstInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Popcnt, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstFlags.PrefixF3)); + Add(X86Instruction.Por, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pshufb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3800, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pshufd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Pslldq, new InstInfo(BadOp, 0x07000f73, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psllw, new InstInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psrad, new InstInfo(BadOp, 0x00000f72, BadOp, BadOp, 0x00000fe2, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psraw, new InstInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psrld, new InstInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psrlq, new InstInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psrldq, new InstInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psrlw, new InstInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psubb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psubd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psubq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psubw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Punpckhbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f68, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Punpckhdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6a, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Punpckhqdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6d, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Punpckhwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f69, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Punpcklbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f60, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Punpckldq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f62, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Punpcklqdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6c, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Punpcklwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f61, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Push, new InstInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstFlags.None)); + Add(X86Instruction.Pxor, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Rcpps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstFlags.Vex)); + Add(X86Instruction.Rcpss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Ror, new InstInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Roundpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Roundps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex)); + Add(X86Instruction.Roundsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Roundss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Rsqrtps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstFlags.Vex)); + Add(X86Instruction.Rsqrtss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Sar, new InstInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Setcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstFlags.Reg8)); + Add(X86Instruction.Shl, new InstInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Shr, new InstInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Shufpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Shufps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstFlags.Vex)); + Add(X86Instruction.Sqrtpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Sqrtps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex)); + Add(X86Instruction.Sqrtsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Sqrtss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Sub, new InstInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstFlags.None)); + Add(X86Instruction.Subpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Subps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex)); + Add(X86Instruction.Subsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Subss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Test, new InstInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstFlags.None)); + Add(X86Instruction.Unpckhpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Unpckhps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstFlags.Vex)); + Add(X86Instruction.Unpcklpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Unpcklps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstFlags.Vex)); + Add(X86Instruction.Xor, new InstInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstFlags.None)); + Add(X86Instruction.Xorpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Xorps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstFlags.Vex)); } private static void Add(X86Instruction inst, InstInfo info) @@ -836,6 +846,11 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(dest, source, X86Instruction.Por, source1); } + public void Pshufb(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Pshufb, source1); + } + public void Pshufd(Operand dest, Operand source, byte imm) { WriteInstruction(dest, source, X86Instruction.Pshufd); @@ -843,6 +858,11 @@ namespace ARMeilleure.CodeGen.X86 WriteByte(imm); } + public void Pslldq(Operand dest, Operand source, Operand source1) + { + WriteInstruction(source1, source, X86Instruction.Pslldq, dest); + } + public void Psllw(Operand dest, Operand source, Operand source1) { WriteInstruction(source1, source, X86Instruction.Psllw, dest); @@ -898,6 +918,46 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(dest, source, X86Instruction.Psubw, source1); } + public void Punpckhbw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Punpckhbw, source1); + } + + public void Punpckhdq(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Punpckhdq, source1); + } + + public void Punpckhqdq(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Punpckhqdq, source1); + } + + public void Punpckhwd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Punpckhwd, source1); + } + + public void Punpcklbw(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Punpcklbw, source1); + } + + public void Punpckldq(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Punpckldq, source1); + } + + public void Punpcklqdq(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Punpcklqdq, source1); + } + + public void Punpcklwd(Operand dest, Operand source, Operand source1) + { + WriteInstruction(dest, source, X86Instruction.Punpcklwd, source1); + } + public void Push(Operand source) { if (source.Kind == OperandKind.Register) @@ -1325,6 +1385,8 @@ namespace ARMeilleure.CodeGen.X86 modRM |= 0xc0; } + Debug.Assert(opCode != BadOp, "Invalid opcode value."); + if ((flags & InstFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) { int vexByte2 = (int)(flags & InstFlags.PrefixMask) >> (int)InstFlags.PrefixBit; @@ -1388,8 +1450,6 @@ namespace ARMeilleure.CodeGen.X86 } } - Debug.Assert(opCode != BadOp, "Invalid opcode value."); - if ((opCode & 0xff0000) != 0) { WriteByte((byte)(opCode >> 16)); diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index 13a518695c..006d0c85da 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -144,6 +144,8 @@ namespace ARMeilleure.CodeGen.X86 Add(Instruction.X86Pmullw, GenerateX86Pmullw); Add(Instruction.X86Popcnt, GenerateX86Popcnt); Add(Instruction.X86Por, GenerateX86Por); + Add(Instruction.X86Pshufb, GenerateX86Pshufb); + Add(Instruction.X86Pslldq, GenerateX86Pslldq); Add(Instruction.X86Psllw, GenerateX86Psllw); Add(Instruction.X86Psrad, GenerateX86Psrad); Add(Instruction.X86Psraw, GenerateX86Psraw); @@ -155,6 +157,14 @@ namespace ARMeilleure.CodeGen.X86 Add(Instruction.X86Psubd, GenerateX86Psubd); Add(Instruction.X86Psubq, GenerateX86Psubq); Add(Instruction.X86Psubw, GenerateX86Psubw); + Add(Instruction.X86Punpckhbw, GenerateX86Punpckhbw); + Add(Instruction.X86Punpckhdq, GenerateX86Punpckhdq); + Add(Instruction.X86Punpckhqdq, GenerateX86Punpckhqdq); + Add(Instruction.X86Punpckhwd, GenerateX86Punpckhwd); + Add(Instruction.X86Punpcklbw, GenerateX86Punpcklbw); + Add(Instruction.X86Punpckldq, GenerateX86Punpckldq); + Add(Instruction.X86Punpcklqdq, GenerateX86Punpcklqdq); + Add(Instruction.X86Punpcklwd, GenerateX86Punpcklwd); Add(Instruction.X86Pxor, GenerateX86Pxor); Add(Instruction.X86Rcpps, GenerateX86Rcpps); Add(Instruction.X86Rcpss, GenerateX86Rcpss); @@ -1249,6 +1259,16 @@ namespace ARMeilleure.CodeGen.X86 context.Assembler.Por(operation.Dest, operation.GetSource(1), operation.GetSource(0)); } + private static void GenerateX86Pshufb(CodeGenContext context, Operation operation) + { + context.Assembler.Pshufb(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Pslldq(CodeGenContext context, Operation operation) + { + context.Assembler.Pslldq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + private static void GenerateX86Psllw(CodeGenContext context, Operation operation) { context.Assembler.Psllw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); @@ -1304,6 +1324,46 @@ namespace ARMeilleure.CodeGen.X86 context.Assembler.Psubw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); } + private static void GenerateX86Punpckhbw(CodeGenContext context, Operation operation) + { + context.Assembler.Punpckhbw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Punpckhdq(CodeGenContext context, Operation operation) + { + context.Assembler.Punpckhdq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Punpckhqdq(CodeGenContext context, Operation operation) + { + context.Assembler.Punpckhqdq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Punpckhwd(CodeGenContext context, Operation operation) + { + context.Assembler.Punpckhwd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Punpcklbw(CodeGenContext context, Operation operation) + { + context.Assembler.Punpcklbw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Punpckldq(CodeGenContext context, Operation operation) + { + context.Assembler.Punpckldq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Punpcklqdq(CodeGenContext context, Operation operation) + { + context.Assembler.Punpcklqdq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + + private static void GenerateX86Punpcklwd(CodeGenContext context, Operation operation) + { + context.Assembler.Punpcklwd(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + private static void GenerateX86Pxor(CodeGenContext context, Operation operation) { context.Assembler.Pxor(operation.Dest, operation.GetSource(1), operation.GetSource(0)); diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs index 879fa62b90..709293cd9f 100644 --- a/ARMeilleure/CodeGen/X86/PreAllocator.cs +++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -117,7 +117,7 @@ namespace ARMeilleure.CodeGen.X86 private static void AddConstantCopy(LinkedListNode node, Operation operation) { - if (operation.SourcesCount == 0 || HasFixedConst(operation.Inst)) + if (operation.SourcesCount == 0 || IsIntrinsic(operation.Inst)) { return; } @@ -128,20 +128,31 @@ namespace ARMeilleure.CodeGen.X86 Operand src1 = operation.GetSource(0); Operand src2; - if (src1.Type.IsInteger()) + if (src1.Kind == OperandKind.Constant) { - //Handle integer types. - //Most ALU instructions accepts a 32-bits immediate on the second operand. - //We need to ensure the following: - //- If the constant is on operand 1, we need to move it. - //-- But first, we try to swap operand 1 and 2 if the instruction is commutative. - //-- Doing so may allow us to encode the constant as operand 2 and avoid a copy. - //- If the constant is on operand 2, we check if the instruction supports it, - //if not, we also add a copy. 64-bits constants are usually not supported. bool isVecCopy = inst == Instruction.Copy && !dest.Type.IsInteger(); - if (src1.Kind == OperandKind.Constant && (!HasConstSrc1(inst) || isVecCopy)) + if (!src1.Type.IsInteger()) { + //Handle non-integer types (FP32, FP64 and V128). + //For instructions without an immediate operand, we do the following: + //- Insert a copy with the constant value (as integer) to a GPR. + //- Insert a copy from the GPR to a XMM register. + //- Replace the constant use with the XMM register. + src1 = AddXmmCopy(node, src1); + + operation.SetSource(0, src1); + } + else if (!HasConstSrc1(inst) || isVecCopy) + { + //Handle integer types. + //Most ALU instructions accepts a 32-bits immediate on the second operand. + //We need to ensure the following: + //- If the constant is on operand 1, we need to move it. + //-- But first, we try to swap operand 1 and 2 if the instruction is commutative. + //-- Doing so may allow us to encode the constant as operand 2 and avoid a copy. + //- If the constant is on operand 2, we check if the instruction supports it, + //if not, we also add a copy. 64-bits constants are usually not supported. if (IsCommutative(inst)) { src2 = operation.GetSource(1); @@ -162,45 +173,26 @@ namespace ARMeilleure.CodeGen.X86 operation.SetSource(0, src1); } } + } - if (operation.SourcesCount < 2) + if (operation.SourcesCount < 2) + { + return; + } + + src2 = operation.GetSource(1); + + if (src2.Kind == OperandKind.Constant) + { + if (!src2.Type.IsInteger()) { - return; - } - - src2 = operation.GetSource(1); - - if (src2.Kind == OperandKind.Constant && (!HasConstSrc2(inst) || IsLongConst(src2))) - { - src2 = AddCopy(node, src2); + src2 = AddXmmCopy(node, src2); operation.SetSource(1, src2); } - } - else - { - //Handle non-integer types (FP32, FP64 and V128). - //For instructions without an immediate operand, we do the following: - //- Insert a copy with the constant value (as integer) to a GPR. - //- Insert a copy from the GPR to a XMM register. - //- Replace the constant use with the XMM register. - if (src1.Kind == OperandKind.Constant && src1.Type.IsInteger()) + else if (!HasConstSrc2(inst) || IsLongConst(src2)) { - src1 = AddXmmCopy(node, src1); - - operation.SetSource(0, src1); - } - - if (operation.SourcesCount < 2) - { - return; - } - - src2 = operation.GetSource(1); - - if (src2.Kind == OperandKind.Constant && src2.Type.IsInteger()) - { - src2 = AddXmmCopy(node, src2); + src2 = AddCopy(node, src2); operation.SetSource(1, src2); } @@ -600,24 +592,6 @@ namespace ARMeilleure.CodeGen.X86 return false; } - private static bool HasFixedConst(Instruction inst) - { - switch (inst) - { - case Instruction.LoadFromContext: - case Instruction.StoreToContext: - case Instruction.VectorExtract: - case Instruction.VectorExtract16: - case Instruction.VectorExtract8: - case Instruction.VectorInsert: - case Instruction.VectorInsert16: - case Instruction.VectorInsert8: - return true; - } - - return IsIntrinsic(inst); - } - private static bool IsIntrinsic(Instruction inst) { return inst > Instruction.X86Intrinsic_Start && diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index 93f86407e8..45d2295a37 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -103,7 +103,9 @@ namespace ARMeilleure.CodeGen.X86 Pop, Popcnt, Por, + Pshufb, Pshufd, + Pslldq, Psllw, Psrad, Psraw, @@ -115,6 +117,14 @@ namespace ARMeilleure.CodeGen.X86 Psubd, Psubq, Psubw, + Punpckhbw, + Punpckhdq, + Punpckhqdq, + Punpckhwd, + Punpcklbw, + Punpckldq, + Punpcklqdq, + Punpcklwd, Push, Pxor, Rcpps, diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 21f2f844ed..34ea2e1848 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -234,11 +234,11 @@ namespace ARMeilleure.Decoders SetA64("01011110111xxxxx100011xxxxxxxxxx", InstName.Cmtst_S, null, typeof(OpCodeSimdReg)); SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstName.Cmtst_V, null, typeof(OpCodeSimdReg)); SetA64("0x00111000100000010110xxxxxxxxxx", InstName.Cnt_V, InstEmit.Cnt_V, typeof(OpCodeSimd)); - SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp, null, typeof(OpCodeSimdIns)); - SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S, null, typeof(OpCodeSimdIns)); - SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstName.Dup_V, null, typeof(OpCodeSimdIns)); + SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp, InstEmit.Dup_Gp, typeof(OpCodeSimdIns)); + SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S, InstEmit.Dup_S, typeof(OpCodeSimdIns)); + SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstName.Dup_V, InstEmit.Dup_V, typeof(OpCodeSimdIns)); SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstName.Eor_V, null, typeof(OpCodeSimdReg)); - SetA64("0>101110000xxxxx0101110000xxxxx01011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V, InstEmit.Fabd_V, typeof(OpCodeSimdReg)); SetA64("000111100x100000110000xxxxxxxxxx", InstName.Fabs_S, InstEmit.Fabs_S, typeof(OpCodeSimd)); @@ -267,7 +267,7 @@ namespace ARMeilleure.Decoders SetA64("0>0011101<100000111010xxxxxxxxxx", InstName.Fcmlt_V, null, typeof(OpCodeSimd)); SetA64("000111100x1xxxxx001000xxxxx0x000", InstName.Fcmp_S, null, typeof(OpCodeSimdReg)); SetA64("000111100x1xxxxx001000xxxxx1x000", InstName.Fcmpe_S, null, typeof(OpCodeSimdReg)); - SetA64("000111100x1xxxxxxxxx11xxxxxxxxxx", InstName.Fcsel_S, null, typeof(OpCodeSimdFcond)); + SetA64("000111100x1xxxxxxxxx11xxxxxxxxxx", InstName.Fcsel_S, InstEmit.Fcsel_S, typeof(OpCodeSimdFcond)); SetA64("000111100x10001xx10000xxxxxxxxxx", InstName.Fcvt_S, null, typeof(OpCodeSimd)); SetA64("x00111100x100100000000xxxxxxxxxx", InstName.Fcvtas_Gp, null, typeof(OpCodeSimdCvt)); SetA64("x00111100x100101000000xxxxxxxxxx", InstName.Fcvtau_Gp, null, typeof(OpCodeSimdCvt)); @@ -312,13 +312,13 @@ namespace ARMeilleure.Decoders SetA64("010111111xxxxxxx0101x0xxxxxxxxxx", InstName.Fmls_Se, InstEmit.Fmls_Se, typeof(OpCodeSimdRegElemF)); SetA64("0>0011101<1xxxxx110011xxxxxxxxxx", InstName.Fmls_V, InstEmit.Fmls_V, typeof(OpCodeSimdReg)); SetA64("0>00111111011101<100001111110xxxxxxxxxx", InstName.Fsqrt_V, InstEmit.Fsqrt_V, typeof(OpCodeSimd)); SetA64("000111100x1xxxxx001110xxxxxxxxxx", InstName.Fsub_S, InstEmit.Fsub_S, typeof(OpCodeSimdReg)); SetA64("0>0011101<1xxxxx110101xxxxxxxxxx", InstName.Fsub_V, InstEmit.Fsub_V, typeof(OpCodeSimdReg)); - SetA64("01001110000xxxxx000111xxxxxxxxxx", InstName.Ins_Gp, null, typeof(OpCodeSimdIns)); - SetA64("01101110000xxxxx0xxxx1xxxxxxxxxx", InstName.Ins_V, null, typeof(OpCodeSimdIns)); + SetA64("01001110000xxxxx000111xxxxxxxxxx", InstName.Ins_Gp, InstEmit.Ins_Gp, typeof(OpCodeSimdIns)); + SetA64("01101110000xxxxx0xxxx1xxxxxxxxxx", InstName.Ins_V, InstEmit.Ins_V, typeof(OpCodeSimdIns)); SetA64("0x00110001000000xxxxxxxxxxxxxxxx", InstName.Ld__Vms, null, typeof(OpCodeSimdMemMs)); SetA64("0x001100110xxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vms, null, typeof(OpCodeSimdMemMs)); SetA64("0x00110101x00000xxxxxxxxxxxxxxxx", InstName.Ld__Vss, null, typeof(OpCodeSimdMemSs)); @@ -377,15 +377,15 @@ namespace ARMeilleure.Decoders SetA64("0x101111xxxxxxxx0000x0xxxxxxxxxx", InstName.Mla_Ve, InstEmit.Mla_Ve, typeof(OpCodeSimdRegElem)); SetA64("0x101110<<1xxxxx100101xxxxxxxxxx", InstName.Mls_V, InstEmit.Mls_V, typeof(OpCodeSimdReg)); SetA64("0x101111xxxxxxxx0100x0xxxxxxxxxx", InstName.Mls_Ve, InstEmit.Mls_Ve, typeof(OpCodeSimdRegElem)); - SetA64("0x00111100000xxx0xx001xxxxxxxxxx", InstName.Movi_V, null, typeof(OpCodeSimdImm)); - SetA64("0x00111100000xxx10x001xxxxxxxxxx", InstName.Movi_V, null, typeof(OpCodeSimdImm)); - SetA64("0x00111100000xxx110x01xxxxxxxxxx", InstName.Movi_V, null, typeof(OpCodeSimdImm)); - SetA64("0xx0111100000xxx111001xxxxxxxxxx", InstName.Movi_V, null, typeof(OpCodeSimdImm)); + SetA64("0x00111100000xxx0xx001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm)); + SetA64("0x00111100000xxx10x001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm)); + SetA64("0x00111100000xxx110x01xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm)); + SetA64("0xx0111100000xxx111001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm)); SetA64("0x001110<<1xxxxx100111xxxxxxxxxx", InstName.Mul_V, InstEmit.Mul_V, typeof(OpCodeSimdReg)); SetA64("0x001111xxxxxxxx1000x0xxxxxxxxxx", InstName.Mul_Ve, InstEmit.Mul_Ve, typeof(OpCodeSimdRegElem)); - SetA64("0x10111100000xxx0xx001xxxxxxxxxx", InstName.Mvni_V, null, typeof(OpCodeSimdImm)); - SetA64("0x10111100000xxx10x001xxxxxxxxxx", InstName.Mvni_V, null, typeof(OpCodeSimdImm)); - SetA64("0x10111100000xxx110x01xxxxxxxxxx", InstName.Mvni_V, null, typeof(OpCodeSimdImm)); + SetA64("0x10111100000xxx0xx001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm)); + SetA64("0x10111100000xxx10x001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm)); + SetA64("0x10111100000xxx110x01xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm)); SetA64("0111111011100000101110xxxxxxxxxx", InstName.Neg_S, InstEmit.Neg_S, typeof(OpCodeSimd)); SetA64("0>101110<<100000101110xxxxxxxxxx", InstName.Neg_V, InstEmit.Neg_V, typeof(OpCodeSimd)); SetA64("0x10111000100000010110xxxxxxxxxx", InstName.Not_V, null, typeof(OpCodeSimd)); @@ -406,6 +406,8 @@ namespace ARMeilleure.Decoders SetA64("0x001110<<100000011010xxxxxxxxxx", InstName.Sadalp_V, InstEmit.Sadalp_V, typeof(OpCodeSimd)); SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstName.Saddl_V, InstEmit.Saddl_V, typeof(OpCodeSimdReg)); SetA64("0x001110<<100000001010xxxxxxxxxx", InstName.Saddlp_V, InstEmit.Saddlp_V, typeof(OpCodeSimd)); + SetA64("000011100x110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, typeof(OpCodeSimd)); + SetA64("01001110<<110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, typeof(OpCodeSimd)); SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstName.Saddw_V, InstEmit.Saddw_V, typeof(OpCodeSimdReg)); SetA64("x00111100x100010000000xxxxxxxxxx", InstName.Scvtf_Gp, null, typeof(OpCodeSimdCvt)); SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstName.Scvtf_Gp_Fixed, null, typeof(OpCodeSimdCvt)); @@ -441,7 +443,7 @@ namespace ARMeilleure.Decoders SetA64("0x001111xxxxxxxx0010x0xxxxxxxxxx", InstName.Smlal_Ve, InstEmit.Smlal_Ve, typeof(OpCodeSimdRegElem)); SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", InstName.Smlsl_V, InstEmit.Smlsl_V, typeof(OpCodeSimdReg)); SetA64("0x001111xxxxxxxx0110x0xxxxxxxxxx", InstName.Smlsl_Ve, InstEmit.Smlsl_Ve, typeof(OpCodeSimdRegElem)); - SetA64("0x001110000xxxxx001011xxxxxxxxxx", InstName.Smov_S, null, typeof(OpCodeSimdIns)); + SetA64("0x001110000xxxxx001011xxxxxxxxxx", InstName.Smov_S, InstEmit.Smov_S, typeof(OpCodeSimdIns)); SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", InstName.Smull_V, InstEmit.Smull_V, typeof(OpCodeSimdReg)); SetA64("0x001111xxxxxxxx1010x0xxxxxxxxxx", InstName.Smull_Ve, InstEmit.Smull_Ve, typeof(OpCodeSimdRegElem)); SetA64("01011110xx100000011110xxxxxxxxxx", InstName.Sqabs_S, InstEmit.Sqabs_S, typeof(OpCodeSimd)); @@ -507,9 +509,9 @@ namespace ARMeilleure.Decoders SetA64("0x001110<<1xxxxx011000xxxxxxxxxx", InstName.Subhn_V, InstEmit.Subhn_V, typeof(OpCodeSimdReg)); SetA64("01011110xx100000001110xxxxxxxxxx", InstName.Suqadd_S, InstEmit.Suqadd_S, typeof(OpCodeSimd)); SetA64("0>001110<<100000001110xxxxxxxxxx", InstName.Suqadd_V, InstEmit.Suqadd_V, typeof(OpCodeSimd)); - SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", InstName.Tbl_V, null, typeof(OpCodeSimdTbl)); - SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", InstName.Trn1_V, null, typeof(OpCodeSimdReg)); - SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", InstName.Trn2_V, null, typeof(OpCodeSimdReg)); + SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", InstName.Tbl_V, InstEmit.Tbl_V, typeof(OpCodeSimdTbl)); + SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", InstName.Trn1_V, InstEmit.Trn1_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", InstName.Trn2_V, InstEmit.Trn2_V, typeof(OpCodeSimdReg)); SetA64("0x101110<<1xxxxx011111xxxxxxxxxx", InstName.Uaba_V, InstEmit.Uaba_V, typeof(OpCodeSimdReg)); SetA64("0x101110<<1xxxxx010100xxxxxxxxxx", InstName.Uabal_V, InstEmit.Uabal_V, typeof(OpCodeSimdReg)); SetA64("0x101110<<1xxxxx011101xxxxxxxxxx", InstName.Uabd_V, InstEmit.Uabd_V, typeof(OpCodeSimdReg)); @@ -573,11 +575,11 @@ namespace ARMeilleure.Decoders SetA64("0110111101xxxxxx000101xxxxxxxxxx", InstName.Usra_V, null, typeof(OpCodeSimdShImm)); SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", InstName.Usubl_V, InstEmit.Usubl_V, typeof(OpCodeSimdReg)); SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", InstName.Usubw_V, InstEmit.Usubw_V, typeof(OpCodeSimdReg)); - SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", InstName.Uzp1_V, null, typeof(OpCodeSimdReg)); - SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", InstName.Uzp2_V, null, typeof(OpCodeSimdReg)); - SetA64("0x001110<<100001001010xxxxxxxxxx", InstName.Xtn_V, null, typeof(OpCodeSimd)); - SetA64("0>001110<<0xxxxx001110xxxxxxxxxx", InstName.Zip1_V, null, typeof(OpCodeSimdReg)); - SetA64("0>001110<<0xxxxx011110xxxxxxxxxx", InstName.Zip2_V, null, typeof(OpCodeSimdReg)); + SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", InstName.Uzp1_V, InstEmit.Uzp1_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", InstName.Uzp2_V, InstEmit.Uzp2_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<100001001010xxxxxxxxxx", InstName.Xtn_V, InstEmit.Xtn_V, typeof(OpCodeSimd)); + SetA64("0>001110<<0xxxxx001110xxxxxxxxxx", InstName.Zip1_V, InstEmit.Zip1_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<0xxxxx011110xxxxxxxxxx", InstName.Zip2_V, InstEmit.Zip2_V, typeof(OpCodeSimdReg)); #endregion #region "OpCode Table (AArch32)" diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs index dd25eb56af..e0a24e514d 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -45,7 +45,14 @@ namespace ARMeilleure.Instructions Instruction addInst = X86PaddInstruction[op.Size]; - context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + Operand res = context.AddIntrinsic(addInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); } else { @@ -1788,6 +1795,11 @@ namespace ARMeilleure.Instructions EmitAddLongPairwise(context, signed: true, accumulate: false); } + public static void Saddlv_V(EmitterContext context) + { + EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2)); + } + public static void Saddw_V(EmitterContext context) { if (Optimizations.UseSse41) @@ -2295,7 +2307,14 @@ namespace ARMeilleure.Instructions Instruction subInst = X86PsubInstruction[op.Size]; - context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + Operand res = context.AddIntrinsic(subInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); } else { @@ -2429,7 +2448,7 @@ namespace ARMeilleure.Instructions public static void Uaddlv_V(EmitterContext context) { - EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); } public static void Uaddw_V(EmitterContext context) diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs index b989fb4ff2..65b95414b7 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -17,6 +17,7 @@ namespace ARMeilleure.Instructions static class InstEmitSimdHelper { +#region "X86 SSE Instructions" public static readonly Instruction[] X86PaddInstruction = new Instruction[] { Instruction.X86Paddb, @@ -91,6 +92,23 @@ namespace ARMeilleure.Instructions Instruction.X86Psubq }; + public static readonly Instruction[] X86PunpckhInstruction = new Instruction[] + { + Instruction.X86Punpckhbw, + Instruction.X86Punpckhwd, + Instruction.X86Punpckhdq, + Instruction.X86Punpckhqdq + }; + + public static readonly Instruction[] X86PunpcklInstruction = new Instruction[] + { + Instruction.X86Punpcklbw, + Instruction.X86Punpcklwd, + Instruction.X86Punpckldq, + Instruction.X86Punpcklqdq + }; +#endregion + public static int GetImmShl(OpCodeSimdShImm op) { return op.Imm - (8 << op.Size); @@ -103,16 +121,22 @@ namespace ARMeilleure.Instructions public static Operand X86GetScalar(EmitterContext context, float value) { - int imm = BitConverter.SingleToInt32Bits(value); - - return context.Copy(Local(OperandType.V128), Const(imm)); + return X86GetScalar(context, BitConverter.SingleToInt32Bits(value)); } public static Operand X86GetScalar(EmitterContext context, double value) { - long imm = BitConverter.DoubleToInt64Bits(value); + return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value)); + } - return context.Copy(Local(OperandType.V128), Const(imm)); + public static Operand X86GetScalar(EmitterContext context, int value) + { + return context.Copy(Local(OperandType.V128), Const(value)); + } + + public static Operand X86GetScalar(EmitterContext context, long value) + { + return context.Copy(Local(OperandType.V128), Const(value)); } public static Operand X86GetAllElements(EmitterContext context, float value) @@ -570,7 +594,7 @@ namespace ARMeilleure.Instructions { Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); - res = EmitVectorInsert(context, res, ne, index, op.Size); + res = EmitVectorInsert(context, res, emit(ne), index, op.Size); } context.Copy(GetVec(op.Rd), res); @@ -627,7 +651,7 @@ namespace ARMeilleure.Instructions { Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); - res = EmitVectorInsert(context, res, ne, index, op.Size); + res = EmitVectorInsert(context, res, emit(ne), index, op.Size); } context.Copy(GetVec(op.Rd), res); @@ -781,7 +805,7 @@ namespace ARMeilleure.Instructions EmitVectorWidenRmBinaryOp(context, emit, signed: false); } - public static void EmitVectorWidenRmBinaryOp(EmitterContext context, Func2I emit, bool signed) + private static void EmitVectorWidenRmBinaryOp(EmitterContext context, Func2I emit, bool signed) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -940,7 +964,7 @@ namespace ARMeilleure.Instructions EmitVectorPairwiseOp(context, emit, signed: false); } - public static void EmitVectorPairwiseOp(EmitterContext context, Func2I emit, bool signed) + private static void EmitVectorPairwiseOp(EmitterContext context, Func2I emit, bool signed) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -967,15 +991,29 @@ namespace ARMeilleure.Instructions public static void EmitVectorAcrossVectorOpSx(EmitterContext context, Func2I emit) { - EmitVectorAcrossVectorOp(context, emit, true); + EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false); } public static void EmitVectorAcrossVectorOpZx(EmitterContext context, Func2I emit) { - EmitVectorAcrossVectorOp(context, emit, false); + EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false); } - public static void EmitVectorAcrossVectorOp(EmitterContext context, Func2I emit, bool signed) + public static void EmitVectorLongAcrossVectorOpSx(EmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true); + } + + public static void EmitVectorLongAcrossVectorOpZx(EmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true); + } + + private static void EmitVectorAcrossVectorOp( + EmitterContext context, + Func2I emit, + bool signed, + bool isLong) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -990,7 +1028,11 @@ namespace ARMeilleure.Instructions res = emit(res, n); } - context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size)); + int size = isLong ? op.Size + 1 : op.Size; + + Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size); + + context.Copy(GetVec(op.Rd), d); } public static void EmitVectorPairwiseOpF(EmitterContext context, Func2I emit) @@ -1093,7 +1135,7 @@ namespace ARMeilleure.Instructions EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.VectorSx); } - public static void EmitSaturatingUnaryOpSx(EmitterContext context, Func1I emit, SaturatingFlags flags) + private static void EmitSaturatingUnaryOpSx(EmitterContext context, Func1I emit, SaturatingFlags flags) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitSimdMove.cs b/ARMeilleure/Instructions/InstEmitSimdMove.cs index e3ad19d957..0cfbbe49a4 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMove.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMove.cs @@ -1,12 +1,8 @@ using ARMeilleure.Decoders; using ARMeilleure.IntermediateRepresentation; -using ARMeilleure.State; using ARMeilleure.Translation; -using System; -using System.Reflection; using static ARMeilleure.Instructions.InstEmitHelper; -using static ARMeilleure.Instructions.InstEmitMemoryHelper; using static ARMeilleure.Instructions.InstEmitSimdHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -14,22 +10,760 @@ namespace ARMeilleure.Instructions { static partial class InstEmit { +#region "Masks" + private static readonly long[] _masksE0_TrnUzpXtn = new long[] + { + 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, + 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 + }; + + private static readonly long[] _masksE1_TrnUzp = new long[] + { + 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, + 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 + }; + + private static readonly long[] _masksE0_Uzp = new long[] + { + 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0 + }; + + private static readonly long[] _masksE1_Uzp = new long[] + { + 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0 + }; +#endregion + + public static void Dup_Gp(EmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand n = GetIntOrZR(op, op.Rn); + + if (Optimizations.UseSse2) + { + switch (op.Size) + { + case 0: n = ZeroExtend8 (context, n); n = context.Multiply(n, Const(0x01010101)); break; + case 1: n = ZeroExtend16(context, n); n = context.Multiply(n, Const(0x00010001)); break; + case 2: n = ZeroExtend32(context, n); break; + } + + Operand vector = context.VectorInsert(context.VectorZero(), n, 0); + + if (op.Size < 3) + { + vector = context.AddIntrinsic(Instruction.X86Shufps, vector, vector, Const(0)); + } + else + { + vector = context.AddIntrinsic(Instruction.X86Movlhps, vector, vector); + } + } + else + { + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, n, index, op.Size); + } + } + } + + public static void Dup_S(EmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size)); + } + + public static void Dup_V(EmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand res = GetVec(op.Rn); + + if (op.Size == 0) + { + res = context.AddIntrinsic(Instruction.X86Psrldq, res, Const(op.DstIndex)); + res = context.AddIntrinsic(Instruction.X86Punpcklbw, res, res); + } + else if (op.Size == 1) + { + res = context.AddIntrinsic(Instruction.X86Psrldq, res, Const(op.DstIndex * 2)); + res = context.AddIntrinsic(Instruction.X86Punpcklwd, res, res); + } + + if (op.Size < 3) + { + res = context.AddIntrinsic(Instruction.X86Shufps, res, res, Const(0)); + } + else if (op.DstIndex == 0) + { + res = context.AddIntrinsic(Instruction.X86Movlhps, res, res); + } + else + { + res = context.AddIntrinsic(Instruction.X86Movhlps, res, res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, ne, index, op.Size); + } + } + } + + public static void Ext_V(EmitterContext context) + { + OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand nShifted = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd64) + { + nShifted = context.AddIntrinsic(Instruction.X86Movlhps, nShifted, context.VectorZero()); + } + + nShifted = context.AddIntrinsic(Instruction.X86Psrldq, nShifted, Const(op.Imm4)); + + Operand mShifted = GetVec(op.Rm); + + mShifted = context.AddIntrinsic(Instruction.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + mShifted = context.AddIntrinsic(Instruction.X86Movlhps, mShifted, context.VectorZero()); + } + + Operand res = context.AddIntrinsic(Instruction.X86Por, nShifted, mShifted); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int bytes = op.GetBytesCount(); + + int position = op.Imm4 & (bytes - 1); + + for (int index = 0; index < bytes; index++) + { + int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; + + Operand e = EmitVectorExtractZx(context, reg, position, 0); + + position = (position + 1) & (bytes - 1); + + res = EmitVectorInsert(context, res, e, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Fcsel_S(EmitterContext context) + { + OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp; + + Operand lblTrue = Label(); + Operand lblEnd = Label(); + + Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond); + + context.BranchIfTrue(isTrue, lblTrue); + + OperandType type = op.Size != 0 ? OperandType.FP32 + : OperandType.FP64; + + Operand me = context.VectorExtract(GetVec(op.Rm), Local(type), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0)); + + context.Branch(lblEnd); + + context.MarkLabel(lblTrue); + + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0)); + + context.MarkLabel(lblEnd); + } + + public static void Fmov_Ftoi(EmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 3); + + SetIntOrZR(context, op.Rd, EmitIntZeroUpperIfNeeded(context, ne)); + } + + public static void Fmov_Ftoi1(EmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3); + + SetIntOrZR(context, op.Rd, EmitIntZeroUpperIfNeeded(context, ne)); + } + + public static void Fmov_Itof(EmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand n = EmitIntZeroUpperIfNeeded(context, GetIntOrZR(op, op.Rn)); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, 3)); + } + + public static void Fmov_Itof1(EmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand n = EmitIntZeroUpperIfNeeded(context, GetIntOrZR(op, op.Rn)); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 1, 3)); + } + + public static void Fmov_S(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + OperandType type = op.Size != 0 ? OperandType.FP32 + : OperandType.FP64; + + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0)); + } + public static void Fmov_Si(EmitterContext context) { OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp; - Operand imm; - - if (op.Size != 0) + if (op.Size == 0) { - imm = Const(op.Immediate); + context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate)); } else { - imm = Const((int)op.Immediate); + context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate)); + } + } + + public static void Fmov_V(EmitterContext context) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand e; + + if (op.Size == 0) + { + e = Const((int)op.Immediate); + } + else + { + e = Const(op.Immediate); } - context.Copy(GetVec(op.Rd), imm); + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2; + + for (int index = 0; index < (elems >> op.Size); index++) + { + res = EmitVectorInsert(context, res, e, index, op.Size + 2); + } + + context.Copy(GetVec(op.Rd), e); + } + + public static void Ins_Gp(EmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand n = GetIntOrZR(op, op.Rn); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, op.DstIndex, op.Size)); + } + + public static void Ins_V(EmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, op.DstIndex, op.Size)); + } + + public static void Movi_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + EmitMoviMvni(context, not: false); + } + else + { + EmitVectorImmUnaryOp(context, (op1) => op1); + } + } + + public static void Mvni_V(EmitterContext context) + { + if (Optimizations.UseSse2) + { + EmitMoviMvni(context, not: true); + } + else + { + EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1)); + } + } + + public static void Smov_S(EmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size); + + SetIntOrZR(context, op.Rd, EmitIntZeroUpperIfNeeded(context, ne)); + } + + public static void Tbl_V(EmitterContext context) + { + OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp; + + if (Optimizations.UseSsse3) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL); + + Operand mMask = context.AddIntrinsic(Instruction.X86Pcmpgtb, m, mask); + + mMask = context.AddIntrinsic(Instruction.X86Por, mMask, m); + + Operand res = context.AddIntrinsic(Instruction.X86Pshufb, n, mMask); + + for (int index = 1; index < op.Size; index++) + { + Operand ni = GetVec((op.Rn + index) & 0x1f); + + Operand indexMask = mask = X86GetAllElements(context, 0x1010101010101010L * index); + + Operand mMinusMask = context.AddIntrinsic(Instruction.X86Psubb, m, indexMask); + + Operand mMask2 = context.AddIntrinsic(Instruction.X86Pcmpgtb, mMinusMask, mask); + + mMask2 = context.AddIntrinsic(Instruction.X86Por, mMask2, mMinusMask); + + Operand res2 = context.AddIntrinsic(Instruction.X86Pshufb, ni, mMask2); + + res = context.AddIntrinsic(Instruction.X86Por, res, res2); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand[] args = new Operand[1 + op.Size]; + + args[0] = GetVec(op.Rm); + + for (int index = 0; index < op.Size; index++) + { + args[1 + index] = GetVec((op.Rn + index) & 0x1f); + } + + string name = null; + + switch (op.Size) + { + case 1: name = op.RegisterSize == RegisterSize.Simd64 + ? nameof(SoftFallback.Tbl1_V64) + : nameof(SoftFallback.Tbl1_V128); break; + + case 2: name = op.RegisterSize == RegisterSize.Simd64 + ? nameof(SoftFallback.Tbl2_V64) + : nameof(SoftFallback.Tbl2_V128); break; + + case 3: name = op.RegisterSize == RegisterSize.Simd64 + ? nameof(SoftFallback.Tbl3_V64) + : nameof(SoftFallback.Tbl3_V128); break; + + case 4: name = op.RegisterSize == RegisterSize.Simd64 + ? nameof(SoftFallback.Tbl4_V64) + : nameof(SoftFallback.Tbl4_V128); break; + } + + context.Copy(GetVec(op.Rd), context.Call(typeof(SoftFallback).GetMethod(name), args)); + } + } + + public static void Trn1_V(EmitterContext context) + { + EmitVectorTranspose(context, part: 0); + } + + public static void Trn2_V(EmitterContext context) + { + EmitVectorTranspose(context, part: 1); + } + + public static void Umov_S(EmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Uzp1_V(EmitterContext context) + { + EmitVectorUnzip(context, part: 0); + } + + public static void Uzp2_V(EmitterContext context) + { + EmitVectorUnzip(context, part: 1); + } + + public static void Xtn_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (Optimizations.UseSsse3) + { + Operand d = GetVec(op.Rd); + + Operand res = context.AddIntrinsic(Instruction.X86Movlhps, d, context.VectorZero()); + + Operand n = GetVec(op.Rn); + + Operand mask = X86GetAllElements(context, _masksE0_TrnUzpXtn[op.Size]); + + Operand res2 = context.AddIntrinsic(Instruction.X86Pshufb, n, mask); + + Instruction movInst = op.RegisterSize == RegisterSize.Simd128 + ? Instruction.X86Movlhps + : Instruction.X86Movhlps; + + res = context.AddIntrinsic(movInst, res, res2); + + context.Copy(GetVec(op.Rd), res); + } + else + { + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + + res = EmitVectorInsert(context, res, ne, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Zip1_V(EmitterContext context) + { + EmitVectorZip(context, part: 0); + } + + public static void Zip2_V(EmitterContext context) + { + EmitVectorZip(context, part: 1); + } + + private static Operand EmitIntZeroUpperIfNeeded(EmitterContext context, Operand value) + { + if (context.CurrOp.RegisterSize == RegisterSize.Int32 || + context.CurrOp.RegisterSize == RegisterSize.Simd64) + { + return ZeroExtend32(context, value); + } + + return value; + } + + private static void EmitMoviMvni(EmitterContext context, bool not) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + long imm = op.Immediate; + + if (not) + { + imm = ~imm; + } + + switch (op.Size) + { + case 0: imm *= 0x01010101; break; + case 1: imm *= 0x00010001; break; + } + + Operand mask; + + if (op.Size < 3) + { + mask = X86GetAllElements(context, (int)imm); + } + else + { + mask = X86GetAllElements(context, imm); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + mask = context.VectorZeroUpper64(mask); + } + + context.Copy(GetVec(op.Rd), mask); + } + + private static void EmitVectorTranspose(EmitterContext context, int part) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + long maskE0 = _masksE0_TrnUzpXtn[op.Size]; + long maskE1 = _masksE1_TrnUzp [op.Size]; + + Operand mask = null; + + if (op.Size < 3) + { + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand n = GetVec(op.Rn); + + if (op.Size < 3) + { + n = context.AddIntrinsic(Instruction.X86Pshufb, n, mask); + } + + Operand m = GetVec(op.Rm); + + if (op.Size < 3) + { + m = context.AddIntrinsic(Instruction.X86Pshufb, m, mask); + } + + Instruction punpckInst = part == 0 + ? X86PunpcklInstruction[op.Size] + : X86PunpckhInstruction[op.Size]; + + Operand res = context.AddIntrinsic(punpckInst, n, m); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size); + + res = EmitVectorInsert(context, res, ne, pairIndex, op.Size); + res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitVectorUnzip(EmitterContext context, int part) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + if (op.RegisterSize == RegisterSize.Simd128) + { + Operand mask = null; + + if (op.Size < 3) + { + long maskE0 = _masksE0_TrnUzpXtn[op.Size]; + long maskE1 = _masksE1_TrnUzp [op.Size]; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand n = GetVec(op.Rn); + + if (op.Size < 3) + { + n = context.AddIntrinsic(Instruction.X86Pshufb, n, mask); + } + + Operand m = GetVec(op.Rm); + + if (op.Size < 3) + { + m = context.AddIntrinsic(Instruction.X86Pshufb, m, mask); + } + + Instruction punpckInst = part == 0 + ? Instruction.X86Punpcklqdq + : Instruction.X86Punpckhqdq; + + Operand res = context.AddIntrinsic(punpckInst, n, m); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction punpcklInst = X86PunpcklInstruction[op.Size]; + + Operand res = context.AddIntrinsic(punpcklInst, n, m); + + if (op.Size < 2) + { + long maskE0 = _masksE0_Uzp[op.Size]; + long maskE1 = _masksE1_Uzp[op.Size]; + + Operand mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + + res = context.AddIntrinsic(Instruction.X86Pshufb, res, mask); + } + + Instruction punpckInst = part == 0 + ? Instruction.X86Punpcklqdq + : Instruction.X86Punpckhqdq; + + res = context.AddIntrinsic(punpckInst, res, context.VectorZero()); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + + res = EmitVectorInsert(context, res, ne, index, op.Size); + res = EmitVectorInsert(context, res, me, pairs + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitVectorZip(EmitterContext context, int part) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m); + + Instruction punpckInst = part == 0 + ? Instruction.X86Punpcklqdq + : Instruction.X86Punpckhqdq; + + res = context.AddIntrinsic(punpckInst, res, context.VectorZero()); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + int baseIndex = part != 0 ? pairs : 0; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size); + + res = EmitVectorInsert(context, res, ne, pairIndex, op.Size); + res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } } } } \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index 6e8f29a194..4bf819f2ec 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -290,6 +290,7 @@ namespace ARMeilleure.Instructions Sadalp_V, Saddl_V, Saddlp_V, + Saddlv_V, Saddw_V, Scvtf_Gp, Scvtf_Gp_Fixed, diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs index 4581b92c7e..1d62985ed6 100644 --- a/ARMeilleure/Instructions/SoftFallback.cs +++ b/ARMeilleure/Instructions/SoftFallback.cs @@ -411,6 +411,73 @@ namespace ARMeilleure.Instructions } #endregion +#region "Table" + public static V128 Tbl1_V64(V128 vector, V128 tb0) + { + return Tbl(vector, 8, tb0); + } + + public static V128 Tbl1_V128(V128 vector, V128 tb0) + { + return Tbl(vector, 16, tb0); + } + + public static V128 Tbl2_V64(V128 vector, V128 tb0, V128 tb1) + { + return Tbl(vector, 8, tb0, tb1); + } + + public static V128 Tbl2_V128(V128 vector, V128 tb0, V128 tb1) + { + return Tbl(vector, 16, tb0, tb1); + } + + public static V128 Tbl3_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2) + { + return Tbl(vector, 8, tb0, tb1, tb2); + } + + public static V128 Tbl3_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2) + { + return Tbl(vector, 16, tb0, tb1, tb2); + } + + public static V128 Tbl4_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3) + { + return Tbl(vector, 8, tb0, tb1, tb2, tb3); + } + + public static V128 Tbl4_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3) + { + return Tbl(vector, 16, tb0, tb1, tb2, tb3); + } + + private static V128 Tbl(V128 vector, int bytes, params V128[] tb) + { + byte[] res = new byte[16]; + byte[] table = new byte[tb.Length * 16]; + + for (byte index = 0; index < tb.Length; index++) + { + Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16); + } + + byte[] v = vector.ToArray(); + + for (byte index = 0; index < bytes; index++) + { + byte tblIdx = v[index]; + + if (tblIdx < table.Length) + { + res[index] = table[tblIdx]; + } + } + + return new V128(res); + } +#endregion + #region "Crc32" private const uint Crc32RevPoly = 0xedb88320; private const uint Crc32cRevPoly = 0x82f63b78; diff --git a/ARMeilleure/IntermediateRepresentation/Instruction.cs b/ARMeilleure/IntermediateRepresentation/Instruction.cs index 9cde061ab3..52b8015f38 100644 --- a/ARMeilleure/IntermediateRepresentation/Instruction.cs +++ b/ARMeilleure/IntermediateRepresentation/Instruction.cs @@ -131,6 +131,8 @@ namespace ARMeilleure.IntermediateRepresentation X86Pmullw, X86Popcnt, X86Por, + X86Pshufb, + X86Pslldq, X86Psllw, X86Psrad, X86Psraw, @@ -142,6 +144,14 @@ namespace ARMeilleure.IntermediateRepresentation X86Psubd, X86Psubq, X86Psubw, + X86Punpckhbw, + X86Punpckhdq, + X86Punpckhqdq, + X86Punpckhwd, + X86Punpcklbw, + X86Punpckldq, + X86Punpcklqdq, + X86Punpcklwd, X86Pxor, X86Rcpps, X86Rcpss,