Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes

This commit is contained in:
gdkchan 2019-06-12 19:33:49 -03:00
parent e8a65ad4a7
commit 9d9aca1c11
11 changed files with 1247 additions and 268 deletions

View file

@ -61,154 +61,164 @@ namespace ARMeilleure.CodeGen.X86
{
_instTable = new InstInfo[(int)X86Instruction.Count];
// Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags
Add(X86Instruction.Add, new InstInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstFlags.None));
Add(X86Instruction.Addpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Addps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex));
Add(X86Instruction.Addsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Addss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.And, new InstInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstFlags.None));
Add(X86Instruction.Andnpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Andnps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstFlags.Vex));
Add(X86Instruction.Bsr, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstFlags.None));
Add(X86Instruction.Bswap, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc8, InstFlags.RegOnly));
Add(X86Instruction.Call, new InstInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Cmovcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstFlags.None));
Add(X86Instruction.Cmp, new InstInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstFlags.None));
Add(X86Instruction.Div, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstFlags.None));
Add(X86Instruction.Divpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Divps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex));
Add(X86Instruction.Divsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Divss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Haddpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Haddps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Idiv, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstFlags.None));
Add(X86Instruction.Imul, new InstInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstFlags.None));
Add(X86Instruction.Imul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstFlags.None));
Add(X86Instruction.Insertps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Maxpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Maxps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex));
Add(X86Instruction.Maxsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Maxss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Minpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Minps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex));
Add(X86Instruction.Minsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Minss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Mov, new InstInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstFlags.None));
Add(X86Instruction.Mov16, new InstInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstFlags.Prefix66));
Add(X86Instruction.Mov8, new InstInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstFlags.None));
Add(X86Instruction.Movd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6e, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Movdqu, new InstInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Movhlps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstFlags.Vex));
Add(X86Instruction.Movlhps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstFlags.Vex));
Add(X86Instruction.Movq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Movsd, new InstInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Movss, new InstInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Movsx16, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstFlags.None));
Add(X86Instruction.Movsx32, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstFlags.None));
Add(X86Instruction.Movsx8, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstFlags.None));
Add(X86Instruction.Movzx16, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstFlags.None));
Add(X86Instruction.Movzx8, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstFlags.None));
Add(X86Instruction.Mul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstFlags.None));
Add(X86Instruction.Mulpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Mulps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex));
Add(X86Instruction.Mulsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Mulss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Neg, new InstInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Not, new InstInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Or, new InstInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstFlags.None));
Add(X86Instruction.Paddb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Paddd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Paddq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Paddw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pand, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pandn, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pavgb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pavgw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pblendvb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpeqb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpeqd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpeqq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpeqw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpgtb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpgtd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpgtq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpgtw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pextrb, new InstInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pextrd, new InstInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pextrw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pinsrb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pinsrd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pinsrw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmaxsb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmaxsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmaxsw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmaxub, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmaxud, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmaxuw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pminsb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pminsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pminsw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pminub, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pminud, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pminuw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmovsxbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmovsxdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmovsxwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmovzxbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmovzxdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmovzxwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmulld, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmullw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pop, new InstInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Popcnt, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstFlags.PrefixF3));
Add(X86Instruction.Por, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pshufd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psllw, new InstInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psrad, new InstInfo(BadOp, 0x00000f72, BadOp, BadOp, 0x00000fe2, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psraw, new InstInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psrld, new InstInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psrlq, new InstInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psrldq, new InstInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psrlw, new InstInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psubb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psubd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psubq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psubw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Push, new InstInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstFlags.None));
Add(X86Instruction.Pxor, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Rcpps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstFlags.Vex));
Add(X86Instruction.Rcpss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Ror, new InstInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Roundpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Roundps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex));
Add(X86Instruction.Roundsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Roundss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Rsqrtps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstFlags.Vex));
Add(X86Instruction.Rsqrtss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Sar, new InstInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Setcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstFlags.Reg8));
Add(X86Instruction.Shl, new InstInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Shr, new InstInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Shufpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Shufps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstFlags.Vex));
Add(X86Instruction.Sqrtpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Sqrtps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex));
Add(X86Instruction.Sqrtsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Sqrtss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Sub, new InstInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstFlags.None));
Add(X86Instruction.Subpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Subps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex));
Add(X86Instruction.Subsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Subss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Test, new InstInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Unpckhpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Unpckhps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstFlags.Vex));
Add(X86Instruction.Unpcklpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Unpcklps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstFlags.Vex));
Add(X86Instruction.Xor, new InstInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstFlags.None));
Add(X86Instruction.Xorpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Xorps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstFlags.Vex));
// Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags
Add(X86Instruction.Add, new InstInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstFlags.None));
Add(X86Instruction.Addpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Addps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex));
Add(X86Instruction.Addsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Addss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.And, new InstInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstFlags.None));
Add(X86Instruction.Andnpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Andnps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstFlags.Vex));
Add(X86Instruction.Bsr, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstFlags.None));
Add(X86Instruction.Bswap, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc8, InstFlags.RegOnly));
Add(X86Instruction.Call, new InstInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Cmovcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstFlags.None));
Add(X86Instruction.Cmp, new InstInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstFlags.None));
Add(X86Instruction.Div, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstFlags.None));
Add(X86Instruction.Divpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Divps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex));
Add(X86Instruction.Divsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Divss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Haddpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Haddps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Idiv, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstFlags.None));
Add(X86Instruction.Imul, new InstInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstFlags.None));
Add(X86Instruction.Imul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstFlags.None));
Add(X86Instruction.Insertps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Maxpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Maxps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex));
Add(X86Instruction.Maxsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Maxss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Minpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Minps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex));
Add(X86Instruction.Minsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Minss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Mov, new InstInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstFlags.None));
Add(X86Instruction.Mov16, new InstInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstFlags.Prefix66));
Add(X86Instruction.Mov8, new InstInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstFlags.None));
Add(X86Instruction.Movd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6e, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Movdqu, new InstInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Movhlps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstFlags.Vex));
Add(X86Instruction.Movlhps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstFlags.Vex));
Add(X86Instruction.Movq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Movsd, new InstInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Movss, new InstInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Movsx16, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstFlags.None));
Add(X86Instruction.Movsx32, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstFlags.None));
Add(X86Instruction.Movsx8, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstFlags.None));
Add(X86Instruction.Movzx16, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstFlags.None));
Add(X86Instruction.Movzx8, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstFlags.None));
Add(X86Instruction.Mul128, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstFlags.None));
Add(X86Instruction.Mulpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Mulps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex));
Add(X86Instruction.Mulsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Mulss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Neg, new InstInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Not, new InstInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Or, new InstInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstFlags.None));
Add(X86Instruction.Paddb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Paddd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Paddq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Paddw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pand, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pandn, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pavgb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pavgw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pblendvb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpeqb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpeqd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpeqq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpeqw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpgtb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpgtd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpgtq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pcmpgtw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pextrb, new InstInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pextrd, new InstInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pextrw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pinsrb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pinsrd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pinsrw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmaxsb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmaxsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmaxsw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmaxub, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmaxud, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmaxuw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pminsb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pminsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pminsw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pminub, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pminud, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pminuw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmovsxbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmovsxdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmovsxwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmovzxbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmovzxdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmovzxwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmulld, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pmullw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pop, new InstInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Popcnt, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstFlags.PrefixF3));
Add(X86Instruction.Por, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pshufb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3800, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pshufd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Pslldq, new InstInfo(BadOp, 0x07000f73, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psllw, new InstInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psrad, new InstInfo(BadOp, 0x00000f72, BadOp, BadOp, 0x00000fe2, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psraw, new InstInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psrld, new InstInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psrlq, new InstInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psrldq, new InstInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psrlw, new InstInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psubb, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psubd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psubq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Psubw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Punpckhbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f68, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Punpckhdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6a, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Punpckhqdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6d, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Punpckhwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f69, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Punpcklbw, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f60, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Punpckldq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f62, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Punpcklqdq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6c, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Punpcklwd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f61, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Push, new InstInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstFlags.None));
Add(X86Instruction.Pxor, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Rcpps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstFlags.Vex));
Add(X86Instruction.Rcpss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Ror, new InstInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Roundpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Roundps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex));
Add(X86Instruction.Roundsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Roundss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f3a, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Rsqrtps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstFlags.Vex));
Add(X86Instruction.Rsqrtss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Sar, new InstInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Setcc, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstFlags.Reg8));
Add(X86Instruction.Shl, new InstInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Shr, new InstInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Shufpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Shufps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstFlags.Vex));
Add(X86Instruction.Sqrtpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Sqrtps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex));
Add(X86Instruction.Sqrtsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Sqrtss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Sub, new InstInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstFlags.None));
Add(X86Instruction.Subpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Subps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex));
Add(X86Instruction.Subsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.PrefixF2));
Add(X86Instruction.Subss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstFlags.Vex | InstFlags.PrefixF3));
Add(X86Instruction.Test, new InstInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstFlags.None));
Add(X86Instruction.Unpckhpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Unpckhps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstFlags.Vex));
Add(X86Instruction.Unpcklpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Unpcklps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstFlags.Vex));
Add(X86Instruction.Xor, new InstInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstFlags.None));
Add(X86Instruction.Xorpd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstFlags.Vex | InstFlags.Prefix66));
Add(X86Instruction.Xorps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstFlags.Vex));
}
private static void Add(X86Instruction inst, InstInfo info)
@ -836,6 +846,11 @@ namespace ARMeilleure.CodeGen.X86
WriteInstruction(dest, source, X86Instruction.Por, source1);
}
public void Pshufb(Operand dest, Operand source, Operand source1)
{
WriteInstruction(dest, source, X86Instruction.Pshufb, source1);
}
public void Pshufd(Operand dest, Operand source, byte imm)
{
WriteInstruction(dest, source, X86Instruction.Pshufd);
@ -843,6 +858,11 @@ namespace ARMeilleure.CodeGen.X86
WriteByte(imm);
}
public void Pslldq(Operand dest, Operand source, Operand source1)
{
WriteInstruction(source1, source, X86Instruction.Pslldq, dest);
}
public void Psllw(Operand dest, Operand source, Operand source1)
{
WriteInstruction(source1, source, X86Instruction.Psllw, dest);
@ -898,6 +918,46 @@ namespace ARMeilleure.CodeGen.X86
WriteInstruction(dest, source, X86Instruction.Psubw, source1);
}
public void Punpckhbw(Operand dest, Operand source, Operand source1)
{
WriteInstruction(dest, source, X86Instruction.Punpckhbw, source1);
}
public void Punpckhdq(Operand dest, Operand source, Operand source1)
{
WriteInstruction(dest, source, X86Instruction.Punpckhdq, source1);
}
public void Punpckhqdq(Operand dest, Operand source, Operand source1)
{
WriteInstruction(dest, source, X86Instruction.Punpckhqdq, source1);
}
public void Punpckhwd(Operand dest, Operand source, Operand source1)
{
WriteInstruction(dest, source, X86Instruction.Punpckhwd, source1);
}
public void Punpcklbw(Operand dest, Operand source, Operand source1)
{
WriteInstruction(dest, source, X86Instruction.Punpcklbw, source1);
}
public void Punpckldq(Operand dest, Operand source, Operand source1)
{
WriteInstruction(dest, source, X86Instruction.Punpckldq, source1);
}
public void Punpcklqdq(Operand dest, Operand source, Operand source1)
{
WriteInstruction(dest, source, X86Instruction.Punpcklqdq, source1);
}
public void Punpcklwd(Operand dest, Operand source, Operand source1)
{
WriteInstruction(dest, source, X86Instruction.Punpcklwd, source1);
}
public void Push(Operand source)
{
if (source.Kind == OperandKind.Register)
@ -1325,6 +1385,8 @@ namespace ARMeilleure.CodeGen.X86
modRM |= 0xc0;
}
Debug.Assert(opCode != BadOp, "Invalid opcode value.");
if ((flags & InstFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
{
int vexByte2 = (int)(flags & InstFlags.PrefixMask) >> (int)InstFlags.PrefixBit;
@ -1388,8 +1450,6 @@ namespace ARMeilleure.CodeGen.X86
}
}
Debug.Assert(opCode != BadOp, "Invalid opcode value.");
if ((opCode & 0xff0000) != 0)
{
WriteByte((byte)(opCode >> 16));

View file

@ -144,6 +144,8 @@ namespace ARMeilleure.CodeGen.X86
Add(Instruction.X86Pmullw, GenerateX86Pmullw);
Add(Instruction.X86Popcnt, GenerateX86Popcnt);
Add(Instruction.X86Por, GenerateX86Por);
Add(Instruction.X86Pshufb, GenerateX86Pshufb);
Add(Instruction.X86Pslldq, GenerateX86Pslldq);
Add(Instruction.X86Psllw, GenerateX86Psllw);
Add(Instruction.X86Psrad, GenerateX86Psrad);
Add(Instruction.X86Psraw, GenerateX86Psraw);
@ -155,6 +157,14 @@ namespace ARMeilleure.CodeGen.X86
Add(Instruction.X86Psubd, GenerateX86Psubd);
Add(Instruction.X86Psubq, GenerateX86Psubq);
Add(Instruction.X86Psubw, GenerateX86Psubw);
Add(Instruction.X86Punpckhbw, GenerateX86Punpckhbw);
Add(Instruction.X86Punpckhdq, GenerateX86Punpckhdq);
Add(Instruction.X86Punpckhqdq, GenerateX86Punpckhqdq);
Add(Instruction.X86Punpckhwd, GenerateX86Punpckhwd);
Add(Instruction.X86Punpcklbw, GenerateX86Punpcklbw);
Add(Instruction.X86Punpckldq, GenerateX86Punpckldq);
Add(Instruction.X86Punpcklqdq, GenerateX86Punpcklqdq);
Add(Instruction.X86Punpcklwd, GenerateX86Punpcklwd);
Add(Instruction.X86Pxor, GenerateX86Pxor);
Add(Instruction.X86Rcpps, GenerateX86Rcpps);
Add(Instruction.X86Rcpss, GenerateX86Rcpss);
@ -1249,6 +1259,16 @@ namespace ARMeilleure.CodeGen.X86
context.Assembler.Por(operation.Dest, operation.GetSource(1), operation.GetSource(0));
}
private static void GenerateX86Pshufb(CodeGenContext context, Operation operation)
{
context.Assembler.Pshufb(operation.Dest, operation.GetSource(1), operation.GetSource(0));
}
private static void GenerateX86Pslldq(CodeGenContext context, Operation operation)
{
context.Assembler.Pslldq(operation.Dest, operation.GetSource(1), operation.GetSource(0));
}
private static void GenerateX86Psllw(CodeGenContext context, Operation operation)
{
context.Assembler.Psllw(operation.Dest, operation.GetSource(1), operation.GetSource(0));
@ -1304,6 +1324,46 @@ namespace ARMeilleure.CodeGen.X86
context.Assembler.Psubw(operation.Dest, operation.GetSource(1), operation.GetSource(0));
}
private static void GenerateX86Punpckhbw(CodeGenContext context, Operation operation)
{
context.Assembler.Punpckhbw(operation.Dest, operation.GetSource(1), operation.GetSource(0));
}
private static void GenerateX86Punpckhdq(CodeGenContext context, Operation operation)
{
context.Assembler.Punpckhdq(operation.Dest, operation.GetSource(1), operation.GetSource(0));
}
private static void GenerateX86Punpckhqdq(CodeGenContext context, Operation operation)
{
context.Assembler.Punpckhqdq(operation.Dest, operation.GetSource(1), operation.GetSource(0));
}
private static void GenerateX86Punpckhwd(CodeGenContext context, Operation operation)
{
context.Assembler.Punpckhwd(operation.Dest, operation.GetSource(1), operation.GetSource(0));
}
private static void GenerateX86Punpcklbw(CodeGenContext context, Operation operation)
{
context.Assembler.Punpcklbw(operation.Dest, operation.GetSource(1), operation.GetSource(0));
}
private static void GenerateX86Punpckldq(CodeGenContext context, Operation operation)
{
context.Assembler.Punpckldq(operation.Dest, operation.GetSource(1), operation.GetSource(0));
}
private static void GenerateX86Punpcklqdq(CodeGenContext context, Operation operation)
{
context.Assembler.Punpcklqdq(operation.Dest, operation.GetSource(1), operation.GetSource(0));
}
private static void GenerateX86Punpcklwd(CodeGenContext context, Operation operation)
{
context.Assembler.Punpcklwd(operation.Dest, operation.GetSource(1), operation.GetSource(0));
}
private static void GenerateX86Pxor(CodeGenContext context, Operation operation)
{
context.Assembler.Pxor(operation.Dest, operation.GetSource(1), operation.GetSource(0));

View file

@ -117,7 +117,7 @@ namespace ARMeilleure.CodeGen.X86
private static void AddConstantCopy(LinkedListNode<Node> node, Operation operation)
{
if (operation.SourcesCount == 0 || HasFixedConst(operation.Inst))
if (operation.SourcesCount == 0 || IsIntrinsic(operation.Inst))
{
return;
}
@ -128,20 +128,31 @@ namespace ARMeilleure.CodeGen.X86
Operand src1 = operation.GetSource(0);
Operand src2;
if (src1.Type.IsInteger())
if (src1.Kind == OperandKind.Constant)
{
//Handle integer types.
//Most ALU instructions accepts a 32-bits immediate on the second operand.
//We need to ensure the following:
//- If the constant is on operand 1, we need to move it.
//-- But first, we try to swap operand 1 and 2 if the instruction is commutative.
//-- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
//- If the constant is on operand 2, we check if the instruction supports it,
//if not, we also add a copy. 64-bits constants are usually not supported.
bool isVecCopy = inst == Instruction.Copy && !dest.Type.IsInteger();
if (src1.Kind == OperandKind.Constant && (!HasConstSrc1(inst) || isVecCopy))
if (!src1.Type.IsInteger())
{
//Handle non-integer types (FP32, FP64 and V128).
//For instructions without an immediate operand, we do the following:
//- Insert a copy with the constant value (as integer) to a GPR.
//- Insert a copy from the GPR to a XMM register.
//- Replace the constant use with the XMM register.
src1 = AddXmmCopy(node, src1);
operation.SetSource(0, src1);
}
else if (!HasConstSrc1(inst) || isVecCopy)
{
//Handle integer types.
//Most ALU instructions accepts a 32-bits immediate on the second operand.
//We need to ensure the following:
//- If the constant is on operand 1, we need to move it.
//-- But first, we try to swap operand 1 and 2 if the instruction is commutative.
//-- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
//- If the constant is on operand 2, we check if the instruction supports it,
//if not, we also add a copy. 64-bits constants are usually not supported.
if (IsCommutative(inst))
{
src2 = operation.GetSource(1);
@ -162,45 +173,26 @@ namespace ARMeilleure.CodeGen.X86
operation.SetSource(0, src1);
}
}
}
if (operation.SourcesCount < 2)
if (operation.SourcesCount < 2)
{
return;
}
src2 = operation.GetSource(1);
if (src2.Kind == OperandKind.Constant)
{
if (!src2.Type.IsInteger())
{
return;
}
src2 = operation.GetSource(1);
if (src2.Kind == OperandKind.Constant && (!HasConstSrc2(inst) || IsLongConst(src2)))
{
src2 = AddCopy(node, src2);
src2 = AddXmmCopy(node, src2);
operation.SetSource(1, src2);
}
}
else
{
//Handle non-integer types (FP32, FP64 and V128).
//For instructions without an immediate operand, we do the following:
//- Insert a copy with the constant value (as integer) to a GPR.
//- Insert a copy from the GPR to a XMM register.
//- Replace the constant use with the XMM register.
if (src1.Kind == OperandKind.Constant && src1.Type.IsInteger())
else if (!HasConstSrc2(inst) || IsLongConst(src2))
{
src1 = AddXmmCopy(node, src1);
operation.SetSource(0, src1);
}
if (operation.SourcesCount < 2)
{
return;
}
src2 = operation.GetSource(1);
if (src2.Kind == OperandKind.Constant && src2.Type.IsInteger())
{
src2 = AddXmmCopy(node, src2);
src2 = AddCopy(node, src2);
operation.SetSource(1, src2);
}
@ -600,24 +592,6 @@ namespace ARMeilleure.CodeGen.X86
return false;
}
private static bool HasFixedConst(Instruction inst)
{
switch (inst)
{
case Instruction.LoadFromContext:
case Instruction.StoreToContext:
case Instruction.VectorExtract:
case Instruction.VectorExtract16:
case Instruction.VectorExtract8:
case Instruction.VectorInsert:
case Instruction.VectorInsert16:
case Instruction.VectorInsert8:
return true;
}
return IsIntrinsic(inst);
}
private static bool IsIntrinsic(Instruction inst)
{
return inst > Instruction.X86Intrinsic_Start &&

View file

@ -103,7 +103,9 @@ namespace ARMeilleure.CodeGen.X86
Pop,
Popcnt,
Por,
Pshufb,
Pshufd,
Pslldq,
Psllw,
Psrad,
Psraw,
@ -115,6 +117,14 @@ namespace ARMeilleure.CodeGen.X86
Psubd,
Psubq,
Psubw,
Punpckhbw,
Punpckhdq,
Punpckhqdq,
Punpckhwd,
Punpcklbw,
Punpckldq,
Punpcklqdq,
Punpcklwd,
Push,
Pxor,
Rcpps,

View file

@ -234,11 +234,11 @@ namespace ARMeilleure.Decoders
SetA64("01011110111xxxxx100011xxxxxxxxxx", InstName.Cmtst_S, null, typeof(OpCodeSimdReg));
SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstName.Cmtst_V, null, typeof(OpCodeSimdReg));
SetA64("0x00111000100000010110xxxxxxxxxx", InstName.Cnt_V, InstEmit.Cnt_V, typeof(OpCodeSimd));
SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp, null, typeof(OpCodeSimdIns));
SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S, null, typeof(OpCodeSimdIns));
SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstName.Dup_V, null, typeof(OpCodeSimdIns));
SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp, InstEmit.Dup_Gp, typeof(OpCodeSimdIns));
SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S, InstEmit.Dup_S, typeof(OpCodeSimdIns));
SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstName.Dup_V, InstEmit.Dup_V, typeof(OpCodeSimdIns));
SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstName.Eor_V, null, typeof(OpCodeSimdReg));
SetA64("0>101110000xxxxx0<xxx0xxxxxxxxxx", InstName.Ext_V, null, typeof(OpCodeSimdExt));
SetA64("0>101110000xxxxx0<xxx0xxxxxxxxxx", InstName.Ext_V, InstEmit.Ext_V, typeof(OpCodeSimdExt));
SetA64("011111101x1xxxxx110101xxxxxxxxxx", InstName.Fabd_S, InstEmit.Fabd_S, typeof(OpCodeSimdReg));
SetA64("0>1011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V, InstEmit.Fabd_V, typeof(OpCodeSimdReg));
SetA64("000111100x100000110000xxxxxxxxxx", InstName.Fabs_S, InstEmit.Fabs_S, typeof(OpCodeSimd));
@ -267,7 +267,7 @@ namespace ARMeilleure.Decoders
SetA64("0>0011101<100000111010xxxxxxxxxx", InstName.Fcmlt_V, null, typeof(OpCodeSimd));
SetA64("000111100x1xxxxx001000xxxxx0x000", InstName.Fcmp_S, null, typeof(OpCodeSimdReg));
SetA64("000111100x1xxxxx001000xxxxx1x000", InstName.Fcmpe_S, null, typeof(OpCodeSimdReg));
SetA64("000111100x1xxxxxxxxx11xxxxxxxxxx", InstName.Fcsel_S, null, typeof(OpCodeSimdFcond));
SetA64("000111100x1xxxxxxxxx11xxxxxxxxxx", InstName.Fcsel_S, InstEmit.Fcsel_S, typeof(OpCodeSimdFcond));
SetA64("000111100x10001xx10000xxxxxxxxxx", InstName.Fcvt_S, null, typeof(OpCodeSimd));
SetA64("x00111100x100100000000xxxxxxxxxx", InstName.Fcvtas_Gp, null, typeof(OpCodeSimdCvt));
SetA64("x00111100x100101000000xxxxxxxxxx", InstName.Fcvtau_Gp, null, typeof(OpCodeSimdCvt));
@ -312,13 +312,13 @@ namespace ARMeilleure.Decoders
SetA64("010111111xxxxxxx0101x0xxxxxxxxxx", InstName.Fmls_Se, InstEmit.Fmls_Se, typeof(OpCodeSimdRegElemF));
SetA64("0>0011101<1xxxxx110011xxxxxxxxxx", InstName.Fmls_V, InstEmit.Fmls_V, typeof(OpCodeSimdReg));
SetA64("0>0011111<xxxxxx0101x0xxxxxxxxxx", InstName.Fmls_Ve, InstEmit.Fmls_Ve, typeof(OpCodeSimdRegElemF));
SetA64("000111100x100000010000xxxxxxxxxx", InstName.Fmov_S, null, typeof(OpCodeSimd));
SetA64("000111100x100000010000xxxxxxxxxx", InstName.Fmov_S, InstEmit.Fmov_S, typeof(OpCodeSimd));
SetA64("00011110xx1xxxxxxxx100xxxxxxxxxx", InstName.Fmov_Si, InstEmit.Fmov_Si, typeof(OpCodeSimdFmov));
SetA64("0xx0111100000xxx111101xxxxxxxxxx", InstName.Fmov_V, null, typeof(OpCodeSimdImm));
SetA64("x00111100x100110000000xxxxxxxxxx", InstName.Fmov_Ftoi, null, typeof(OpCodeSimdCvt));
SetA64("x00111100x100111000000xxxxxxxxxx", InstName.Fmov_Itof, null, typeof(OpCodeSimdCvt));
SetA64("1001111010101110000000xxxxxxxxxx", InstName.Fmov_Ftoi1, null, typeof(OpCodeSimdCvt));
SetA64("1001111010101111000000xxxxxxxxxx", InstName.Fmov_Itof1, null, typeof(OpCodeSimdCvt));
SetA64("0xx0111100000xxx111101xxxxxxxxxx", InstName.Fmov_V, InstEmit.Fmov_V, typeof(OpCodeSimdImm));
SetA64("x00111100x100110000000xxxxxxxxxx", InstName.Fmov_Ftoi, InstEmit.Fmov_Ftoi, typeof(OpCodeSimdCvt));
SetA64("x00111100x100111000000xxxxxxxxxx", InstName.Fmov_Itof, InstEmit.Fmov_Itof, typeof(OpCodeSimdCvt));
SetA64("1001111010101110000000xxxxxxxxxx", InstName.Fmov_Ftoi1, InstEmit.Fmov_Ftoi1, typeof(OpCodeSimdCvt));
SetA64("1001111010101111000000xxxxxxxxxx", InstName.Fmov_Itof1, InstEmit.Fmov_Itof1, typeof(OpCodeSimdCvt));
SetA64("000111110x0xxxxx1xxxxxxxxxxxxxxx", InstName.Fmsub_S, InstEmit.Fmsub_S, typeof(OpCodeSimdReg));
SetA64("000111100x1xxxxx000010xxxxxxxxxx", InstName.Fmul_S, InstEmit.Fmul_S, typeof(OpCodeSimdReg));
SetA64("010111111xxxxxxx1001x0xxxxxxxxxx", InstName.Fmul_Se, InstEmit.Fmul_Se, typeof(OpCodeSimdRegElemF));
@ -360,8 +360,8 @@ namespace ARMeilleure.Decoders
SetA64("0>1011101<100001111110xxxxxxxxxx", InstName.Fsqrt_V, InstEmit.Fsqrt_V, typeof(OpCodeSimd));
SetA64("000111100x1xxxxx001110xxxxxxxxxx", InstName.Fsub_S, InstEmit.Fsub_S, typeof(OpCodeSimdReg));
SetA64("0>0011101<1xxxxx110101xxxxxxxxxx", InstName.Fsub_V, InstEmit.Fsub_V, typeof(OpCodeSimdReg));
SetA64("01001110000xxxxx000111xxxxxxxxxx", InstName.Ins_Gp, null, typeof(OpCodeSimdIns));
SetA64("01101110000xxxxx0xxxx1xxxxxxxxxx", InstName.Ins_V, null, typeof(OpCodeSimdIns));
SetA64("01001110000xxxxx000111xxxxxxxxxx", InstName.Ins_Gp, InstEmit.Ins_Gp, typeof(OpCodeSimdIns));
SetA64("01101110000xxxxx0xxxx1xxxxxxxxxx", InstName.Ins_V, InstEmit.Ins_V, typeof(OpCodeSimdIns));
SetA64("0x00110001000000xxxxxxxxxxxxxxxx", InstName.Ld__Vms, null, typeof(OpCodeSimdMemMs));
SetA64("0x001100110xxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vms, null, typeof(OpCodeSimdMemMs));
SetA64("0x00110101x00000xxxxxxxxxxxxxxxx", InstName.Ld__Vss, null, typeof(OpCodeSimdMemSs));
@ -377,15 +377,15 @@ namespace ARMeilleure.Decoders
SetA64("0x101111xxxxxxxx0000x0xxxxxxxxxx", InstName.Mla_Ve, InstEmit.Mla_Ve, typeof(OpCodeSimdRegElem));
SetA64("0x101110<<1xxxxx100101xxxxxxxxxx", InstName.Mls_V, InstEmit.Mls_V, typeof(OpCodeSimdReg));
SetA64("0x101111xxxxxxxx0100x0xxxxxxxxxx", InstName.Mls_Ve, InstEmit.Mls_Ve, typeof(OpCodeSimdRegElem));
SetA64("0x00111100000xxx0xx001xxxxxxxxxx", InstName.Movi_V, null, typeof(OpCodeSimdImm));
SetA64("0x00111100000xxx10x001xxxxxxxxxx", InstName.Movi_V, null, typeof(OpCodeSimdImm));
SetA64("0x00111100000xxx110x01xxxxxxxxxx", InstName.Movi_V, null, typeof(OpCodeSimdImm));
SetA64("0xx0111100000xxx111001xxxxxxxxxx", InstName.Movi_V, null, typeof(OpCodeSimdImm));
SetA64("0x00111100000xxx0xx001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm));
SetA64("0x00111100000xxx10x001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm));
SetA64("0x00111100000xxx110x01xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm));
SetA64("0xx0111100000xxx111001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm));
SetA64("0x001110<<1xxxxx100111xxxxxxxxxx", InstName.Mul_V, InstEmit.Mul_V, typeof(OpCodeSimdReg));
SetA64("0x001111xxxxxxxx1000x0xxxxxxxxxx", InstName.Mul_Ve, InstEmit.Mul_Ve, typeof(OpCodeSimdRegElem));
SetA64("0x10111100000xxx0xx001xxxxxxxxxx", InstName.Mvni_V, null, typeof(OpCodeSimdImm));
SetA64("0x10111100000xxx10x001xxxxxxxxxx", InstName.Mvni_V, null, typeof(OpCodeSimdImm));
SetA64("0x10111100000xxx110x01xxxxxxxxxx", InstName.Mvni_V, null, typeof(OpCodeSimdImm));
SetA64("0x10111100000xxx0xx001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm));
SetA64("0x10111100000xxx10x001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm));
SetA64("0x10111100000xxx110x01xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm));
SetA64("0111111011100000101110xxxxxxxxxx", InstName.Neg_S, InstEmit.Neg_S, typeof(OpCodeSimd));
SetA64("0>101110<<100000101110xxxxxxxxxx", InstName.Neg_V, InstEmit.Neg_V, typeof(OpCodeSimd));
SetA64("0x10111000100000010110xxxxxxxxxx", InstName.Not_V, null, typeof(OpCodeSimd));
@ -406,6 +406,8 @@ namespace ARMeilleure.Decoders
SetA64("0x001110<<100000011010xxxxxxxxxx", InstName.Sadalp_V, InstEmit.Sadalp_V, typeof(OpCodeSimd));
SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstName.Saddl_V, InstEmit.Saddl_V, typeof(OpCodeSimdReg));
SetA64("0x001110<<100000001010xxxxxxxxxx", InstName.Saddlp_V, InstEmit.Saddlp_V, typeof(OpCodeSimd));
SetA64("000011100x110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, typeof(OpCodeSimd));
SetA64("01001110<<110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, typeof(OpCodeSimd));
SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstName.Saddw_V, InstEmit.Saddw_V, typeof(OpCodeSimdReg));
SetA64("x00111100x100010000000xxxxxxxxxx", InstName.Scvtf_Gp, null, typeof(OpCodeSimdCvt));
SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstName.Scvtf_Gp_Fixed, null, typeof(OpCodeSimdCvt));
@ -441,7 +443,7 @@ namespace ARMeilleure.Decoders
SetA64("0x001111xxxxxxxx0010x0xxxxxxxxxx", InstName.Smlal_Ve, InstEmit.Smlal_Ve, typeof(OpCodeSimdRegElem));
SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", InstName.Smlsl_V, InstEmit.Smlsl_V, typeof(OpCodeSimdReg));
SetA64("0x001111xxxxxxxx0110x0xxxxxxxxxx", InstName.Smlsl_Ve, InstEmit.Smlsl_Ve, typeof(OpCodeSimdRegElem));
SetA64("0x001110000xxxxx001011xxxxxxxxxx", InstName.Smov_S, null, typeof(OpCodeSimdIns));
SetA64("0x001110000xxxxx001011xxxxxxxxxx", InstName.Smov_S, InstEmit.Smov_S, typeof(OpCodeSimdIns));
SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", InstName.Smull_V, InstEmit.Smull_V, typeof(OpCodeSimdReg));
SetA64("0x001111xxxxxxxx1010x0xxxxxxxxxx", InstName.Smull_Ve, InstEmit.Smull_Ve, typeof(OpCodeSimdRegElem));
SetA64("01011110xx100000011110xxxxxxxxxx", InstName.Sqabs_S, InstEmit.Sqabs_S, typeof(OpCodeSimd));
@ -507,9 +509,9 @@ namespace ARMeilleure.Decoders
SetA64("0x001110<<1xxxxx011000xxxxxxxxxx", InstName.Subhn_V, InstEmit.Subhn_V, typeof(OpCodeSimdReg));
SetA64("01011110xx100000001110xxxxxxxxxx", InstName.Suqadd_S, InstEmit.Suqadd_S, typeof(OpCodeSimd));
SetA64("0>001110<<100000001110xxxxxxxxxx", InstName.Suqadd_V, InstEmit.Suqadd_V, typeof(OpCodeSimd));
SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", InstName.Tbl_V, null, typeof(OpCodeSimdTbl));
SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", InstName.Trn1_V, null, typeof(OpCodeSimdReg));
SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", InstName.Trn2_V, null, typeof(OpCodeSimdReg));
SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", InstName.Tbl_V, InstEmit.Tbl_V, typeof(OpCodeSimdTbl));
SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", InstName.Trn1_V, InstEmit.Trn1_V, typeof(OpCodeSimdReg));
SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", InstName.Trn2_V, InstEmit.Trn2_V, typeof(OpCodeSimdReg));
SetA64("0x101110<<1xxxxx011111xxxxxxxxxx", InstName.Uaba_V, InstEmit.Uaba_V, typeof(OpCodeSimdReg));
SetA64("0x101110<<1xxxxx010100xxxxxxxxxx", InstName.Uabal_V, InstEmit.Uabal_V, typeof(OpCodeSimdReg));
SetA64("0x101110<<1xxxxx011101xxxxxxxxxx", InstName.Uabd_V, InstEmit.Uabd_V, typeof(OpCodeSimdReg));
@ -573,11 +575,11 @@ namespace ARMeilleure.Decoders
SetA64("0110111101xxxxxx000101xxxxxxxxxx", InstName.Usra_V, null, typeof(OpCodeSimdShImm));
SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", InstName.Usubl_V, InstEmit.Usubl_V, typeof(OpCodeSimdReg));
SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", InstName.Usubw_V, InstEmit.Usubw_V, typeof(OpCodeSimdReg));
SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", InstName.Uzp1_V, null, typeof(OpCodeSimdReg));
SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", InstName.Uzp2_V, null, typeof(OpCodeSimdReg));
SetA64("0x001110<<100001001010xxxxxxxxxx", InstName.Xtn_V, null, typeof(OpCodeSimd));
SetA64("0>001110<<0xxxxx001110xxxxxxxxxx", InstName.Zip1_V, null, typeof(OpCodeSimdReg));
SetA64("0>001110<<0xxxxx011110xxxxxxxxxx", InstName.Zip2_V, null, typeof(OpCodeSimdReg));
SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", InstName.Uzp1_V, InstEmit.Uzp1_V, typeof(OpCodeSimdReg));
SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", InstName.Uzp2_V, InstEmit.Uzp2_V, typeof(OpCodeSimdReg));
SetA64("0x001110<<100001001010xxxxxxxxxx", InstName.Xtn_V, InstEmit.Xtn_V, typeof(OpCodeSimd));
SetA64("0>001110<<0xxxxx001110xxxxxxxxxx", InstName.Zip1_V, InstEmit.Zip1_V, typeof(OpCodeSimdReg));
SetA64("0>001110<<0xxxxx011110xxxxxxxxxx", InstName.Zip2_V, InstEmit.Zip2_V, typeof(OpCodeSimdReg));
#endregion
#region "OpCode Table (AArch32)"

View file

@ -45,7 +45,14 @@ namespace ARMeilleure.Instructions
Instruction addInst = X86PaddInstruction[op.Size];
context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
Operand res = context.AddIntrinsic(addInst, n, m);
if (op.RegisterSize == RegisterSize.Simd64)
{
res = context.VectorZeroUpper64(res);
}
context.Copy(GetVec(op.Rd), res);
}
else
{
@ -1788,6 +1795,11 @@ namespace ARMeilleure.Instructions
EmitAddLongPairwise(context, signed: true, accumulate: false);
}
public static void Saddlv_V(EmitterContext context)
{
EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2));
}
public static void Saddw_V(EmitterContext context)
{
if (Optimizations.UseSse41)
@ -2295,7 +2307,14 @@ namespace ARMeilleure.Instructions
Instruction subInst = X86PsubInstruction[op.Size];
context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
Operand res = context.AddIntrinsic(subInst, n, m);
if (op.RegisterSize == RegisterSize.Simd64)
{
res = context.VectorZeroUpper64(res);
}
context.Copy(GetVec(op.Rd), res);
}
else
{
@ -2429,7 +2448,7 @@ namespace ARMeilleure.Instructions
public static void Uaddlv_V(EmitterContext context)
{
EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
}
public static void Uaddw_V(EmitterContext context)

View file

@ -17,6 +17,7 @@ namespace ARMeilleure.Instructions
static class InstEmitSimdHelper
{
#region "X86 SSE Instructions"
public static readonly Instruction[] X86PaddInstruction = new Instruction[]
{
Instruction.X86Paddb,
@ -91,6 +92,23 @@ namespace ARMeilleure.Instructions
Instruction.X86Psubq
};
public static readonly Instruction[] X86PunpckhInstruction = new Instruction[]
{
Instruction.X86Punpckhbw,
Instruction.X86Punpckhwd,
Instruction.X86Punpckhdq,
Instruction.X86Punpckhqdq
};
public static readonly Instruction[] X86PunpcklInstruction = new Instruction[]
{
Instruction.X86Punpcklbw,
Instruction.X86Punpcklwd,
Instruction.X86Punpckldq,
Instruction.X86Punpcklqdq
};
#endregion
public static int GetImmShl(OpCodeSimdShImm op)
{
return op.Imm - (8 << op.Size);
@ -103,16 +121,22 @@ namespace ARMeilleure.Instructions
public static Operand X86GetScalar(EmitterContext context, float value)
{
int imm = BitConverter.SingleToInt32Bits(value);
return context.Copy(Local(OperandType.V128), Const(imm));
return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
}
public static Operand X86GetScalar(EmitterContext context, double value)
{
long imm = BitConverter.DoubleToInt64Bits(value);
return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
}
return context.Copy(Local(OperandType.V128), Const(imm));
public static Operand X86GetScalar(EmitterContext context, int value)
{
return context.Copy(Local(OperandType.V128), Const(value));
}
public static Operand X86GetScalar(EmitterContext context, long value)
{
return context.Copy(Local(OperandType.V128), Const(value));
}
public static Operand X86GetAllElements(EmitterContext context, float value)
@ -570,7 +594,7 @@ namespace ARMeilleure.Instructions
{
Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
res = EmitVectorInsert(context, res, ne, index, op.Size);
res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
@ -627,7 +651,7 @@ namespace ARMeilleure.Instructions
{
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
res = EmitVectorInsert(context, res, ne, index, op.Size);
res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
@ -781,7 +805,7 @@ namespace ARMeilleure.Instructions
EmitVectorWidenRmBinaryOp(context, emit, signed: false);
}
public static void EmitVectorWidenRmBinaryOp(EmitterContext context, Func2I emit, bool signed)
private static void EmitVectorWidenRmBinaryOp(EmitterContext context, Func2I emit, bool signed)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@ -940,7 +964,7 @@ namespace ARMeilleure.Instructions
EmitVectorPairwiseOp(context, emit, signed: false);
}
public static void EmitVectorPairwiseOp(EmitterContext context, Func2I emit, bool signed)
private static void EmitVectorPairwiseOp(EmitterContext context, Func2I emit, bool signed)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@ -967,15 +991,29 @@ namespace ARMeilleure.Instructions
public static void EmitVectorAcrossVectorOpSx(EmitterContext context, Func2I emit)
{
EmitVectorAcrossVectorOp(context, emit, true);
EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
}
public static void EmitVectorAcrossVectorOpZx(EmitterContext context, Func2I emit)
{
EmitVectorAcrossVectorOp(context, emit, false);
EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
}
public static void EmitVectorAcrossVectorOp(EmitterContext context, Func2I emit, bool signed)
public static void EmitVectorLongAcrossVectorOpSx(EmitterContext context, Func2I emit)
{
EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
}
public static void EmitVectorLongAcrossVectorOpZx(EmitterContext context, Func2I emit)
{
EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
}
private static void EmitVectorAcrossVectorOp(
EmitterContext context,
Func2I emit,
bool signed,
bool isLong)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
@ -990,7 +1028,11 @@ namespace ARMeilleure.Instructions
res = emit(res, n);
}
context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size));
int size = isLong ? op.Size + 1 : op.Size;
Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);
context.Copy(GetVec(op.Rd), d);
}
public static void EmitVectorPairwiseOpF(EmitterContext context, Func2I emit)
@ -1093,7 +1135,7 @@ namespace ARMeilleure.Instructions
EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.VectorSx);
}
public static void EmitSaturatingUnaryOpSx(EmitterContext context, Func1I emit, SaturatingFlags flags)
private static void EmitSaturatingUnaryOpSx(EmitterContext context, Func1I emit, SaturatingFlags flags)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;

View file

@ -1,12 +1,8 @@
using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using ARMeilleure.Translation;
using System;
using System.Reflection;
using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.Instructions.InstEmitMemoryHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
@ -14,22 +10,760 @@ namespace ARMeilleure.Instructions
{
static partial class InstEmit
{
#region "Masks"
private static readonly long[] _masksE0_TrnUzpXtn = new long[]
{
14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
};
private static readonly long[] _masksE1_TrnUzp = new long[]
{
15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0,
15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0,
15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0
};
private static readonly long[] _masksE0_Uzp = new long[]
{
13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0
};
private static readonly long[] _masksE1_Uzp = new long[]
{
15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0
};
#endregion
public static void Dup_Gp(EmitterContext context)
{
OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
Operand n = GetIntOrZR(op, op.Rn);
if (Optimizations.UseSse2)
{
switch (op.Size)
{
case 0: n = ZeroExtend8 (context, n); n = context.Multiply(n, Const(0x01010101)); break;
case 1: n = ZeroExtend16(context, n); n = context.Multiply(n, Const(0x00010001)); break;
case 2: n = ZeroExtend32(context, n); break;
}
Operand vector = context.VectorInsert(context.VectorZero(), n, 0);
if (op.Size < 3)
{
vector = context.AddIntrinsic(Instruction.X86Shufps, vector, vector, Const(0));
}
else
{
vector = context.AddIntrinsic(Instruction.X86Movlhps, vector, vector);
}
}
else
{
Operand res = context.VectorZero();
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
res = EmitVectorInsert(context, res, n, index, op.Size);
}
}
}
public static void Dup_S(EmitterContext context)
{
OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size));
}
public static void Dup_V(EmitterContext context)
{
OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
if (Optimizations.UseSse2)
{
Operand res = GetVec(op.Rn);
if (op.Size == 0)
{
res = context.AddIntrinsic(Instruction.X86Psrldq, res, Const(op.DstIndex));
res = context.AddIntrinsic(Instruction.X86Punpcklbw, res, res);
}
else if (op.Size == 1)
{
res = context.AddIntrinsic(Instruction.X86Psrldq, res, Const(op.DstIndex * 2));
res = context.AddIntrinsic(Instruction.X86Punpcklwd, res, res);
}
if (op.Size < 3)
{
res = context.AddIntrinsic(Instruction.X86Shufps, res, res, Const(0));
}
else if (op.DstIndex == 0)
{
res = context.AddIntrinsic(Instruction.X86Movlhps, res, res);
}
else
{
res = context.AddIntrinsic(Instruction.X86Movhlps, res, res);
}
context.Copy(GetVec(op.Rd), res);
}
else
{
Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
Operand res = context.VectorZero();
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
res = EmitVectorInsert(context, res, ne, index, op.Size);
}
}
}
public static void Ext_V(EmitterContext context)
{
OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp;
if (Optimizations.UseSse2)
{
Operand nShifted = GetVec(op.Rn);
if (op.RegisterSize == RegisterSize.Simd64)
{
nShifted = context.AddIntrinsic(Instruction.X86Movlhps, nShifted, context.VectorZero());
}
nShifted = context.AddIntrinsic(Instruction.X86Psrldq, nShifted, Const(op.Imm4));
Operand mShifted = GetVec(op.Rm);
mShifted = context.AddIntrinsic(Instruction.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4));
if (op.RegisterSize == RegisterSize.Simd64)
{
mShifted = context.AddIntrinsic(Instruction.X86Movlhps, mShifted, context.VectorZero());
}
Operand res = context.AddIntrinsic(Instruction.X86Por, nShifted, mShifted);
context.Copy(GetVec(op.Rd), res);
}
else
{
Operand res = context.VectorZero();
int bytes = op.GetBytesCount();
int position = op.Imm4 & (bytes - 1);
for (int index = 0; index < bytes; index++)
{
int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
Operand e = EmitVectorExtractZx(context, reg, position, 0);
position = (position + 1) & (bytes - 1);
res = EmitVectorInsert(context, res, e, index, 0);
}
context.Copy(GetVec(op.Rd), res);
}
}
public static void Fcsel_S(EmitterContext context)
{
OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
Operand lblTrue = Label();
Operand lblEnd = Label();
Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond);
context.BranchIfTrue(isTrue, lblTrue);
OperandType type = op.Size != 0 ? OperandType.FP32
: OperandType.FP64;
Operand me = context.VectorExtract(GetVec(op.Rm), Local(type), 0);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0));
context.Branch(lblEnd);
context.MarkLabel(lblTrue);
Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), 0);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
context.MarkLabel(lblEnd);
}
public static void Fmov_Ftoi(EmitterContext context)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 3);
SetIntOrZR(context, op.Rd, EmitIntZeroUpperIfNeeded(context, ne));
}
public static void Fmov_Ftoi1(EmitterContext context)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3);
SetIntOrZR(context, op.Rd, EmitIntZeroUpperIfNeeded(context, ne));
}
public static void Fmov_Itof(EmitterContext context)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand n = EmitIntZeroUpperIfNeeded(context, GetIntOrZR(op, op.Rn));
context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, 3));
}
public static void Fmov_Itof1(EmitterContext context)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand n = EmitIntZeroUpperIfNeeded(context, GetIntOrZR(op, op.Rn));
context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 1, 3));
}
public static void Fmov_S(EmitterContext context)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
OperandType type = op.Size != 0 ? OperandType.FP32
: OperandType.FP64;
Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), 0);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
}
public static void Fmov_Si(EmitterContext context)
{
OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp;
Operand imm;
if (op.Size != 0)
if (op.Size == 0)
{
imm = Const(op.Immediate);
context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate));
}
else
{
imm = Const((int)op.Immediate);
context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
}
}
public static void Fmov_V(EmitterContext context)
{
OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
Operand e;
if (op.Size == 0)
{
e = Const((int)op.Immediate);
}
else
{
e = Const(op.Immediate);
}
context.Copy(GetVec(op.Rd), imm);
Operand res = context.VectorZero();
int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2;
for (int index = 0; index < (elems >> op.Size); index++)
{
res = EmitVectorInsert(context, res, e, index, op.Size + 2);
}
context.Copy(GetVec(op.Rd), e);
}
public static void Ins_Gp(EmitterContext context)
{
OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
Operand n = GetIntOrZR(op, op.Rn);
context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, op.DstIndex, op.Size));
}
public static void Ins_V(EmitterContext context)
{
OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size);
context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, op.DstIndex, op.Size));
}
public static void Movi_V(EmitterContext context)
{
if (Optimizations.UseSse2)
{
EmitMoviMvni(context, not: false);
}
else
{
EmitVectorImmUnaryOp(context, (op1) => op1);
}
}
public static void Mvni_V(EmitterContext context)
{
if (Optimizations.UseSse2)
{
EmitMoviMvni(context, not: true);
}
else
{
EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1));
}
}
public static void Smov_S(EmitterContext context)
{
OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size);
SetIntOrZR(context, op.Rd, EmitIntZeroUpperIfNeeded(context, ne));
}
public static void Tbl_V(EmitterContext context)
{
OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
if (Optimizations.UseSsse3)
{
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL);
Operand mMask = context.AddIntrinsic(Instruction.X86Pcmpgtb, m, mask);
mMask = context.AddIntrinsic(Instruction.X86Por, mMask, m);
Operand res = context.AddIntrinsic(Instruction.X86Pshufb, n, mMask);
for (int index = 1; index < op.Size; index++)
{
Operand ni = GetVec((op.Rn + index) & 0x1f);
Operand indexMask = mask = X86GetAllElements(context, 0x1010101010101010L * index);
Operand mMinusMask = context.AddIntrinsic(Instruction.X86Psubb, m, indexMask);
Operand mMask2 = context.AddIntrinsic(Instruction.X86Pcmpgtb, mMinusMask, mask);
mMask2 = context.AddIntrinsic(Instruction.X86Por, mMask2, mMinusMask);
Operand res2 = context.AddIntrinsic(Instruction.X86Pshufb, ni, mMask2);
res = context.AddIntrinsic(Instruction.X86Por, res, res2);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
res = context.VectorZeroUpper64(res);
}
context.Copy(GetVec(op.Rd), res);
}
else
{
Operand[] args = new Operand[1 + op.Size];
args[0] = GetVec(op.Rm);
for (int index = 0; index < op.Size; index++)
{
args[1 + index] = GetVec((op.Rn + index) & 0x1f);
}
string name = null;
switch (op.Size)
{
case 1: name = op.RegisterSize == RegisterSize.Simd64
? nameof(SoftFallback.Tbl1_V64)
: nameof(SoftFallback.Tbl1_V128); break;
case 2: name = op.RegisterSize == RegisterSize.Simd64
? nameof(SoftFallback.Tbl2_V64)
: nameof(SoftFallback.Tbl2_V128); break;
case 3: name = op.RegisterSize == RegisterSize.Simd64
? nameof(SoftFallback.Tbl3_V64)
: nameof(SoftFallback.Tbl3_V128); break;
case 4: name = op.RegisterSize == RegisterSize.Simd64
? nameof(SoftFallback.Tbl4_V64)
: nameof(SoftFallback.Tbl4_V128); break;
}
context.Copy(GetVec(op.Rd), context.Call(typeof(SoftFallback).GetMethod(name), args));
}
}
public static void Trn1_V(EmitterContext context)
{
EmitVectorTranspose(context, part: 0);
}
public static void Trn2_V(EmitterContext context)
{
EmitVectorTranspose(context, part: 1);
}
public static void Umov_S(EmitterContext context)
{
OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
SetIntOrZR(context, op.Rd, ne);
}
public static void Uzp1_V(EmitterContext context)
{
EmitVectorUnzip(context, part: 0);
}
public static void Uzp2_V(EmitterContext context)
{
EmitVectorUnzip(context, part: 1);
}
public static void Xtn_V(EmitterContext context)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
if (Optimizations.UseSsse3)
{
Operand d = GetVec(op.Rd);
Operand res = context.AddIntrinsic(Instruction.X86Movlhps, d, context.VectorZero());
Operand n = GetVec(op.Rn);
Operand mask = X86GetAllElements(context, _masksE0_TrnUzpXtn[op.Size]);
Operand res2 = context.AddIntrinsic(Instruction.X86Pshufb, n, mask);
Instruction movInst = op.RegisterSize == RegisterSize.Simd128
? Instruction.X86Movlhps
: Instruction.X86Movhlps;
res = context.AddIntrinsic(movInst, res, res2);
context.Copy(GetVec(op.Rd), res);
}
else
{
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
res = EmitVectorInsert(context, res, ne, part + index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
}
public static void Zip1_V(EmitterContext context)
{
EmitVectorZip(context, part: 0);
}
public static void Zip2_V(EmitterContext context)
{
EmitVectorZip(context, part: 1);
}
private static Operand EmitIntZeroUpperIfNeeded(EmitterContext context, Operand value)
{
if (context.CurrOp.RegisterSize == RegisterSize.Int32 ||
context.CurrOp.RegisterSize == RegisterSize.Simd64)
{
return ZeroExtend32(context, value);
}
return value;
}
private static void EmitMoviMvni(EmitterContext context, bool not)
{
OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
long imm = op.Immediate;
if (not)
{
imm = ~imm;
}
switch (op.Size)
{
case 0: imm *= 0x01010101; break;
case 1: imm *= 0x00010001; break;
}
Operand mask;
if (op.Size < 3)
{
mask = X86GetAllElements(context, (int)imm);
}
else
{
mask = X86GetAllElements(context, imm);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
mask = context.VectorZeroUpper64(mask);
}
context.Copy(GetVec(op.Rd), mask);
}
private static void EmitVectorTranspose(EmitterContext context, int part)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
if (Optimizations.UseSsse3)
{
long maskE0 = _masksE0_TrnUzpXtn[op.Size];
long maskE1 = _masksE1_TrnUzp [op.Size];
Operand mask = null;
if (op.Size < 3)
{
mask = X86GetScalar(context, maskE0);
mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
}
Operand n = GetVec(op.Rn);
if (op.Size < 3)
{
n = context.AddIntrinsic(Instruction.X86Pshufb, n, mask);
}
Operand m = GetVec(op.Rm);
if (op.Size < 3)
{
m = context.AddIntrinsic(Instruction.X86Pshufb, m, mask);
}
Instruction punpckInst = part == 0
? X86PunpcklInstruction[op.Size]
: X86PunpckhInstruction[op.Size];
Operand res = context.AddIntrinsic(punpckInst, n, m);
context.Copy(GetVec(op.Rd), res);
}
else
{
Operand res = context.VectorZero();
int pairs = op.GetPairsCount() >> op.Size;
for (int index = 0; index < pairs; index++)
{
int pairIndex = index << 1;
Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size);
Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size);
res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
}
private static void EmitVectorUnzip(EmitterContext context, int part)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
if (Optimizations.UseSsse3)
{
if (op.RegisterSize == RegisterSize.Simd128)
{
Operand mask = null;
if (op.Size < 3)
{
long maskE0 = _masksE0_TrnUzpXtn[op.Size];
long maskE1 = _masksE1_TrnUzp [op.Size];
mask = X86GetScalar(context, maskE0);
mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
}
Operand n = GetVec(op.Rn);
if (op.Size < 3)
{
n = context.AddIntrinsic(Instruction.X86Pshufb, n, mask);
}
Operand m = GetVec(op.Rm);
if (op.Size < 3)
{
m = context.AddIntrinsic(Instruction.X86Pshufb, m, mask);
}
Instruction punpckInst = part == 0
? Instruction.X86Punpcklqdq
: Instruction.X86Punpckhqdq;
Operand res = context.AddIntrinsic(punpckInst, n, m);
context.Copy(GetVec(op.Rd), res);
}
else
{
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
Instruction punpcklInst = X86PunpcklInstruction[op.Size];
Operand res = context.AddIntrinsic(punpcklInst, n, m);
if (op.Size < 2)
{
long maskE0 = _masksE0_Uzp[op.Size];
long maskE1 = _masksE1_Uzp[op.Size];
Operand mask = X86GetScalar(context, maskE0);
mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
res = context.AddIntrinsic(Instruction.X86Pshufb, res, mask);
}
Instruction punpckInst = part == 0
? Instruction.X86Punpcklqdq
: Instruction.X86Punpckhqdq;
res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
context.Copy(GetVec(op.Rd), res);
}
}
else
{
Operand res = context.VectorZero();
int pairs = op.GetPairsCount() >> op.Size;
for (int index = 0; index < pairs; index++)
{
int idx = index << 1;
Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
res = EmitVectorInsert(context, res, ne, index, op.Size);
res = EmitVectorInsert(context, res, me, pairs + index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
}
private static void EmitVectorZip(EmitterContext context, int part)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
if (Optimizations.UseSse2)
{
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if (op.RegisterSize == RegisterSize.Simd128)
{
Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m);
context.Copy(GetVec(op.Rd), res);
}
else
{
Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m);
Instruction punpckInst = part == 0
? Instruction.X86Punpcklqdq
: Instruction.X86Punpckhqdq;
res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
context.Copy(GetVec(op.Rd), res);
}
}
else
{
Operand res = context.VectorZero();
int pairs = op.GetPairsCount() >> op.Size;
int baseIndex = part != 0 ? pairs : 0;
for (int index = 0; index < pairs; index++)
{
int pairIndex = index << 1;
Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size);
Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size);
res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
}
}
}

View file

@ -290,6 +290,7 @@ namespace ARMeilleure.Instructions
Sadalp_V,
Saddl_V,
Saddlp_V,
Saddlv_V,
Saddw_V,
Scvtf_Gp,
Scvtf_Gp_Fixed,

View file

@ -411,6 +411,73 @@ namespace ARMeilleure.Instructions
}
#endregion
#region "Table"
public static V128 Tbl1_V64(V128 vector, V128 tb0)
{
return Tbl(vector, 8, tb0);
}
public static V128 Tbl1_V128(V128 vector, V128 tb0)
{
return Tbl(vector, 16, tb0);
}
public static V128 Tbl2_V64(V128 vector, V128 tb0, V128 tb1)
{
return Tbl(vector, 8, tb0, tb1);
}
public static V128 Tbl2_V128(V128 vector, V128 tb0, V128 tb1)
{
return Tbl(vector, 16, tb0, tb1);
}
public static V128 Tbl3_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2)
{
return Tbl(vector, 8, tb0, tb1, tb2);
}
public static V128 Tbl3_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2)
{
return Tbl(vector, 16, tb0, tb1, tb2);
}
public static V128 Tbl4_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
{
return Tbl(vector, 8, tb0, tb1, tb2, tb3);
}
public static V128 Tbl4_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
{
return Tbl(vector, 16, tb0, tb1, tb2, tb3);
}
private static V128 Tbl(V128 vector, int bytes, params V128[] tb)
{
byte[] res = new byte[16];
byte[] table = new byte[tb.Length * 16];
for (byte index = 0; index < tb.Length; index++)
{
Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16);
}
byte[] v = vector.ToArray();
for (byte index = 0; index < bytes; index++)
{
byte tblIdx = v[index];
if (tblIdx < table.Length)
{
res[index] = table[tblIdx];
}
}
return new V128(res);
}
#endregion
#region "Crc32"
private const uint Crc32RevPoly = 0xedb88320;
private const uint Crc32cRevPoly = 0x82f63b78;

View file

@ -131,6 +131,8 @@ namespace ARMeilleure.IntermediateRepresentation
X86Pmullw,
X86Popcnt,
X86Por,
X86Pshufb,
X86Pslldq,
X86Psllw,
X86Psrad,
X86Psraw,
@ -142,6 +144,14 @@ namespace ARMeilleure.IntermediateRepresentation
X86Psubd,
X86Psubq,
X86Psubw,
X86Punpckhbw,
X86Punpckhdq,
X86Punpckhqdq,
X86Punpckhwd,
X86Punpcklbw,
X86Punpckldq,
X86Punpcklqdq,
X86Punpcklwd,
X86Pxor,
X86Rcpps,
X86Rcpss,