From 87826b4ac9c73c072bb730fb1567ff02ff6a8126 Mon Sep 17 00:00:00 2001
From: riperiperi <rhy3756547@hotmail.com>
Date: Wed, 29 Jan 2020 13:57:55 +0000
Subject: [PATCH] Add VMOVN, VSHR (imm), VSHRN (imm) and related tests

---
 ...Code32SimdImm6.cs => OpCode32SimdShImm.cs} |   4 +-
 ARMeilleure/Decoders/OpCodeTable.cs           |   5 +-
 .../Instructions/InstEmitSimdArithmetic32.cs  |   5 +
 .../Instructions/InstEmitSimdHelper32.cs      |  21 ++++
 .../Instructions/InstEmitSimdShift32.cs       |  29 ++++-
 ARMeilleure/Instructions/InstName.cs          |   3 +
 Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs         |  24 ++++
 Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs         |   2 +-
 Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs       | 116 ++++++++++++++++++
 9 files changed, 203 insertions(+), 6 deletions(-)
 rename ARMeilleure/Decoders/{OpCode32SimdImm6.cs => OpCode32SimdShImm.cs} (91%)
 create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs

diff --git a/ARMeilleure/Decoders/OpCode32SimdImm6.cs b/ARMeilleure/Decoders/OpCode32SimdShImm.cs
similarity index 91%
rename from ARMeilleure/Decoders/OpCode32SimdImm6.cs
rename to ARMeilleure/Decoders/OpCode32SimdShImm.cs
index 7ae4800d96..b19a601fb6 100644
--- a/ARMeilleure/Decoders/OpCode32SimdImm6.cs
+++ b/ARMeilleure/Decoders/OpCode32SimdShImm.cs
@@ -1,11 +1,11 @@
 ﻿namespace ARMeilleure.Decoders
 {
-    class OpCode32SimdShift : OpCode32Simd
+    class OpCode32SimdShImm : OpCode32Simd
     {
         public int Immediate { get; private set; }
         public int Shift { get; private set; }
 
-        public OpCode32SimdShift(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        public OpCode32SimdShImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
         {
             Immediate = (opCode >> 16) & 0x3f;
             var limm = ((opCode >> 1) & 0x40) | Immediate;
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index 00e8401d92..9505179f8b 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -844,6 +844,7 @@ namespace ARMeilleure.Decoders
             SetA32("<<<<1110000xxxxxxxxx1010x0010000", InstName.Vmov,    InstEmit32.Vmov_GS,     typeof(OpCode32SimdMovGp)); // To/from gen purpose and single precision.
             SetA32("<<<<1110xxx1xxxxxxxx1011xxx10000", InstName.Vmov,    InstEmit32.Vmov_G1,     typeof(OpCode32SimdMovGpElem)); // To gen purpose.
             SetA32("<<<<1100010xxxxxxxxx101000x1xxxx", InstName.Vmov,    InstEmit32.Vmov_G2,     typeof(OpCode32SimdMovGpDouble)); // To/from gen purpose x2 and single precision x2.
+            SetA32("111100111x11xx10xxxx001000x0xxx0", InstName.Vmovn,   InstEmit32.Vmovn,       typeof(OpCode32SimdCmpZ));
 
             SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs,    InstEmit32.Vmrs,        typeof(OpCode32SimdSpecial));
             SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr,    InstEmit32.Vmsr,        typeof(OpCode32SimdSpecial));
@@ -876,7 +877,9 @@ namespace ARMeilleure.Decoders
             SetA32("111111100xxxxxxxxxxx101xx0x0xxxx", InstName.Vsel,    InstEmit32.Vsel,        typeof(OpCode32SimdSel));
 
             SetA32("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl,    InstEmit32.Vshl_I,      typeof(OpCode32SimdReg));
-            SetA32("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl,    InstEmit32.Vshl,        typeof(OpCode32SimdShift));
+            SetA32("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl,    InstEmit32.Vshl,        typeof(OpCode32SimdShImm));
+            SetA32("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr,    InstEmit32.Vshr,        typeof(OpCode32SimdShImm));
+            SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn,   InstEmit32.Vshrn,       typeof(OpCode32SimdShImm));
 
             SetA32("111101001x00xxxxxxxx<<00xxxxxxxx", InstName.Vst1,    InstEmit32.Vst1,        typeof(OpCode32SimdMemSingle));
             SetA32("111101000x00xxxxxxxx0111xxxxxxxx", InstName.Vst1,    InstEmit32.Vst1,        typeof(OpCode32SimdMemPair)); // Regs = 1.
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
index 7077323e25..73202a6ac9 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -160,6 +160,11 @@ namespace ARMeilleure.Instructions
             EmitScalarUnaryOpF32(context, (op1) => op1);
         }
 
+        public static void Vmovn(ArmEmitterContext context)
+        {
+            EmitVectorUnaryNarrowOp32(context, (op1) => op1);
+        }
+
         public static void Vneg_S(ArmEmitterContext context)
         {
             EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1));
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
index 14ab0dc606..67edb64209 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@@ -445,6 +445,27 @@ namespace ARMeilleure.Instructions
             context.Copy(GetVecA32(op.Qd), res);
         }
 
+        // Narrow
+
+        public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit)
+        {
+            OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+            int elems = 8 >> op.Size; // Size contains the target element size. (for when it becomes a doubleword)
+
+            Operand res = GetVecA32(op.Qd);
+            int id = (op.Vd & 1) << (3 - op.Size); // Target doubleword base.
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, false);
+
+                res = EmitVectorInsert(context, res, emit(m), id + index, op.Size);
+            }
+
+            context.Copy(GetVecA32(op.Qd), res);
+        }
+
         // Generic Functions
 
         public static Operand EmitSoftFloatCallDefaultFpscr(
diff --git a/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/ARMeilleure/Instructions/InstEmitSimdShift32.cs
index e312ddf3bc..544bd94545 100644
--- a/ARMeilleure/Instructions/InstEmitSimdShift32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdShift32.cs
@@ -7,6 +7,7 @@ using static ARMeilleure.Instructions.InstEmitSimdHelper;
 using static ARMeilleure.Instructions.InstEmitSimdHelper32;
 using static ARMeilleure.IntermediateRepresentation.OperandHelper;
 using System.Diagnostics;
+using System;
 
 namespace ARMeilleure.Instructions
 {
@@ -14,9 +15,9 @@ namespace ARMeilleure.Instructions
     {
         public static void Vshl(ArmEmitterContext context)
         {
-            OpCode32SimdShift op = (OpCode32SimdShift)context.CurrOp;
+            OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
 
-            EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op1.Type, op.Shift)));
+            EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op.Shift)));
         }
 
         public static void Vshl_I(ArmEmitterContext context)
@@ -33,6 +34,30 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Vshr(ArmEmitterContext context)
+        {
+            OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+            int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped.
+            int maxShift = (8 << op.Size) - 1;
+
+            if (op.U)
+            {
+                EmitVectorUnaryOpZx32(context, (op1) => (shift > maxShift) ? Const(op1.Type, 0) : context.ShiftRightUI(op1, Const(shift)));
+            }
+            else
+            {
+                EmitVectorUnaryOpSx32(context, (op1) => context.ShiftRightSI(op1, Const(Math.Min(maxShift, shift))));
+            }
+        }
+
+        public static void Vshrn(ArmEmitterContext context)
+        {
+            OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+            int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped.
+
+            EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift)));
+        }
+
         private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool unsigned)
         {
             if (shiftLsB.Type == OperandType.I64) shiftLsB = context.ConvertI64ToI32(shiftLsB);
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index fd667fedcd..adf8bb9fc7 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -549,6 +549,7 @@ namespace ARMeilleure.Instructions
         Vmls,
         VMMmn,
         Vmov,
+        Vmovn,
         Vmrs,
         Vmsr,
         Vmul,
@@ -563,6 +564,8 @@ namespace ARMeilleure.Instructions
         Vrint,
         Vsel,
         Vshl,
+        Vshr,
+        Vshrn,
         Vst1,
         Vst2,
         Vst3,
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs
index e35d8fbef8..888f283bb5 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs
@@ -206,6 +206,30 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
+        [Test, Combinatorial, Description("VMOVN.<size> <Dt>, <Qm>")]
+        public void Movn_V([Range(0u, 1u, 2u)] uint size,
+                           [Values(0u, 1u, 2u, 3u)] uint vd,
+                           [Values(0u, 2u, 4u, 8u)] uint vm)
+        {
+            uint opcode = 0xf3b20200u; // VMOVN.I16 D0, Q0
+
+            opcode |= (size & 0x3) << 18;
+            opcode |= ((vm & 0x10) << 1);
+            opcode |= ((vm & 0xf) << 0);
+
+            opcode |= ((vd & 0x10) << 18);
+            opcode |= ((vd & 0xf) << 12);
+
+            V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
+            V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
+            V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
+            V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
+
+            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3);
+
+            CompareAgainstUnicorn();
+        }
+
         [Test, Combinatorial, Description("VTRN.<size> <Vd>, <Vm>")]
         public void Vtrn([Values(0u, 1u, 2u, 3u)] uint vm,
                          [Values(0u, 1u, 2u, 3u)] uint vd,
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs
index 80cdf11a84..bf912b37b8 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs
@@ -323,7 +323,7 @@ namespace Ryujinx.Tests.Cpu
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
-            CompareAgainstUnicorn(fpTolerances: FpTolerances.UpToOneUlpsS);
+            CompareAgainstUnicorn();
         }
 
         [Test, Combinatorial, Description("VPADD.f32 V0, V0, V0")]
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs
new file mode 100644
index 0000000000..0584bb7d98
--- /dev/null
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs
@@ -0,0 +1,116 @@
+﻿#define SimdShImm32
+
+using ARMeilleure.State;
+using NUnit.Framework;
+
+namespace Ryujinx.Tests.Cpu
+{
+    [Category("SimdShImm32")]
+    public sealed class CpuTestSimdShImm32 : CpuTest32
+    {
+#if SimdShImm32
+        private const int RndCnt = 5;
+
+        [Test, Pairwise, Description("VSHL.<size> {<Vd>}, <Vm>, #<imm>")]
+        public void Vshl_Imm([Values(0u)] uint rd,
+                             [Values(2u, 0u)] uint rm,
+                             [Values(0u, 1u, 2u, 3u)] uint size,
+                             [Random(RndCnt), Values(0u)] uint shiftImm,
+                             [Random(RndCnt)] ulong z,
+                             [Random(RndCnt)] ulong a,
+                             [Random(RndCnt)] ulong b,
+                             [Values] bool q)
+        {
+            uint opcode = 0xf2800510u; // VORR.I32 D0, #0 (immediate value changes it into SHL)
+            if (q)
+            {
+                opcode |= 1 << 6;
+                rm <<= 1;
+                rd <<= 1;
+            }
+
+            uint imm = 1u << ((int)size + 3);
+            imm |= shiftImm & (imm - 1);
+
+            opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
+            opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
+            opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1);
+
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, z);
+            V128 v2 = MakeVectorE0E1(b, z);
+
+            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("VSHR.<size> {<Vd>}, <Vm>, #<imm>")]
+        public void Vshr_Imm([Values(0u)] uint rd,
+                             [Values(2u, 0u)] uint rm,
+                             [Values(0u, 1u, 2u, 3u)] uint size,
+                             [Random(RndCnt), Values(0u)] uint shiftImm,
+                             [Random(RndCnt)] ulong z,
+                             [Random(RndCnt)] ulong a,
+                             [Random(RndCnt)] ulong b,
+                             [Values] bool u,
+                             [Values] bool q)
+        {
+            uint opcode = 0xf2800010u; // VMOV.I32 D0, #0 (immediate value changes it into SHR)
+            if (q)
+            {
+                opcode |= 1 << 6;
+                rm <<= 1;
+                rd <<= 1;
+            }
+
+            if (u)
+            {
+                opcode |= 1 << 24;
+            }
+
+            uint imm = 1u << ((int)size + 3);
+            imm |= shiftImm & (imm - 1);
+
+            opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
+            opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
+            opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1);
+
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, z);
+            V128 v2 = MakeVectorE0E1(b, z);
+
+            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("VSHRN.<size> {<Vd>}, <Vm>, #<imm>")]
+        public void Vshrn_Imm([Values(0u, 1u)] uint rd,
+                              [Values(2u, 0u)] uint rm,
+                              [Values(0u, 1u, 2u)] uint size,
+                              [Random(RndCnt), Values(0u)] uint shiftImm,
+                              [Random(RndCnt)] ulong z,
+                              [Random(RndCnt)] ulong a,
+                              [Random(RndCnt)] ulong b)
+        {
+            uint opcode = 0xf2800810u; // VMOV.I16 D0, #0 (immediate value changes it into SHRN)
+
+            uint imm = 1u << ((int)size + 3);
+            imm |= shiftImm & (imm - 1);
+
+            opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
+            opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
+            opcode |= ((imm & 0x3f) << 16);
+
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, z);
+            V128 v2 = MakeVectorE0E1(b, z);
+
+            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
+
+            CompareAgainstUnicorn();
+        }
+#endif
+    }
+}