From 87826b4ac9c73c072bb730fb1567ff02ff6a8126 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Wed, 29 Jan 2020 13:57:55 +0000 Subject: [PATCH] Add VMOVN, VSHR (imm), VSHRN (imm) and related tests --- ...Code32SimdImm6.cs => OpCode32SimdShImm.cs} | 4 +- ARMeilleure/Decoders/OpCodeTable.cs | 5 +- .../Instructions/InstEmitSimdArithmetic32.cs | 5 + .../Instructions/InstEmitSimdHelper32.cs | 21 ++++ .../Instructions/InstEmitSimdShift32.cs | 29 ++++- ARMeilleure/Instructions/InstName.cs | 3 + Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs | 24 ++++ Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs | 2 +- Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs | 116 ++++++++++++++++++ 9 files changed, 203 insertions(+), 6 deletions(-) rename ARMeilleure/Decoders/{OpCode32SimdImm6.cs => OpCode32SimdShImm.cs} (91%) create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs diff --git a/ARMeilleure/Decoders/OpCode32SimdImm6.cs b/ARMeilleure/Decoders/OpCode32SimdShImm.cs similarity index 91% rename from ARMeilleure/Decoders/OpCode32SimdImm6.cs rename to ARMeilleure/Decoders/OpCode32SimdShImm.cs index 7ae4800d96..b19a601fb6 100644 --- a/ARMeilleure/Decoders/OpCode32SimdImm6.cs +++ b/ARMeilleure/Decoders/OpCode32SimdShImm.cs @@ -1,11 +1,11 @@ namespace ARMeilleure.Decoders { - class OpCode32SimdShift : OpCode32Simd + class OpCode32SimdShImm : OpCode32Simd { public int Immediate { get; private set; } public int Shift { get; private set; } - public OpCode32SimdShift(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + public OpCode32SimdShImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { Immediate = (opCode >> 16) & 0x3f; var limm = ((opCode >> 1) & 0x40) | Immediate; diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 00e8401d92..9505179f8b 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -844,6 +844,7 @@ namespace ARMeilleure.Decoders SetA32("<<<<1110000xxxxxxxxx1010x0010000", InstName.Vmov, InstEmit32.Vmov_GS, typeof(OpCode32SimdMovGp)); // To/from gen purpose and single precision. SetA32("<<<<1110xxx1xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, typeof(OpCode32SimdMovGpElem)); // To gen purpose. SetA32("<<<<1100010xxxxxxxxx101000x1xxxx", InstName.Vmov, InstEmit32.Vmov_G2, typeof(OpCode32SimdMovGpDouble)); // To/from gen purpose x2 and single precision x2. + SetA32("111100111x11xx10xxxx001000x0xxx0", InstName.Vmovn, InstEmit32.Vmovn, typeof(OpCode32SimdCmpZ)); SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, typeof(OpCode32SimdSpecial)); SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr, InstEmit32.Vmsr, typeof(OpCode32SimdSpecial)); @@ -876,7 +877,9 @@ namespace ARMeilleure.Decoders SetA32("111111100xxxxxxxxxxx101xx0x0xxxx", InstName.Vsel, InstEmit32.Vsel, typeof(OpCode32SimdSel)); SetA32("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, typeof(OpCode32SimdReg)); - SetA32("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, typeof(OpCode32SimdShift)); + SetA32("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, typeof(OpCode32SimdShImm)); + SetA32("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, typeof(OpCode32SimdShImm)); + SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, typeof(OpCode32SimdShImm)); SetA32("111101001x00xxxxxxxx<<00xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemSingle)); SetA32("111101000x00xxxxxxxx0111xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 1. diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index 7077323e25..73202a6ac9 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -160,6 +160,11 @@ namespace ARMeilleure.Instructions EmitScalarUnaryOpF32(context, (op1) => op1); } + public static void Vmovn(ArmEmitterContext context) + { + EmitVectorUnaryNarrowOp32(context, (op1) => op1); + } + public static void Vneg_S(ArmEmitterContext context) { EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1)); diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs index 14ab0dc606..67edb64209 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -445,6 +445,27 @@ namespace ARMeilleure.Instructions context.Copy(GetVecA32(op.Qd), res); } + // Narrow + + public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int elems = 8 >> op.Size; // Size contains the target element size. (for when it becomes a doubleword) + + Operand res = GetVecA32(op.Qd); + int id = (op.Vd & 1) << (3 - op.Size); // Target doubleword base. + + for (int index = 0; index < elems; index++) + { + Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, false); + + res = EmitVectorInsert(context, res, emit(m), id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + // Generic Functions public static Operand EmitSoftFloatCallDefaultFpscr( diff --git a/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/ARMeilleure/Instructions/InstEmitSimdShift32.cs index e312ddf3bc..544bd94545 100644 --- a/ARMeilleure/Instructions/InstEmitSimdShift32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdShift32.cs @@ -7,6 +7,7 @@ using static ARMeilleure.Instructions.InstEmitSimdHelper; using static ARMeilleure.Instructions.InstEmitSimdHelper32; using static ARMeilleure.IntermediateRepresentation.OperandHelper; using System.Diagnostics; +using System; namespace ARMeilleure.Instructions { @@ -14,9 +15,9 @@ namespace ARMeilleure.Instructions { public static void Vshl(ArmEmitterContext context) { - OpCode32SimdShift op = (OpCode32SimdShift)context.CurrOp; + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; - EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op1.Type, op.Shift))); + EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op.Shift))); } public static void Vshl_I(ArmEmitterContext context) @@ -33,6 +34,30 @@ namespace ARMeilleure.Instructions } } + public static void Vshr(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped. + int maxShift = (8 << op.Size) - 1; + + if (op.U) + { + EmitVectorUnaryOpZx32(context, (op1) => (shift > maxShift) ? Const(op1.Type, 0) : context.ShiftRightUI(op1, Const(shift))); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => context.ShiftRightSI(op1, Const(Math.Min(maxShift, shift)))); + } + } + + public static void Vshrn(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped. + + EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift))); + } + private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool unsigned) { if (shiftLsB.Type == OperandType.I64) shiftLsB = context.ConvertI64ToI32(shiftLsB); diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index fd667fedcd..adf8bb9fc7 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -549,6 +549,7 @@ namespace ARMeilleure.Instructions Vmls, VMMmn, Vmov, + Vmovn, Vmrs, Vmsr, Vmul, @@ -563,6 +564,8 @@ namespace ARMeilleure.Instructions Vrint, Vsel, Vshl, + Vshr, + Vshrn, Vst1, Vst2, Vst3, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs index e35d8fbef8..888f283bb5 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs @@ -206,6 +206,30 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Combinatorial, Description("VMOVN.
, ")] + public void Movn_V([Range(0u, 1u, 2u)] uint size, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 2u, 4u, 8u)] uint vm) + { + uint opcode = 0xf3b20200u; // VMOVN.I16 D0, Q0 + + opcode |= (size & 0x3) << 18; + opcode |= ((vm & 0x10) << 1); + opcode |= ((vm & 0xf) << 0); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + [Test, Combinatorial, Description("VTRN. , ")] public void Vtrn([Values(0u, 1u, 2u, 3u)] uint vm, [Values(0u, 1u, 2u, 3u)] uint vd, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs index 80cdf11a84..bf912b37b8 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs @@ -323,7 +323,7 @@ namespace Ryujinx.Tests.Cpu SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); - CompareAgainstUnicorn(fpTolerances: FpTolerances.UpToOneUlpsS); + CompareAgainstUnicorn(); } [Test, Combinatorial, Description("VPADD.f32 V0, V0, V0")] diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs new file mode 100644 index 0000000000..0584bb7d98 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs @@ -0,0 +1,116 @@ +#define SimdShImm32 + +using ARMeilleure.State; +using NUnit.Framework; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdShImm32")] + public sealed class CpuTestSimdShImm32 : CpuTest32 + { +#if SimdShImm32 + private const int RndCnt = 5; + + [Test, Pairwise, Description("VSHL. {}, , #")] + public void Vshl_Imm([Values(0u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u, 3u)] uint size, + [Random(RndCnt), Values(0u)] uint shiftImm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool q) + { + uint opcode = 0xf2800510u; // VORR.I32 D0, #0 (immediate value changes it into SHL) + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rd <<= 1; + } + + uint imm = 1u << ((int)size + 3); + imm |= shiftImm & (imm - 1); + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSHR. {}, , #")] + public void Vshr_Imm([Values(0u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u, 3u)] uint size, + [Random(RndCnt), Values(0u)] uint shiftImm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool u, + [Values] bool q) + { + uint opcode = 0xf2800010u; // VMOV.I32 D0, #0 (immediate value changes it into SHR) + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rd <<= 1; + } + + if (u) + { + opcode |= 1 << 24; + } + + uint imm = 1u << ((int)size + 3); + imm |= shiftImm & (imm - 1); + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSHRN. {}, , #")] + public void Vshrn_Imm([Values(0u, 1u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u)] uint size, + [Random(RndCnt), Values(0u)] uint shiftImm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b) + { + uint opcode = 0xf2800810u; // VMOV.I16 D0, #0 (immediate value changes it into SHRN) + + uint imm = 1u << ((int)size + 3); + imm |= shiftImm & (imm - 1); + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((imm & 0x3f) << 16); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } +#endif + } +}