From 012fcc6c0655a9e976cb2e8b824f2b9a3a605a5b Mon Sep 17 00:00:00 2001 From: riperiperi Date: Tue, 14 Jan 2020 01:38:28 +0000 Subject: [PATCH] Add a few more instructions, fix Vmul_1 encoding. --- ARMeilleure/Decoders/OpCode32Simd.cs | 2 +- ARMeilleure/Decoders/OpCode32SimdReg.cs | 1 - ARMeilleure/Decoders/OpCode32SimdRegElem.cs | 2 + ARMeilleure/Decoders/OpCode32SimdTbl.cs | 19 ++++ ARMeilleure/Decoders/OpCodeTable.cs | 5 +- ARMeilleure/Instructions/InstEmitAlu32.cs | 2 - .../Instructions/InstEmitSimdArithmetic32.cs | 16 ++++ .../Instructions/InstEmitSimdMove32.cs | 86 +++++++++++++++++++ ARMeilleure/Instructions/InstName.cs | 3 + ARMeilleure/Translation/Translator.cs | 1 + Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs | 46 ++++++++++ 11 files changed, 178 insertions(+), 5 deletions(-) create mode 100644 ARMeilleure/Decoders/OpCode32SimdTbl.cs diff --git a/ARMeilleure/Decoders/OpCode32Simd.cs b/ARMeilleure/Decoders/OpCode32Simd.cs index 59e9c6d114..a9027a19e3 100644 --- a/ARMeilleure/Decoders/OpCode32Simd.cs +++ b/ARMeilleure/Decoders/OpCode32Simd.cs @@ -8,7 +8,7 @@ namespace ARMeilleure.Decoders { public int Vd { get; private set; } public int Vm { get; protected set; } - public int Opc { get; private set; } + public int Opc { get; protected set; } public int Size { get; protected set; } public bool Q { get; protected set; } public bool F { get; protected set; } diff --git a/ARMeilleure/Decoders/OpCode32SimdReg.cs b/ARMeilleure/Decoders/OpCode32SimdReg.cs index b17b8ccef4..e24f259dcd 100644 --- a/ARMeilleure/Decoders/OpCode32SimdReg.cs +++ b/ARMeilleure/Decoders/OpCode32SimdReg.cs @@ -7,7 +7,6 @@ namespace ARMeilleure.Decoders class OpCode32SimdReg : OpCode32Simd { public int Vn { get; private set; } - public int Index { get; private set; } public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { diff --git a/ARMeilleure/Decoders/OpCode32SimdRegElem.cs b/ARMeilleure/Decoders/OpCode32SimdRegElem.cs index 4b1cf58e10..e4599def9b 100644 --- a/ARMeilleure/Decoders/OpCode32SimdRegElem.cs +++ b/ARMeilleure/Decoders/OpCode32SimdRegElem.cs @@ -12,6 +12,8 @@ namespace ARMeilleure.Decoders F = ((opCode >> 8) & 0x1) != 0; Size = ((opCode >> 20) & 0x3); + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + if (Size == 0b11) throw new Exception("Unknown Encoding!"); Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); diff --git a/ARMeilleure/Decoders/OpCode32SimdTbl.cs b/ARMeilleure/Decoders/OpCode32SimdTbl.cs new file mode 100644 index 0000000000..ef18ce76bb --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdTbl.cs @@ -0,0 +1,19 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace ARMeilleure.Decoders +{ + class OpCode32SimdTbl : OpCode32SimdReg + { + public int Length { get; private set; } + public OpCode32SimdTbl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Length = (opCode >> 8) & 3; + Size = 0; + Opc = Q ? 1 : 0; + Q = false; + RegisterSize = RegisterSize.Simd64; + } + } +} diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 32ae89d536..0c9485c54a 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -866,7 +866,7 @@ namespace ARMeilleure.Decoders SetA32("<<<<11100x10xxxxxxxx10xxx0x0xxxx", InstName.Vmul, InstEmit32.Vmul_S, typeof(OpCode32SimdRegS)); SetA32("111100110x0xxxxxxxxx1101xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_V, typeof(OpCode32SimdReg)); SetA32("1111001x0xxxxxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, typeof(OpCode32SimdReg)); - SetA32("1111001x1xxxxxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, typeof(OpCode32SimdRegElem)); + SetA32("1111001x1x<> (31 - msb)) << op.Lsb; Operand n = GetIntOrZR(context, op.Rn); Operand res = context.ShiftRightUI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb)); @@ -650,7 +649,6 @@ namespace ARMeilleure.Instructions OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; var msb = op.Lsb + op.Msb; //for this instruction, the msb is actually a width - var mask = (int)(0xFFFFFFFF >> (31 - msb)) << op.Lsb; Operand n = GetIntOrZR(context, op.Rn); Operand res = context.ShiftRightSI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb)); diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index 7ba526eb71..0927edca04 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -558,6 +558,22 @@ namespace ARMeilleure.Instructions EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U); } + public static void Vrecpe(ArmEmitterContext context) + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1); + }); + } + + public static void Vrecps(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2); + }); + } + public static void Vrsqrte(ArmEmitterContext context) { EmitVectorUnaryOpF32(context, (op1) => diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs index eed4e97e12..e85d9658a8 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -105,6 +105,92 @@ namespace ARMeilleure.Instructions } } + public static void Vtbl(ArmEmitterContext context) + { + OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp; + + bool extension = op.Opc == 1; + + int elems = op.GetBytesCount() >> op.Size; + + (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); + (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + + int length = op.Length + 1; + + Tuple[] tableTuples = new Tuple[length]; + for (int i=0; i< length; i++) + { + (int vn, int en) = GetQuadwordAndSubindex(op.Vn + i, op.RegisterSize); + tableTuples[i] = new Tuple(vn, en); + } + + int byteLength = length * 8; + + Operand res = GetVecA32(vd); + Operand m = GetVecA32(vm); + + for (int index = 0; index < elems; index++) + { + Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + em * elems)); + + Operand end = Label(); + Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength)); + Operand elemRes = null; // note: this is I64 for ease of calculation + + // for some reason this branch ruins everything so we do an extract + conditional select instead + // granted that is slower + // --- context.BranchIfFalse(end, inRange); --- + + // get indexed byte + // to simplify (ha) the il, we get bytes from every vector and use a nested conditional select to choose the right result + // does have to extract `length` times for every element but certainly not as bad as it could be + + // which vector number is the index on + Operand vecIndex = context.ShiftRightUI(selectedIndex, Const(3)); + // what should we shift by to extract it + Operand subVecIndexShift = context.ShiftLeft(context.BitwiseAnd(selectedIndex, Const(7)), Const(3)); + + for (int i=0; i < length; i++) + { + Tuple vectorLocation = tableTuples[i]; + // get the whole vector, we'll get a byte out of it + Operand lookupResult; + if (vectorLocation.Item1 == vd) + { + // result contains the current state of the vector + lookupResult = context.VectorExtract(OperandType.I64, res, vectorLocation.Item2); + } + else + { + lookupResult = EmitVectorExtract32(context, vectorLocation.Item1, vectorLocation.Item2, 3, false); //I64 + } + + lookupResult = context.ShiftRightUI(lookupResult, subVecIndexShift); // get the relevant byte from this vector + + if (i == 0) + { + elemRes = lookupResult; //first result is always default + } + else + { + Operand isThisElem = context.ICompareEqual(vecIndex, Const(i)); + elemRes = context.ConditionalSelect(isThisElem, lookupResult, elemRes); + } + } + + if (!extension) context.MarkLabel(end); + + Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, vd, index + ed * elems, 0, false)) : Const(0L); + + res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + ed * elems, 0); + + if (extension) context.MarkLabel(end); + } + + context.Copy(GetVecA32(vd), res); + } + public static void Vtrn(ArmEmitterContext context) { OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index 7f5e5d7084..2d165118ed 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -567,9 +567,12 @@ namespace ARMeilleure.Instructions Vstm, Vstr, Vsqrt, + Vrecpe, + Vrecps, Vrsqrte, Vrsqrts, Vsub, + Vtbl, Vtrn, Vuzp, Vzip, diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index f93f941e36..98db963812 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -87,6 +87,7 @@ namespace ARMeilleure.Translation public ulong ExecuteSingle(State.ExecutionContext context, ulong address) { + if (address == 0xa28b75) { } TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode); Statistics.StartTimer(); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs index dd946facc0..c82561f3ca 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs @@ -193,6 +193,52 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + + [Test, Combinatorial, Description("VTBL.8
, {list}, ")] + public void Vtbl([Range(0u, 6u)] uint vm, //indices, include potentially invalid + [Range(4u, 12u)] uint vn, //selection + [Values(0u, 1u)] uint vd, //destinations + [Range(0u, 3u)] uint length, + [Values] bool x) + { + uint opcode = 0xf3b00800; + if (vn + length > 31) return; //undefined + + if (x) + { + opcode |= 1 << 6; + } + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + + opcode |= (vn & 0x10) << 3; + opcode |= (vn & 0xf) << 16; + opcode |= (length & 0x3) << 8; + + var rnd = TestContext.CurrentContext.Random; + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v4 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v5 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + byte maxIndex = (byte)(length * 8 - 1); + byte[] b0 = new byte[16]; + byte[] b1 = new byte[16]; + for (int i=0; i<16; i++) + { + b0[i] = rnd.NextByte(maxIndex); + b1[i] = rnd.NextByte(maxIndex); + } + + V128 v0 = new V128(b0); + V128 v1 = new V128(b1); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3, v4: v4, v5: v5); //correct + + CompareAgainstUnicorn(); + } #endif } }