Add a few more instructions, fix Vmul_1 encoding.

This commit is contained in:
riperiperi 2020-01-14 01:38:28 +00:00
parent 301ced766c
commit 012fcc6c06
11 changed files with 178 additions and 5 deletions

View file

@ -8,7 +8,7 @@ namespace ARMeilleure.Decoders
{
public int Vd { get; private set; }
public int Vm { get; protected set; }
public int Opc { get; private set; }
public int Opc { get; protected set; }
public int Size { get; protected set; }
public bool Q { get; protected set; }
public bool F { get; protected set; }

View file

@ -7,7 +7,6 @@ namespace ARMeilleure.Decoders
class OpCode32SimdReg : OpCode32Simd
{
public int Vn { get; private set; }
public int Index { get; private set; }
public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{

View file

@ -12,6 +12,8 @@ namespace ARMeilleure.Decoders
F = ((opCode >> 8) & 0x1) != 0;
Size = ((opCode >> 20) & 0x3);
RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
if (Size == 0b11) throw new Exception("Unknown Encoding!");
Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);

View file

@ -0,0 +1,19 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace ARMeilleure.Decoders
{
class OpCode32SimdTbl : OpCode32SimdReg
{
public int Length { get; private set; }
public OpCode32SimdTbl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
Length = (opCode >> 8) & 3;
Size = 0;
Opc = Q ? 1 : 0;
Q = false;
RegisterSize = RegisterSize.Simd64;
}
}
}

View file

@ -866,7 +866,7 @@ namespace ARMeilleure.Decoders
SetA32("<<<<11100x10xxxxxxxx10xxx0x0xxxx", InstName.Vmul, InstEmit32.Vmul_S, typeof(OpCode32SimdRegS));
SetA32("111100110x0xxxxxxxxx1101xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_V, typeof(OpCode32SimdReg));
SetA32("1111001x0xxxxxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, typeof(OpCode32SimdReg));
SetA32("1111001x1xxxxxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, typeof(OpCode32SimdRegElem));
SetA32("1111001x1x<<xxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, typeof(OpCode32SimdRegElem));
SetA32("111100111x11xx01xxxx0x111xx0xxxx", InstName.Vneg, InstEmit32.Vneg_V, typeof(OpCode32Simd));
SetA32("<<<<11101x110001xxxx10xx01x0xxxx", InstName.Vneg, InstEmit32.Vneg_S, typeof(OpCode32SimdS));
@ -922,6 +922,8 @@ namespace ARMeilleure.Decoders
SetA32("<<<<1101xx00xxxxxxxx10xxxxxxxxxx", InstName.Vstr, InstEmit32.Vstr, typeof(OpCode32SimdMemImm));
SetA32("<<<<11101x110001xxxx10xx11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, typeof(OpCode32SimdS));
SetA32("111100111x11xx11xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte));
SetA32("111100100x0xxxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, typeof(OpCode32SimdReg));
SetA32("111100111x11xx11xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, typeof(OpCode32SimdSqrte));
SetA32("111100100x1xxxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, typeof(OpCode32SimdReg));
@ -929,6 +931,7 @@ namespace ARMeilleure.Decoders
SetA32("111100100x1xxxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, typeof(OpCode32SimdReg));
SetA32("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, typeof(OpCode32SimdReg));
SetA32("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl, InstEmit32.Vtbl, typeof(OpCode32SimdTbl));
SetA32("111100111x11xx10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, typeof(OpCode32SimdCmpZ));
SetA32("111100111x11xx10xxxx00010xx0xxxx", InstName.Vuzp, InstEmit32.Vuzp, typeof(OpCode32SimdCmpZ));
SetA32("111100111x11xx10xxxx00011xx0xxxx", InstName.Vzip, InstEmit32.Vzip, typeof(OpCode32SimdCmpZ));

View file

@ -637,7 +637,6 @@ namespace ARMeilleure.Instructions
OpCode32AluBf op = (OpCode32AluBf)context.CurrOp;
var msb = op.Lsb + op.Msb; //for this instruction, the msb is actually a width
var mask = (int)(0xFFFFFFFF >> (31 - msb)) << op.Lsb;
Operand n = GetIntOrZR(context, op.Rn);
Operand res = context.ShiftRightUI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
@ -650,7 +649,6 @@ namespace ARMeilleure.Instructions
OpCode32AluBf op = (OpCode32AluBf)context.CurrOp;
var msb = op.Lsb + op.Msb; //for this instruction, the msb is actually a width
var mask = (int)(0xFFFFFFFF >> (31 - msb)) << op.Lsb;
Operand n = GetIntOrZR(context, op.Rn);
Operand res = context.ShiftRightSI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));

View file

@ -558,6 +558,22 @@ namespace ARMeilleure.Instructions
EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
}
public static void Vrecpe(ArmEmitterContext context)
{
EmitVectorUnaryOpF32(context, (op1) =>
{
return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1);
});
}
public static void Vrecps(ArmEmitterContext context)
{
EmitVectorBinaryOpF32(context, (op1, op2) =>
{
return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2);
});
}
public static void Vrsqrte(ArmEmitterContext context)
{
EmitVectorUnaryOpF32(context, (op1) =>

View file

@ -105,6 +105,92 @@ namespace ARMeilleure.Instructions
}
}
public static void Vtbl(ArmEmitterContext context)
{
OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;
bool extension = op.Opc == 1;
int elems = op.GetBytesCount() >> op.Size;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
int length = op.Length + 1;
Tuple<int, int>[] tableTuples = new Tuple<int, int>[length];
for (int i=0; i< length; i++)
{
(int vn, int en) = GetQuadwordAndSubindex(op.Vn + i, op.RegisterSize);
tableTuples[i] = new Tuple<int, int>(vn, en);
}
int byteLength = length * 8;
Operand res = GetVecA32(vd);
Operand m = GetVecA32(vm);
for (int index = 0; index < elems; index++)
{
Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + em * elems));
Operand end = Label();
Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength));
Operand elemRes = null; // note: this is I64 for ease of calculation
// for some reason this branch ruins everything so we do an extract + conditional select instead
// granted that is slower
// --- context.BranchIfFalse(end, inRange); ---
// get indexed byte
// to simplify (ha) the il, we get bytes from every vector and use a nested conditional select to choose the right result
// does have to extract `length` times for every element but certainly not as bad as it could be
// which vector number is the index on
Operand vecIndex = context.ShiftRightUI(selectedIndex, Const(3));
// what should we shift by to extract it
Operand subVecIndexShift = context.ShiftLeft(context.BitwiseAnd(selectedIndex, Const(7)), Const(3));
for (int i=0; i < length; i++)
{
Tuple<int, int> vectorLocation = tableTuples[i];
// get the whole vector, we'll get a byte out of it
Operand lookupResult;
if (vectorLocation.Item1 == vd)
{
// result contains the current state of the vector
lookupResult = context.VectorExtract(OperandType.I64, res, vectorLocation.Item2);
}
else
{
lookupResult = EmitVectorExtract32(context, vectorLocation.Item1, vectorLocation.Item2, 3, false); //I64
}
lookupResult = context.ShiftRightUI(lookupResult, subVecIndexShift); // get the relevant byte from this vector
if (i == 0)
{
elemRes = lookupResult; //first result is always default
}
else
{
Operand isThisElem = context.ICompareEqual(vecIndex, Const(i));
elemRes = context.ConditionalSelect(isThisElem, lookupResult, elemRes);
}
}
if (!extension) context.MarkLabel(end);
Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, vd, index + ed * elems, 0, false)) : Const(0L);
res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + ed * elems, 0);
if (extension) context.MarkLabel(end);
}
context.Copy(GetVecA32(vd), res);
}
public static void Vtrn(ArmEmitterContext context)
{
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;

View file

@ -567,9 +567,12 @@ namespace ARMeilleure.Instructions
Vstm,
Vstr,
Vsqrt,
Vrecpe,
Vrecps,
Vrsqrte,
Vrsqrts,
Vsub,
Vtbl,
Vtrn,
Vuzp,
Vzip,

View file

@ -87,6 +87,7 @@ namespace ARMeilleure.Translation
public ulong ExecuteSingle(State.ExecutionContext context, ulong address)
{
if (address == 0xa28b75) { }
TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode);
Statistics.StartTimer();

View file

@ -193,6 +193,52 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
[Test, Combinatorial, Description("VTBL.8 <Dd>, {list}, <Dm>")]
public void Vtbl([Range(0u, 6u)] uint vm, //indices, include potentially invalid
[Range(4u, 12u)] uint vn, //selection
[Values(0u, 1u)] uint vd, //destinations
[Range(0u, 3u)] uint length,
[Values] bool x)
{
uint opcode = 0xf3b00800;
if (vn + length > 31) return; //undefined
if (x)
{
opcode |= 1 << 6;
}
opcode |= (vm & 0x10) << 1;
opcode |= (vm & 0xf);
opcode |= (vd & 0x10) << 18;
opcode |= (vd & 0xf) << 12;
opcode |= (vn & 0x10) << 3;
opcode |= (vn & 0xf) << 16;
opcode |= (length & 0x3) << 8;
var rnd = TestContext.CurrentContext.Random;
V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
V128 v4 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
V128 v5 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
byte maxIndex = (byte)(length * 8 - 1);
byte[] b0 = new byte[16];
byte[] b1 = new byte[16];
for (int i=0; i<16; i++)
{
b0[i] = rnd.NextByte(maxIndex);
b1[i] = rnd.NextByte(maxIndex);
}
V128 v0 = new V128(b0);
V128 v1 = new V128(b1);
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3, v4: v4, v5: v5); //correct
CompareAgainstUnicorn();
}
#endif
}
}