Add a few more instructions, fix Vmul_1 encoding.
This commit is contained in:
parent
301ced766c
commit
012fcc6c06
11 changed files with 178 additions and 5 deletions
|
@ -8,7 +8,7 @@ namespace ARMeilleure.Decoders
|
|||
{
|
||||
public int Vd { get; private set; }
|
||||
public int Vm { get; protected set; }
|
||||
public int Opc { get; private set; }
|
||||
public int Opc { get; protected set; }
|
||||
public int Size { get; protected set; }
|
||||
public bool Q { get; protected set; }
|
||||
public bool F { get; protected set; }
|
||||
|
|
|
@ -7,7 +7,6 @@ namespace ARMeilleure.Decoders
|
|||
class OpCode32SimdReg : OpCode32Simd
|
||||
{
|
||||
public int Vn { get; private set; }
|
||||
public int Index { get; private set; }
|
||||
|
||||
public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
||||
{
|
||||
|
|
|
@ -12,6 +12,8 @@ namespace ARMeilleure.Decoders
|
|||
F = ((opCode >> 8) & 0x1) != 0;
|
||||
Size = ((opCode >> 20) & 0x3);
|
||||
|
||||
RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
|
||||
|
||||
if (Size == 0b11) throw new Exception("Unknown Encoding!");
|
||||
|
||||
Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
|
||||
|
|
19
ARMeilleure/Decoders/OpCode32SimdTbl.cs
Normal file
19
ARMeilleure/Decoders/OpCode32SimdTbl.cs
Normal file
|
@ -0,0 +1,19 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
namespace ARMeilleure.Decoders
|
||||
{
|
||||
class OpCode32SimdTbl : OpCode32SimdReg
|
||||
{
|
||||
public int Length { get; private set; }
|
||||
public OpCode32SimdTbl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
||||
{
|
||||
Length = (opCode >> 8) & 3;
|
||||
Size = 0;
|
||||
Opc = Q ? 1 : 0;
|
||||
Q = false;
|
||||
RegisterSize = RegisterSize.Simd64;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -866,7 +866,7 @@ namespace ARMeilleure.Decoders
|
|||
SetA32("<<<<11100x10xxxxxxxx10xxx0x0xxxx", InstName.Vmul, InstEmit32.Vmul_S, typeof(OpCode32SimdRegS));
|
||||
SetA32("111100110x0xxxxxxxxx1101xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_V, typeof(OpCode32SimdReg));
|
||||
SetA32("1111001x0xxxxxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, typeof(OpCode32SimdReg));
|
||||
SetA32("1111001x1xxxxxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, typeof(OpCode32SimdRegElem));
|
||||
SetA32("1111001x1x<<xxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, typeof(OpCode32SimdRegElem));
|
||||
|
||||
SetA32("111100111x11xx01xxxx0x111xx0xxxx", InstName.Vneg, InstEmit32.Vneg_V, typeof(OpCode32Simd));
|
||||
SetA32("<<<<11101x110001xxxx10xx01x0xxxx", InstName.Vneg, InstEmit32.Vneg_S, typeof(OpCode32SimdS));
|
||||
|
@ -922,6 +922,8 @@ namespace ARMeilleure.Decoders
|
|||
|
||||
SetA32("<<<<1101xx00xxxxxxxx10xxxxxxxxxx", InstName.Vstr, InstEmit32.Vstr, typeof(OpCode32SimdMemImm));
|
||||
SetA32("<<<<11101x110001xxxx10xx11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, typeof(OpCode32SimdS));
|
||||
SetA32("111100111x11xx11xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte));
|
||||
SetA32("111100100x0xxxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, typeof(OpCode32SimdReg));
|
||||
SetA32("111100111x11xx11xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, typeof(OpCode32SimdSqrte));
|
||||
SetA32("111100100x1xxxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, typeof(OpCode32SimdReg));
|
||||
|
||||
|
@ -929,6 +931,7 @@ namespace ARMeilleure.Decoders
|
|||
SetA32("111100100x1xxxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, typeof(OpCode32SimdReg));
|
||||
SetA32("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, typeof(OpCode32SimdReg));
|
||||
|
||||
SetA32("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl, InstEmit32.Vtbl, typeof(OpCode32SimdTbl));
|
||||
SetA32("111100111x11xx10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, typeof(OpCode32SimdCmpZ));
|
||||
SetA32("111100111x11xx10xxxx00010xx0xxxx", InstName.Vuzp, InstEmit32.Vuzp, typeof(OpCode32SimdCmpZ));
|
||||
SetA32("111100111x11xx10xxxx00011xx0xxxx", InstName.Vzip, InstEmit32.Vzip, typeof(OpCode32SimdCmpZ));
|
||||
|
|
|
@ -637,7 +637,6 @@ namespace ARMeilleure.Instructions
|
|||
OpCode32AluBf op = (OpCode32AluBf)context.CurrOp;
|
||||
|
||||
var msb = op.Lsb + op.Msb; //for this instruction, the msb is actually a width
|
||||
var mask = (int)(0xFFFFFFFF >> (31 - msb)) << op.Lsb;
|
||||
|
||||
Operand n = GetIntOrZR(context, op.Rn);
|
||||
Operand res = context.ShiftRightUI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
|
||||
|
@ -650,7 +649,6 @@ namespace ARMeilleure.Instructions
|
|||
OpCode32AluBf op = (OpCode32AluBf)context.CurrOp;
|
||||
|
||||
var msb = op.Lsb + op.Msb; //for this instruction, the msb is actually a width
|
||||
var mask = (int)(0xFFFFFFFF >> (31 - msb)) << op.Lsb;
|
||||
|
||||
Operand n = GetIntOrZR(context, op.Rn);
|
||||
Operand res = context.ShiftRightSI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
|
||||
|
|
|
@ -558,6 +558,22 @@ namespace ARMeilleure.Instructions
|
|||
EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
|
||||
}
|
||||
|
||||
public static void Vrecpe(ArmEmitterContext context)
|
||||
{
|
||||
EmitVectorUnaryOpF32(context, (op1) =>
|
||||
{
|
||||
return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vrecps(ArmEmitterContext context)
|
||||
{
|
||||
EmitVectorBinaryOpF32(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vrsqrte(ArmEmitterContext context)
|
||||
{
|
||||
EmitVectorUnaryOpF32(context, (op1) =>
|
||||
|
|
|
@ -105,6 +105,92 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
public static void Vtbl(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;
|
||||
|
||||
bool extension = op.Opc == 1;
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
int length = op.Length + 1;
|
||||
|
||||
Tuple<int, int>[] tableTuples = new Tuple<int, int>[length];
|
||||
for (int i=0; i< length; i++)
|
||||
{
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn + i, op.RegisterSize);
|
||||
tableTuples[i] = new Tuple<int, int>(vn, en);
|
||||
}
|
||||
|
||||
int byteLength = length * 8;
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand m = GetVecA32(vm);
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + em * elems));
|
||||
|
||||
Operand end = Label();
|
||||
Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength));
|
||||
Operand elemRes = null; // note: this is I64 for ease of calculation
|
||||
|
||||
// for some reason this branch ruins everything so we do an extract + conditional select instead
|
||||
// granted that is slower
|
||||
// --- context.BranchIfFalse(end, inRange); ---
|
||||
|
||||
// get indexed byte
|
||||
// to simplify (ha) the il, we get bytes from every vector and use a nested conditional select to choose the right result
|
||||
// does have to extract `length` times for every element but certainly not as bad as it could be
|
||||
|
||||
// which vector number is the index on
|
||||
Operand vecIndex = context.ShiftRightUI(selectedIndex, Const(3));
|
||||
// what should we shift by to extract it
|
||||
Operand subVecIndexShift = context.ShiftLeft(context.BitwiseAnd(selectedIndex, Const(7)), Const(3));
|
||||
|
||||
for (int i=0; i < length; i++)
|
||||
{
|
||||
Tuple<int, int> vectorLocation = tableTuples[i];
|
||||
// get the whole vector, we'll get a byte out of it
|
||||
Operand lookupResult;
|
||||
if (vectorLocation.Item1 == vd)
|
||||
{
|
||||
// result contains the current state of the vector
|
||||
lookupResult = context.VectorExtract(OperandType.I64, res, vectorLocation.Item2);
|
||||
}
|
||||
else
|
||||
{
|
||||
lookupResult = EmitVectorExtract32(context, vectorLocation.Item1, vectorLocation.Item2, 3, false); //I64
|
||||
}
|
||||
|
||||
lookupResult = context.ShiftRightUI(lookupResult, subVecIndexShift); // get the relevant byte from this vector
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
elemRes = lookupResult; //first result is always default
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand isThisElem = context.ICompareEqual(vecIndex, Const(i));
|
||||
elemRes = context.ConditionalSelect(isThisElem, lookupResult, elemRes);
|
||||
}
|
||||
}
|
||||
|
||||
if (!extension) context.MarkLabel(end);
|
||||
|
||||
Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, vd, index + ed * elems, 0, false)) : Const(0L);
|
||||
|
||||
res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + ed * elems, 0);
|
||||
|
||||
if (extension) context.MarkLabel(end);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
}
|
||||
|
||||
public static void Vtrn(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
|
||||
|
|
|
@ -567,9 +567,12 @@ namespace ARMeilleure.Instructions
|
|||
Vstm,
|
||||
Vstr,
|
||||
Vsqrt,
|
||||
Vrecpe,
|
||||
Vrecps,
|
||||
Vrsqrte,
|
||||
Vrsqrts,
|
||||
Vsub,
|
||||
Vtbl,
|
||||
Vtrn,
|
||||
Vuzp,
|
||||
Vzip,
|
||||
|
|
|
@ -87,6 +87,7 @@ namespace ARMeilleure.Translation
|
|||
|
||||
public ulong ExecuteSingle(State.ExecutionContext context, ulong address)
|
||||
{
|
||||
if (address == 0xa28b75) { }
|
||||
TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode);
|
||||
|
||||
Statistics.StartTimer();
|
||||
|
|
|
@ -193,6 +193,52 @@ namespace Ryujinx.Tests.Cpu
|
|||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Combinatorial, Description("VTBL.8 <Dd>, {list}, <Dm>")]
|
||||
public void Vtbl([Range(0u, 6u)] uint vm, //indices, include potentially invalid
|
||||
[Range(4u, 12u)] uint vn, //selection
|
||||
[Values(0u, 1u)] uint vd, //destinations
|
||||
[Range(0u, 3u)] uint length,
|
||||
[Values] bool x)
|
||||
{
|
||||
uint opcode = 0xf3b00800;
|
||||
if (vn + length > 31) return; //undefined
|
||||
|
||||
if (x)
|
||||
{
|
||||
opcode |= 1 << 6;
|
||||
}
|
||||
opcode |= (vm & 0x10) << 1;
|
||||
opcode |= (vm & 0xf);
|
||||
opcode |= (vd & 0x10) << 18;
|
||||
opcode |= (vd & 0xf) << 12;
|
||||
|
||||
opcode |= (vn & 0x10) << 3;
|
||||
opcode |= (vn & 0xf) << 16;
|
||||
opcode |= (length & 0x3) << 8;
|
||||
|
||||
var rnd = TestContext.CurrentContext.Random;
|
||||
V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
|
||||
V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
|
||||
V128 v4 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
|
||||
V128 v5 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
|
||||
|
||||
byte maxIndex = (byte)(length * 8 - 1);
|
||||
byte[] b0 = new byte[16];
|
||||
byte[] b1 = new byte[16];
|
||||
for (int i=0; i<16; i++)
|
||||
{
|
||||
b0[i] = rnd.NextByte(maxIndex);
|
||||
b1[i] = rnd.NextByte(maxIndex);
|
||||
}
|
||||
|
||||
V128 v0 = new V128(b0);
|
||||
V128 v1 = new V128(b1);
|
||||
|
||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3, v4: v4, v5: v5); //correct
|
||||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue