Add a few more instructions, fix Vmul_1 encoding.
This commit is contained in:
parent
301ced766c
commit
012fcc6c06
11 changed files with 178 additions and 5 deletions
|
@ -8,7 +8,7 @@ namespace ARMeilleure.Decoders
|
||||||
{
|
{
|
||||||
public int Vd { get; private set; }
|
public int Vd { get; private set; }
|
||||||
public int Vm { get; protected set; }
|
public int Vm { get; protected set; }
|
||||||
public int Opc { get; private set; }
|
public int Opc { get; protected set; }
|
||||||
public int Size { get; protected set; }
|
public int Size { get; protected set; }
|
||||||
public bool Q { get; protected set; }
|
public bool Q { get; protected set; }
|
||||||
public bool F { get; protected set; }
|
public bool F { get; protected set; }
|
||||||
|
|
|
@ -7,7 +7,6 @@ namespace ARMeilleure.Decoders
|
||||||
class OpCode32SimdReg : OpCode32Simd
|
class OpCode32SimdReg : OpCode32Simd
|
||||||
{
|
{
|
||||||
public int Vn { get; private set; }
|
public int Vn { get; private set; }
|
||||||
public int Index { get; private set; }
|
|
||||||
|
|
||||||
public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
||||||
{
|
{
|
||||||
|
|
|
@ -12,6 +12,8 @@ namespace ARMeilleure.Decoders
|
||||||
F = ((opCode >> 8) & 0x1) != 0;
|
F = ((opCode >> 8) & 0x1) != 0;
|
||||||
Size = ((opCode >> 20) & 0x3);
|
Size = ((opCode >> 20) & 0x3);
|
||||||
|
|
||||||
|
RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
|
||||||
|
|
||||||
if (Size == 0b11) throw new Exception("Unknown Encoding!");
|
if (Size == 0b11) throw new Exception("Unknown Encoding!");
|
||||||
|
|
||||||
Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
|
Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
|
||||||
|
|
19
ARMeilleure/Decoders/OpCode32SimdTbl.cs
Normal file
19
ARMeilleure/Decoders/OpCode32SimdTbl.cs
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace ARMeilleure.Decoders
|
||||||
|
{
|
||||||
|
class OpCode32SimdTbl : OpCode32SimdReg
|
||||||
|
{
|
||||||
|
public int Length { get; private set; }
|
||||||
|
public OpCode32SimdTbl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
||||||
|
{
|
||||||
|
Length = (opCode >> 8) & 3;
|
||||||
|
Size = 0;
|
||||||
|
Opc = Q ? 1 : 0;
|
||||||
|
Q = false;
|
||||||
|
RegisterSize = RegisterSize.Simd64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -866,7 +866,7 @@ namespace ARMeilleure.Decoders
|
||||||
SetA32("<<<<11100x10xxxxxxxx10xxx0x0xxxx", InstName.Vmul, InstEmit32.Vmul_S, typeof(OpCode32SimdRegS));
|
SetA32("<<<<11100x10xxxxxxxx10xxx0x0xxxx", InstName.Vmul, InstEmit32.Vmul_S, typeof(OpCode32SimdRegS));
|
||||||
SetA32("111100110x0xxxxxxxxx1101xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_V, typeof(OpCode32SimdReg));
|
SetA32("111100110x0xxxxxxxxx1101xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_V, typeof(OpCode32SimdReg));
|
||||||
SetA32("1111001x0xxxxxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, typeof(OpCode32SimdReg));
|
SetA32("1111001x0xxxxxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, typeof(OpCode32SimdReg));
|
||||||
SetA32("1111001x1xxxxxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, typeof(OpCode32SimdRegElem));
|
SetA32("1111001x1x<<xxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, typeof(OpCode32SimdRegElem));
|
||||||
|
|
||||||
SetA32("111100111x11xx01xxxx0x111xx0xxxx", InstName.Vneg, InstEmit32.Vneg_V, typeof(OpCode32Simd));
|
SetA32("111100111x11xx01xxxx0x111xx0xxxx", InstName.Vneg, InstEmit32.Vneg_V, typeof(OpCode32Simd));
|
||||||
SetA32("<<<<11101x110001xxxx10xx01x0xxxx", InstName.Vneg, InstEmit32.Vneg_S, typeof(OpCode32SimdS));
|
SetA32("<<<<11101x110001xxxx10xx01x0xxxx", InstName.Vneg, InstEmit32.Vneg_S, typeof(OpCode32SimdS));
|
||||||
|
@ -922,6 +922,8 @@ namespace ARMeilleure.Decoders
|
||||||
|
|
||||||
SetA32("<<<<1101xx00xxxxxxxx10xxxxxxxxxx", InstName.Vstr, InstEmit32.Vstr, typeof(OpCode32SimdMemImm));
|
SetA32("<<<<1101xx00xxxxxxxx10xxxxxxxxxx", InstName.Vstr, InstEmit32.Vstr, typeof(OpCode32SimdMemImm));
|
||||||
SetA32("<<<<11101x110001xxxx10xx11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, typeof(OpCode32SimdS));
|
SetA32("<<<<11101x110001xxxx10xx11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, typeof(OpCode32SimdS));
|
||||||
|
SetA32("111100111x11xx11xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte));
|
||||||
|
SetA32("111100100x0xxxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, typeof(OpCode32SimdReg));
|
||||||
SetA32("111100111x11xx11xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, typeof(OpCode32SimdSqrte));
|
SetA32("111100111x11xx11xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, typeof(OpCode32SimdSqrte));
|
||||||
SetA32("111100100x1xxxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, typeof(OpCode32SimdReg));
|
SetA32("111100100x1xxxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, typeof(OpCode32SimdReg));
|
||||||
|
|
||||||
|
@ -929,6 +931,7 @@ namespace ARMeilleure.Decoders
|
||||||
SetA32("111100100x1xxxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, typeof(OpCode32SimdReg));
|
SetA32("111100100x1xxxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, typeof(OpCode32SimdReg));
|
||||||
SetA32("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, typeof(OpCode32SimdReg));
|
SetA32("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, typeof(OpCode32SimdReg));
|
||||||
|
|
||||||
|
SetA32("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl, InstEmit32.Vtbl, typeof(OpCode32SimdTbl));
|
||||||
SetA32("111100111x11xx10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, typeof(OpCode32SimdCmpZ));
|
SetA32("111100111x11xx10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, typeof(OpCode32SimdCmpZ));
|
||||||
SetA32("111100111x11xx10xxxx00010xx0xxxx", InstName.Vuzp, InstEmit32.Vuzp, typeof(OpCode32SimdCmpZ));
|
SetA32("111100111x11xx10xxxx00010xx0xxxx", InstName.Vuzp, InstEmit32.Vuzp, typeof(OpCode32SimdCmpZ));
|
||||||
SetA32("111100111x11xx10xxxx00011xx0xxxx", InstName.Vzip, InstEmit32.Vzip, typeof(OpCode32SimdCmpZ));
|
SetA32("111100111x11xx10xxxx00011xx0xxxx", InstName.Vzip, InstEmit32.Vzip, typeof(OpCode32SimdCmpZ));
|
||||||
|
|
|
@ -637,7 +637,6 @@ namespace ARMeilleure.Instructions
|
||||||
OpCode32AluBf op = (OpCode32AluBf)context.CurrOp;
|
OpCode32AluBf op = (OpCode32AluBf)context.CurrOp;
|
||||||
|
|
||||||
var msb = op.Lsb + op.Msb; //for this instruction, the msb is actually a width
|
var msb = op.Lsb + op.Msb; //for this instruction, the msb is actually a width
|
||||||
var mask = (int)(0xFFFFFFFF >> (31 - msb)) << op.Lsb;
|
|
||||||
|
|
||||||
Operand n = GetIntOrZR(context, op.Rn);
|
Operand n = GetIntOrZR(context, op.Rn);
|
||||||
Operand res = context.ShiftRightUI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
|
Operand res = context.ShiftRightUI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
|
||||||
|
@ -650,7 +649,6 @@ namespace ARMeilleure.Instructions
|
||||||
OpCode32AluBf op = (OpCode32AluBf)context.CurrOp;
|
OpCode32AluBf op = (OpCode32AluBf)context.CurrOp;
|
||||||
|
|
||||||
var msb = op.Lsb + op.Msb; //for this instruction, the msb is actually a width
|
var msb = op.Lsb + op.Msb; //for this instruction, the msb is actually a width
|
||||||
var mask = (int)(0xFFFFFFFF >> (31 - msb)) << op.Lsb;
|
|
||||||
|
|
||||||
Operand n = GetIntOrZR(context, op.Rn);
|
Operand n = GetIntOrZR(context, op.Rn);
|
||||||
Operand res = context.ShiftRightSI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
|
Operand res = context.ShiftRightSI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
|
||||||
|
|
|
@ -558,6 +558,22 @@ namespace ARMeilleure.Instructions
|
||||||
EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
|
EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Vrecpe(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
EmitVectorUnaryOpF32(context, (op1) =>
|
||||||
|
{
|
||||||
|
return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void Vrecps(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
EmitVectorBinaryOpF32(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
public static void Vrsqrte(ArmEmitterContext context)
|
public static void Vrsqrte(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
EmitVectorUnaryOpF32(context, (op1) =>
|
EmitVectorUnaryOpF32(context, (op1) =>
|
||||||
|
|
|
@ -105,6 +105,92 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Vtbl(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;
|
||||||
|
|
||||||
|
bool extension = op.Opc == 1;
|
||||||
|
|
||||||
|
int elems = op.GetBytesCount() >> op.Size;
|
||||||
|
|
||||||
|
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||||
|
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||||
|
|
||||||
|
int length = op.Length + 1;
|
||||||
|
|
||||||
|
Tuple<int, int>[] tableTuples = new Tuple<int, int>[length];
|
||||||
|
for (int i=0; i< length; i++)
|
||||||
|
{
|
||||||
|
(int vn, int en) = GetQuadwordAndSubindex(op.Vn + i, op.RegisterSize);
|
||||||
|
tableTuples[i] = new Tuple<int, int>(vn, en);
|
||||||
|
}
|
||||||
|
|
||||||
|
int byteLength = length * 8;
|
||||||
|
|
||||||
|
Operand res = GetVecA32(vd);
|
||||||
|
Operand m = GetVecA32(vm);
|
||||||
|
|
||||||
|
for (int index = 0; index < elems; index++)
|
||||||
|
{
|
||||||
|
Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + em * elems));
|
||||||
|
|
||||||
|
Operand end = Label();
|
||||||
|
Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength));
|
||||||
|
Operand elemRes = null; // note: this is I64 for ease of calculation
|
||||||
|
|
||||||
|
// for some reason this branch ruins everything so we do an extract + conditional select instead
|
||||||
|
// granted that is slower
|
||||||
|
// --- context.BranchIfFalse(end, inRange); ---
|
||||||
|
|
||||||
|
// get indexed byte
|
||||||
|
// to simplify (ha) the il, we get bytes from every vector and use a nested conditional select to choose the right result
|
||||||
|
// does have to extract `length` times for every element but certainly not as bad as it could be
|
||||||
|
|
||||||
|
// which vector number is the index on
|
||||||
|
Operand vecIndex = context.ShiftRightUI(selectedIndex, Const(3));
|
||||||
|
// what should we shift by to extract it
|
||||||
|
Operand subVecIndexShift = context.ShiftLeft(context.BitwiseAnd(selectedIndex, Const(7)), Const(3));
|
||||||
|
|
||||||
|
for (int i=0; i < length; i++)
|
||||||
|
{
|
||||||
|
Tuple<int, int> vectorLocation = tableTuples[i];
|
||||||
|
// get the whole vector, we'll get a byte out of it
|
||||||
|
Operand lookupResult;
|
||||||
|
if (vectorLocation.Item1 == vd)
|
||||||
|
{
|
||||||
|
// result contains the current state of the vector
|
||||||
|
lookupResult = context.VectorExtract(OperandType.I64, res, vectorLocation.Item2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
lookupResult = EmitVectorExtract32(context, vectorLocation.Item1, vectorLocation.Item2, 3, false); //I64
|
||||||
|
}
|
||||||
|
|
||||||
|
lookupResult = context.ShiftRightUI(lookupResult, subVecIndexShift); // get the relevant byte from this vector
|
||||||
|
|
||||||
|
if (i == 0)
|
||||||
|
{
|
||||||
|
elemRes = lookupResult; //first result is always default
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Operand isThisElem = context.ICompareEqual(vecIndex, Const(i));
|
||||||
|
elemRes = context.ConditionalSelect(isThisElem, lookupResult, elemRes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!extension) context.MarkLabel(end);
|
||||||
|
|
||||||
|
Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, vd, index + ed * elems, 0, false)) : Const(0L);
|
||||||
|
|
||||||
|
res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + ed * elems, 0);
|
||||||
|
|
||||||
|
if (extension) context.MarkLabel(end);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Copy(GetVecA32(vd), res);
|
||||||
|
}
|
||||||
|
|
||||||
public static void Vtrn(ArmEmitterContext context)
|
public static void Vtrn(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
|
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
|
||||||
|
|
|
@ -567,9 +567,12 @@ namespace ARMeilleure.Instructions
|
||||||
Vstm,
|
Vstm,
|
||||||
Vstr,
|
Vstr,
|
||||||
Vsqrt,
|
Vsqrt,
|
||||||
|
Vrecpe,
|
||||||
|
Vrecps,
|
||||||
Vrsqrte,
|
Vrsqrte,
|
||||||
Vrsqrts,
|
Vrsqrts,
|
||||||
Vsub,
|
Vsub,
|
||||||
|
Vtbl,
|
||||||
Vtrn,
|
Vtrn,
|
||||||
Vuzp,
|
Vuzp,
|
||||||
Vzip,
|
Vzip,
|
||||||
|
|
|
@ -87,6 +87,7 @@ namespace ARMeilleure.Translation
|
||||||
|
|
||||||
public ulong ExecuteSingle(State.ExecutionContext context, ulong address)
|
public ulong ExecuteSingle(State.ExecutionContext context, ulong address)
|
||||||
{
|
{
|
||||||
|
if (address == 0xa28b75) { }
|
||||||
TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode);
|
TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode);
|
||||||
|
|
||||||
Statistics.StartTimer();
|
Statistics.StartTimer();
|
||||||
|
|
|
@ -193,6 +193,52 @@ namespace Ryujinx.Tests.Cpu
|
||||||
|
|
||||||
CompareAgainstUnicorn();
|
CompareAgainstUnicorn();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Test, Combinatorial, Description("VTBL.8 <Dd>, {list}, <Dm>")]
|
||||||
|
public void Vtbl([Range(0u, 6u)] uint vm, //indices, include potentially invalid
|
||||||
|
[Range(4u, 12u)] uint vn, //selection
|
||||||
|
[Values(0u, 1u)] uint vd, //destinations
|
||||||
|
[Range(0u, 3u)] uint length,
|
||||||
|
[Values] bool x)
|
||||||
|
{
|
||||||
|
uint opcode = 0xf3b00800;
|
||||||
|
if (vn + length > 31) return; //undefined
|
||||||
|
|
||||||
|
if (x)
|
||||||
|
{
|
||||||
|
opcode |= 1 << 6;
|
||||||
|
}
|
||||||
|
opcode |= (vm & 0x10) << 1;
|
||||||
|
opcode |= (vm & 0xf);
|
||||||
|
opcode |= (vd & 0x10) << 18;
|
||||||
|
opcode |= (vd & 0xf) << 12;
|
||||||
|
|
||||||
|
opcode |= (vn & 0x10) << 3;
|
||||||
|
opcode |= (vn & 0xf) << 16;
|
||||||
|
opcode |= (length & 0x3) << 8;
|
||||||
|
|
||||||
|
var rnd = TestContext.CurrentContext.Random;
|
||||||
|
V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
|
||||||
|
V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
|
||||||
|
V128 v4 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
|
||||||
|
V128 v5 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
|
||||||
|
|
||||||
|
byte maxIndex = (byte)(length * 8 - 1);
|
||||||
|
byte[] b0 = new byte[16];
|
||||||
|
byte[] b1 = new byte[16];
|
||||||
|
for (int i=0; i<16; i++)
|
||||||
|
{
|
||||||
|
b0[i] = rnd.NextByte(maxIndex);
|
||||||
|
b1[i] = rnd.NextByte(maxIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
V128 v0 = new V128(b0);
|
||||||
|
V128 v1 = new V128(b1);
|
||||||
|
|
||||||
|
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3, v4: v4, v5: v5); //correct
|
||||||
|
|
||||||
|
CompareAgainstUnicorn();
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue