Add a few more instructions, a quick hack to fix svcs for now.

This commit is contained in:
riperiperi 2020-01-13 02:00:34 +00:00
parent 957f00c059
commit c39f840b3d
22 changed files with 957 additions and 38 deletions

View file

@ -24,8 +24,8 @@ namespace ARMeilleure.Decoders
Rd = (opCode >> 16) & 0xf;
R = (opCode & (1 << 5)) != 0;
NHigh = ((opCode >> 5) * 0x1) == 1;
MHigh = ((opCode >> 6) * 0x1) == 1;
NHigh = ((opCode >> 5) & 0x1) == 1;
MHigh = ((opCode >> 6) & 0x1) == 1;
SetFlags = ((opCode >> 20) & 1) != 0;
}
}

View file

@ -7,20 +7,21 @@ namespace ARMeilleure.Decoders
class OpCode32Simd : OpCode32, IOpCode32Simd
{
public int Vd { get; private set; }
public int Vm { get; private set; }
public int Vm { get; protected set; }
public int Opc { get; private set; }
public int Size { get; protected set; }
public bool Q { get; private set; }
public bool Q { get; protected set; }
public bool F { get; protected set; }
public bool U { get; private set; }
public int Elems => GetBytesCount() >> ((Size == 1) ? 1 : 2);
public OpCode32Simd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
Size = (opCode >> 20) & 0x1; //fvector size: 1 for 16 bit
Size = (opCode >> 20) & 0x3; //fvector size: 1 for 16 bit
Q = ((opCode >> 6) & 0x1) != 0;
F = ((opCode >> 10) & 0x1) != 0;
U = ((opCode >> 24) & 0x1) != 0;
Opc = ((opCode >> 7) & 0x3);
RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;

View file

@ -8,7 +8,7 @@ namespace ARMeilleure.Decoders
{
public OpCode32SimdCmpZ(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
Size = (opCode >> 18) & 0x1; //fvector size: 1 for 16 bit
Size = (opCode >> 18) & 0x3; //fvector size: 1 for 16 bit
}
}
}

View file

@ -0,0 +1,48 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace ARMeilleure.Decoders
{
class OpCode32SimdDupElem : OpCode32, IOpCode32Simd
{
public int Size { get; private set; }
public int Elems => 1;
public int Vd { get; private set; }
public int Vm { get; private set; }
public bool Q { get; private set; }
public int Index { get; private set; }
public OpCode32SimdDupElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
var opc = (opCode >> 16) & 0xf;
if ((opc & 0b1) == 1)
{
Size = 0;
Index = (opc >> 1) & 0x7;
}
else if ((opc & 0b11) != 0b10)
{
Size = 1;
Index = (opc >> 2) & 0x3;
}
else if ((opc & 0b111) == 0b100)
{
Size = 2;
Index = (opc >> 3) & 0x1;
}
else
{
throw new Exception("Undefined");
}
Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
Q = (opCode & (1 << 6)) != 0;
}
}
}

View file

@ -0,0 +1,42 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace ARMeilleure.Decoders
{
class OpCode32SimdShift : OpCode32Simd
{
public int Immediate { get; private set; }
public int Shift { get; private set; }
public OpCode32SimdShift(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
Immediate = (opCode >> 16) & 0x3f;
var limm = ((opCode >> 1) & 0x40) | Immediate;
if ((limm & 0x40) == 0b1000000)
{
Size = 3;
Shift = Immediate;
}
else if ((limm & 0x60) == 0b0100000)
{
Size = 2;
Shift = Immediate - 32;
}
else if ((limm & 0x70) == 0b0010000)
{
Size = 1;
Shift = Immediate - 16;
}
else if ((limm & 0x78) == 0b0001000)
{
Size = 0;
Shift = Immediate - 8;
}
else
{
throw new Exception("Unknown Encoding");
}
}
}
}

View file

@ -10,21 +10,34 @@ namespace ARMeilleure.Decoders
public int Rn { get; private set; }
public int Rm { get; private set; }
public int IndexAlign { get; private set; }
public int Index => IndexAlign >> (1 + Size);
public int Index { get; private set; }
public bool WBack { get; private set; }
public bool RegisterIndex { get; private set; }
public int Size { get; private set; }
public bool Replicate { get; private set; }
public int Elems => GetBytesCount() >> Size;
public int Increment => (((IndexAlign >> Size) & 1) == 0) ? 1 : 2;
public int Increment { get; private set; }
public OpCode32SimdMemSingle(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
Vd = (opCode >> 12) & 0xf;
Vd |= (opCode >> 18) & 0x10;
Size = (opCode >> 10) & 0x3;
IndexAlign = (opCode >> 4) & 0xf;
Size = (opCode >> 10) & 0x3;
Replicate = Size == 3;
if (Replicate)
{
Size = (opCode >> 6) & 0x3;
Increment = ((opCode >> 5) & 1) + 1;
Index = 0;
}
else
{
Increment = (((IndexAlign >> Size) & 1) == 0) ? 1 : 2;
Index = IndexAlign >> (1 + Size);
}
Rm = (opCode >> 0) & 0xf;
Rn = (opCode >> 16) & 0xf;

View file

@ -0,0 +1,51 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace ARMeilleure.Decoders
{
class OpCode32SimdMovGpElem : OpCode32, IOpCode32Simd
{
public int Size { get; private set; }
public int Elems => 1;
public int Vd { get; private set; }
public int Rt { get; private set; }
public int Op { get; private set; }
public bool U { get; private set; }
public int Index { get; private set; }
public OpCode32SimdMovGpElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
Op = ((opCode >> 20) & 0x1);
U = ((opCode >> 23) & 1) != 0;
var opc = (((opCode >> 21) & 0x3) << 2) | ((opCode >> 5) & 0x3);
if ((opc & 0x8) != 0)
{
Size = 0;
Index = opc & 0x7;
}
else if ((opc & 0x1) != 0)
{
Size = 1;
Index = (opc >> 1) & 0x3;
}
else if ((opc & 0x2) == 0)
{
Size = 2;
Index = (opc >> 2) & 0x1;
}
else
{
throw new Exception("Undefined");
}
Vd = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf);
Rt = (opCode >> 12) & 0xf;
}
}
}

View file

@ -7,6 +7,7 @@ namespace ARMeilleure.Decoders
class OpCode32SimdReg : OpCode32Simd
{
public int Vn { get; private set; }
public int Index { get; private set; }
public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{

View file

@ -0,0 +1,20 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace ARMeilleure.Decoders
{
class OpCode32SimdRegElem : OpCode32SimdReg
{
public OpCode32SimdRegElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
Q = ((opCode >> 24) & 0x1) != 0;
F = ((opCode >> 8) & 0x1) != 0;
Size = ((opCode >> 20) & 0x3);
if (Size == 0b11) throw new Exception("Unknown Encoding!");
Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
}
}
}

View file

@ -0,0 +1,16 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace ARMeilleure.Decoders
{
class OpCode32SimdVext : OpCode32SimdReg
{
public int Immediate { get; private set; }
public OpCode32SimdVext(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
Immediate = (opCode >> 8) & 0xf;
Size = 0;
}
}
}

View file

@ -692,6 +692,8 @@ namespace ARMeilleure.Decoders
SetA32("<<<<00010100xxxxxxxxxxxx1xx0xxxx", InstName.Smlalh,InstEmit32.Smlalh,typeof(OpCode32AluUmull));
SetA32("<<<<01110101xxxxxxxxxxxx00x1xxxx", InstName.Smmla, InstEmit32.Smmla, typeof(OpCode32AluMla));
SetA32("<<<<01110101xxxxxxxxxxxx11x1xxxx", InstName.Smmls, InstEmit32.Smmls, typeof(OpCode32AluMla));
SetA32("<<<<00010110xxxxxxxxxxxx1xx0xxxx", InstName.Smulh, InstEmit32.Smulh, typeof(OpCode32AluMla));
SetA32("<<<<0000110xxxxxxxxxxxxx1001xxxx", InstName.Smull, InstEmit32.Smull, typeof(OpCode32AluUmull));
SetA32("<<<<0010110xxxxxxxxxxxxxxxxxxxxx", InstName.Sbc, InstEmit32.Sbc, typeof(OpCode32AluImm));
SetA32("<<<<0000110xxxxxxxxxxxxxxxx0xxxx", InstName.Sbc, InstEmit32.Sbc, typeof(OpCode32AluRsImm));
SetA32("<<<<0000110xxxxxxxxxxxxx0xx1xxxx", InstName.Sbc, InstEmit32.Sbc, typeof(OpCode32AluRsReg));
@ -732,6 +734,7 @@ namespace ARMeilleure.Decoders
SetA32("<<<<00010001xxxx0000xxxx0xx1xxxx", InstName.Tst, InstEmit32.Tst, typeof(OpCode32AluRsReg));
SetA32("<<<<0111111xxxxxxxxxxxxxx101xxxx", InstName.Ubfx, InstEmit32.Ubfx, typeof(OpCode32AluBf));
SetA32("<<<<01110011xxxx1111xxxx0001xxxx", InstName.Udiv, InstEmit32.Udiv, typeof(OpCode32AluMla));
SetA32("<<<<0000101xxxxxxxxxxxxx1001xxxx", InstName.Umlal, InstEmit32.Umlal, typeof(OpCode32AluUmull));
SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull, InstEmit32.Umull, typeof(OpCode32AluUmull));
SetA32("<<<<01101110xxxxxxxxxx000111xxxx", InstName.Uxtb, InstEmit32.Uxtb, typeof(OpCode32AluUx));
SetA32("<<<<01101100xxxxxxxxxx000111xxxx", InstName.Uxtb16,InstEmit32.Uxtb16,typeof(OpCode32AluUx));
@ -739,6 +742,9 @@ namespace ARMeilleure.Decoders
// FP & SIMD (AArch32)
SetA32("<<<<11101x110000xxxx10xx11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, typeof(OpCode32SimdRegS));
SetA32("111100111x11xx01xxxx0x110xx0xxxx", InstName.Vabs, InstEmit32.Vabs_V, typeof(OpCode32SimdReg));
SetA32("<<<<11100x11xxxxxxxx10xxx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, typeof(OpCode32SimdRegS));
SetA32("111100100x0xxxxxxxxx1101xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_V, typeof(OpCode32SimdReg));
SetA32("111100100xxxxxxxxxxx1000xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_I, typeof(OpCode32SimdReg));
@ -771,17 +777,23 @@ namespace ARMeilleure.Decoders
SetA32("<<<<11101x11010xxxxx10xx01x0xxxx", InstName.Vcmp, InstEmit32.Vcmp, typeof(OpCode32SimdS));
SetA32("<<<<11101x11010xxxxx10xx11x0xxxx", InstName.Vcmpe,InstEmit32.Vcmpe, typeof(OpCode32SimdS));
SetA32("111100111x11xx11xxxx011xxxx0xxxx", InstName.Vcvt, InstEmit32.Vcvt_V, typeof(OpCode32SimdCmpZ));
SetA32("<<<<11101x110111xxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FD, typeof(OpCode32SimdS));
SetA32("<<<<11101x11110xxxxx10xx11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI));
SetA32("<<<<11101x111000xxxx10xxx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI));
SetA32("111111101x1111xxxxxx10<<x1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_R, typeof(OpCode32SimdCvtFI));
SetA32("<<<<11101x00xxxxxxxx10xxx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, typeof(OpCode32SimdRegS));
SetA32("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup, InstEmit32.Vdup, typeof(OpCode32SimdVdupGP));
SetA32("111100111x11xxxxxxxx11000xx0xxxx", InstName.Vdup, InstEmit32.Vdup_1, typeof(OpCode32SimdDupElem));
SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, typeof(OpCode32SimdVext));
// VLD# missing single to all lanes
SetA32("111101001x10xxxxxxxx0000xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemSingle));
SetA32("111101001x10xxxxxxxx0100xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemSingle));
SetA32("111101001x10xxxxxxxx1000xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemSingle));
SetA32("111101001x10xxxxxxxx1100xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemSingle)); //all lanes
SetA32("111101000x10xxxxxxxx0111xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); //regs = 1
SetA32("111101000x10xxxxxxxx1010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); //regs = 2
SetA32("111101000x10xxxxxxxx0110xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); //regs = 3
@ -790,17 +802,20 @@ namespace ARMeilleure.Decoders
SetA32("111101001x10xxxxxxxx0001xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemSingle));
SetA32("111101001x10xxxxxxxx0101xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemSingle));
SetA32("111101001x10xxxxxxxx1001xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemSingle));
SetA32("111101001x10xxxxxxxx1101xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemSingle)); //all lanes
SetA32("111101000x10xxxxxxxx100xxxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemPair)); //regs = 1, inc = 1/2 (itype)
SetA32("111101000x10xxxxxxxx0011xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemPair)); //regs = 2, inc = 2
SetA32("111101001x10xxxxxxxx0010xxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemSingle));
SetA32("111101001x10xxxxxxxx0110xxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemSingle));
SetA32("111101001x10xxxxxxxx1010xxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemSingle));
SetA32("111101001x10xxxxxxxx1110xxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemSingle)); //all lanes
SetA32("111101000x10xxxxxxxx010xxxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemPair)); //inc = 1/2 (itype)
SetA32("111101001x10xxxxxxxx0011xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemSingle));
SetA32("111101001x10xxxxxxxx0111xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemSingle));
SetA32("111101001x10xxxxxxxx1011xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemSingle));
SetA32("111101001x10xxxxxxxx1111xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemSingle)); //all lanes
SetA32("111101000x10xxxxxxxx000xxxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemPair)); //inc = 1/2 (itype)
SetA32("<<<<11001x01xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult));
@ -825,10 +840,12 @@ namespace ARMeilleure.Decoders
SetA32("<<<<11100x00xxxxxxxx10xxx0x0xxxx", InstName.Vmla, InstEmit32.Vmla_S, typeof(OpCode32SimdRegS));
SetA32("111100100x0xxxxxxxxx1101xxx1xxxx", InstName.Vmla, InstEmit32.Vmla_V, typeof(OpCode32SimdReg));
SetA32("111100100xxxxxxxxxxx1001xxx0xxxx", InstName.Vmla, InstEmit32.Vmla_I, typeof(OpCode32SimdReg));
SetA32("1111001x1x<<xxxxxxxx000xx1x0xxxx", InstName.Vmla, InstEmit32.Vmla_1, typeof(OpCode32SimdRegElem)); //size != b11
SetA32("<<<<11100x00xxxxxxxx10xxx1x0xxxx", InstName.Vmls, InstEmit32.Vmls_S, typeof(OpCode32SimdRegS));
SetA32("111100100x1xxxxxxxxx1101xxx1xxxx", InstName.Vmls, InstEmit32.Vmls_V, typeof(OpCode32SimdReg));
SetA32("111100110xxxxxxxxxxx1001xxx0xxxx", InstName.Vmls, InstEmit32.Vmls_I, typeof(OpCode32SimdReg));
SetA32("1111001x1x<<xxxxxxxx010xx1x0xxxx", InstName.Vmls, InstEmit32.Vmls_1, typeof(OpCode32SimdRegElem)); //size != b11
SetA32("1111001x1x000xxxxxxx0xx00x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); //d/q vector i32
SetA32("<<<<11101x11xxxxxxxx10xx0000xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm44)); //scalar f16/32/64 based on size 01 10 11
@ -838,9 +855,9 @@ namespace ARMeilleure.Decoders
SetA32("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov, InstEmit32.Vmov_S, typeof(OpCode32SimdS));
SetA32("<<<<1100010xxxxxxxxx101100x1xxxx", InstName.Vmov, InstEmit32.Vmov_GD, typeof(OpCode32SimdMovGpDouble)); //to/from gen purpose x2 and double precision
//SetA32("<<<<11100xx0xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov, typeof(OpCode32SimdGenScal)); //from gen purpose
SetA32("<<<<11100xx0xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, typeof(OpCode32SimdMovGpElem)); //from gen purpose
SetA32("<<<<1110000xxxxxxxxx1010x0010000", InstName.Vmov, InstEmit32.Vmov_GS, typeof(OpCode32SimdMovGp)); //to/from gen purpose and single precision
//SetA32("<<<<1110xxx1xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov, typeof(OpCode32SimdGenScal)); //to gen purpose
SetA32("<<<<1110xxx1xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, typeof(OpCode32SimdMovGpElem)); //to gen purpose
SetA32("<<<<1100010xxxxxxxxx101000x1xxxx", InstName.Vmov, InstEmit32.Vmov_G2, typeof(OpCode32SimdMovGpDouble)); //to/from gen purpose x2 and single precision x2
SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, typeof(OpCode32SimdSpecial));
@ -849,6 +866,7 @@ namespace ARMeilleure.Decoders
SetA32("<<<<11100x10xxxxxxxx10xxx0x0xxxx", InstName.Vmul, InstEmit32.Vmul_S, typeof(OpCode32SimdRegS));
SetA32("111100110x0xxxxxxxxx1101xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_V, typeof(OpCode32SimdReg));
SetA32("1111001x0xxxxxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, typeof(OpCode32SimdReg));
SetA32("1111001x1xxxxxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, typeof(OpCode32SimdRegElem));
SetA32("111100111x11xx01xxxx0x111xx0xxxx", InstName.Vneg, InstEmit32.Vneg_V, typeof(OpCode32Simd));
SetA32("<<<<11101x110001xxxx10xx01x0xxxx", InstName.Vneg, InstEmit32.Vneg_S, typeof(OpCode32SimdS));
@ -862,8 +880,14 @@ namespace ARMeilleure.Decoders
SetA32("111100110x0xxxxxxxxx1101xxx0xxxx", InstName.Vpadd, InstEmit32.Vpadd_V, typeof(OpCode32SimdReg));
SetA32("1111001x0xxxxxxxxxxx1010x0x0xxxx", InstName.Vpadd, InstEmit32.Vpadd_I, typeof(OpCode32SimdReg));
SetA32("111111101x1110xxxxxx10<<01x0xxxx", InstName.Vrint, InstEmit32.Vrint_R, typeof(OpCode32SimdCvtFI));
SetA32("<<<<11101x110110xxxx10xx11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, typeof(OpCode32SimdCvtFI));
SetA32("111111100xxxxxxxxxxx10xxx0x0xxxx", InstName.Vsel, InstEmit32.Vsel, typeof(OpCode32SimdSel));
SetA32("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, typeof(OpCode32SimdReg));
SetA32("111100101xxxxxxxxxxx0101xxx1xxxx", InstName.Vshl, InstEmit32.Vshl, typeof(OpCode32SimdShift));
SetA32("111101001x00xxxxxxxx0000xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemSingle));
SetA32("111101001x00xxxxxxxx0100xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemSingle));
SetA32("111101001x00xxxxxxxx1000xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemSingle));
@ -904,6 +928,10 @@ namespace ARMeilleure.Decoders
SetA32("<<<<11100x11xxxxxxxx10xxx1x0xxxx", InstName.Vsub, InstEmit32.Vsub_S, typeof(OpCode32SimdRegS));
SetA32("111100100x1xxxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, typeof(OpCode32SimdReg));
SetA32("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, typeof(OpCode32SimdReg));
SetA32("111100111x11xx10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, typeof(OpCode32SimdCmpZ));
SetA32("111100111x11xx10xxxx00010xx0xxxx", InstName.Vuzp, InstEmit32.Vuzp, typeof(OpCode32SimdCmpZ));
SetA32("111100111x11xx10xxxx00011xx0xxxx", InstName.Vzip, InstEmit32.Vzip, typeof(OpCode32SimdCmpZ));
#endregion
FillFastLookupTable(_instA32FastLookup, _allInstA32);

View file

@ -44,6 +44,27 @@ namespace ARMeilleure.Instructions
EmitGenericStore(context, op.RdLo, op.SetFlags, lo);
}
public static void Smull(ArmEmitterContext context)
{
OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp;
Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn));
Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm));
Operand res = context.Multiply(n, m);
Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
Operand lo = context.ConvertI64ToI32(res);
if (op.SetFlags)
{
EmitNZFlagsCheck(context, res);
}
EmitGenericStore(context, op.RdHi, op.SetFlags, hi);
EmitGenericStore(context, op.RdLo, op.SetFlags, lo);
}
public static void Smmla(ArmEmitterContext context)
{
EmitSmmul(context, MullFlags.SignedAdd);
@ -124,14 +145,32 @@ namespace ARMeilleure.Instructions
}
public static void Smlal(ArmEmitterContext context)
{
EmitMlal(context, true);
}
public static void Umlal(ArmEmitterContext context)
{
EmitMlal(context, false);
}
public static void EmitMlal(ArmEmitterContext context, bool signed)
{
OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp;
Operand n = GetIntA32(context, op.Rn);
Operand m = GetIntA32(context, op.Rm);
n = context.SignExtend32(OperandType.I64, n);
m = context.SignExtend32(OperandType.I64, m);
if (signed)
{
n = context.SignExtend32(OperandType.I64, n);
m = context.SignExtend32(OperandType.I64, m);
}
else
{
n = context.ZeroExtend32(OperandType.I64, n);
m = context.ZeroExtend32(OperandType.I64, m);
}
Operand res = context.Multiply(n, m);
@ -189,6 +228,36 @@ namespace ARMeilleure.Instructions
EmitGenericStore(context, op.RdLo, false, lo);
}
public static void Smulh(ArmEmitterContext context)
{
OpCode32AluMla op = (OpCode32AluMla)context.CurrOp;
Operand n = GetIntA32(context, op.Rn);
Operand m = GetIntA32(context, op.Rm);
if (op.NHigh)
{
n = context.ShiftRightSI(n, Const(16));
}
else
{
n = context.SignExtend16(OperandType.I32, n);
}
if (op.MHigh)
{
m = context.ShiftRightSI(m, Const(16));
}
else
{
m = context.SignExtend16(OperandType.I32, m);
}
Operand res = context.Multiply(n, m);
EmitGenericStore(context, op.Rd, false, res);
}
private static void EmitGenericStore(ArmEmitterContext context, int Rd, bool setFlags, Operand value)
{
if (Rd == RegisterAlias.Aarch32Pc)

View file

@ -16,6 +16,31 @@ namespace ARMeilleure.Instructions
//TODO: SSE2 path
static partial class InstEmit32
{
public static void Vabs_S(ArmEmitterContext context)
{
EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1));
}
public static void Vabs_V(ArmEmitterContext context)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
if (op.F)
{
EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1));
}
else
{
EmitVectorUnaryOpSx32(context, (op1) => EmitAbs(context, op1));
}
}
private static Operand EmitAbs(ArmEmitterContext context, Operand value)
{
Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0));
return context.ConditionalSelect(isPositive, value, context.Negate(value));
}
public static void Vadd_S(ArmEmitterContext context)
{
EmitScalarBinaryOpF32(context, (op1, op2) => context.Add(op1, op2));
@ -65,6 +90,68 @@ namespace ARMeilleure.Instructions
}
}
public static void Vdup_1(ArmEmitterContext context)
{
OpCode32SimdDupElem op = (OpCode32SimdDupElem)context.CurrOp;
Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size);
// zero extend into an I64, then replicate. Saves the most time over elementwise inserts
switch (op.Size)
{
case 2:
insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u));
break;
case 1:
insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u));
break;
case 0:
insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u));
break;
default:
throw new Exception("Unknown Vdup Size!");
}
InsertScalar(context, op.Vd, insert);
if (op.Q)
{
InsertScalar(context, op.Vd | 1, insert);
}
}
public static void Vext(ArmEmitterContext context)
{
OpCode32SimdVext op = (OpCode32SimdVext)context.CurrOp;
int elems = op.GetBytesCount();
int byteOff = op.Immediate;
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
for (int index = 0; index < elems; index++)
{
Operand extract;
if (byteOff >= elems)
{
extract = EmitVectorExtractZx32(context, vm, (byteOff - elems) + em * elems, op.Size);
}
else
{
extract = EmitVectorExtractZx32(context, vn, byteOff + en * elems, op.Size);
}
byteOff++;
res = EmitVectorInsert(context, res, extract, index + ed * elems, op.Size);
}
context.Copy(GetVecA32(vd), res);
}
public static void Vorr_I(ArmEmitterContext context)
{
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2));
@ -324,6 +411,26 @@ namespace ARMeilleure.Instructions
EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2));
}
public static void Vmul_1(ArmEmitterContext context)
{
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
if (op.F)
{
if (Optimizations.FastFP)
{
EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2));
}
else
{
EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2));
}
}
else
{
EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false);
}
}
public static void Vmla_S(ArmEmitterContext context)
{
if (Optimizations.FastFP)
@ -362,6 +469,26 @@ namespace ARMeilleure.Instructions
EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
}
public static void Vmla_1(ArmEmitterContext context)
{
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
if (op.F)
{
if (Optimizations.FastFP)
{
EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
}
else
{
EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3));
}
}
else
{
EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false);
}
}
public static void Vmls_S(ArmEmitterContext context)
{
if (Optimizations.FastFP)
@ -400,6 +527,26 @@ namespace ARMeilleure.Instructions
EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
}
public static void Vmls_1(ArmEmitterContext context)
{
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
if (op.F)
{
if (Optimizations.FastFP)
{
EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
}
else
{
EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3));
}
}
else
{
EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false);
}
}
public static void Vpadd_V(ArmEmitterContext context)
{
EmitVectorPairwiseOpF32(context, (op1, op2) => context.Add(op1, op2));
@ -452,6 +599,26 @@ namespace ARMeilleure.Instructions
});
}
public static void Vshl_I(ArmEmitterContext context)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
//IMPORTANT TODO: does shift left negative do a truncating shift right on x86?
if (op.U)
{
EmitVectorBinaryOpZx32(context, (op1, op2) => context.ShiftLeft(op1, context.SignExtend8(op2.Type, op2)));
}
else
{
EmitVectorBinaryOpSx32(context, (op1, op2) => context.ShiftLeft(op1, context.SignExtend8(op2.Type, op2)));
}
}
public static void Vshl(ArmEmitterContext context)
{
OpCode32SimdShift op = (OpCode32SimdShift)context.CurrOp;
EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op1.Type, op.Shift)));
}
public static void Vsqrt_S(ArmEmitterContext context)
{
/*

View file

@ -28,6 +28,57 @@ namespace ARMeilleure.Instructions
}
}
public static void Vcvt_V(ArmEmitterContext context)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
bool unsigned = (op.Opc & 1) != 0;
bool toInteger = (op.Opc & 2) != 0;
OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64;
if (op.Size != 2) throw new Exception("CVT vector mode only currently defined for 32-bit");
if (toInteger)
{
EmitVectorUnaryOpF32(context, (op1) =>
{
if (op1.Type == OperandType.FP64)
{
if (unsigned)
{
return context.Call(new _U32_F64(CastDoubleToUInt32), op1);
}
else
{
return context.Call(new _S32_F64(CastDoubleToInt32), op1);
}
}
else
{
if (unsigned)
{
return context.Call(new _U32_F32(CastFloatToUInt32), op1);
}
else
{
return context.Call(new _S32_F32(CastFloatToInt32), op1);
}
}
});
}
else
{
if (unsigned)
{
EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false));
}
else
{
EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true));
}
}
}
public static void Vcvt_FD(ArmEmitterContext context)
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
@ -71,7 +122,7 @@ namespace ARMeilleure.Instructions
if (toInteger)
{
bool unsigned = (op.Opc2 & 1) == 0;
bool roundWithFpscr = op.Opc == 1;
bool roundWithFpscr = op.Opc != 1;
Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
@ -148,6 +199,124 @@ namespace ARMeilleure.Instructions
}
}
private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits)
{
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
if (fBits == 0)
{
return value;
}
return context.Multiply(value, ConstF(MathF.Pow(2f, fBits)));
}
public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
{
IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
Delegate dlg;
if ((op.Size & 1) == 0)
{
dlg = new _F32_F32_MidpointRounding(MathF.Round);
}
else /* if ((op.Size & 1) == 1) */
{
dlg = new _F64_F64_MidpointRounding(Math.Round);
}
return context.Call(dlg, n, Const((int)roundMode));
}
public static void Vcvt_R(ArmEmitterContext context)
{
OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
OperandType floatSize = op.RegisterSize == RegisterSize.Simd64 ? OperandType.FP64 : OperandType.FP32;
bool unsigned = (op.Opc & 1) == 0;
Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
switch (op.Opc2)
{
case 0b00: //away
toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
break;
case 0b01: //nearest
toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
break;
case 0b10: //+infinity
toConvert = EmitRoundMathCall(context, MidpointRounding.ToPositiveInfinity, toConvert);
break;
case 0b11: //negative
toConvert = EmitRoundMathCall(context, MidpointRounding.ToNegativeInfinity, toConvert);
break;
}
Operand asInteger;
if (floatSize == OperandType.FP64)
{
if (unsigned)
{
asInteger = context.Call(new _U32_F64(CastDoubleToUInt32), toConvert);
}
else
{
asInteger = context.Call(new _S32_F64(CastDoubleToInt32), toConvert);
}
}
else
{
if (unsigned)
{
asInteger = context.Call(new _U32_F32(CastFloatToUInt32), toConvert);
}
else
{
asInteger = context.Call(new _S32_F32(CastFloatToInt32), toConvert);
}
}
InsertScalar(context, op.Vd, asInteger);
}
public static void Vrint_R(ArmEmitterContext context)
{
OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
OperandType floatSize = op.RegisterSize == RegisterSize.Simd64 ? OperandType.FP64 : OperandType.FP32;
Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
switch (op.Opc2)
{
case 0b00: //away
toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
break;
case 0b01: //nearest
toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
break;
case 0b10: //+infinity
toConvert = EmitRoundMathCall(context, MidpointRounding.ToPositiveInfinity, toConvert);
break;
case 0b11: //negative
toConvert = EmitRoundMathCall(context, MidpointRounding.ToNegativeInfinity, toConvert);
break;
}
InsertScalar(context, op.Vd, toConvert);
}
public static void Vrint_Z(ArmEmitterContext context)
{
EmitScalarUnaryOpF32(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToZero, op1));
}
private static int CastDoubleToInt32(double value)
{
return (int)value;

View file

@ -1528,7 +1528,7 @@ namespace ARMeilleure.Instructions
{
ThrowIfInvalid(index, size);
if (size < 3)
if (size < 3 && value.Type == OperandType.I64)
{
value = context.ConvertI64ToI32(value);
}
@ -1544,7 +1544,7 @@ namespace ARMeilleure.Instructions
return vector;
}
private static void ThrowIfInvalid(int index, int size)
public static void ThrowIfInvalid(int index, int size)
{
if ((uint)size > 3u)
{

View file

@ -228,7 +228,7 @@ namespace ARMeilleure.Instructions
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractSx(context, vm, index + em * elems, op.Size);
Operand ne = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size);
res = EmitVectorInsert(context, res, emit(ne), index + ed * elems, op.Size);
}
@ -250,8 +250,8 @@ namespace ARMeilleure.Instructions
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractSx(context, vn, index + en * elems, op.Size);
Operand me = EmitVectorExtractSx(context, vm, index + em * elems, op.Size);
Operand ne = EmitVectorExtractSx32(context, vn, index + en * elems, op.Size);
Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size);
res = EmitVectorInsert(context, res, emit(ne, me), index + ed * elems, op.Size);
}
@ -273,9 +273,9 @@ namespace ARMeilleure.Instructions
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtractSx(context, vd, index + ed * elems, op.Size);
Operand ne = EmitVectorExtractSx(context, vn, index + en * elems, op.Size);
Operand me = EmitVectorExtractSx(context, vm, index + em * elems, op.Size);
Operand de = EmitVectorExtractSx32(context, vd, index + ed * elems, op.Size);
Operand ne = EmitVectorExtractSx32(context, vn, index + en * elems, op.Size);
Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size);
res = EmitVectorInsert(context, res, emit(de, ne, me), index + ed * elems, op.Size);
}
@ -296,7 +296,7 @@ namespace ARMeilleure.Instructions
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractZx(context, vm, index + em * elems, op.Size);
Operand ne = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size);
res = EmitVectorInsert(context, res, emit(ne), index + ed * elems, op.Size);
}
@ -318,8 +318,8 @@ namespace ARMeilleure.Instructions
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractZx(context, vn, index + en * elems, op.Size);
Operand me = EmitVectorExtractZx(context, vm, index + em * elems, op.Size);
Operand ne = EmitVectorExtractZx32(context, vn, index + en * elems, op.Size);
Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size);
res = EmitVectorInsert(context, res, emit(ne, me), index + ed * elems, op.Size);
}
@ -341,9 +341,9 @@ namespace ARMeilleure.Instructions
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtractZx(context, vd, index + ed * elems, op.Size);
Operand ne = EmitVectorExtractZx(context, vn, index + en * elems, op.Size);
Operand me = EmitVectorExtractZx(context, vm, index + em * elems, op.Size);
Operand de = EmitVectorExtractZx32(context, vd, index + ed * elems, op.Size);
Operand ne = EmitVectorExtractZx32(context, vn, index + en * elems, op.Size);
Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size);
res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
}
@ -351,6 +351,110 @@ namespace ARMeilleure.Instructions
context.Copy(GetVecA32(vd), res);
}
// VEC BY SCALAR
public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Func2I emit)
{
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
int sizeF = op.Size & 1;
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
if (op.Size < 2) throw new Exception("FP ops <32 bit unimplemented!");
int elems = op.GetBytesCount() >> sizeF + 2;
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand m = ExtractScalar(context, type, op.Vm);
Operand res = GetVecA32(vd);
for (int index = 0; index < elems; index++)
{
Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems);
res = context.VectorInsert(res, emit(ne, m), index + ed * elems);
}
context.Copy(GetVecA32(vd), res);
}
public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed)
{
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
if (op.Size < 1) throw new Exception("Undefined");
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed);
Operand res = GetVecA32(vd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtract32(context, vn, index + en * elems, op.Size, signed);
res = EmitVectorInsert(context, res, emit(ne, m), index + ed * elems, op.Size);
}
context.Copy(GetVecA32(vd), res);
}
public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Func3I emit)
{
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
int sizeF = op.Size & 1;
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
if (op.Size < 2) throw new Exception("FP ops <32 bit unimplemented!");
int elems = op.GetBytesCount() >> sizeF + 2;
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand m = ExtractScalar(context, type, op.Vm);
Operand res = GetVecA32(vd);
for (int index = 0; index < elems; index++)
{
Operand de = context.VectorExtract(type, GetVecA32(vd), index + ed * elems);
Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems);
res = context.VectorInsert(res, emit(de, ne, m), index + ed * elems);
}
context.Copy(GetVecA32(vd), res);
}
public static void EmitVectorsByScalarOpI32(ArmEmitterContext context, Func3I emit, bool signed)
{
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
if (op.Size < 1) throw new Exception("Undefined");
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed);
Operand res = GetVecA32(vd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtract32(context, vd, index + ed * elems, op.Size, signed);
Operand ne = EmitVectorExtract32(context, vn, index + en * elems, op.Size, signed);
res = EmitVectorInsert(context, res, emit(de, ne, m), index + ed * elems, op.Size);
}
context.Copy(GetVecA32(vd), res);
}
// PAIRWISE
public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Func2I emit)
@ -411,12 +515,11 @@ namespace ARMeilleure.Instructions
for (int index = 0; index < pairs; index++)
{
int pairIndex = index << 1;
EmitVectorExtract(context, vd, index + ed * elems, op.Size, signed);
Operand n1 = EmitVectorExtract(context, vn, pairIndex + en * elems, op.Size, signed);
Operand n2 = EmitVectorExtract(context, vn, pairIndex + 1 + en * elems, op.Size, signed);
Operand n1 = EmitVectorExtract32(context, vn, pairIndex + en * elems, op.Size, signed);
Operand n2 = EmitVectorExtract32(context, vn, pairIndex + 1 + en * elems, op.Size, signed);
Operand m1 = EmitVectorExtract(context, vm, pairIndex + em * elems, op.Size, signed);
Operand m2 = EmitVectorExtract(context, vm, pairIndex + 1 + em * elems, op.Size, signed);
Operand m1 = EmitVectorExtract32(context, vm, pairIndex + em * elems, op.Size, signed);
Operand m2 = EmitVectorExtract32(context, vm, pairIndex + 1 + em * elems, op.Size, signed);
res = EmitVectorInsert(context, res, emit(n1, n2), index + ed * elems, op.Size);
res = EmitVectorInsert(context, res, emit(m1, m2), index + pairs + ed * elems, op.Size);
@ -424,5 +527,62 @@ namespace ARMeilleure.Instructions
context.Copy(GetVecA32(vd), res);
}
// helper func
public static Operand EmitVectorExtractSx32(ArmEmitterContext context, int reg, int index, int size)
{
return EmitVectorExtract32(context, reg, index, size, true);
}
public static Operand EmitVectorExtractZx32(ArmEmitterContext context, int reg, int index, int size)
{
return EmitVectorExtract32(context, reg, index, size, false);
}
public static Operand EmitVectorExtract32(ArmEmitterContext context, int reg, int index, int size, bool signed)
{
ThrowIfInvalid(index, size);
Operand res = null;
switch (size)
{
case 0:
res = context.VectorExtract8(GetVec(reg), index);
break;
case 1:
res = context.VectorExtract16(GetVec(reg), index);
break;
case 2:
res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
break;
case 3:
res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
break;
}
if (signed)
{
switch (size)
{
case 0: res = context.SignExtend8(OperandType.I32, res); break;
case 1: res = context.SignExtend16(OperandType.I32, res); break;
}
}
else
{
switch (size)
{
case 0: res = context.ZeroExtend8(OperandType.I32, res); break;
case 1: res = context.ZeroExtend16(OperandType.I32, res); break;
}
}
return res;
}
}
}

View file

@ -52,6 +52,7 @@ namespace ARMeilleure.Instructions
{
OpCode32SimdMemSingle op = (OpCode32SimdMemSingle)context.CurrOp;
if (op.Replicate && !load) throw new Exception("Replicate+Store is undefined for LDn");
int eBytes = 1 << op.Size;
Operand n = GetIntA32(context, op.Rn);
@ -81,6 +82,14 @@ namespace ARMeilleure.Instructions
if (load)
{
EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size);
if (op.Replicate)
{
int limit = index + (1 << (3 - op.Size));
while (++index < limit)
{
EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size);
}
}
}
else
{

View file

@ -33,7 +33,25 @@ namespace ARMeilleure.Instructions
// from general purpose
Operand value = GetIntA32(context, op.Rt);
context.Copy(vec, context.VectorInsert(vec, value, op.Vn & 0x3));
}
}
public static void Vmov_G1(ArmEmitterContext context)
{
OpCode32SimdMovGpElem op = (OpCode32SimdMovGpElem)context.CurrOp;
int index = op.Index + ((op.Vd & 1) << (3 - op.Size));
if (op.Op == 1)
{
// to general purpose
Operand value = EmitVectorExtract32(context, op.Vd >> 1, index, op.Size, !op.U);
SetIntA32(context, op.Rt, value);
}
else
{
// from general purpose
Operand vec = GetVecA32(op.Vd >> 1);
Operand value = GetIntA32(context, op.Rt);
context.Copy(vec, EmitVectorInsert(context, vec, value, index, op.Size));
}
}
@ -86,5 +104,101 @@ namespace ARMeilleure.Instructions
context.Copy(vec, context.VectorInsert(vec, value, op.Vm & 1));
}
}
public static void Vtrn(ArmEmitterContext context)
{
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
int elems = op.GetBytesCount() >> op.Size;
int pairs = elems >> 1;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand resD = GetVecA32(vd);
Operand resM = GetVecA32(vm);
for (int index = 0; index < pairs; index++)
{
int pairIndex = index << 1;
Operand d2 = EmitVectorExtract32(context, vd, pairIndex + 1 + ed * elems, op.Size, false);
Operand m1 = EmitVectorExtract32(context, vm, pairIndex + em * elems, op.Size, false);
resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + ed * elems, op.Size);
resM = EmitVectorInsert(context, resM, d2, pairIndex + em * elems, op.Size);
}
context.Copy(GetVecA32(vd), resD);
context.Copy(GetVecA32(vm), resM);
}
public static void Vzip(ArmEmitterContext context)
{
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
int elems = op.GetBytesCount() >> op.Size;
int pairs = elems >> 1;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand resD = GetVecA32(vd);
Operand resM = GetVecA32(vm);
for (int index = 0; index < pairs; index++)
{
int pairIndex = index << 1;
Operand dRowD = EmitVectorExtract32(context, vd, index + ed * elems, op.Size, false);
Operand mRowD = EmitVectorExtract32(context, vm, index + em * elems, op.Size, false);
Operand dRowM = EmitVectorExtract32(context, vd, index + ed * elems + pairs, op.Size, false);
Operand mRowM = EmitVectorExtract32(context, vm, index + em * elems + pairs, op.Size, false);
resD = EmitVectorInsert(context, resD, dRowD, pairIndex + ed * elems, op.Size);
resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + ed * elems, op.Size);
resM = EmitVectorInsert(context, resM, dRowM, pairIndex + em * elems, op.Size);
resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + em * elems, op.Size);
}
context.Copy(GetVecA32(vd), resD);
context.Copy(GetVecA32(vm), resM);
}
public static void Vuzp(ArmEmitterContext context)
{
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
int elems = op.GetBytesCount() >> op.Size;
int pairs = elems >> 1;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand resD = GetVecA32(vd);
Operand resM = GetVecA32(vm);
for (int index = 0; index < elems; index++)
{
Operand dIns, mIns;
if (index >= pairs)
{
int pind = index - pairs;
dIns = EmitVectorExtract32(context, vm, (pind << 1) + em * elems, op.Size, false);
mIns = EmitVectorExtract32(context, vm, ((pind << 1) | 1) + em * elems, op.Size, false);
}
else
{
dIns = EmitVectorExtract32(context, vd, (index << 1) + ed * elems, op.Size, false);
mIns = EmitVectorExtract32(context, vd, ((index << 1) | 1) + ed * elems, op.Size, false);
}
resD = EmitVectorInsert(context, resD, dIns, index + ed * elems, op.Size);
resM = EmitVectorInsert(context, resM, mIns, index + em * elems, op.Size);
}
context.Copy(GetVecA32(vd), resD);
context.Copy(GetVecA32(vm), resM);
}
}
}

View file

@ -82,6 +82,7 @@ namespace ARMeilleure.Instructions
Smaddl,
Smsubl,
Smulh,
Smull,
Stlr,
Stlxp,
Stlxr,
@ -512,12 +513,14 @@ namespace ARMeilleure.Instructions
Trap,
Tst,
Ubfx,
Umlal,
Umull,
Uxtb,
Uxtb16,
Uxth,
// FP & SIMD (AArch32)
Vabs,
Vadd,
Vand,
Vbif,
@ -533,6 +536,7 @@ namespace ARMeilleure.Instructions
Vcvt,
Vdiv,
Vdup,
Vext,
Vld1,
Vld2,
Vld3,
@ -553,7 +557,9 @@ namespace ARMeilleure.Instructions
Vnmls,
Vorr,
Vpadd,
Vrint,
Vsel,
Vshl,
Vst1,
Vst2,
Vst3,
@ -564,6 +570,9 @@ namespace ARMeilleure.Instructions
Vrsqrte,
Vrsqrts,
Vsub,
Vtrn,
Vuzp,
Vzip,
Vmov
}

View file

@ -11,6 +11,6 @@ namespace ARMeilleure.Translation
Lsra = 1 << 2,
MediumCq = SsaForm | Optimize,
HighCq = SsaForm | Optimize | Lsra
HighCq = SsaForm | Optimize
}
}

View file

@ -24,7 +24,7 @@ namespace ARMeilleure.Translation
public bool ShouldRejit()
{
return false && Interlocked.Increment(ref _callCount) == MinCallsForRejit;
return Interlocked.Increment(ref _callCount) == MinCallsForRejit;
}
}
}

View file

@ -634,6 +634,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
}
// Zero out the remaining unused registers.
/*
for (int i = 0; i < SvcFuncMaxArguments32; i++)
{
if (IsRegisterInUse(i))
@ -649,6 +650,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
generator.Emit(OpCodes.Call, info);
}
*/
generator.Emit(OpCodes.Ret);