From c39f840b3d7be560f9319993a59ddc947c6d511f Mon Sep 17 00:00:00 2001 From: riperiperi Date: Mon, 13 Jan 2020 02:00:34 +0000 Subject: [PATCH] Add a few more instructions, a quick hack to fix svcs for now. --- ARMeilleure/Decoders/OpCode32AluMla.cs | 4 +- ARMeilleure/Decoders/OpCode32Simd.cs | 7 +- ARMeilleure/Decoders/OpCode32SimdCmpZ.cs | 2 +- ARMeilleure/Decoders/OpCode32SimdDupElem.cs | 48 +++++ ARMeilleure/Decoders/OpCode32SimdImm6.cs | 42 ++++ ARMeilleure/Decoders/OpCode32SimdMemSingle.cs | 23 ++- ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs | 51 +++++ ARMeilleure/Decoders/OpCode32SimdReg.cs | 1 + ARMeilleure/Decoders/OpCode32SimdRegElem.cs | 20 ++ ARMeilleure/Decoders/OpCode32SimdVext.cs | 16 ++ ARMeilleure/Decoders/OpCodeTable.cs | 32 ++- ARMeilleure/Instructions/InstEmitMul32.cs | 73 ++++++- .../Instructions/InstEmitSimdArithmetic32.cs | 167 +++++++++++++++ ARMeilleure/Instructions/InstEmitSimdCvt32.cs | 171 ++++++++++++++- .../Instructions/InstEmitSimdHelper.cs | 4 +- .../Instructions/InstEmitSimdHelper32.cs | 194 ++++++++++++++++-- .../Instructions/InstEmitSimdMemory32.cs | 9 + .../Instructions/InstEmitSimdMove32.cs | 116 ++++++++++- ARMeilleure/Instructions/InstName.cs | 9 + ARMeilleure/Translation/CompilerOptions.cs | 2 +- ARMeilleure/Translation/TranslatedFunction.cs | 2 +- .../HOS/Kernel/SupervisorCall/SvcTable.cs | 2 + 22 files changed, 957 insertions(+), 38 deletions(-) create mode 100644 ARMeilleure/Decoders/OpCode32SimdDupElem.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdImm6.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdRegElem.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdVext.cs diff --git a/ARMeilleure/Decoders/OpCode32AluMla.cs b/ARMeilleure/Decoders/OpCode32AluMla.cs index 1c87226a32..352ee8ff5e 100644 --- a/ARMeilleure/Decoders/OpCode32AluMla.cs +++ b/ARMeilleure/Decoders/OpCode32AluMla.cs @@ -24,8 +24,8 @@ namespace ARMeilleure.Decoders Rd = (opCode >> 16) & 0xf; R = (opCode & (1 << 5)) != 0; - NHigh = ((opCode >> 5) * 0x1) == 1; - MHigh = ((opCode >> 6) * 0x1) == 1; + NHigh = ((opCode >> 5) & 0x1) == 1; + MHigh = ((opCode >> 6) & 0x1) == 1; SetFlags = ((opCode >> 20) & 1) != 0; } } diff --git a/ARMeilleure/Decoders/OpCode32Simd.cs b/ARMeilleure/Decoders/OpCode32Simd.cs index cc1379f404..59e9c6d114 100644 --- a/ARMeilleure/Decoders/OpCode32Simd.cs +++ b/ARMeilleure/Decoders/OpCode32Simd.cs @@ -7,20 +7,21 @@ namespace ARMeilleure.Decoders class OpCode32Simd : OpCode32, IOpCode32Simd { public int Vd { get; private set; } - public int Vm { get; private set; } + public int Vm { get; protected set; } public int Opc { get; private set; } public int Size { get; protected set; } - public bool Q { get; private set; } + public bool Q { get; protected set; } public bool F { get; protected set; } public bool U { get; private set; } public int Elems => GetBytesCount() >> ((Size == 1) ? 1 : 2); public OpCode32Simd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { - Size = (opCode >> 20) & 0x1; //fvector size: 1 for 16 bit + Size = (opCode >> 20) & 0x3; //fvector size: 1 for 16 bit Q = ((opCode >> 6) & 0x1) != 0; F = ((opCode >> 10) & 0x1) != 0; U = ((opCode >> 24) & 0x1) != 0; + Opc = ((opCode >> 7) & 0x3); RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; diff --git a/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs b/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs index 7e1c80dca4..157a516af2 100644 --- a/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs +++ b/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs @@ -8,7 +8,7 @@ namespace ARMeilleure.Decoders { public OpCode32SimdCmpZ(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { - Size = (opCode >> 18) & 0x1; //fvector size: 1 for 16 bit + Size = (opCode >> 18) & 0x3; //fvector size: 1 for 16 bit } } } diff --git a/ARMeilleure/Decoders/OpCode32SimdDupElem.cs b/ARMeilleure/Decoders/OpCode32SimdDupElem.cs new file mode 100644 index 0000000000..aa154680ad --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdDupElem.cs @@ -0,0 +1,48 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace ARMeilleure.Decoders +{ + class OpCode32SimdDupElem : OpCode32, IOpCode32Simd + { + public int Size { get; private set; } + public int Elems => 1; + + public int Vd { get; private set; } + public int Vm { get; private set; } + public bool Q { get; private set; } + + public int Index { get; private set; } + + + public OpCode32SimdDupElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + var opc = (opCode >> 16) & 0xf; + + if ((opc & 0b1) == 1) + { + Size = 0; + Index = (opc >> 1) & 0x7; + } + else if ((opc & 0b11) != 0b10) + { + Size = 1; + Index = (opc >> 2) & 0x3; + } + else if ((opc & 0b111) == 0b100) + { + Size = 2; + Index = (opc >> 3) & 0x1; + } + else + { + throw new Exception("Undefined"); + } + + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + Q = (opCode & (1 << 6)) != 0; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdImm6.cs b/ARMeilleure/Decoders/OpCode32SimdImm6.cs new file mode 100644 index 0000000000..a378849d8c --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdImm6.cs @@ -0,0 +1,42 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace ARMeilleure.Decoders +{ + class OpCode32SimdShift : OpCode32Simd + { + public int Immediate { get; private set; } + public int Shift { get; private set; } + public OpCode32SimdShift(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = (opCode >> 16) & 0x3f; + var limm = ((opCode >> 1) & 0x40) | Immediate; + + if ((limm & 0x40) == 0b1000000) + { + Size = 3; + Shift = Immediate; + } + else if ((limm & 0x60) == 0b0100000) + { + Size = 2; + Shift = Immediate - 32; + } + else if ((limm & 0x70) == 0b0010000) + { + Size = 1; + Shift = Immediate - 16; + } + else if ((limm & 0x78) == 0b0001000) + { + Size = 0; + Shift = Immediate - 8; + } + else + { + throw new Exception("Unknown Encoding"); + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs b/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs index cad0badc63..a163185a60 100644 --- a/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs +++ b/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs @@ -10,21 +10,34 @@ namespace ARMeilleure.Decoders public int Rn { get; private set; } public int Rm { get; private set; } public int IndexAlign { get; private set; } - public int Index => IndexAlign >> (1 + Size); + public int Index { get; private set; } public bool WBack { get; private set; } public bool RegisterIndex { get; private set; } public int Size { get; private set; } + public bool Replicate { get; private set; } public int Elems => GetBytesCount() >> Size; - - public int Increment => (((IndexAlign >> Size) & 1) == 0) ? 1 : 2; + public int Increment { get; private set; } public OpCode32SimdMemSingle(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { Vd = (opCode >> 12) & 0xf; Vd |= (opCode >> 18) & 0x10; - Size = (opCode >> 10) & 0x3; - IndexAlign = (opCode >> 4) & 0xf; + + Size = (opCode >> 10) & 0x3; + Replicate = Size == 3; + if (Replicate) + { + Size = (opCode >> 6) & 0x3; + Increment = ((opCode >> 5) & 1) + 1; + Index = 0; + } + else + { + Increment = (((IndexAlign >> Size) & 1) == 0) ? 1 : 2; + Index = IndexAlign >> (1 + Size); + } + Rm = (opCode >> 0) & 0xf; Rn = (opCode >> 16) & 0xf; diff --git a/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs b/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs new file mode 100644 index 0000000000..7a0b0fe2c3 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs @@ -0,0 +1,51 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMovGpElem : OpCode32, IOpCode32Simd + { + public int Size { get; private set; } + public int Elems => 1; + + public int Vd { get; private set; } + public int Rt { get; private set; } + public int Op { get; private set; } + public bool U { get; private set; } + + public int Index { get; private set; } + + + public OpCode32SimdMovGpElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Op = ((opCode >> 20) & 0x1); + U = ((opCode >> 23) & 1) != 0; + + var opc = (((opCode >> 21) & 0x3) << 2) | ((opCode >> 5) & 0x3); + + if ((opc & 0x8) != 0) + { + Size = 0; + Index = opc & 0x7; + } + else if ((opc & 0x1) != 0) + { + Size = 1; + Index = (opc >> 1) & 0x3; + } + else if ((opc & 0x2) == 0) + { + Size = 2; + Index = (opc >> 2) & 0x1; + } + else + { + throw new Exception("Undefined"); + } + + Vd = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + Rt = (opCode >> 12) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdReg.cs b/ARMeilleure/Decoders/OpCode32SimdReg.cs index e24f259dcd..b17b8ccef4 100644 --- a/ARMeilleure/Decoders/OpCode32SimdReg.cs +++ b/ARMeilleure/Decoders/OpCode32SimdReg.cs @@ -7,6 +7,7 @@ namespace ARMeilleure.Decoders class OpCode32SimdReg : OpCode32Simd { public int Vn { get; private set; } + public int Index { get; private set; } public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { diff --git a/ARMeilleure/Decoders/OpCode32SimdRegElem.cs b/ARMeilleure/Decoders/OpCode32SimdRegElem.cs new file mode 100644 index 0000000000..4b1cf58e10 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdRegElem.cs @@ -0,0 +1,20 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRegElem : OpCode32SimdReg + { + public OpCode32SimdRegElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Q = ((opCode >> 24) & 0x1) != 0; + F = ((opCode >> 8) & 0x1) != 0; + Size = ((opCode >> 20) & 0x3); + + if (Size == 0b11) throw new Exception("Unknown Encoding!"); + + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdVext.cs b/ARMeilleure/Decoders/OpCode32SimdVext.cs new file mode 100644 index 0000000000..c378f095c2 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdVext.cs @@ -0,0 +1,16 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace ARMeilleure.Decoders +{ + class OpCode32SimdVext : OpCode32SimdReg + { + public int Immediate { get; private set; } + public OpCode32SimdVext(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = (opCode >> 8) & 0xf; + Size = 0; + } + } +} diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 59a3cacc67..32ae89d536 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -692,6 +692,8 @@ namespace ARMeilleure.Decoders SetA32("<<<<00010100xxxxxxxxxxxx1xx0xxxx", InstName.Smlalh,InstEmit32.Smlalh,typeof(OpCode32AluUmull)); SetA32("<<<<01110101xxxxxxxxxxxx00x1xxxx", InstName.Smmla, InstEmit32.Smmla, typeof(OpCode32AluMla)); SetA32("<<<<01110101xxxxxxxxxxxx11x1xxxx", InstName.Smmls, InstEmit32.Smmls, typeof(OpCode32AluMla)); + SetA32("<<<<00010110xxxxxxxxxxxx1xx0xxxx", InstName.Smulh, InstEmit32.Smulh, typeof(OpCode32AluMla)); + SetA32("<<<<0000110xxxxxxxxxxxxx1001xxxx", InstName.Smull, InstEmit32.Smull, typeof(OpCode32AluUmull)); SetA32("<<<<0010110xxxxxxxxxxxxxxxxxxxxx", InstName.Sbc, InstEmit32.Sbc, typeof(OpCode32AluImm)); SetA32("<<<<0000110xxxxxxxxxxxxxxxx0xxxx", InstName.Sbc, InstEmit32.Sbc, typeof(OpCode32AluRsImm)); SetA32("<<<<0000110xxxxxxxxxxxxx0xx1xxxx", InstName.Sbc, InstEmit32.Sbc, typeof(OpCode32AluRsReg)); @@ -732,6 +734,7 @@ namespace ARMeilleure.Decoders SetA32("<<<<00010001xxxx0000xxxx0xx1xxxx", InstName.Tst, InstEmit32.Tst, typeof(OpCode32AluRsReg)); SetA32("<<<<0111111xxxxxxxxxxxxxx101xxxx", InstName.Ubfx, InstEmit32.Ubfx, typeof(OpCode32AluBf)); SetA32("<<<<01110011xxxx1111xxxx0001xxxx", InstName.Udiv, InstEmit32.Udiv, typeof(OpCode32AluMla)); + SetA32("<<<<0000101xxxxxxxxxxxxx1001xxxx", InstName.Umlal, InstEmit32.Umlal, typeof(OpCode32AluUmull)); SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull, InstEmit32.Umull, typeof(OpCode32AluUmull)); SetA32("<<<<01101110xxxxxxxxxx000111xxxx", InstName.Uxtb, InstEmit32.Uxtb, typeof(OpCode32AluUx)); SetA32("<<<<01101100xxxxxxxxxx000111xxxx", InstName.Uxtb16,InstEmit32.Uxtb16,typeof(OpCode32AluUx)); @@ -739,6 +742,9 @@ namespace ARMeilleure.Decoders // FP & SIMD (AArch32) + SetA32("<<<<11101x110000xxxx10xx11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, typeof(OpCode32SimdRegS)); + SetA32("111100111x11xx01xxxx0x110xx0xxxx", InstName.Vabs, InstEmit32.Vabs_V, typeof(OpCode32SimdReg)); + SetA32("<<<<11100x11xxxxxxxx10xxx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, typeof(OpCode32SimdRegS)); SetA32("111100100x0xxxxxxxxx1101xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_V, typeof(OpCode32SimdReg)); SetA32("111100100xxxxxxxxxxx1000xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_I, typeof(OpCode32SimdReg)); @@ -771,17 +777,23 @@ namespace ARMeilleure.Decoders SetA32("<<<<11101x11010xxxxx10xx01x0xxxx", InstName.Vcmp, InstEmit32.Vcmp, typeof(OpCode32SimdS)); SetA32("<<<<11101x11010xxxxx10xx11x0xxxx", InstName.Vcmpe,InstEmit32.Vcmpe, typeof(OpCode32SimdS)); + SetA32("111100111x11xx11xxxx011xxxx0xxxx", InstName.Vcvt, InstEmit32.Vcvt_V, typeof(OpCode32SimdCmpZ)); SetA32("<<<<11101x110111xxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FD, typeof(OpCode32SimdS)); SetA32("<<<<11101x11110xxxxx10xx11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); SetA32("<<<<11101x111000xxxx10xxx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); + SetA32("111111101x1111xxxxxx10< EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1)); + } + + public static void Vabs_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + if (op.F) + { + EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => EmitAbs(context, op1)); + } + } + + private static Operand EmitAbs(ArmEmitterContext context, Operand value) + { + Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0)); + + return context.ConditionalSelect(isPositive, value, context.Negate(value)); + } + public static void Vadd_S(ArmEmitterContext context) { EmitScalarBinaryOpF32(context, (op1, op2) => context.Add(op1, op2)); @@ -65,6 +90,68 @@ namespace ARMeilleure.Instructions } } + public static void Vdup_1(ArmEmitterContext context) + { + OpCode32SimdDupElem op = (OpCode32SimdDupElem)context.CurrOp; + + Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size); + + // zero extend into an I64, then replicate. Saves the most time over elementwise inserts + switch (op.Size) + { + case 2: + insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)); + break; + case 1: + insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)); + break; + case 0: + insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)); + break; + default: + throw new Exception("Unknown Vdup Size!"); + } + + InsertScalar(context, op.Vd, insert); + if (op.Q) + { + InsertScalar(context, op.Vd | 1, insert); + } + } + + public static void Vext(ArmEmitterContext context) + { + OpCode32SimdVext op = (OpCode32SimdVext)context.CurrOp; + + int elems = op.GetBytesCount(); + int byteOff = op.Immediate; + + (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); + (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); + (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + + Operand res = GetVecA32(vd); + + for (int index = 0; index < elems; index++) + { + Operand extract; + + if (byteOff >= elems) + { + extract = EmitVectorExtractZx32(context, vm, (byteOff - elems) + em * elems, op.Size); + } + else + { + extract = EmitVectorExtractZx32(context, vn, byteOff + en * elems, op.Size); + } + byteOff++; + + res = EmitVectorInsert(context, res, extract, index + ed * elems, op.Size); + } + + context.Copy(GetVecA32(vd), res); + } + public static void Vorr_I(ArmEmitterContext context) { EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2)); @@ -324,6 +411,26 @@ namespace ARMeilleure.Instructions EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2)); } + public static void Vmul_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + if (op.F) + { + if (Optimizations.FastFP) + { + EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2)); + } + } + else + { + EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false); + } + } + public static void Vmla_S(ArmEmitterContext context) { if (Optimizations.FastFP) @@ -362,6 +469,26 @@ namespace ARMeilleure.Instructions EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); } + public static void Vmla_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + if (op.F) + { + if (Optimizations.FastFP) + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3)); + } + } + else + { + EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false); + } + } + public static void Vmls_S(ArmEmitterContext context) { if (Optimizations.FastFP) @@ -400,6 +527,26 @@ namespace ARMeilleure.Instructions EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); } + public static void Vmls_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + if (op.F) + { + if (Optimizations.FastFP) + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3)); + } + } + else + { + EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false); + } + } + public static void Vpadd_V(ArmEmitterContext context) { EmitVectorPairwiseOpF32(context, (op1, op2) => context.Add(op1, op2)); @@ -452,6 +599,26 @@ namespace ARMeilleure.Instructions }); } + public static void Vshl_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + //IMPORTANT TODO: does shift left negative do a truncating shift right on x86? + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.ShiftLeft(op1, context.SignExtend8(op2.Type, op2))); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.ShiftLeft(op1, context.SignExtend8(op2.Type, op2))); + } + } + + public static void Vshl(ArmEmitterContext context) + { + OpCode32SimdShift op = (OpCode32SimdShift)context.CurrOp; + EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op1.Type, op.Shift))); + } + public static void Vsqrt_S(ArmEmitterContext context) { /* diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs index 9a70b189b5..4bfb8bb324 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -28,6 +28,57 @@ namespace ARMeilleure.Instructions } } + public static void Vcvt_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + bool unsigned = (op.Opc & 1) != 0; + bool toInteger = (op.Opc & 2) != 0; + OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64; + + if (op.Size != 2) throw new Exception("CVT vector mode only currently defined for 32-bit"); + if (toInteger) + { + EmitVectorUnaryOpF32(context, (op1) => + { + if (op1.Type == OperandType.FP64) + { + if (unsigned) + { + return context.Call(new _U32_F64(CastDoubleToUInt32), op1); + } + else + { + return context.Call(new _S32_F64(CastDoubleToInt32), op1); + } + + } + else + { + if (unsigned) + { + return context.Call(new _U32_F32(CastFloatToUInt32), op1); + } + else + { + return context.Call(new _S32_F32(CastFloatToInt32), op1); + } + } + }); + } + else + { + if (unsigned) + { + EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true)); + } + } + + } + public static void Vcvt_FD(ArmEmitterContext context) { OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; @@ -71,7 +122,7 @@ namespace ARMeilleure.Instructions if (toInteger) { bool unsigned = (op.Opc2 & 1) == 0; - bool roundWithFpscr = op.Opc == 1; + bool roundWithFpscr = op.Opc != 1; Operand toConvert = ExtractScalar(context, floatSize, op.Vm); @@ -148,6 +199,124 @@ namespace ARMeilleure.Instructions } } + private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + if (fBits == 0) + { + return value; + } + + return context.Multiply(value, ConstF(MathF.Pow(2f, fBits))); + } + + public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n) + { + IOpCode32Simd op = (IOpCode32Simd)context.CurrOp; + + Delegate dlg; + + if ((op.Size & 1) == 0) + { + dlg = new _F32_F32_MidpointRounding(MathF.Round); + } + else /* if ((op.Size & 1) == 1) */ + { + dlg = new _F64_F64_MidpointRounding(Math.Round); + } + + return context.Call(dlg, n, Const((int)roundMode)); + } + + public static void Vcvt_R(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + OperandType floatSize = op.RegisterSize == RegisterSize.Simd64 ? OperandType.FP64 : OperandType.FP32; + + bool unsigned = (op.Opc & 1) == 0; + + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + switch (op.Opc2) + { + case 0b00: //away + toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); + break; + case 0b01: //nearest + toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); + break; + case 0b10: //+infinity + toConvert = EmitRoundMathCall(context, MidpointRounding.ToPositiveInfinity, toConvert); + break; + case 0b11: //negative + toConvert = EmitRoundMathCall(context, MidpointRounding.ToNegativeInfinity, toConvert); + break; + } + + Operand asInteger; + + if (floatSize == OperandType.FP64) + { + if (unsigned) + { + asInteger = context.Call(new _U32_F64(CastDoubleToUInt32), toConvert); + } + else + { + asInteger = context.Call(new _S32_F64(CastDoubleToInt32), toConvert); + } + + } + else + { + if (unsigned) + { + asInteger = context.Call(new _U32_F32(CastFloatToUInt32), toConvert); + } + else + { + asInteger = context.Call(new _S32_F32(CastFloatToInt32), toConvert); + } + } + + InsertScalar(context, op.Vd, asInteger); + } + + + public static void Vrint_R(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + OperandType floatSize = op.RegisterSize == RegisterSize.Simd64 ? OperandType.FP64 : OperandType.FP32; + + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + switch (op.Opc2) + { + case 0b00: //away + toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); + break; + case 0b01: //nearest + toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); + break; + case 0b10: //+infinity + toConvert = EmitRoundMathCall(context, MidpointRounding.ToPositiveInfinity, toConvert); + break; + case 0b11: //negative + toConvert = EmitRoundMathCall(context, MidpointRounding.ToNegativeInfinity, toConvert); + break; + } + + InsertScalar(context, op.Vd, toConvert); + } + + public static void Vrint_Z(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToZero, op1)); + } + private static int CastDoubleToInt32(double value) { return (int)value; diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs index fce1bed5cb..a87dac015a 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -1528,7 +1528,7 @@ namespace ARMeilleure.Instructions { ThrowIfInvalid(index, size); - if (size < 3) + if (size < 3 && value.Type == OperandType.I64) { value = context.ConvertI64ToI32(value); } @@ -1544,7 +1544,7 @@ namespace ARMeilleure.Instructions return vector; } - private static void ThrowIfInvalid(int index, int size) + public static void ThrowIfInvalid(int index, int size) { if ((uint)size > 3u) { diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs index 858d794d70..3edb8e5ceb 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -228,7 +228,7 @@ namespace ARMeilleure.Instructions for (int index = 0; index < elems; index++) { - Operand ne = EmitVectorExtractSx(context, vm, index + em * elems, op.Size); + Operand ne = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size); res = EmitVectorInsert(context, res, emit(ne), index + ed * elems, op.Size); } @@ -250,8 +250,8 @@ namespace ARMeilleure.Instructions for (int index = 0; index < elems; index++) { - Operand ne = EmitVectorExtractSx(context, vn, index + en * elems, op.Size); - Operand me = EmitVectorExtractSx(context, vm, index + em * elems, op.Size); + Operand ne = EmitVectorExtractSx32(context, vn, index + en * elems, op.Size); + Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size); res = EmitVectorInsert(context, res, emit(ne, me), index + ed * elems, op.Size); } @@ -273,9 +273,9 @@ namespace ARMeilleure.Instructions for (int index = 0; index < elems; index++) { - Operand de = EmitVectorExtractSx(context, vd, index + ed * elems, op.Size); - Operand ne = EmitVectorExtractSx(context, vn, index + en * elems, op.Size); - Operand me = EmitVectorExtractSx(context, vm, index + em * elems, op.Size); + Operand de = EmitVectorExtractSx32(context, vd, index + ed * elems, op.Size); + Operand ne = EmitVectorExtractSx32(context, vn, index + en * elems, op.Size); + Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size); res = EmitVectorInsert(context, res, emit(de, ne, me), index + ed * elems, op.Size); } @@ -296,7 +296,7 @@ namespace ARMeilleure.Instructions for (int index = 0; index < elems; index++) { - Operand ne = EmitVectorExtractZx(context, vm, index + em * elems, op.Size); + Operand ne = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size); res = EmitVectorInsert(context, res, emit(ne), index + ed * elems, op.Size); } @@ -318,8 +318,8 @@ namespace ARMeilleure.Instructions for (int index = 0; index < elems; index++) { - Operand ne = EmitVectorExtractZx(context, vn, index + en * elems, op.Size); - Operand me = EmitVectorExtractZx(context, vm, index + em * elems, op.Size); + Operand ne = EmitVectorExtractZx32(context, vn, index + en * elems, op.Size); + Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size); res = EmitVectorInsert(context, res, emit(ne, me), index + ed * elems, op.Size); } @@ -341,9 +341,9 @@ namespace ARMeilleure.Instructions for (int index = 0; index < elems; index++) { - Operand de = EmitVectorExtractZx(context, vd, index + ed * elems, op.Size); - Operand ne = EmitVectorExtractZx(context, vn, index + en * elems, op.Size); - Operand me = EmitVectorExtractZx(context, vm, index + em * elems, op.Size); + Operand de = EmitVectorExtractZx32(context, vd, index + ed * elems, op.Size); + Operand ne = EmitVectorExtractZx32(context, vn, index + en * elems, op.Size); + Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size); res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); } @@ -351,6 +351,110 @@ namespace ARMeilleure.Instructions context.Copy(GetVecA32(vd), res); } + // VEC BY SCALAR + + public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + if (op.Size < 2) throw new Exception("FP ops <32 bit unimplemented!"); + + int elems = op.GetBytesCount() >> sizeF + 2; + + (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); + (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + Operand m = ExtractScalar(context, type, op.Vm); + + Operand res = GetVecA32(vd); + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems); + + res = context.VectorInsert(res, emit(ne, m), index + ed * elems); + } + + context.Copy(GetVecA32(vd), res); + } + + public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.Size < 1) throw new Exception("Undefined"); + (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); + (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed); + + Operand res = GetVecA32(vd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, vn, index + en * elems, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, m), index + ed * elems, op.Size); + } + + context.Copy(GetVecA32(vd), res); + } + + public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + if (op.Size < 2) throw new Exception("FP ops <32 bit unimplemented!"); + + int elems = op.GetBytesCount() >> sizeF + 2; + + (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); + (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + Operand m = ExtractScalar(context, type, op.Vm); + + Operand res = GetVecA32(vd); + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVecA32(vd), index + ed * elems); + Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems); + + res = context.VectorInsert(res, emit(de, ne, m), index + ed * elems); + } + + context.Copy(GetVecA32(vd), res); + } + + public static void EmitVectorsByScalarOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.Size < 1) throw new Exception("Undefined"); + (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); + (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed); + + Operand res = GetVecA32(vd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, vd, index + ed * elems, op.Size, signed); + Operand ne = EmitVectorExtract32(context, vn, index + en * elems, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, m), index + ed * elems, op.Size); + } + + context.Copy(GetVecA32(vd), res); + } + // PAIRWISE public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Func2I emit) @@ -411,12 +515,11 @@ namespace ARMeilleure.Instructions for (int index = 0; index < pairs; index++) { int pairIndex = index << 1; - EmitVectorExtract(context, vd, index + ed * elems, op.Size, signed); - Operand n1 = EmitVectorExtract(context, vn, pairIndex + en * elems, op.Size, signed); - Operand n2 = EmitVectorExtract(context, vn, pairIndex + 1 + en * elems, op.Size, signed); + Operand n1 = EmitVectorExtract32(context, vn, pairIndex + en * elems, op.Size, signed); + Operand n2 = EmitVectorExtract32(context, vn, pairIndex + 1 + en * elems, op.Size, signed); - Operand m1 = EmitVectorExtract(context, vm, pairIndex + em * elems, op.Size, signed); - Operand m2 = EmitVectorExtract(context, vm, pairIndex + 1 + em * elems, op.Size, signed); + Operand m1 = EmitVectorExtract32(context, vm, pairIndex + em * elems, op.Size, signed); + Operand m2 = EmitVectorExtract32(context, vm, pairIndex + 1 + em * elems, op.Size, signed); res = EmitVectorInsert(context, res, emit(n1, n2), index + ed * elems, op.Size); res = EmitVectorInsert(context, res, emit(m1, m2), index + pairs + ed * elems, op.Size); @@ -424,5 +527,62 @@ namespace ARMeilleure.Instructions context.Copy(GetVecA32(vd), res); } + + // helper func + public static Operand EmitVectorExtractSx32(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract32(context, reg, index, size, true); + } + + public static Operand EmitVectorExtractZx32(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract32(context, reg, index, size, false); + } + + public static Operand EmitVectorExtract32(ArmEmitterContext context, int reg, int index, int size, bool signed) + { + ThrowIfInvalid(index, size); + + Operand res = null; + + switch (size) + { + case 0: + res = context.VectorExtract8(GetVec(reg), index); + break; + + case 1: + res = context.VectorExtract16(GetVec(reg), index); + break; + + case 2: + res = context.VectorExtract(OperandType.I32, GetVec(reg), index); + break; + + case 3: + res = context.VectorExtract(OperandType.I64, GetVec(reg), index); + break; + } + + if (signed) + { + switch (size) + { + case 0: res = context.SignExtend8(OperandType.I32, res); break; + case 1: res = context.SignExtend16(OperandType.I32, res); break; + } + } + else + { + switch (size) + { + case 0: res = context.ZeroExtend8(OperandType.I32, res); break; + case 1: res = context.ZeroExtend16(OperandType.I32, res); break; + } + } + + return res; + } + } } diff --git a/ARMeilleure/Instructions/InstEmitSimdMemory32.cs b/ARMeilleure/Instructions/InstEmitSimdMemory32.cs index cb255e12ff..eed3bdf97c 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMemory32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMemory32.cs @@ -52,6 +52,7 @@ namespace ARMeilleure.Instructions { OpCode32SimdMemSingle op = (OpCode32SimdMemSingle)context.CurrOp; + if (op.Replicate && !load) throw new Exception("Replicate+Store is undefined for LDn"); int eBytes = 1 << op.Size; Operand n = GetIntA32(context, op.Rn); @@ -81,6 +82,14 @@ namespace ARMeilleure.Instructions if (load) { EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size); + if (op.Replicate) + { + int limit = index + (1 << (3 - op.Size)); + while (++index < limit) + { + EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size); + } + } } else { diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs index f4cbc33896..4f236c832f 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -33,7 +33,25 @@ namespace ARMeilleure.Instructions // from general purpose Operand value = GetIntA32(context, op.Rt); context.Copy(vec, context.VectorInsert(vec, value, op.Vn & 0x3)); - + } + } + + public static void Vmov_G1(ArmEmitterContext context) + { + OpCode32SimdMovGpElem op = (OpCode32SimdMovGpElem)context.CurrOp; + int index = op.Index + ((op.Vd & 1) << (3 - op.Size)); + if (op.Op == 1) + { + // to general purpose + Operand value = EmitVectorExtract32(context, op.Vd >> 1, index, op.Size, !op.U); + SetIntA32(context, op.Rt, value); + } + else + { + // from general purpose + Operand vec = GetVecA32(op.Vd >> 1); + Operand value = GetIntA32(context, op.Rt); + context.Copy(vec, EmitVectorInsert(context, vec, value, index, op.Size)); } } @@ -86,5 +104,101 @@ namespace ARMeilleure.Instructions context.Copy(vec, context.VectorInsert(vec, value, op.Vm & 1)); } } + + public static void Vtrn(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); + (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + + Operand resD = GetVecA32(vd); + Operand resM = GetVecA32(vm); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand d2 = EmitVectorExtract32(context, vd, pairIndex + 1 + ed * elems, op.Size, false); + Operand m1 = EmitVectorExtract32(context, vm, pairIndex + em * elems, op.Size, false); + + resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + ed * elems, op.Size); + resM = EmitVectorInsert(context, resM, d2, pairIndex + em * elems, op.Size); + } + + context.Copy(GetVecA32(vd), resD); + context.Copy(GetVecA32(vm), resM); + } + + public static void Vzip(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); + (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + + Operand resD = GetVecA32(vd); + Operand resM = GetVecA32(vm); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand dRowD = EmitVectorExtract32(context, vd, index + ed * elems, op.Size, false); + Operand mRowD = EmitVectorExtract32(context, vm, index + em * elems, op.Size, false); + + Operand dRowM = EmitVectorExtract32(context, vd, index + ed * elems + pairs, op.Size, false); + Operand mRowM = EmitVectorExtract32(context, vm, index + em * elems + pairs, op.Size, false); + + resD = EmitVectorInsert(context, resD, dRowD, pairIndex + ed * elems, op.Size); + resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + ed * elems, op.Size); + + resM = EmitVectorInsert(context, resM, dRowM, pairIndex + em * elems, op.Size); + resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + em * elems, op.Size); + } + + context.Copy(GetVecA32(vd), resD); + context.Copy(GetVecA32(vm), resM); + } + + public static void Vuzp(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); + (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + + Operand resD = GetVecA32(vd); + Operand resM = GetVecA32(vm); + + for (int index = 0; index < elems; index++) + { + Operand dIns, mIns; + if (index >= pairs) + { + int pind = index - pairs; + dIns = EmitVectorExtract32(context, vm, (pind << 1) + em * elems, op.Size, false); + mIns = EmitVectorExtract32(context, vm, ((pind << 1) | 1) + em * elems, op.Size, false); + } + else + { + dIns = EmitVectorExtract32(context, vd, (index << 1) + ed * elems, op.Size, false); + mIns = EmitVectorExtract32(context, vd, ((index << 1) | 1) + ed * elems, op.Size, false); + } + + resD = EmitVectorInsert(context, resD, dIns, index + ed * elems, op.Size); + resM = EmitVectorInsert(context, resM, mIns, index + em * elems, op.Size); + } + + context.Copy(GetVecA32(vd), resD); + context.Copy(GetVecA32(vm), resM); + } } } diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index dbfcd52bd3..7f5e5d7084 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -82,6 +82,7 @@ namespace ARMeilleure.Instructions Smaddl, Smsubl, Smulh, + Smull, Stlr, Stlxp, Stlxr, @@ -512,12 +513,14 @@ namespace ARMeilleure.Instructions Trap, Tst, Ubfx, + Umlal, Umull, Uxtb, Uxtb16, Uxth, // FP & SIMD (AArch32) + Vabs, Vadd, Vand, Vbif, @@ -533,6 +536,7 @@ namespace ARMeilleure.Instructions Vcvt, Vdiv, Vdup, + Vext, Vld1, Vld2, Vld3, @@ -553,7 +557,9 @@ namespace ARMeilleure.Instructions Vnmls, Vorr, Vpadd, + Vrint, Vsel, + Vshl, Vst1, Vst2, Vst3, @@ -564,6 +570,9 @@ namespace ARMeilleure.Instructions Vrsqrte, Vrsqrts, Vsub, + Vtrn, + Vuzp, + Vzip, Vmov } diff --git a/ARMeilleure/Translation/CompilerOptions.cs b/ARMeilleure/Translation/CompilerOptions.cs index 53998ec6f3..820bf7a2fc 100644 --- a/ARMeilleure/Translation/CompilerOptions.cs +++ b/ARMeilleure/Translation/CompilerOptions.cs @@ -11,6 +11,6 @@ namespace ARMeilleure.Translation Lsra = 1 << 2, MediumCq = SsaForm | Optimize, - HighCq = SsaForm | Optimize | Lsra + HighCq = SsaForm | Optimize } } \ No newline at end of file diff --git a/ARMeilleure/Translation/TranslatedFunction.cs b/ARMeilleure/Translation/TranslatedFunction.cs index 9ee3771bbc..3bde31bc56 100644 --- a/ARMeilleure/Translation/TranslatedFunction.cs +++ b/ARMeilleure/Translation/TranslatedFunction.cs @@ -24,7 +24,7 @@ namespace ARMeilleure.Translation public bool ShouldRejit() { - return false && Interlocked.Increment(ref _callCount) == MinCallsForRejit; + return Interlocked.Increment(ref _callCount) == MinCallsForRejit; } } } \ No newline at end of file diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs index 3a356ad8c4..b9307a98eb 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs @@ -634,6 +634,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall } // Zero out the remaining unused registers. + /* for (int i = 0; i < SvcFuncMaxArguments32; i++) { if (IsRegisterInUse(i)) @@ -649,6 +650,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall generator.Emit(OpCodes.Call, info); } + */ generator.Emit(OpCodes.Ret);