diff --git a/ARMeilleure/Decoders/BaseOpCode32Simd.cs b/ARMeilleure/Decoders/BaseOpCode32Simd.cs new file mode 100644 index 0000000000..d5ae51c039 --- /dev/null +++ b/ARMeilleure/Decoders/BaseOpCode32Simd.cs @@ -0,0 +1,61 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace ARMeilleure.Decoders +{ + abstract class BaseOpCode32Simd : OpCode32, IOpCode32Simd + { + public int Vd { get; protected set; } + public int Vm { get; protected set; } + public int Size { get; protected set; } + + // Helpers to index doublewords within quad words. Essentially, looping over the vector starts at quadword Q and index Fx or Ix within it, + // depending on instruction type. + // + // Qx: The quadword register that the target vector is contained in. + // Ix: The starting index of the target vector within the quadword, with size treated as integer. + // Fx: The starting index of the target vector within the quadword, with size treated as floating point. (16 or 32) + public int Qd => GetQuadwordIndex(Vd); + public int Id => GetQuadwordSubindex(Vd) << (3 - Size); + public int Fd => GetQuadwordSubindex(Vd) << (1 - (Size & 1)); // When the top bit is truncated, 1 SHOULD be fp16, but switch does not support it so we always assume 64. + + public int Qm => GetQuadwordIndex(Vm); + public int Im => GetQuadwordSubindex(Vm) << (3 - Size); + public int Fm => GetQuadwordSubindex(Vm) << (1 - (Size & 1)); + + protected int GetQuadwordIndex(int index) + { + switch (RegisterSize) + { + case RegisterSize.Simd128: + return index >> 1; + case RegisterSize.Simd64: + return index >> 1; + case RegisterSize.Simd32: + return index >> 2; + } + + throw new InvalidOperationException(); + } + + protected int GetQuadwordSubindex(int index) + { + switch (RegisterSize) + { + case RegisterSize.Simd128: + return 0; + case RegisterSize.Simd64: + return index & 1; + case RegisterSize.Simd32: + return index & 3; + } + + throw new InvalidOperationException(); + } + + public BaseOpCode32Simd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + } + } +} diff --git a/ARMeilleure/Decoders/IOpCode32Simd.cs b/ARMeilleure/Decoders/IOpCode32Simd.cs index 9c43906426..4a00d56ac9 100644 --- a/ARMeilleure/Decoders/IOpCode32Simd.cs +++ b/ARMeilleure/Decoders/IOpCode32Simd.cs @@ -6,6 +6,5 @@ namespace ARMeilleure.Decoders { interface IOpCode32Simd : IOpCode32, IOpCodeSimd { - int Elems { get; } } } diff --git a/ARMeilleure/Decoders/IOpCode32SimdImm.cs b/ARMeilleure/Decoders/IOpCode32SimdImm.cs index 9c5f59b845..81cae5d309 100644 --- a/ARMeilleure/Decoders/IOpCode32SimdImm.cs +++ b/ARMeilleure/Decoders/IOpCode32SimdImm.cs @@ -8,5 +8,6 @@ namespace ARMeilleure.Decoders { public int Vd { get; } public long Immediate { get; } + int Elems { get; } } } diff --git a/ARMeilleure/Decoders/OpCode32Simd.cs b/ARMeilleure/Decoders/OpCode32Simd.cs index a9027a19e3..558b67dc48 100644 --- a/ARMeilleure/Decoders/OpCode32Simd.cs +++ b/ARMeilleure/Decoders/OpCode32Simd.cs @@ -4,16 +4,12 @@ using System.Text; namespace ARMeilleure.Decoders { - class OpCode32Simd : OpCode32, IOpCode32Simd + class OpCode32Simd : BaseOpCode32Simd { - public int Vd { get; private set; } - public int Vm { get; protected set; } public int Opc { get; protected set; } - public int Size { get; protected set; } public bool Q { get; protected set; } public bool F { get; protected set; } public bool U { get; private set; } - public int Elems => GetBytesCount() >> ((Size == 1) ? 1 : 2); public OpCode32Simd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { diff --git a/ARMeilleure/Decoders/OpCode32SimdDupElem.cs b/ARMeilleure/Decoders/OpCode32SimdDupElem.cs index 99a4f65a0a..3c1683f82b 100644 --- a/ARMeilleure/Decoders/OpCode32SimdDupElem.cs +++ b/ARMeilleure/Decoders/OpCode32SimdDupElem.cs @@ -4,18 +4,12 @@ using System.Text; namespace ARMeilleure.Decoders { - class OpCode32SimdDupElem : OpCode32, IOpCode32Simd + class OpCode32SimdDupElem : BaseOpCode32Simd { - public int Size { get; private set; } - public int Elems => 1; - - public int Vd { get; private set; } - public int Vm { get; private set; } public bool Q { get; private set; } public int Index { get; private set; } - public OpCode32SimdDupElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { var opc = (opCode >> 16) & 0xf; diff --git a/ARMeilleure/Decoders/OpCode32SimdReg.cs b/ARMeilleure/Decoders/OpCode32SimdReg.cs index e24f259dcd..7a1351a075 100644 --- a/ARMeilleure/Decoders/OpCode32SimdReg.cs +++ b/ARMeilleure/Decoders/OpCode32SimdReg.cs @@ -8,6 +8,10 @@ namespace ARMeilleure.Decoders { public int Vn { get; private set; } + public int Qn => GetQuadwordIndex(Vn); + public int In => GetQuadwordSubindex(Vn) << (3 - Size); + public int Fn => GetQuadwordSubindex(Vn) << (1 - (Size & 1)); + public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); diff --git a/ARMeilleure/Instructions/InstEmitHelper.cs b/ARMeilleure/Instructions/InstEmitHelper.cs index 2a26193993..37cd1f7585 100644 --- a/ARMeilleure/Instructions/InstEmitHelper.cs +++ b/ARMeilleure/Instructions/InstEmitHelper.cs @@ -52,32 +52,6 @@ namespace ARMeilleure.Instructions return Register(regIndex, RegisterType.Vector, OperandType.V128); } - public static Operand GetVecA32(ArmEmitterContext context, int regIndex, int registerSizeLog) - { - // vector registers in A32 all overlap each other - eg. Q0 = D0:D1 = S0:S1:S2:S3 - // so we need to select the relevant part of a quad vector based on register size. - int elemSizeLog = (4 - registerSizeLog); - int quadIndex = regIndex >> elemSizeLog; - int subIndex = regIndex & ((1 << elemSizeLog) - 1); - Operand result = Register(quadIndex, RegisterType.Vector, OperandType.V128); - if (subIndex != 0) - { - result = context.RotateRight(result, Const(subIndex << elemSizeLog)); - } - - switch (registerSizeLog) - { - case 4: // quad word - return result; - case 3: // double word - return context.VectorZeroUpper64(result); - case 2: // single word - return context.VectorZeroUpper96(result); - } - - return result; - } - public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value) { if (regIndex == RegisterAlias.Aarch32Pc) diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index 01db978376..af50435794 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -117,11 +117,7 @@ namespace ARMeilleure.Instructions int elems = op.GetBytesCount(); int byteOff = op.Immediate; - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - - Operand res = GetVecA32(vd); + Operand res = GetVecA32(op.Qd); for (int index = 0; index < elems; index++) { @@ -129,18 +125,18 @@ namespace ARMeilleure.Instructions if (byteOff >= elems) { - extract = EmitVectorExtractZx32(context, vm, (byteOff - elems) + em * elems, op.Size); + extract = EmitVectorExtractZx32(context, op.Qm, op.Im + (byteOff - elems), op.Size); } else { - extract = EmitVectorExtractZx32(context, vn, byteOff + en * elems, op.Size); + extract = EmitVectorExtractZx32(context, op.Qn, op.In + byteOff, op.Size); } byteOff++; - res = EmitVectorInsert(context, res, extract, index + ed * elems, op.Size); + res = EmitVectorInsert(context, res, extract, op.Id + index, op.Size); } - context.Copy(GetVecA32(vd), res); + context.Copy(GetVecA32(op.Qd), res); } public static void Vmov_S(ArmEmitterContext context) diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs index e3573baf59..1ed05ffb2d 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -143,19 +143,16 @@ namespace ARMeilleure.Instructions int elems = op.GetBytesCount() >> sizeF + 2; - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - - Operand res = GetVecA32(vd); + Operand res = GetVecA32(op.Qd); for (int index = 0; index < elems; index++) { - Operand ne = context.VectorExtract(type, GetVecA32(vm), index + em * elems); + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); - res = context.VectorInsert(res, emit(ne), index + ed * elems); + res = context.VectorInsert(res, emit(me), op.Fd + index); } - context.Copy(GetVecA32(vd), res); + context.Copy(GetVecA32(op.Qd), res); } public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Func2I emit) @@ -168,21 +165,17 @@ namespace ARMeilleure.Instructions int elems = op.GetBytesCount() >> (sizeF + 2); - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - - Operand res = GetVecA32(vd); + Operand res = GetVecA32(op.Qd); for (int index = 0; index < elems; index++) { - Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems); - Operand me = context.VectorExtract(type, GetVecA32(vm), index + em * elems); + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); - res = context.VectorInsert(res, emit(ne, me), index + ed * elems); + res = context.VectorInsert(res, emit(ne, me), op.Fd + index); } - context.Copy(GetVecA32(vd), res); + context.Copy(GetVecA32(op.Qd), res); } public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Func3I emit) @@ -195,160 +188,107 @@ namespace ARMeilleure.Instructions int elems = op.GetBytesCount() >> sizeF + 2; - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - - Operand res = GetVecA32(vd); + Operand res = GetVecA32(op.Qd); for (int index = 0; index < elems; index++) { - Operand de = context.VectorExtract(type, GetVecA32(vd), index + ed * elems); - Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems); - Operand me = context.VectorExtract(type, GetVecA32(vm), index + em * elems); + Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index); + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); - res = context.VectorInsert(res, emit(de, ne, me), index); + res = context.VectorInsert(res, emit(de, ne, me), op.Fd + index); } - context.Copy(GetVecA32(vd), res); + context.Copy(GetVecA32(op.Qd), res); } // INTEGER - public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit) + public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - - Operand res = GetVecA32(vd); + Operand res = GetVecA32(op.Qd); int elems = op.GetBytesCount() >> op.Size; for (int index = 0; index < elems; index++) { - Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); - res = EmitVectorInsert(context, res, emit(me), index + ed * elems, op.Size); + res = EmitVectorInsert(context, res, emit(me), op.Id + index, op.Size); } - context.Copy(GetVecA32(vd), res); + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorBinaryOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index + op.Id, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit) + { + EmitVectorUnaryOpI32(context, emit, true); } public static void EmitVectorBinaryOpSx32(ArmEmitterContext context, Func2I emit) { - OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; - - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - - Operand res = GetVecA32(vd); - - int elems = op.GetBytesCount() >> op.Size; - - for (int index = 0; index < elems; index++) - { - Operand ne = EmitVectorExtractSx32(context, vn, index + en * elems, op.Size); - Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size); - - res = EmitVectorInsert(context, res, emit(ne, me), index + ed * elems, op.Size); - } - - context.Copy(GetVecA32(vd), res); + EmitVectorBinaryOpI32(context, emit, true); } public static void EmitVectorTernaryOpSx32(ArmEmitterContext context, Func3I emit) { - OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; - - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - - Operand res = GetVecA32(vd); - - int elems = op.GetBytesCount() >> op.Size; - - for (int index = 0; index < elems; index++) - { - Operand de = EmitVectorExtractSx32(context, vd, index + ed * elems, op.Size); - Operand ne = EmitVectorExtractSx32(context, vn, index + en * elems, op.Size); - Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size); - - res = EmitVectorInsert(context, res, emit(de, ne, me), index + ed * elems, op.Size); - } - - context.Copy(GetVecA32(vd), res); + EmitVectorTernaryOpI32(context, emit, true); } public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit) { - OpCode32Simd op = (OpCode32Simd)context.CurrOp; - - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - - Operand res = GetVecA32(vd); - - int elems = op.GetBytesCount() >> op.Size; - - for (int index = 0; index < elems; index++) - { - Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size); - - res = EmitVectorInsert(context, res, emit(me), index + ed * elems, op.Size); - } - - context.Copy(GetVecA32(vd), res); + EmitVectorUnaryOpI32(context, emit, false); } public static void EmitVectorBinaryOpZx32(ArmEmitterContext context, Func2I emit) { - OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; - - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - - Operand res = GetVecA32(vd); - - int elems = op.GetBytesCount() >> op.Size; - - for (int index = 0; index < elems; index++) - { - Operand ne = EmitVectorExtractZx32(context, vn, index + en * elems, op.Size); - Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size); - - res = EmitVectorInsert(context, res, emit(ne, me), index + ed * elems, op.Size); - } - - context.Copy(GetVecA32(vd), res); + EmitVectorBinaryOpI32(context, emit, false); } public static void EmitVectorTernaryOpZx32(ArmEmitterContext context, Func3I emit) { - OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; - - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - - Operand res = GetVecA32(vd); - - int elems = op.GetBytesCount() >> op.Size; - - for (int index = 0; index < elems; index++) - { - Operand de = EmitVectorExtractZx32(context, vd, index + ed * elems, op.Size); - Operand ne = EmitVectorExtractZx32(context, vn, index + en * elems, op.Size); - Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size); - - res = EmitVectorInsert(context, res, emit(de, ne, me), index + ed * elems, op.Size); - } - - context.Copy(GetVecA32(vd), res); + EmitVectorTernaryOpI32(context, emit, false); } // VEC BY SCALAR @@ -364,20 +304,18 @@ namespace ARMeilleure.Instructions int elems = op.GetBytesCount() >> sizeF + 2; - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); Operand m = ExtractScalar(context, type, op.Vm); - Operand res = GetVecA32(vd); + Operand res = GetVecA32(op.Qd); for (int index = 0; index < elems; index++) { - Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems); + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), index + op.Fn); - res = context.VectorInsert(res, emit(ne, m), index + ed * elems); + res = context.VectorInsert(res, emit(ne, m), index + op.Fd); } - context.Copy(GetVecA32(vd), res); + context.Copy(GetVecA32(op.Qd), res); } public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed) @@ -385,22 +323,20 @@ namespace ARMeilleure.Instructions OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; if (op.Size < 1) throw new Exception("Undefined"); - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed); - Operand res = GetVecA32(vd); + Operand res = GetVecA32(op.Qd); int elems = op.GetBytesCount() >> op.Size; for (int index = 0; index < elems; index++) { - Operand ne = EmitVectorExtract32(context, vn, index + en * elems, op.Size, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, index + op.In, op.Size, signed); - res = EmitVectorInsert(context, res, emit(ne, m), index + ed * elems, op.Size); + res = EmitVectorInsert(context, res, emit(ne, m), index + op.Id, op.Size); } - context.Copy(GetVecA32(vd), res); + context.Copy(GetVecA32(op.Qd), res); } public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Func3I emit) @@ -414,21 +350,19 @@ namespace ARMeilleure.Instructions int elems = op.GetBytesCount() >> sizeF + 2; - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); Operand m = ExtractScalar(context, type, op.Vm); - Operand res = GetVecA32(vd); + Operand res = GetVecA32(op.Qd); for (int index = 0; index < elems; index++) { - Operand de = context.VectorExtract(type, GetVecA32(vd), index + ed * elems); - Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems); + Operand de = context.VectorExtract(type, GetVecA32(op.Qd), index + op.Fd); + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), index + op.Fn); - res = context.VectorInsert(res, emit(de, ne, m), index + ed * elems); + res = context.VectorInsert(res, emit(de, ne, m), index + op.Fd); } - context.Copy(GetVecA32(vd), res); + context.Copy(GetVecA32(op.Qd), res); } public static void EmitVectorsByScalarOpI32(ArmEmitterContext context, Func3I emit, bool signed) @@ -436,23 +370,21 @@ namespace ARMeilleure.Instructions OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; if (op.Size < 1) throw new Exception("Undefined"); - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed); - Operand res = GetVecA32(vd); + Operand res = GetVecA32(op.Qd); int elems = op.GetBytesCount() >> op.Size; for (int index = 0; index < elems; index++) { - Operand de = EmitVectorExtract32(context, vd, index + ed * elems, op.Size, signed); - Operand ne = EmitVectorExtract32(context, vn, index + en * elems, op.Size, signed); + Operand de = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, index + op.In, op.Size, signed); - res = EmitVectorInsert(context, res, emit(de, ne, m), index + ed * elems, op.Size); + res = EmitVectorInsert(context, res, emit(de, ne, m), index + op.Id, op.Size); } - context.Copy(GetVecA32(vd), res); + context.Copy(GetVecA32(op.Qd), res); } // PAIRWISE @@ -472,30 +404,26 @@ namespace ARMeilleure.Instructions int elems = op.GetBytesCount() >> (sizeF + 2); int pairs = elems >> 1; - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - - Operand res = GetVecA32(vd); - Operand mvec = GetVecA32(vm); - Operand nvec = GetVecA32(vn); + Operand res = GetVecA32(op.Qd); + Operand mvec = GetVecA32(op.Qm); + Operand nvec = GetVecA32(op.Qn); for (int index = 0; index < pairs; index++) { int pairIndex = index << 1; - Operand n1 = context.VectorExtract(type, nvec, pairIndex + en * elems); - Operand n2 = context.VectorExtract(type, nvec, pairIndex + 1 + en * elems); + Operand n1 = context.VectorExtract(type, nvec, pairIndex + op.Fn); + Operand n2 = context.VectorExtract(type, nvec, pairIndex + 1 + op.Fn); - res = context.VectorInsert(res, emit(n1, n2), index + ed * elems); + res = context.VectorInsert(res, emit(n1, n2), index + op.Fd); - Operand m1 = context.VectorExtract(type, mvec, pairIndex + em * elems); - Operand m2 = context.VectorExtract(type, mvec, pairIndex + 1 + em * elems); + Operand m1 = context.VectorExtract(type, mvec, pairIndex + op.Fm); + Operand m2 = context.VectorExtract(type, mvec, pairIndex + 1 + op.Fm); - res = context.VectorInsert(res, emit(m1, m2), index + pairs + ed * elems); + res = context.VectorInsert(res, emit(m1, m2), index + pairs + op.Fd); } - context.Copy(GetVecA32(vd), res); + context.Copy(GetVecA32(op.Qd), res); } public static void EmitVectorPairwiseOpI32(ArmEmitterContext context, Func2I emit, bool signed) @@ -510,26 +438,22 @@ namespace ARMeilleure.Instructions int elems = op.GetBytesCount() >> op.Size; int pairs = elems >> 1; - (int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize); - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - - Operand res = GetVecA32(vd); + Operand res = GetVecA32(op.Qd); for (int index = 0; index < pairs; index++) { int pairIndex = index << 1; - Operand n1 = EmitVectorExtract32(context, vn, pairIndex + en * elems, op.Size, signed); - Operand n2 = EmitVectorExtract32(context, vn, pairIndex + 1 + en * elems, op.Size, signed); + Operand n1 = EmitVectorExtract32(context, op.Qn, pairIndex + op.In, op.Size, signed); + Operand n2 = EmitVectorExtract32(context, op.Qn, pairIndex + 1 + op.In, op.Size, signed); - Operand m1 = EmitVectorExtract32(context, vm, pairIndex + em * elems, op.Size, signed); - Operand m2 = EmitVectorExtract32(context, vm, pairIndex + 1 + em * elems, op.Size, signed); + Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, signed); + Operand m2 = EmitVectorExtract32(context, op.Qm, pairIndex + 1 + op.Im, op.Size, signed); - res = EmitVectorInsert(context, res, emit(n1, n2), index + ed * elems, op.Size); - res = EmitVectorInsert(context, res, emit(m1, m2), index + pairs + ed * elems, op.Size); + res = EmitVectorInsert(context, res, emit(n1, n2), index + op.Id, op.Size); + res = EmitVectorInsert(context, res, emit(m1, m2), index + pairs + op.Id, op.Size); } - context.Copy(GetVecA32(vd), res); + context.Copy(GetVecA32(op.Qd), res); } // helper func diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs index 8ba26f3887..7c2c419f34 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -131,9 +131,6 @@ namespace ARMeilleure.Instructions int elems = op.GetBytesCount() >> op.Size; - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); - int length = op.Length + 1; Tuple[] tableTuples = new Tuple[length]; @@ -145,12 +142,12 @@ namespace ARMeilleure.Instructions int byteLength = length * 8; - Operand res = GetVecA32(vd); - Operand m = GetVecA32(vm); + Operand res = GetVecA32(op.Qd); + Operand m = GetVecA32(op.Qm); for (int index = 0; index < elems; index++) { - Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + em * elems)); + Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + op.Im)); Operand end = Label(); Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength)); @@ -174,7 +171,7 @@ namespace ARMeilleure.Instructions Tuple vectorLocation = tableTuples[i]; // get the whole vector, we'll get a byte out of it Operand lookupResult; - if (vectorLocation.Item1 == vd) + if (vectorLocation.Item1 == op.Qd) { // result contains the current state of the vector lookupResult = context.VectorExtract(OperandType.I64, res, vectorLocation.Item2); @@ -199,14 +196,14 @@ namespace ARMeilleure.Instructions if (!extension) context.MarkLabel(end); - Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, vd, index + ed * elems, 0, false)) : Const(0L); + Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, op.Qd, index + op.Id, 0, false)) : Const(0L); - res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + ed * elems, 0); + res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + op.Id, 0); if (extension) context.MarkLabel(end); } - context.Copy(GetVecA32(vd), res); + context.Copy(GetVecA32(op.Qd), res); } public static void Vtrn(ArmEmitterContext context) @@ -216,28 +213,25 @@ namespace ARMeilleure.Instructions int elems = op.GetBytesCount() >> op.Size; int pairs = elems >> 1; - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + bool overlap = op.Qm == op.Qd; - bool overlap = vm == vd; - - Operand resD = GetVecA32(vd); - Operand resM = GetVecA32(vm); + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); for (int index = 0; index < pairs; index++) { int pairIndex = index << 1; - Operand d2 = EmitVectorExtract32(context, vd, pairIndex + 1 + ed * elems, op.Size, false); - Operand m1 = EmitVectorExtract32(context, vm, pairIndex + em * elems, op.Size, false); + Operand d2 = EmitVectorExtract32(context, op.Qd, pairIndex + 1 + op.Id, op.Size, false); + Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, false); - resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + ed * elems, op.Size); + resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + op.Id, op.Size); if (overlap) resM = resD; - resM = EmitVectorInsert(context, resM, d2, pairIndex + em * elems, op.Size); + resM = EmitVectorInsert(context, resM, d2, pairIndex + op.Im, op.Size); if (overlap) resD = resM; } - context.Copy(GetVecA32(vd), resD); - if (!overlap) context.Copy(GetVecA32(vm), resM); + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) context.Copy(GetVecA32(op.Qm), resM); } public static void Vzip(ArmEmitterContext context) @@ -247,36 +241,33 @@ namespace ARMeilleure.Instructions int elems = op.GetBytesCount() >> op.Size; int pairs = elems >> 1; - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + bool overlap = op.Qm == op.Qd; - bool overlap = vm == vd; - - Operand resD = GetVecA32(vd); - Operand resM = GetVecA32(vm); + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); for (int index = 0; index < pairs; index++) { int pairIndex = index << 1; - Operand dRowD = EmitVectorExtract32(context, vd, index + ed * elems, op.Size, false); - Operand mRowD = EmitVectorExtract32(context, vm, index + em * elems, op.Size, false); + Operand dRowD = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, false); + Operand mRowD = EmitVectorExtract32(context, op.Qm, index + op.Im, op.Size, false); - Operand dRowM = EmitVectorExtract32(context, vd, index + ed * elems + pairs, op.Size, false); - Operand mRowM = EmitVectorExtract32(context, vm, index + em * elems + pairs, op.Size, false); + Operand dRowM = EmitVectorExtract32(context, op.Qd, index + op.Id + pairs, op.Size, false); + Operand mRowM = EmitVectorExtract32(context, op.Qm, index + op.Im + pairs, op.Size, false); - resD = EmitVectorInsert(context, resD, dRowD, pairIndex + ed * elems, op.Size); - resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + ed * elems, op.Size); + resD = EmitVectorInsert(context, resD, dRowD, pairIndex + op.Id, op.Size); + resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + op.Id, op.Size); if (overlap) resM = resD; - resM = EmitVectorInsert(context, resM, dRowM, pairIndex + em * elems, op.Size); - resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + em * elems, op.Size); + resM = EmitVectorInsert(context, resM, dRowM, pairIndex + op.Im, op.Size); + resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + op.Im, op.Size); if (overlap) resD = resM; } - context.Copy(GetVecA32(vd), resD); - if (!overlap) context.Copy(GetVecA32(vm), resM); + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) context.Copy(GetVecA32(op.Qm), resM); } public static void Vuzp(ArmEmitterContext context) @@ -286,13 +277,10 @@ namespace ARMeilleure.Instructions int elems = op.GetBytesCount() >> op.Size; int pairs = elems >> 1; - (int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize); - (int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + bool overlap = op.Qm == op.Qd; - bool overlap = vm == vd; - - Operand resD = GetVecA32(vd); - Operand resM = GetVecA32(vm); + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); for (int index = 0; index < elems; index++) { @@ -300,23 +288,23 @@ namespace ARMeilleure.Instructions if (index >= pairs) { int pind = index - pairs; - dIns = EmitVectorExtract32(context, vm, (pind << 1) + em * elems, op.Size, false); - mIns = EmitVectorExtract32(context, vm, ((pind << 1) | 1) + em * elems, op.Size, false); + dIns = EmitVectorExtract32(context, op.Qm, (pind << 1) + op.Im, op.Size, false); + mIns = EmitVectorExtract32(context, op.Qm, ((pind << 1) | 1) + op.Im, op.Size, false); } else { - dIns = EmitVectorExtract32(context, vd, (index << 1) + ed * elems, op.Size, false); - mIns = EmitVectorExtract32(context, vd, ((index << 1) | 1) + ed * elems, op.Size, false); + dIns = EmitVectorExtract32(context, op.Qd, (index << 1) + op.Id, op.Size, false); + mIns = EmitVectorExtract32(context, op.Qd, ((index << 1) | 1) + op.Id, op.Size, false); } - resD = EmitVectorInsert(context, resD, dIns, index + ed * elems, op.Size); + resD = EmitVectorInsert(context, resD, dIns, index + op.Id, op.Size); if (overlap) resM = resD; - resM = EmitVectorInsert(context, resM, mIns, index + em * elems, op.Size); + resM = EmitVectorInsert(context, resM, mIns, index + op.Im, op.Size); if (overlap) resD = resM; } - context.Copy(GetVecA32(vd), resD); - if (!overlap) context.Copy(GetVecA32(vm), resM); + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) context.Copy(GetVecA32(op.Qm), resM); } } }