Refactor part 2: Move index/subindex logic to Operand

May have inadvertently fixed one (1) bug
This commit is contained in:
riperiperi 2020-01-17 00:59:04 +00:00
parent c246ed3daf
commit 56db1a1c45
10 changed files with 218 additions and 281 deletions

View file

@ -0,0 +1,61 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace ARMeilleure.Decoders
{
abstract class BaseOpCode32Simd : OpCode32, IOpCode32Simd
{
public int Vd { get; protected set; }
public int Vm { get; protected set; }
public int Size { get; protected set; }
// Helpers to index doublewords within quad words. Essentially, looping over the vector starts at quadword Q and index Fx or Ix within it,
// depending on instruction type.
//
// Qx: The quadword register that the target vector is contained in.
// Ix: The starting index of the target vector within the quadword, with size treated as integer.
// Fx: The starting index of the target vector within the quadword, with size treated as floating point. (16 or 32)
public int Qd => GetQuadwordIndex(Vd);
public int Id => GetQuadwordSubindex(Vd) << (3 - Size);
public int Fd => GetQuadwordSubindex(Vd) << (1 - (Size & 1)); // When the top bit is truncated, 1 SHOULD be fp16, but switch does not support it so we always assume 64.
public int Qm => GetQuadwordIndex(Vm);
public int Im => GetQuadwordSubindex(Vm) << (3 - Size);
public int Fm => GetQuadwordSubindex(Vm) << (1 - (Size & 1));
protected int GetQuadwordIndex(int index)
{
switch (RegisterSize)
{
case RegisterSize.Simd128:
return index >> 1;
case RegisterSize.Simd64:
return index >> 1;
case RegisterSize.Simd32:
return index >> 2;
}
throw new InvalidOperationException();
}
protected int GetQuadwordSubindex(int index)
{
switch (RegisterSize)
{
case RegisterSize.Simd128:
return 0;
case RegisterSize.Simd64:
return index & 1;
case RegisterSize.Simd32:
return index & 3;
}
throw new InvalidOperationException();
}
public BaseOpCode32Simd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
}
}
}

View file

@ -6,6 +6,5 @@ namespace ARMeilleure.Decoders
{
interface IOpCode32Simd : IOpCode32, IOpCodeSimd
{
int Elems { get; }
}
}

View file

@ -8,5 +8,6 @@ namespace ARMeilleure.Decoders
{
public int Vd { get; }
public long Immediate { get; }
int Elems { get; }
}
}

View file

@ -4,16 +4,12 @@ using System.Text;
namespace ARMeilleure.Decoders
{
class OpCode32Simd : OpCode32, IOpCode32Simd
class OpCode32Simd : BaseOpCode32Simd
{
public int Vd { get; private set; }
public int Vm { get; protected set; }
public int Opc { get; protected set; }
public int Size { get; protected set; }
public bool Q { get; protected set; }
public bool F { get; protected set; }
public bool U { get; private set; }
public int Elems => GetBytesCount() >> ((Size == 1) ? 1 : 2);
public OpCode32Simd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{

View file

@ -4,18 +4,12 @@ using System.Text;
namespace ARMeilleure.Decoders
{
class OpCode32SimdDupElem : OpCode32, IOpCode32Simd
class OpCode32SimdDupElem : BaseOpCode32Simd
{
public int Size { get; private set; }
public int Elems => 1;
public int Vd { get; private set; }
public int Vm { get; private set; }
public bool Q { get; private set; }
public int Index { get; private set; }
public OpCode32SimdDupElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
var opc = (opCode >> 16) & 0xf;

View file

@ -8,6 +8,10 @@ namespace ARMeilleure.Decoders
{
public int Vn { get; private set; }
public int Qn => GetQuadwordIndex(Vn);
public int In => GetQuadwordSubindex(Vn) << (3 - Size);
public int Fn => GetQuadwordSubindex(Vn) << (1 - (Size & 1));
public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf);

View file

@ -52,32 +52,6 @@ namespace ARMeilleure.Instructions
return Register(regIndex, RegisterType.Vector, OperandType.V128);
}
public static Operand GetVecA32(ArmEmitterContext context, int regIndex, int registerSizeLog)
{
// vector registers in A32 all overlap each other - eg. Q0 = D0:D1 = S0:S1:S2:S3
// so we need to select the relevant part of a quad vector based on register size.
int elemSizeLog = (4 - registerSizeLog);
int quadIndex = regIndex >> elemSizeLog;
int subIndex = regIndex & ((1 << elemSizeLog) - 1);
Operand result = Register(quadIndex, RegisterType.Vector, OperandType.V128);
if (subIndex != 0)
{
result = context.RotateRight(result, Const(subIndex << elemSizeLog));
}
switch (registerSizeLog)
{
case 4: // quad word
return result;
case 3: // double word
return context.VectorZeroUpper64(result);
case 2: // single word
return context.VectorZeroUpper96(result);
}
return result;
}
public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value)
{
if (regIndex == RegisterAlias.Aarch32Pc)

View file

@ -117,11 +117,7 @@ namespace ARMeilleure.Instructions
int elems = op.GetBytesCount();
int byteOff = op.Immediate;
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
Operand res = GetVecA32(op.Qd);
for (int index = 0; index < elems; index++)
{
@ -129,18 +125,18 @@ namespace ARMeilleure.Instructions
if (byteOff >= elems)
{
extract = EmitVectorExtractZx32(context, vm, (byteOff - elems) + em * elems, op.Size);
extract = EmitVectorExtractZx32(context, op.Qm, op.Im + (byteOff - elems), op.Size);
}
else
{
extract = EmitVectorExtractZx32(context, vn, byteOff + en * elems, op.Size);
extract = EmitVectorExtractZx32(context, op.Qn, op.In + byteOff, op.Size);
}
byteOff++;
res = EmitVectorInsert(context, res, extract, index + ed * elems, op.Size);
res = EmitVectorInsert(context, res, extract, op.Id + index, op.Size);
}
context.Copy(GetVecA32(vd), res);
context.Copy(GetVecA32(op.Qd), res);
}
public static void Vmov_S(ArmEmitterContext context)

View file

@ -143,19 +143,16 @@ namespace ARMeilleure.Instructions
int elems = op.GetBytesCount() >> sizeF + 2;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
Operand res = GetVecA32(op.Qd);
for (int index = 0; index < elems; index++)
{
Operand ne = context.VectorExtract(type, GetVecA32(vm), index + em * elems);
Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
res = context.VectorInsert(res, emit(ne), index + ed * elems);
res = context.VectorInsert(res, emit(me), op.Fd + index);
}
context.Copy(GetVecA32(vd), res);
context.Copy(GetVecA32(op.Qd), res);
}
public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Func2I emit)
@ -168,21 +165,17 @@ namespace ARMeilleure.Instructions
int elems = op.GetBytesCount() >> (sizeF + 2);
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
Operand res = GetVecA32(op.Qd);
for (int index = 0; index < elems; index++)
{
Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems);
Operand me = context.VectorExtract(type, GetVecA32(vm), index + em * elems);
Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
res = context.VectorInsert(res, emit(ne, me), index + ed * elems);
res = context.VectorInsert(res, emit(ne, me), op.Fd + index);
}
context.Copy(GetVecA32(vd), res);
context.Copy(GetVecA32(op.Qd), res);
}
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Func3I emit)
@ -195,160 +188,107 @@ namespace ARMeilleure.Instructions
int elems = op.GetBytesCount() >> sizeF + 2;
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
Operand res = GetVecA32(op.Qd);
for (int index = 0; index < elems; index++)
{
Operand de = context.VectorExtract(type, GetVecA32(vd), index + ed * elems);
Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems);
Operand me = context.VectorExtract(type, GetVecA32(vm), index + em * elems);
Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index);
Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
res = context.VectorInsert(res, emit(de, ne, me), index);
res = context.VectorInsert(res, emit(de, ne, me), op.Fd + index);
}
context.Copy(GetVecA32(vd), res);
context.Copy(GetVecA32(op.Qd), res);
}
// INTEGER
public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit)
public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
Operand res = GetVecA32(op.Qd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size);
Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
res = EmitVectorInsert(context, res, emit(me), index + ed * elems, op.Size);
res = EmitVectorInsert(context, res, emit(me), op.Id + index, op.Size);
}
context.Copy(GetVecA32(vd), res);
context.Copy(GetVecA32(op.Qd), res);
}
public static void EmitVectorBinaryOpI32(ArmEmitterContext context, Func2I emit, bool signed)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Operand res = GetVecA32(op.Qd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
res = EmitVectorInsert(context, res, emit(ne, me), op.Id + index, op.Size);
}
context.Copy(GetVecA32(op.Qd), res);
}
public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Operand res = GetVecA32(op.Qd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
res = EmitVectorInsert(context, res, emit(de, ne, me), index + op.Id, op.Size);
}
context.Copy(GetVecA32(op.Qd), res);
}
public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit)
{
EmitVectorUnaryOpI32(context, emit, true);
}
public static void EmitVectorBinaryOpSx32(ArmEmitterContext context, Func2I emit)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractSx32(context, vn, index + en * elems, op.Size);
Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size);
res = EmitVectorInsert(context, res, emit(ne, me), index + ed * elems, op.Size);
}
context.Copy(GetVecA32(vd), res);
EmitVectorBinaryOpI32(context, emit, true);
}
public static void EmitVectorTernaryOpSx32(ArmEmitterContext context, Func3I emit)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtractSx32(context, vd, index + ed * elems, op.Size);
Operand ne = EmitVectorExtractSx32(context, vn, index + en * elems, op.Size);
Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size);
res = EmitVectorInsert(context, res, emit(de, ne, me), index + ed * elems, op.Size);
}
context.Copy(GetVecA32(vd), res);
EmitVectorTernaryOpI32(context, emit, true);
}
public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size);
res = EmitVectorInsert(context, res, emit(me), index + ed * elems, op.Size);
}
context.Copy(GetVecA32(vd), res);
EmitVectorUnaryOpI32(context, emit, false);
}
public static void EmitVectorBinaryOpZx32(ArmEmitterContext context, Func2I emit)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractZx32(context, vn, index + en * elems, op.Size);
Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size);
res = EmitVectorInsert(context, res, emit(ne, me), index + ed * elems, op.Size);
}
context.Copy(GetVecA32(vd), res);
EmitVectorBinaryOpI32(context, emit, false);
}
public static void EmitVectorTernaryOpZx32(ArmEmitterContext context, Func3I emit)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtractZx32(context, vd, index + ed * elems, op.Size);
Operand ne = EmitVectorExtractZx32(context, vn, index + en * elems, op.Size);
Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size);
res = EmitVectorInsert(context, res, emit(de, ne, me), index + ed * elems, op.Size);
}
context.Copy(GetVecA32(vd), res);
EmitVectorTernaryOpI32(context, emit, false);
}
// VEC BY SCALAR
@ -364,20 +304,18 @@ namespace ARMeilleure.Instructions
int elems = op.GetBytesCount() >> sizeF + 2;
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand m = ExtractScalar(context, type, op.Vm);
Operand res = GetVecA32(vd);
Operand res = GetVecA32(op.Qd);
for (int index = 0; index < elems; index++)
{
Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems);
Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), index + op.Fn);
res = context.VectorInsert(res, emit(ne, m), index + ed * elems);
res = context.VectorInsert(res, emit(ne, m), index + op.Fd);
}
context.Copy(GetVecA32(vd), res);
context.Copy(GetVecA32(op.Qd), res);
}
public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed)
@ -385,22 +323,20 @@ namespace ARMeilleure.Instructions
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
if (op.Size < 1) throw new Exception("Undefined");
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed);
Operand res = GetVecA32(vd);
Operand res = GetVecA32(op.Qd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtract32(context, vn, index + en * elems, op.Size, signed);
Operand ne = EmitVectorExtract32(context, op.Qn, index + op.In, op.Size, signed);
res = EmitVectorInsert(context, res, emit(ne, m), index + ed * elems, op.Size);
res = EmitVectorInsert(context, res, emit(ne, m), index + op.Id, op.Size);
}
context.Copy(GetVecA32(vd), res);
context.Copy(GetVecA32(op.Qd), res);
}
public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Func3I emit)
@ -414,21 +350,19 @@ namespace ARMeilleure.Instructions
int elems = op.GetBytesCount() >> sizeF + 2;
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand m = ExtractScalar(context, type, op.Vm);
Operand res = GetVecA32(vd);
Operand res = GetVecA32(op.Qd);
for (int index = 0; index < elems; index++)
{
Operand de = context.VectorExtract(type, GetVecA32(vd), index + ed * elems);
Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems);
Operand de = context.VectorExtract(type, GetVecA32(op.Qd), index + op.Fd);
Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), index + op.Fn);
res = context.VectorInsert(res, emit(de, ne, m), index + ed * elems);
res = context.VectorInsert(res, emit(de, ne, m), index + op.Fd);
}
context.Copy(GetVecA32(vd), res);
context.Copy(GetVecA32(op.Qd), res);
}
public static void EmitVectorsByScalarOpI32(ArmEmitterContext context, Func3I emit, bool signed)
@ -436,23 +370,21 @@ namespace ARMeilleure.Instructions
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
if (op.Size < 1) throw new Exception("Undefined");
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed);
Operand res = GetVecA32(vd);
Operand res = GetVecA32(op.Qd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtract32(context, vd, index + ed * elems, op.Size, signed);
Operand ne = EmitVectorExtract32(context, vn, index + en * elems, op.Size, signed);
Operand de = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, signed);
Operand ne = EmitVectorExtract32(context, op.Qn, index + op.In, op.Size, signed);
res = EmitVectorInsert(context, res, emit(de, ne, m), index + ed * elems, op.Size);
res = EmitVectorInsert(context, res, emit(de, ne, m), index + op.Id, op.Size);
}
context.Copy(GetVecA32(vd), res);
context.Copy(GetVecA32(op.Qd), res);
}
// PAIRWISE
@ -472,30 +404,26 @@ namespace ARMeilleure.Instructions
int elems = op.GetBytesCount() >> (sizeF + 2);
int pairs = elems >> 1;
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
Operand mvec = GetVecA32(vm);
Operand nvec = GetVecA32(vn);
Operand res = GetVecA32(op.Qd);
Operand mvec = GetVecA32(op.Qm);
Operand nvec = GetVecA32(op.Qn);
for (int index = 0; index < pairs; index++)
{
int pairIndex = index << 1;
Operand n1 = context.VectorExtract(type, nvec, pairIndex + en * elems);
Operand n2 = context.VectorExtract(type, nvec, pairIndex + 1 + en * elems);
Operand n1 = context.VectorExtract(type, nvec, pairIndex + op.Fn);
Operand n2 = context.VectorExtract(type, nvec, pairIndex + 1 + op.Fn);
res = context.VectorInsert(res, emit(n1, n2), index + ed * elems);
res = context.VectorInsert(res, emit(n1, n2), index + op.Fd);
Operand m1 = context.VectorExtract(type, mvec, pairIndex + em * elems);
Operand m2 = context.VectorExtract(type, mvec, pairIndex + 1 + em * elems);
Operand m1 = context.VectorExtract(type, mvec, pairIndex + op.Fm);
Operand m2 = context.VectorExtract(type, mvec, pairIndex + 1 + op.Fm);
res = context.VectorInsert(res, emit(m1, m2), index + pairs + ed * elems);
res = context.VectorInsert(res, emit(m1, m2), index + pairs + op.Fd);
}
context.Copy(GetVecA32(vd), res);
context.Copy(GetVecA32(op.Qd), res);
}
public static void EmitVectorPairwiseOpI32(ArmEmitterContext context, Func2I emit, bool signed)
@ -510,26 +438,22 @@ namespace ARMeilleure.Instructions
int elems = op.GetBytesCount() >> op.Size;
int pairs = elems >> 1;
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
Operand res = GetVecA32(vd);
Operand res = GetVecA32(op.Qd);
for (int index = 0; index < pairs; index++)
{
int pairIndex = index << 1;
Operand n1 = EmitVectorExtract32(context, vn, pairIndex + en * elems, op.Size, signed);
Operand n2 = EmitVectorExtract32(context, vn, pairIndex + 1 + en * elems, op.Size, signed);
Operand n1 = EmitVectorExtract32(context, op.Qn, pairIndex + op.In, op.Size, signed);
Operand n2 = EmitVectorExtract32(context, op.Qn, pairIndex + 1 + op.In, op.Size, signed);
Operand m1 = EmitVectorExtract32(context, vm, pairIndex + em * elems, op.Size, signed);
Operand m2 = EmitVectorExtract32(context, vm, pairIndex + 1 + em * elems, op.Size, signed);
Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, signed);
Operand m2 = EmitVectorExtract32(context, op.Qm, pairIndex + 1 + op.Im, op.Size, signed);
res = EmitVectorInsert(context, res, emit(n1, n2), index + ed * elems, op.Size);
res = EmitVectorInsert(context, res, emit(m1, m2), index + pairs + ed * elems, op.Size);
res = EmitVectorInsert(context, res, emit(n1, n2), index + op.Id, op.Size);
res = EmitVectorInsert(context, res, emit(m1, m2), index + pairs + op.Id, op.Size);
}
context.Copy(GetVecA32(vd), res);
context.Copy(GetVecA32(op.Qd), res);
}
// helper func

View file

@ -131,9 +131,6 @@ namespace ARMeilleure.Instructions
int elems = op.GetBytesCount() >> op.Size;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
int length = op.Length + 1;
Tuple<int, int>[] tableTuples = new Tuple<int, int>[length];
@ -145,12 +142,12 @@ namespace ARMeilleure.Instructions
int byteLength = length * 8;
Operand res = GetVecA32(vd);
Operand m = GetVecA32(vm);
Operand res = GetVecA32(op.Qd);
Operand m = GetVecA32(op.Qm);
for (int index = 0; index < elems; index++)
{
Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + em * elems));
Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + op.Im));
Operand end = Label();
Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength));
@ -174,7 +171,7 @@ namespace ARMeilleure.Instructions
Tuple<int, int> vectorLocation = tableTuples[i];
// get the whole vector, we'll get a byte out of it
Operand lookupResult;
if (vectorLocation.Item1 == vd)
if (vectorLocation.Item1 == op.Qd)
{
// result contains the current state of the vector
lookupResult = context.VectorExtract(OperandType.I64, res, vectorLocation.Item2);
@ -199,14 +196,14 @@ namespace ARMeilleure.Instructions
if (!extension) context.MarkLabel(end);
Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, vd, index + ed * elems, 0, false)) : Const(0L);
Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, op.Qd, index + op.Id, 0, false)) : Const(0L);
res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + ed * elems, 0);
res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + op.Id, 0);
if (extension) context.MarkLabel(end);
}
context.Copy(GetVecA32(vd), res);
context.Copy(GetVecA32(op.Qd), res);
}
public static void Vtrn(ArmEmitterContext context)
@ -216,28 +213,25 @@ namespace ARMeilleure.Instructions
int elems = op.GetBytesCount() >> op.Size;
int pairs = elems >> 1;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
bool overlap = op.Qm == op.Qd;
bool overlap = vm == vd;
Operand resD = GetVecA32(vd);
Operand resM = GetVecA32(vm);
Operand resD = GetVecA32(op.Qd);
Operand resM = GetVecA32(op.Qm);
for (int index = 0; index < pairs; index++)
{
int pairIndex = index << 1;
Operand d2 = EmitVectorExtract32(context, vd, pairIndex + 1 + ed * elems, op.Size, false);
Operand m1 = EmitVectorExtract32(context, vm, pairIndex + em * elems, op.Size, false);
Operand d2 = EmitVectorExtract32(context, op.Qd, pairIndex + 1 + op.Id, op.Size, false);
Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, false);
resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + ed * elems, op.Size);
resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + op.Id, op.Size);
if (overlap) resM = resD;
resM = EmitVectorInsert(context, resM, d2, pairIndex + em * elems, op.Size);
resM = EmitVectorInsert(context, resM, d2, pairIndex + op.Im, op.Size);
if (overlap) resD = resM;
}
context.Copy(GetVecA32(vd), resD);
if (!overlap) context.Copy(GetVecA32(vm), resM);
context.Copy(GetVecA32(op.Qd), resD);
if (!overlap) context.Copy(GetVecA32(op.Qm), resM);
}
public static void Vzip(ArmEmitterContext context)
@ -247,36 +241,33 @@ namespace ARMeilleure.Instructions
int elems = op.GetBytesCount() >> op.Size;
int pairs = elems >> 1;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
bool overlap = op.Qm == op.Qd;
bool overlap = vm == vd;
Operand resD = GetVecA32(vd);
Operand resM = GetVecA32(vm);
Operand resD = GetVecA32(op.Qd);
Operand resM = GetVecA32(op.Qm);
for (int index = 0; index < pairs; index++)
{
int pairIndex = index << 1;
Operand dRowD = EmitVectorExtract32(context, vd, index + ed * elems, op.Size, false);
Operand mRowD = EmitVectorExtract32(context, vm, index + em * elems, op.Size, false);
Operand dRowD = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, false);
Operand mRowD = EmitVectorExtract32(context, op.Qm, index + op.Im, op.Size, false);
Operand dRowM = EmitVectorExtract32(context, vd, index + ed * elems + pairs, op.Size, false);
Operand mRowM = EmitVectorExtract32(context, vm, index + em * elems + pairs, op.Size, false);
Operand dRowM = EmitVectorExtract32(context, op.Qd, index + op.Id + pairs, op.Size, false);
Operand mRowM = EmitVectorExtract32(context, op.Qm, index + op.Im + pairs, op.Size, false);
resD = EmitVectorInsert(context, resD, dRowD, pairIndex + ed * elems, op.Size);
resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + ed * elems, op.Size);
resD = EmitVectorInsert(context, resD, dRowD, pairIndex + op.Id, op.Size);
resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + op.Id, op.Size);
if (overlap) resM = resD;
resM = EmitVectorInsert(context, resM, dRowM, pairIndex + em * elems, op.Size);
resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + em * elems, op.Size);
resM = EmitVectorInsert(context, resM, dRowM, pairIndex + op.Im, op.Size);
resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + op.Im, op.Size);
if (overlap) resD = resM;
}
context.Copy(GetVecA32(vd), resD);
if (!overlap) context.Copy(GetVecA32(vm), resM);
context.Copy(GetVecA32(op.Qd), resD);
if (!overlap) context.Copy(GetVecA32(op.Qm), resM);
}
public static void Vuzp(ArmEmitterContext context)
@ -286,13 +277,10 @@ namespace ARMeilleure.Instructions
int elems = op.GetBytesCount() >> op.Size;
int pairs = elems >> 1;
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
bool overlap = op.Qm == op.Qd;
bool overlap = vm == vd;
Operand resD = GetVecA32(vd);
Operand resM = GetVecA32(vm);
Operand resD = GetVecA32(op.Qd);
Operand resM = GetVecA32(op.Qm);
for (int index = 0; index < elems; index++)
{
@ -300,23 +288,23 @@ namespace ARMeilleure.Instructions
if (index >= pairs)
{
int pind = index - pairs;
dIns = EmitVectorExtract32(context, vm, (pind << 1) + em * elems, op.Size, false);
mIns = EmitVectorExtract32(context, vm, ((pind << 1) | 1) + em * elems, op.Size, false);
dIns = EmitVectorExtract32(context, op.Qm, (pind << 1) + op.Im, op.Size, false);
mIns = EmitVectorExtract32(context, op.Qm, ((pind << 1) | 1) + op.Im, op.Size, false);
}
else
{
dIns = EmitVectorExtract32(context, vd, (index << 1) + ed * elems, op.Size, false);
mIns = EmitVectorExtract32(context, vd, ((index << 1) | 1) + ed * elems, op.Size, false);
dIns = EmitVectorExtract32(context, op.Qd, (index << 1) + op.Id, op.Size, false);
mIns = EmitVectorExtract32(context, op.Qd, ((index << 1) | 1) + op.Id, op.Size, false);
}
resD = EmitVectorInsert(context, resD, dIns, index + ed * elems, op.Size);
resD = EmitVectorInsert(context, resD, dIns, index + op.Id, op.Size);
if (overlap) resM = resD;
resM = EmitVectorInsert(context, resM, mIns, index + em * elems, op.Size);
resM = EmitVectorInsert(context, resM, mIns, index + op.Im, op.Size);
if (overlap) resD = resM;
}
context.Copy(GetVecA32(vd), resD);
if (!overlap) context.Copy(GetVecA32(vm), resM);
context.Copy(GetVecA32(op.Qd), resD);
if (!overlap) context.Copy(GetVecA32(op.Qm), resM);
}
}
}