Refactor part 2: Move index/subindex logic to Operand
May have inadvertently fixed one (1) bug
This commit is contained in:
parent
c246ed3daf
commit
56db1a1c45
10 changed files with 218 additions and 281 deletions
61
ARMeilleure/Decoders/BaseOpCode32Simd.cs
Normal file
61
ARMeilleure/Decoders/BaseOpCode32Simd.cs
Normal file
|
@ -0,0 +1,61 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
namespace ARMeilleure.Decoders
|
||||
{
|
||||
abstract class BaseOpCode32Simd : OpCode32, IOpCode32Simd
|
||||
{
|
||||
public int Vd { get; protected set; }
|
||||
public int Vm { get; protected set; }
|
||||
public int Size { get; protected set; }
|
||||
|
||||
// Helpers to index doublewords within quad words. Essentially, looping over the vector starts at quadword Q and index Fx or Ix within it,
|
||||
// depending on instruction type.
|
||||
//
|
||||
// Qx: The quadword register that the target vector is contained in.
|
||||
// Ix: The starting index of the target vector within the quadword, with size treated as integer.
|
||||
// Fx: The starting index of the target vector within the quadword, with size treated as floating point. (16 or 32)
|
||||
public int Qd => GetQuadwordIndex(Vd);
|
||||
public int Id => GetQuadwordSubindex(Vd) << (3 - Size);
|
||||
public int Fd => GetQuadwordSubindex(Vd) << (1 - (Size & 1)); // When the top bit is truncated, 1 SHOULD be fp16, but switch does not support it so we always assume 64.
|
||||
|
||||
public int Qm => GetQuadwordIndex(Vm);
|
||||
public int Im => GetQuadwordSubindex(Vm) << (3 - Size);
|
||||
public int Fm => GetQuadwordSubindex(Vm) << (1 - (Size & 1));
|
||||
|
||||
protected int GetQuadwordIndex(int index)
|
||||
{
|
||||
switch (RegisterSize)
|
||||
{
|
||||
case RegisterSize.Simd128:
|
||||
return index >> 1;
|
||||
case RegisterSize.Simd64:
|
||||
return index >> 1;
|
||||
case RegisterSize.Simd32:
|
||||
return index >> 2;
|
||||
}
|
||||
|
||||
throw new InvalidOperationException();
|
||||
}
|
||||
|
||||
protected int GetQuadwordSubindex(int index)
|
||||
{
|
||||
switch (RegisterSize)
|
||||
{
|
||||
case RegisterSize.Simd128:
|
||||
return 0;
|
||||
case RegisterSize.Simd64:
|
||||
return index & 1;
|
||||
case RegisterSize.Simd32:
|
||||
return index & 3;
|
||||
}
|
||||
|
||||
throw new InvalidOperationException();
|
||||
}
|
||||
|
||||
public BaseOpCode32Simd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
|
@ -6,6 +6,5 @@ namespace ARMeilleure.Decoders
|
|||
{
|
||||
interface IOpCode32Simd : IOpCode32, IOpCodeSimd
|
||||
{
|
||||
int Elems { get; }
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,5 +8,6 @@ namespace ARMeilleure.Decoders
|
|||
{
|
||||
public int Vd { get; }
|
||||
public long Immediate { get; }
|
||||
int Elems { get; }
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,16 +4,12 @@ using System.Text;
|
|||
|
||||
namespace ARMeilleure.Decoders
|
||||
{
|
||||
class OpCode32Simd : OpCode32, IOpCode32Simd
|
||||
class OpCode32Simd : BaseOpCode32Simd
|
||||
{
|
||||
public int Vd { get; private set; }
|
||||
public int Vm { get; protected set; }
|
||||
public int Opc { get; protected set; }
|
||||
public int Size { get; protected set; }
|
||||
public bool Q { get; protected set; }
|
||||
public bool F { get; protected set; }
|
||||
public bool U { get; private set; }
|
||||
public int Elems => GetBytesCount() >> ((Size == 1) ? 1 : 2);
|
||||
|
||||
public OpCode32Simd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
||||
{
|
||||
|
|
|
@ -4,18 +4,12 @@ using System.Text;
|
|||
|
||||
namespace ARMeilleure.Decoders
|
||||
{
|
||||
class OpCode32SimdDupElem : OpCode32, IOpCode32Simd
|
||||
class OpCode32SimdDupElem : BaseOpCode32Simd
|
||||
{
|
||||
public int Size { get; private set; }
|
||||
public int Elems => 1;
|
||||
|
||||
public int Vd { get; private set; }
|
||||
public int Vm { get; private set; }
|
||||
public bool Q { get; private set; }
|
||||
|
||||
public int Index { get; private set; }
|
||||
|
||||
|
||||
public OpCode32SimdDupElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
||||
{
|
||||
var opc = (opCode >> 16) & 0xf;
|
||||
|
|
|
@ -8,6 +8,10 @@ namespace ARMeilleure.Decoders
|
|||
{
|
||||
public int Vn { get; private set; }
|
||||
|
||||
public int Qn => GetQuadwordIndex(Vn);
|
||||
public int In => GetQuadwordSubindex(Vn) << (3 - Size);
|
||||
public int Fn => GetQuadwordSubindex(Vn) << (1 - (Size & 1));
|
||||
|
||||
public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
||||
{
|
||||
Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf);
|
||||
|
|
|
@ -52,32 +52,6 @@ namespace ARMeilleure.Instructions
|
|||
return Register(regIndex, RegisterType.Vector, OperandType.V128);
|
||||
}
|
||||
|
||||
public static Operand GetVecA32(ArmEmitterContext context, int regIndex, int registerSizeLog)
|
||||
{
|
||||
// vector registers in A32 all overlap each other - eg. Q0 = D0:D1 = S0:S1:S2:S3
|
||||
// so we need to select the relevant part of a quad vector based on register size.
|
||||
int elemSizeLog = (4 - registerSizeLog);
|
||||
int quadIndex = regIndex >> elemSizeLog;
|
||||
int subIndex = regIndex & ((1 << elemSizeLog) - 1);
|
||||
Operand result = Register(quadIndex, RegisterType.Vector, OperandType.V128);
|
||||
if (subIndex != 0)
|
||||
{
|
||||
result = context.RotateRight(result, Const(subIndex << elemSizeLog));
|
||||
}
|
||||
|
||||
switch (registerSizeLog)
|
||||
{
|
||||
case 4: // quad word
|
||||
return result;
|
||||
case 3: // double word
|
||||
return context.VectorZeroUpper64(result);
|
||||
case 2: // single word
|
||||
return context.VectorZeroUpper96(result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value)
|
||||
{
|
||||
if (regIndex == RegisterAlias.Aarch32Pc)
|
||||
|
|
|
@ -117,11 +117,7 @@ namespace ARMeilleure.Instructions
|
|||
int elems = op.GetBytesCount();
|
||||
int byteOff = op.Immediate;
|
||||
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
|
@ -129,18 +125,18 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
if (byteOff >= elems)
|
||||
{
|
||||
extract = EmitVectorExtractZx32(context, vm, (byteOff - elems) + em * elems, op.Size);
|
||||
extract = EmitVectorExtractZx32(context, op.Qm, op.Im + (byteOff - elems), op.Size);
|
||||
}
|
||||
else
|
||||
{
|
||||
extract = EmitVectorExtractZx32(context, vn, byteOff + en * elems, op.Size);
|
||||
extract = EmitVectorExtractZx32(context, op.Qn, op.In + byteOff, op.Size);
|
||||
}
|
||||
byteOff++;
|
||||
|
||||
res = EmitVectorInsert(context, res, extract, index + ed * elems, op.Size);
|
||||
res = EmitVectorInsert(context, res, extract, op.Id + index, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void Vmov_S(ArmEmitterContext context)
|
||||
|
|
|
@ -143,19 +143,16 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int elems = op.GetBytesCount() >> sizeF + 2;
|
||||
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand ne = context.VectorExtract(type, GetVecA32(vm), index + em * elems);
|
||||
Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
|
||||
|
||||
res = context.VectorInsert(res, emit(ne), index + ed * elems);
|
||||
res = context.VectorInsert(res, emit(me), op.Fd + index);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Func2I emit)
|
||||
|
@ -168,21 +165,17 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int elems = op.GetBytesCount() >> (sizeF + 2);
|
||||
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems);
|
||||
Operand me = context.VectorExtract(type, GetVecA32(vm), index + em * elems);
|
||||
Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
|
||||
Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
|
||||
|
||||
res = context.VectorInsert(res, emit(ne, me), index + ed * elems);
|
||||
res = context.VectorInsert(res, emit(ne, me), op.Fd + index);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Func3I emit)
|
||||
|
@ -195,160 +188,107 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int elems = op.GetBytesCount() >> sizeF + 2;
|
||||
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand de = context.VectorExtract(type, GetVecA32(vd), index + ed * elems);
|
||||
Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems);
|
||||
Operand me = context.VectorExtract(type, GetVecA32(vm), index + em * elems);
|
||||
Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index);
|
||||
Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
|
||||
Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
|
||||
|
||||
res = context.VectorInsert(res, emit(de, ne, me), index);
|
||||
res = context.VectorInsert(res, emit(de, ne, me), op.Fd + index);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
// INTEGER
|
||||
|
||||
public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit)
|
||||
public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed)
|
||||
{
|
||||
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
|
||||
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size);
|
||||
Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(me), index + ed * elems, op.Size);
|
||||
res = EmitVectorInsert(context, res, emit(me), op.Id + index, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorBinaryOpI32(ArmEmitterContext context, Func2I emit, bool signed)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
|
||||
Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(ne, me), op.Id + index, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
|
||||
Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
|
||||
Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(de, ne, me), index + op.Id, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit)
|
||||
{
|
||||
EmitVectorUnaryOpI32(context, emit, true);
|
||||
}
|
||||
|
||||
public static void EmitVectorBinaryOpSx32(ArmEmitterContext context, Func2I emit)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand ne = EmitVectorExtractSx32(context, vn, index + en * elems, op.Size);
|
||||
Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(ne, me), index + ed * elems, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
EmitVectorBinaryOpI32(context, emit, true);
|
||||
}
|
||||
|
||||
public static void EmitVectorTernaryOpSx32(ArmEmitterContext context, Func3I emit)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand de = EmitVectorExtractSx32(context, vd, index + ed * elems, op.Size);
|
||||
Operand ne = EmitVectorExtractSx32(context, vn, index + en * elems, op.Size);
|
||||
Operand me = EmitVectorExtractSx32(context, vm, index + em * elems, op.Size);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(de, ne, me), index + ed * elems, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
EmitVectorTernaryOpI32(context, emit, true);
|
||||
}
|
||||
|
||||
public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit)
|
||||
{
|
||||
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
|
||||
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(me), index + ed * elems, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
EmitVectorUnaryOpI32(context, emit, false);
|
||||
}
|
||||
|
||||
public static void EmitVectorBinaryOpZx32(ArmEmitterContext context, Func2I emit)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand ne = EmitVectorExtractZx32(context, vn, index + en * elems, op.Size);
|
||||
Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(ne, me), index + ed * elems, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
EmitVectorBinaryOpI32(context, emit, false);
|
||||
}
|
||||
|
||||
public static void EmitVectorTernaryOpZx32(ArmEmitterContext context, Func3I emit)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand de = EmitVectorExtractZx32(context, vd, index + ed * elems, op.Size);
|
||||
Operand ne = EmitVectorExtractZx32(context, vn, index + en * elems, op.Size);
|
||||
Operand me = EmitVectorExtractZx32(context, vm, index + em * elems, op.Size);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(de, ne, me), index + ed * elems, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
EmitVectorTernaryOpI32(context, emit, false);
|
||||
}
|
||||
|
||||
// VEC BY SCALAR
|
||||
|
@ -364,20 +304,18 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int elems = op.GetBytesCount() >> sizeF + 2;
|
||||
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
Operand m = ExtractScalar(context, type, op.Vm);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems);
|
||||
Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), index + op.Fn);
|
||||
|
||||
res = context.VectorInsert(res, emit(ne, m), index + ed * elems);
|
||||
res = context.VectorInsert(res, emit(ne, m), index + op.Fd);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed)
|
||||
|
@ -385,22 +323,20 @@ namespace ARMeilleure.Instructions
|
|||
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
|
||||
|
||||
if (op.Size < 1) throw new Exception("Undefined");
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand ne = EmitVectorExtract32(context, vn, index + en * elems, op.Size, signed);
|
||||
Operand ne = EmitVectorExtract32(context, op.Qn, index + op.In, op.Size, signed);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(ne, m), index + ed * elems, op.Size);
|
||||
res = EmitVectorInsert(context, res, emit(ne, m), index + op.Id, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Func3I emit)
|
||||
|
@ -414,21 +350,19 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int elems = op.GetBytesCount() >> sizeF + 2;
|
||||
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
Operand m = ExtractScalar(context, type, op.Vm);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand de = context.VectorExtract(type, GetVecA32(vd), index + ed * elems);
|
||||
Operand ne = context.VectorExtract(type, GetVecA32(vn), index + en * elems);
|
||||
Operand de = context.VectorExtract(type, GetVecA32(op.Qd), index + op.Fd);
|
||||
Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), index + op.Fn);
|
||||
|
||||
res = context.VectorInsert(res, emit(de, ne, m), index + ed * elems);
|
||||
res = context.VectorInsert(res, emit(de, ne, m), index + op.Fd);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorsByScalarOpI32(ArmEmitterContext context, Func3I emit, bool signed)
|
||||
|
@ -436,23 +370,21 @@ namespace ARMeilleure.Instructions
|
|||
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
|
||||
|
||||
if (op.Size < 1) throw new Exception("Undefined");
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand de = EmitVectorExtract32(context, vd, index + ed * elems, op.Size, signed);
|
||||
Operand ne = EmitVectorExtract32(context, vn, index + en * elems, op.Size, signed);
|
||||
Operand de = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, signed);
|
||||
Operand ne = EmitVectorExtract32(context, op.Qn, index + op.In, op.Size, signed);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(de, ne, m), index + ed * elems, op.Size);
|
||||
res = EmitVectorInsert(context, res, emit(de, ne, m), index + op.Id, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
// PAIRWISE
|
||||
|
@ -472,30 +404,26 @@ namespace ARMeilleure.Instructions
|
|||
int elems = op.GetBytesCount() >> (sizeF + 2);
|
||||
int pairs = elems >> 1;
|
||||
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand mvec = GetVecA32(vm);
|
||||
Operand nvec = GetVecA32(vn);
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
Operand mvec = GetVecA32(op.Qm);
|
||||
Operand nvec = GetVecA32(op.Qn);
|
||||
|
||||
for (int index = 0; index < pairs; index++)
|
||||
{
|
||||
int pairIndex = index << 1;
|
||||
|
||||
Operand n1 = context.VectorExtract(type, nvec, pairIndex + en * elems);
|
||||
Operand n2 = context.VectorExtract(type, nvec, pairIndex + 1 + en * elems);
|
||||
Operand n1 = context.VectorExtract(type, nvec, pairIndex + op.Fn);
|
||||
Operand n2 = context.VectorExtract(type, nvec, pairIndex + 1 + op.Fn);
|
||||
|
||||
res = context.VectorInsert(res, emit(n1, n2), index + ed * elems);
|
||||
res = context.VectorInsert(res, emit(n1, n2), index + op.Fd);
|
||||
|
||||
Operand m1 = context.VectorExtract(type, mvec, pairIndex + em * elems);
|
||||
Operand m2 = context.VectorExtract(type, mvec, pairIndex + 1 + em * elems);
|
||||
Operand m1 = context.VectorExtract(type, mvec, pairIndex + op.Fm);
|
||||
Operand m2 = context.VectorExtract(type, mvec, pairIndex + 1 + op.Fm);
|
||||
|
||||
res = context.VectorInsert(res, emit(m1, m2), index + pairs + ed * elems);
|
||||
res = context.VectorInsert(res, emit(m1, m2), index + pairs + op.Fd);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorPairwiseOpI32(ArmEmitterContext context, Func2I emit, bool signed)
|
||||
|
@ -510,26 +438,22 @@ namespace ARMeilleure.Instructions
|
|||
int elems = op.GetBytesCount() >> op.Size;
|
||||
int pairs = elems >> 1;
|
||||
|
||||
(int vn, int en) = GetQuadwordAndSubindex(op.Vn, op.RegisterSize);
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
for (int index = 0; index < pairs; index++)
|
||||
{
|
||||
int pairIndex = index << 1;
|
||||
Operand n1 = EmitVectorExtract32(context, vn, pairIndex + en * elems, op.Size, signed);
|
||||
Operand n2 = EmitVectorExtract32(context, vn, pairIndex + 1 + en * elems, op.Size, signed);
|
||||
Operand n1 = EmitVectorExtract32(context, op.Qn, pairIndex + op.In, op.Size, signed);
|
||||
Operand n2 = EmitVectorExtract32(context, op.Qn, pairIndex + 1 + op.In, op.Size, signed);
|
||||
|
||||
Operand m1 = EmitVectorExtract32(context, vm, pairIndex + em * elems, op.Size, signed);
|
||||
Operand m2 = EmitVectorExtract32(context, vm, pairIndex + 1 + em * elems, op.Size, signed);
|
||||
Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, signed);
|
||||
Operand m2 = EmitVectorExtract32(context, op.Qm, pairIndex + 1 + op.Im, op.Size, signed);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(n1, n2), index + ed * elems, op.Size);
|
||||
res = EmitVectorInsert(context, res, emit(m1, m2), index + pairs + ed * elems, op.Size);
|
||||
res = EmitVectorInsert(context, res, emit(n1, n2), index + op.Id, op.Size);
|
||||
res = EmitVectorInsert(context, res, emit(m1, m2), index + pairs + op.Id, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
// helper func
|
||||
|
|
|
@ -131,9 +131,6 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
|
||||
int length = op.Length + 1;
|
||||
|
||||
Tuple<int, int>[] tableTuples = new Tuple<int, int>[length];
|
||||
|
@ -145,12 +142,12 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int byteLength = length * 8;
|
||||
|
||||
Operand res = GetVecA32(vd);
|
||||
Operand m = GetVecA32(vm);
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
Operand m = GetVecA32(op.Qm);
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + em * elems));
|
||||
Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + op.Im));
|
||||
|
||||
Operand end = Label();
|
||||
Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength));
|
||||
|
@ -174,7 +171,7 @@ namespace ARMeilleure.Instructions
|
|||
Tuple<int, int> vectorLocation = tableTuples[i];
|
||||
// get the whole vector, we'll get a byte out of it
|
||||
Operand lookupResult;
|
||||
if (vectorLocation.Item1 == vd)
|
||||
if (vectorLocation.Item1 == op.Qd)
|
||||
{
|
||||
// result contains the current state of the vector
|
||||
lookupResult = context.VectorExtract(OperandType.I64, res, vectorLocation.Item2);
|
||||
|
@ -199,14 +196,14 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
if (!extension) context.MarkLabel(end);
|
||||
|
||||
Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, vd, index + ed * elems, 0, false)) : Const(0L);
|
||||
Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, op.Qd, index + op.Id, 0, false)) : Const(0L);
|
||||
|
||||
res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + ed * elems, 0);
|
||||
res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + op.Id, 0);
|
||||
|
||||
if (extension) context.MarkLabel(end);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), res);
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void Vtrn(ArmEmitterContext context)
|
||||
|
@ -216,28 +213,25 @@ namespace ARMeilleure.Instructions
|
|||
int elems = op.GetBytesCount() >> op.Size;
|
||||
int pairs = elems >> 1;
|
||||
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
bool overlap = op.Qm == op.Qd;
|
||||
|
||||
bool overlap = vm == vd;
|
||||
|
||||
Operand resD = GetVecA32(vd);
|
||||
Operand resM = GetVecA32(vm);
|
||||
Operand resD = GetVecA32(op.Qd);
|
||||
Operand resM = GetVecA32(op.Qm);
|
||||
|
||||
for (int index = 0; index < pairs; index++)
|
||||
{
|
||||
int pairIndex = index << 1;
|
||||
Operand d2 = EmitVectorExtract32(context, vd, pairIndex + 1 + ed * elems, op.Size, false);
|
||||
Operand m1 = EmitVectorExtract32(context, vm, pairIndex + em * elems, op.Size, false);
|
||||
Operand d2 = EmitVectorExtract32(context, op.Qd, pairIndex + 1 + op.Id, op.Size, false);
|
||||
Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, false);
|
||||
|
||||
resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + ed * elems, op.Size);
|
||||
resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + op.Id, op.Size);
|
||||
if (overlap) resM = resD;
|
||||
resM = EmitVectorInsert(context, resM, d2, pairIndex + em * elems, op.Size);
|
||||
resM = EmitVectorInsert(context, resM, d2, pairIndex + op.Im, op.Size);
|
||||
if (overlap) resD = resM;
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), resD);
|
||||
if (!overlap) context.Copy(GetVecA32(vm), resM);
|
||||
context.Copy(GetVecA32(op.Qd), resD);
|
||||
if (!overlap) context.Copy(GetVecA32(op.Qm), resM);
|
||||
}
|
||||
|
||||
public static void Vzip(ArmEmitterContext context)
|
||||
|
@ -247,36 +241,33 @@ namespace ARMeilleure.Instructions
|
|||
int elems = op.GetBytesCount() >> op.Size;
|
||||
int pairs = elems >> 1;
|
||||
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
bool overlap = op.Qm == op.Qd;
|
||||
|
||||
bool overlap = vm == vd;
|
||||
|
||||
Operand resD = GetVecA32(vd);
|
||||
Operand resM = GetVecA32(vm);
|
||||
Operand resD = GetVecA32(op.Qd);
|
||||
Operand resM = GetVecA32(op.Qm);
|
||||
|
||||
for (int index = 0; index < pairs; index++)
|
||||
{
|
||||
int pairIndex = index << 1;
|
||||
Operand dRowD = EmitVectorExtract32(context, vd, index + ed * elems, op.Size, false);
|
||||
Operand mRowD = EmitVectorExtract32(context, vm, index + em * elems, op.Size, false);
|
||||
Operand dRowD = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, false);
|
||||
Operand mRowD = EmitVectorExtract32(context, op.Qm, index + op.Im, op.Size, false);
|
||||
|
||||
Operand dRowM = EmitVectorExtract32(context, vd, index + ed * elems + pairs, op.Size, false);
|
||||
Operand mRowM = EmitVectorExtract32(context, vm, index + em * elems + pairs, op.Size, false);
|
||||
Operand dRowM = EmitVectorExtract32(context, op.Qd, index + op.Id + pairs, op.Size, false);
|
||||
Operand mRowM = EmitVectorExtract32(context, op.Qm, index + op.Im + pairs, op.Size, false);
|
||||
|
||||
resD = EmitVectorInsert(context, resD, dRowD, pairIndex + ed * elems, op.Size);
|
||||
resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + ed * elems, op.Size);
|
||||
resD = EmitVectorInsert(context, resD, dRowD, pairIndex + op.Id, op.Size);
|
||||
resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + op.Id, op.Size);
|
||||
|
||||
if (overlap) resM = resD;
|
||||
|
||||
resM = EmitVectorInsert(context, resM, dRowM, pairIndex + em * elems, op.Size);
|
||||
resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + em * elems, op.Size);
|
||||
resM = EmitVectorInsert(context, resM, dRowM, pairIndex + op.Im, op.Size);
|
||||
resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + op.Im, op.Size);
|
||||
|
||||
if (overlap) resD = resM;
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), resD);
|
||||
if (!overlap) context.Copy(GetVecA32(vm), resM);
|
||||
context.Copy(GetVecA32(op.Qd), resD);
|
||||
if (!overlap) context.Copy(GetVecA32(op.Qm), resM);
|
||||
}
|
||||
|
||||
public static void Vuzp(ArmEmitterContext context)
|
||||
|
@ -286,13 +277,10 @@ namespace ARMeilleure.Instructions
|
|||
int elems = op.GetBytesCount() >> op.Size;
|
||||
int pairs = elems >> 1;
|
||||
|
||||
(int vm, int em) = GetQuadwordAndSubindex(op.Vm, op.RegisterSize);
|
||||
(int vd, int ed) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
|
||||
bool overlap = op.Qm == op.Qd;
|
||||
|
||||
bool overlap = vm == vd;
|
||||
|
||||
Operand resD = GetVecA32(vd);
|
||||
Operand resM = GetVecA32(vm);
|
||||
Operand resD = GetVecA32(op.Qd);
|
||||
Operand resM = GetVecA32(op.Qm);
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
|
@ -300,23 +288,23 @@ namespace ARMeilleure.Instructions
|
|||
if (index >= pairs)
|
||||
{
|
||||
int pind = index - pairs;
|
||||
dIns = EmitVectorExtract32(context, vm, (pind << 1) + em * elems, op.Size, false);
|
||||
mIns = EmitVectorExtract32(context, vm, ((pind << 1) | 1) + em * elems, op.Size, false);
|
||||
dIns = EmitVectorExtract32(context, op.Qm, (pind << 1) + op.Im, op.Size, false);
|
||||
mIns = EmitVectorExtract32(context, op.Qm, ((pind << 1) | 1) + op.Im, op.Size, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
dIns = EmitVectorExtract32(context, vd, (index << 1) + ed * elems, op.Size, false);
|
||||
mIns = EmitVectorExtract32(context, vd, ((index << 1) | 1) + ed * elems, op.Size, false);
|
||||
dIns = EmitVectorExtract32(context, op.Qd, (index << 1) + op.Id, op.Size, false);
|
||||
mIns = EmitVectorExtract32(context, op.Qd, ((index << 1) | 1) + op.Id, op.Size, false);
|
||||
}
|
||||
|
||||
resD = EmitVectorInsert(context, resD, dIns, index + ed * elems, op.Size);
|
||||
resD = EmitVectorInsert(context, resD, dIns, index + op.Id, op.Size);
|
||||
if (overlap) resM = resD;
|
||||
resM = EmitVectorInsert(context, resM, mIns, index + em * elems, op.Size);
|
||||
resM = EmitVectorInsert(context, resM, mIns, index + op.Im, op.Size);
|
||||
if (overlap) resD = resM;
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(vd), resD);
|
||||
if (!overlap) context.Copy(GetVecA32(vm), resM);
|
||||
context.Copy(GetVecA32(op.Qd), resD);
|
||||
if (!overlap) context.Copy(GetVecA32(op.Qm), resM);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue