opt_p2
This commit is contained in:
parent
5c581ac24b
commit
844954abb7
6 changed files with 110 additions and 98 deletions
|
@ -321,32 +321,61 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
if (op.RegisterSize == RegisterSize.Int32)
|
||||
{
|
||||
d = context.Call(new _U32_U32(SoftFallback.ReverseBytes16_32), n);
|
||||
d = EmitReverseBytes16_32Op(context, n);
|
||||
}
|
||||
else
|
||||
{
|
||||
d = context.Call(new _U64_U64(SoftFallback.ReverseBytes16_64), n);
|
||||
d = EmitReverseBytes16_64Op(context, n);
|
||||
}
|
||||
|
||||
SetAluDOrZR(context, d);
|
||||
}
|
||||
|
||||
private static Operand EmitReverseBytes16_32Op(ArmEmitterContext context, Operand op)
|
||||
{
|
||||
Debug.Assert(op.Type == OperandType.I32);
|
||||
|
||||
Operand val = context.ZeroExtend32 (OperandType.I64, op);
|
||||
val = EmitReverseBytes16_64Op(context, val);
|
||||
|
||||
return context.ConvertI64ToI32(val);
|
||||
}
|
||||
|
||||
private static Operand EmitReverseBytes16_64Op(ArmEmitterContext context, Operand op)
|
||||
{
|
||||
Debug.Assert(op.Type == OperandType.I64);
|
||||
|
||||
return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xff00ff00ff00ff00ul)), Const(8)),
|
||||
context.ShiftLeft (context.BitwiseAnd(op, Const(0x00ff00ff00ff00fful)), Const(8)));
|
||||
}
|
||||
|
||||
public static void Rev32(ArmEmitterContext context)
|
||||
{
|
||||
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
|
||||
|
||||
Operand n = GetIntOrZR(context, op.Rn);
|
||||
Operand d;
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Int32)
|
||||
{
|
||||
SetAluDOrZR(context, context.ByteSwap(n));
|
||||
d = context.ByteSwap(n);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand d = context.Call(new _U64_U64(SoftFallback.ReverseBytes32_64), n);
|
||||
|
||||
SetAluDOrZR(context, d);
|
||||
d = EmitReverseBytes32_64Op(context, n);
|
||||
}
|
||||
|
||||
SetAluDOrZR(context, d);
|
||||
}
|
||||
|
||||
private static Operand EmitReverseBytes32_64Op(ArmEmitterContext context, Operand op)
|
||||
{
|
||||
Debug.Assert(op.Type == OperandType.I64);
|
||||
|
||||
Operand val = EmitReverseBytes16_64Op(context, op);
|
||||
|
||||
return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xffff0000ffff0000ul)), Const(16)),
|
||||
context.ShiftLeft (context.BitwiseAnd(val, Const(0x0000ffff0000fffful)), Const(16)));
|
||||
}
|
||||
|
||||
public static void Rev64(ArmEmitterContext context)
|
||||
|
|
|
@ -671,7 +671,7 @@ namespace ARMeilleure.Instructions
|
|||
res = context.VectorZeroUpper64(res);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
|
@ -679,7 +679,7 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -717,7 +717,7 @@ namespace ARMeilleure.Instructions
|
|||
res = context.VectorZeroUpper64(res);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
|
@ -728,7 +728,7 @@ namespace ARMeilleure.Instructions
|
|||
res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -771,7 +771,7 @@ namespace ARMeilleure.Instructions
|
|||
res = context.VectorZeroUpper64(res);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
|
@ -779,7 +779,7 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -817,7 +817,7 @@ namespace ARMeilleure.Instructions
|
|||
res = context.VectorZeroUpper64(res);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
|
@ -828,7 +828,7 @@ namespace ARMeilleure.Instructions
|
|||
res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -3055,7 +3055,9 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
||||
|
||||
Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
|
||||
Operand d = GetVec(op.Rd);
|
||||
|
||||
Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
|
||||
|
||||
long roundConst = 1L << (eSize - 1);
|
||||
|
||||
|
@ -3076,7 +3078,7 @@ namespace ARMeilleure.Instructions
|
|||
res = EmitVectorInsert(context, res, de, part + index, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
|
||||
public static void EmitScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
|
||||
|
|
|
@ -1034,9 +1034,7 @@ namespace ARMeilleure.Instructions
|
|||
Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
|
||||
Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n
|
||||
|
||||
Operand res = context.AddIntrinsic(inst[op.Size], left, right);
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
|
||||
}
|
||||
else if (op.Size < 3)
|
||||
{
|
||||
|
@ -1048,18 +1046,14 @@ namespace ARMeilleure.Instructions
|
|||
Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
|
||||
Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);
|
||||
|
||||
Operand res = context.AddIntrinsic(inst[op.Size], left, right);
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
|
||||
Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);
|
||||
|
||||
Operand res = context.AddIntrinsic(inst[3], left, right);
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1303,8 +1297,7 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
if (op.Size <= 2)
|
||||
{
|
||||
Operand temp = add ? context.Add (ne, me)
|
||||
: context.Subtract(ne, me);
|
||||
Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me);
|
||||
|
||||
de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed);
|
||||
}
|
||||
|
@ -1388,7 +1381,9 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
|
||||
|
||||
Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
|
||||
Operand d = GetVec(op.Rd);
|
||||
|
||||
Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
|
@ -1399,7 +1394,7 @@ namespace ARMeilleure.Instructions
|
|||
res = EmitVectorInsert(context, res, temp, part + index, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
|
||||
// TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
using ARMeilleure.Decoders;
|
||||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.Translation;
|
||||
using System.Diagnostics;
|
||||
|
||||
using static ARMeilleure.Instructions.InstEmitHelper;
|
||||
using static ARMeilleure.Instructions.InstEmitSimdHelper;
|
||||
|
@ -107,7 +108,7 @@ namespace ARMeilleure.Instructions
|
|||
res = context.VectorZeroUpper64(res);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -158,7 +159,7 @@ namespace ARMeilleure.Instructions
|
|||
res = context.VectorZeroUpper64(res);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -292,11 +293,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
|
||||
|
||||
ne = context.ConvertI64ToI32(ne);
|
||||
|
||||
Operand de = context.Call(new _U32_U32(SoftFallback.ReverseBits8), ne);
|
||||
|
||||
de = context.ZeroExtend32(OperandType.I64, de);
|
||||
Operand de = EmitReverseBits8Op(context, ne);
|
||||
|
||||
res = EmitVectorInsert(context, res, de, index, 0);
|
||||
}
|
||||
|
@ -304,6 +301,20 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(GetVec(op.Rd), res);
|
||||
}
|
||||
|
||||
private static Operand EmitReverseBits8Op(ArmEmitterContext context, Operand op)
|
||||
{
|
||||
Debug.Assert(op.Type == OperandType.I64);
|
||||
|
||||
Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaul)), Const(1)),
|
||||
context.ShiftLeft (context.BitwiseAnd(op, Const(0x55ul)), Const(1)));
|
||||
|
||||
val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccul)), Const(2)),
|
||||
context.ShiftLeft (context.BitwiseAnd(val, Const(0x33ul)), Const(2)));
|
||||
|
||||
return context.BitwiseOr(context.ShiftRightUI(val, Const(4)),
|
||||
context.ShiftLeft (context.BitwiseAnd(val, Const(0x0ful)), Const(4)));
|
||||
}
|
||||
|
||||
public static void Rev16_V(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.UseSsse3)
|
||||
|
|
|
@ -4,6 +4,7 @@ using ARMeilleure.Decoders;
|
|||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.Translation;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
using static ARMeilleure.Instructions.InstEmitHelper;
|
||||
using static ARMeilleure.Instructions.InstEmitSimdHelper;
|
||||
|
@ -35,7 +36,7 @@ namespace ARMeilleure.Instructions
|
|||
Operand d = GetVec(op.Rd);
|
||||
Operand n = GetVec(op.Rn);
|
||||
|
||||
Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
|
||||
Operand dLow = context.VectorZeroUpper64(d);
|
||||
|
||||
Operand mask = null;
|
||||
|
||||
|
@ -150,7 +151,7 @@ namespace ARMeilleure.Instructions
|
|||
Operand d = GetVec(op.Rd);
|
||||
Operand n = GetVec(op.Rn);
|
||||
|
||||
Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
|
||||
Operand dLow = context.VectorZeroUpper64(d);
|
||||
|
||||
Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
|
||||
|
||||
|
@ -695,9 +696,9 @@ namespace ARMeilleure.Instructions
|
|||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
|
||||
Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
|
||||
Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, 0);
|
||||
|
||||
Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(0), Const(op.Size));
|
||||
Operand e = EmitUnsignedShlRegOp(context, ne, context.ConvertI64ToI32(me), op.Size);
|
||||
|
||||
res = EmitVectorInsert(context, res, e, index, op.Size);
|
||||
}
|
||||
|
@ -872,9 +873,7 @@ namespace ARMeilleure.Instructions
|
|||
e = context.Add(e, Const(roundConst));
|
||||
}
|
||||
|
||||
e = signed
|
||||
? context.ShiftRightSI(e, Const(shift))
|
||||
: context.ShiftRightUI(e, Const(shift));
|
||||
e = signed ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift));
|
||||
}
|
||||
else /* if (op.Size == 3) */
|
||||
{
|
||||
|
@ -894,6 +893,28 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(GetVec(op.Rd), res);
|
||||
}
|
||||
|
||||
private static Operand EmitUnsignedShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size)
|
||||
{
|
||||
Debug.Assert(op.Type == OperandType.I64);
|
||||
Debug.Assert(shiftLsB.Type == OperandType.I32);
|
||||
Debug.Assert((uint)size < 4u);
|
||||
|
||||
Operand negShiftLsB = context.Negate(shiftLsB);
|
||||
|
||||
Operand isPositive = context.ICompareGreaterOrEqual(shiftLsB, Const(0));
|
||||
|
||||
Operand shl = context.ShiftLeft (op, shiftLsB);
|
||||
Operand shr = context.ShiftRightUI(op, negShiftLsB);
|
||||
|
||||
Operand res = context.ConditionalSelect(isPositive, shl, shr);
|
||||
|
||||
Operand isOutOfRange = context.BitwiseOr(
|
||||
context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)),
|
||||
context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size)));
|
||||
|
||||
return context.ConditionalSelect(isOutOfRange, Const(0UL), res);
|
||||
}
|
||||
|
||||
private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
|
||||
{
|
||||
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
|
||||
|
@ -906,7 +927,9 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
||||
|
||||
Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
|
||||
Operand d = GetVec(op.Rd);
|
||||
|
||||
Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
|
@ -922,7 +945,7 @@ namespace ARMeilleure.Instructions
|
|||
res = EmitVectorInsert(context, res, e, part + index, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
|
||||
[Flags]
|
||||
|
@ -965,7 +988,9 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
|
||||
|
||||
Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
|
||||
Operand d = GetVec(op.Rd);
|
||||
|
||||
Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
|
@ -978,9 +1003,7 @@ namespace ARMeilleure.Instructions
|
|||
e = context.Add(e, Const(roundConst));
|
||||
}
|
||||
|
||||
e = signedSrc
|
||||
? context.ShiftRightSI(e, Const(shift))
|
||||
: context.ShiftRightUI(e, Const(shift));
|
||||
e = signedSrc ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift));
|
||||
}
|
||||
else /* if (op.Size == 2 && round) */
|
||||
{
|
||||
|
@ -992,7 +1015,7 @@ namespace ARMeilleure.Instructions
|
|||
res = EmitVectorInsert(context, res, e, part + index, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
context.Copy(d, res);
|
||||
}
|
||||
|
||||
// dst64 = (Int(src64, signed) + roundConst) >> shift;
|
||||
|
|
|
@ -1240,53 +1240,5 @@ namespace ARMeilleure.Instructions
|
|||
: (uint)(value >> 32);
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region "Reverse"
|
||||
public static uint ReverseBits8(uint value)
|
||||
{
|
||||
value = ((value & 0xaa) >> 1) | ((value & 0x55) << 1);
|
||||
value = ((value & 0xcc) >> 2) | ((value & 0x33) << 2);
|
||||
|
||||
return (value >> 4) | ((value & 0x0f) << 4);
|
||||
}
|
||||
|
||||
public static uint ReverseBytes16_32(uint value) => (uint)ReverseBytes16_64(value);
|
||||
|
||||
public static ulong ReverseBytes16_64(ulong value) => ReverseBytes(value, RevSize.Rev16);
|
||||
public static ulong ReverseBytes32_64(ulong value) => ReverseBytes(value, RevSize.Rev32);
|
||||
|
||||
private enum RevSize
|
||||
{
|
||||
Rev16,
|
||||
Rev32,
|
||||
Rev64
|
||||
}
|
||||
|
||||
private static ulong ReverseBytes(ulong value, RevSize size)
|
||||
{
|
||||
value = ((value & 0xff00ff00ff00ff00) >> 8) | ((value & 0x00ff00ff00ff00ff) << 8);
|
||||
|
||||
if (size == RevSize.Rev16)
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
|
||||
|
||||
if (size == RevSize.Rev32)
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
value = ((value & 0xffffffff00000000) >> 32) | ((value & 0x00000000ffffffff) << 32);
|
||||
|
||||
if (size == RevSize.Rev64)
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
throw new ArgumentException(nameof(size));
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue