Cleanup, add SSE2 support for scalar insert.
Works similarly to the IR scalar insert, but obviously this one works directly on V128.
This commit is contained in:
parent
e719af48f0
commit
621c5a26c1
4 changed files with 49 additions and 34 deletions
|
@ -63,6 +63,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(Intrinsic.X86Minss, new IntrinsicInfo(X86Instruction.Minss, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Movhlps, new IntrinsicInfo(X86Instruction.Movhlps, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Movlhps, new IntrinsicInfo(X86Instruction.Movlhps, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Movss, new IntrinsicInfo(X86Instruction.Movss, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Mulpd, new IntrinsicInfo(X86Instruction.Mulpd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));
|
||||
|
|
|
@ -173,7 +173,6 @@ namespace ARMeilleure.Instructions
|
|||
// TODO: Fast Path.
|
||||
if (roundWithFpscr)
|
||||
{
|
||||
// These need to get the FPSCR value, so it's worth noting we'd need to do a c# call at some point.
|
||||
if (floatSize == OperandType.FP64)
|
||||
{
|
||||
if (unsigned)
|
||||
|
@ -363,7 +362,6 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
|
||||
|
@ -382,7 +380,7 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
|
||||
{
|
||||
// a port of the similar round function in InstEmitSimdCvt
|
||||
// A port of the similar round function in InstEmitSimdCvt.
|
||||
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
|
||||
|
||||
bool doubleSize = (op.Size & 1) != 0;
|
||||
|
@ -457,7 +455,7 @@ namespace ARMeilleure.Instructions
|
|||
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
||||
}
|
||||
|
||||
long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
|
||||
long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
|
||||
|
||||
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
||||
|
||||
|
|
|
@ -473,10 +473,12 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
// Intrinsic Emits
|
||||
// Intrinsic Helpers
|
||||
|
||||
public static Operand EmitSwapDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
|
||||
{
|
||||
Debug.Assert(input.Type == OperandType.V128);
|
||||
|
||||
int originalSide = originalV & 1;
|
||||
int targetSide = targetV & 1;
|
||||
|
||||
|
@ -487,16 +489,18 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
if (targetSide == 1)
|
||||
{
|
||||
return context.AddIntrinsic(Intrinsic.X86Movlhps, input, input); // low to high
|
||||
return context.AddIntrinsic(Intrinsic.X86Movlhps, input, input); // Low to high.
|
||||
}
|
||||
else
|
||||
{
|
||||
return context.AddIntrinsic(Intrinsic.X86Movhlps, input, input); // high to low
|
||||
return context.AddIntrinsic(Intrinsic.X86Movhlps, input, input); // High to low.
|
||||
}
|
||||
}
|
||||
|
||||
public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
|
||||
{
|
||||
Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
|
||||
|
||||
int targetSide = targetV & 1;
|
||||
int shuffleMask = 2 | 0;
|
||||
|
||||
|
@ -510,45 +514,56 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
public static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
|
||||
public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
|
||||
{
|
||||
// index into 0, 0 into index. This swap happens at the start and end of an A32 scalar op if required.
|
||||
int index = reg & (doubleWidth ? 1 : 3);
|
||||
if (index == 0) return target;
|
||||
Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
|
||||
|
||||
if (doubleWidth)
|
||||
{
|
||||
int shuffleMask = 1; // swap top and bottom (b0 = 1, b1 = 0)
|
||||
return context.AddIntrinsic(Intrinsic.X86Shufpd, target, target, Const(shuffleMask));
|
||||
}
|
||||
else
|
||||
{
|
||||
int shuffleMask = (3 << 6) | (2 << 4) | (1 << 2) | index; // swap index and 0 (others remain)
|
||||
shuffleMask &= ~(3 << (index * 2));
|
||||
|
||||
return context.AddIntrinsic(Intrinsic.X86Shufps, target, target, Const(shuffleMask));
|
||||
}
|
||||
}
|
||||
|
||||
public static Operand EmitInsertScalar(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
|
||||
{
|
||||
// insert from index 0 in value to index in target
|
||||
// Insert from index 0 in value to index in target.
|
||||
int index = reg & (doubleWidth ? 1 : 3);
|
||||
|
||||
if (doubleWidth)
|
||||
{
|
||||
if (index == 1)
|
||||
{
|
||||
return context.AddIntrinsic(Intrinsic.X86Movlhps, target, value); // low to high
|
||||
return context.AddIntrinsic(Intrinsic.X86Movlhps, target, value); // Low to high.
|
||||
}
|
||||
else
|
||||
{
|
||||
return context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(2)); // low to low, keep high from original
|
||||
return context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(2)); // Low to low, keep high from original.
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return context.AddIntrinsic(Intrinsic.X86Insertps, target, value, Const(index << 4));
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
return context.AddIntrinsic(Intrinsic.X86Insertps, target, value, Const(index << 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
target = EmitSwapScalar(context, target, index, doubleWidth); // Swap value to replace into element 0.
|
||||
target = context.AddIntrinsic(Intrinsic.X86Movss, target, value); // Move the value into element 0 of the vector.
|
||||
return EmitSwapScalar(context, target, index, doubleWidth); // Swap new value back to the correct index.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
|
||||
{
|
||||
// Index into 0, 0 into index. This swap happens at the start of an A32 scalar op if required.
|
||||
int index = reg & (doubleWidth ? 1 : 3);
|
||||
if (index == 0) return target;
|
||||
|
||||
if (doubleWidth)
|
||||
{
|
||||
int shuffleMask = 1; // Swap top and bottom. (b0 = 1, b1 = 0)
|
||||
return context.AddIntrinsic(Intrinsic.X86Shufpd, target, target, Const(shuffleMask));
|
||||
}
|
||||
else
|
||||
{
|
||||
int shuffleMask = (3 << 6) | (2 << 4) | (1 << 2) | index; // Swap index and 0. (others remain)
|
||||
shuffleMask &= ~(3 << (index * 2));
|
||||
|
||||
return context.AddIntrinsic(Intrinsic.X86Shufps, target, target, Const(shuffleMask));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -679,7 +694,7 @@ namespace ARMeilleure.Instructions
|
|||
Operand res = scalarFunc(m);
|
||||
|
||||
// Insert scalar into vector.
|
||||
res = EmitInsertScalar(context, d, res, op.Vd, doubleSize);
|
||||
res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
|
||||
|
||||
context.Copy(d, res);
|
||||
}
|
||||
|
@ -709,7 +724,7 @@ namespace ARMeilleure.Instructions
|
|||
Operand res = scalarFunc(n, m);
|
||||
|
||||
// Insert scalar into vector.
|
||||
res = EmitInsertScalar(context, d, res, op.Vd, doubleSize);
|
||||
res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
|
||||
|
||||
context.Copy(d, res);
|
||||
}
|
||||
|
@ -741,7 +756,7 @@ namespace ARMeilleure.Instructions
|
|||
Operand res = scalarFunc(d, n, m);
|
||||
|
||||
// Insert scalar into vector.
|
||||
res = EmitInsertScalar(context, initialD, res, op.Vd, doubleSize);
|
||||
res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
|
||||
|
||||
context.Copy(initialD, res);
|
||||
}
|
||||
|
|
|
@ -52,6 +52,7 @@ namespace ARMeilleure.IntermediateRepresentation
|
|||
X86Minss,
|
||||
X86Movhlps,
|
||||
X86Movlhps,
|
||||
X86Movss,
|
||||
X86Mulpd,
|
||||
X86Mulps,
|
||||
X86Mulsd,
|
||||
|
|
Loading…
Add table
Reference in a new issue