Cleanup, add SSE2 support for scalar insert.

Works similarly to the IR scalar insert, but obviously this one works
directly on V128.
This commit is contained in:
riperiperi 2020-02-26 18:13:21 +00:00
parent e719af48f0
commit 621c5a26c1
4 changed files with 49 additions and 34 deletions

View file

@ -63,6 +63,7 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Minss, new IntrinsicInfo(X86Instruction.Minss, IntrinsicType.Binary));
Add(Intrinsic.X86Movhlps, new IntrinsicInfo(X86Instruction.Movhlps, IntrinsicType.Binary));
Add(Intrinsic.X86Movlhps, new IntrinsicInfo(X86Instruction.Movlhps, IntrinsicType.Binary));
Add(Intrinsic.X86Movss, new IntrinsicInfo(X86Instruction.Movss, IntrinsicType.Binary));
Add(Intrinsic.X86Mulpd, new IntrinsicInfo(X86Instruction.Mulpd, IntrinsicType.Binary));
Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));

View file

@ -173,7 +173,6 @@ namespace ARMeilleure.Instructions
// TODO: Fast Path.
if (roundWithFpscr)
{
// These need to get the FPSCR value, so it's worth noting we'd need to do a c# call at some point.
if (floatSize == OperandType.FP64)
{
if (unsigned)
@ -363,7 +362,6 @@ namespace ARMeilleure.Instructions
{
EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1));
}
}
private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
@ -382,7 +380,7 @@ namespace ARMeilleure.Instructions
private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
{
// a port of the similar round function in InstEmitSimdCvt
// A port of the similar round function in InstEmitSimdCvt.
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0;
@ -457,7 +455,7 @@ namespace ARMeilleure.Instructions
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
}
long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);

View file

@ -473,10 +473,12 @@ namespace ARMeilleure.Instructions
context.Copy(GetVecA32(op.Qd), res);
}
// Intrinsic Emits
// Intrinsic Helpers
public static Operand EmitSwapDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
{
Debug.Assert(input.Type == OperandType.V128);
int originalSide = originalV & 1;
int targetSide = targetV & 1;
@ -487,16 +489,18 @@ namespace ARMeilleure.Instructions
if (targetSide == 1)
{
return context.AddIntrinsic(Intrinsic.X86Movlhps, input, input); // low to high
return context.AddIntrinsic(Intrinsic.X86Movlhps, input, input); // Low to high.
}
else
{
return context.AddIntrinsic(Intrinsic.X86Movhlps, input, input); // high to low
return context.AddIntrinsic(Intrinsic.X86Movhlps, input, input); // High to low.
}
}
public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
{
Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
int targetSide = targetV & 1;
int shuffleMask = 2 | 0;
@ -510,45 +514,56 @@ namespace ARMeilleure.Instructions
}
}
public static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
{
// index into 0, 0 into index. This swap happens at the start and end of an A32 scalar op if required.
int index = reg & (doubleWidth ? 1 : 3);
if (index == 0) return target;
Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
if (doubleWidth)
{
int shuffleMask = 1; // swap top and bottom (b0 = 1, b1 = 0)
return context.AddIntrinsic(Intrinsic.X86Shufpd, target, target, Const(shuffleMask));
}
else
{
int shuffleMask = (3 << 6) | (2 << 4) | (1 << 2) | index; // swap index and 0 (others remain)
shuffleMask &= ~(3 << (index * 2));
return context.AddIntrinsic(Intrinsic.X86Shufps, target, target, Const(shuffleMask));
}
}
public static Operand EmitInsertScalar(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
{
// insert from index 0 in value to index in target
// Insert from index 0 in value to index in target.
int index = reg & (doubleWidth ? 1 : 3);
if (doubleWidth)
{
if (index == 1)
{
return context.AddIntrinsic(Intrinsic.X86Movlhps, target, value); // low to high
return context.AddIntrinsic(Intrinsic.X86Movlhps, target, value); // Low to high.
}
else
{
return context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(2)); // low to low, keep high from original
return context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(2)); // Low to low, keep high from original.
}
}
else
{
return context.AddIntrinsic(Intrinsic.X86Insertps, target, value, Const(index << 4));
if (Optimizations.UseSse41)
{
return context.AddIntrinsic(Intrinsic.X86Insertps, target, value, Const(index << 4));
}
else
{
target = EmitSwapScalar(context, target, index, doubleWidth); // Swap value to replace into element 0.
target = context.AddIntrinsic(Intrinsic.X86Movss, target, value); // Move the value into element 0 of the vector.
return EmitSwapScalar(context, target, index, doubleWidth); // Swap new value back to the correct index.
}
}
}
public static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
{
// Index into 0, 0 into index. This swap happens at the start of an A32 scalar op if required.
int index = reg & (doubleWidth ? 1 : 3);
if (index == 0) return target;
if (doubleWidth)
{
int shuffleMask = 1; // Swap top and bottom. (b0 = 1, b1 = 0)
return context.AddIntrinsic(Intrinsic.X86Shufpd, target, target, Const(shuffleMask));
}
else
{
int shuffleMask = (3 << 6) | (2 << 4) | (1 << 2) | index; // Swap index and 0. (others remain)
shuffleMask &= ~(3 << (index * 2));
return context.AddIntrinsic(Intrinsic.X86Shufps, target, target, Const(shuffleMask));
}
}
@ -679,7 +694,7 @@ namespace ARMeilleure.Instructions
Operand res = scalarFunc(m);
// Insert scalar into vector.
res = EmitInsertScalar(context, d, res, op.Vd, doubleSize);
res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
context.Copy(d, res);
}
@ -709,7 +724,7 @@ namespace ARMeilleure.Instructions
Operand res = scalarFunc(n, m);
// Insert scalar into vector.
res = EmitInsertScalar(context, d, res, op.Vd, doubleSize);
res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
context.Copy(d, res);
}
@ -741,7 +756,7 @@ namespace ARMeilleure.Instructions
Operand res = scalarFunc(d, n, m);
// Insert scalar into vector.
res = EmitInsertScalar(context, initialD, res, op.Vd, doubleSize);
res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
context.Copy(initialD, res);
}

View file

@ -52,6 +52,7 @@ namespace ARMeilleure.IntermediateRepresentation
X86Minss,
X86Movhlps,
X86Movlhps,
X86Movss,
X86Mulpd,
X86Mulps,
X86Mulsd,