diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index ced61882b2..2fa7702d90 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -437,6 +437,7 @@ namespace ARMeilleure.Decoders SetA64("0x101110<<100001001110xxxxxxxxxx", InstName.Shll_V, InstEmit.Shll_V, typeof(OpCodeSimd)); SetA64("0x00111100>>>xxx100001xxxxxxxxxx", InstName.Shrn_V, InstEmit.Shrn_V, typeof(OpCodeSimdShImm)); SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", InstName.Shsub_V, InstEmit.Shsub_V, typeof(OpCodeSimdReg)); + SetA64("0111111101xxxxxx010101xxxxxxxxxx", InstName.Sli_S, InstEmit.Sli_S, typeof(OpCodeSimdShImm)); SetA64("0x10111100>>>xxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, typeof(OpCodeSimdShImm)); SetA64("0110111101xxxxxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, typeof(OpCodeSimdShImm)); SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", InstName.Smax_V, InstEmit.Smax_V, typeof(OpCodeSimdReg)); @@ -485,6 +486,7 @@ namespace ARMeilleure.Decoders SetA64("01111110<<100001001010xxxxxxxxxx", InstName.Sqxtun_S, InstEmit.Sqxtun_S, typeof(OpCodeSimd)); SetA64("0x101110<<100001001010xxxxxxxxxx", InstName.Sqxtun_V, InstEmit.Sqxtun_V, typeof(OpCodeSimd)); SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", InstName.Srhadd_V, InstEmit.Srhadd_V, typeof(OpCodeSimdReg)); + SetA64("0111111101xxxxxx010001xxxxxxxxxx", InstName.Sri_S, InstEmit.Sri_S, typeof(OpCodeSimdShImm)); SetA64("0x10111100>>>xxx010001xxxxxxxxxx", InstName.Sri_V, InstEmit.Sri_V, typeof(OpCodeSimdShImm)); SetA64("0110111101xxxxxx010001xxxxxxxxxx", InstName.Sri_V, InstEmit.Sri_V, typeof(OpCodeSimdShImm)); SetA64("0>001110<<1xxxxx010101xxxxxxxxxx", InstName.Srshl_V, InstEmit.Srshl_V, typeof(OpCodeSimdReg)); diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs index 77c08ff07f..17f43c812c 100644 --- a/ARMeilleure/Instructions/InstEmitSimdShift.cs +++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs @@ -181,59 +181,14 @@ namespace ARMeilleure.Instructions } } + public static void Sli_S(ArmEmitterContext context) + { + EmitSli(context, scalar: true); + } + public static void Sli_V(ArmEmitterContext context) { - OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - - int shift = GetImmShl(op); - - ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL; - - if (Optimizations.UseSse2 && op.Size > 0) - { - Operand d = GetVec(op.Rd); - Operand n = GetVec(op.Rn); - - Intrinsic sllInst = X86PsllInstruction[op.Size]; - - Operand nShifted = context.AddIntrinsic(sllInst, n, Const(shift)); - - Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]); - - Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask); - - Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked); - - if (op.RegisterSize == RegisterSize.Simd64) - { - res = context.VectorZeroUpper64(res); - } - - context.Copy(d, res); - } - else - { - Operand res = context.VectorZero(); - - int elems = op.GetBytesCount() >> op.Size; - - for (int index = 0; index < elems; index++) - { - Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); - - Operand neShifted = context.ShiftLeft(ne, Const(shift)); - - Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); - - Operand deMasked = context.BitwiseAnd(de, Const(mask)); - - Operand e = context.BitwiseOr(neShifted, deMasked); - - res = EmitVectorInsert(context, res, e, index, op.Size); - } - - context.Copy(GetVec(op.Rd), res); - } + EmitSli(context, scalar: false); } public static void Sqrshl_V(ArmEmitterContext context) @@ -318,60 +273,14 @@ namespace ARMeilleure.Instructions EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); } + public static void Sri_S(ArmEmitterContext context) + { + EmitSri(context, scalar: true); + } + public static void Sri_V(ArmEmitterContext context) { - OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - - int shift = GetImmShr(op); - int eSize = 8 << op.Size; - - ulong mask = (ulong.MaxValue << (eSize - shift)) & (ulong.MaxValue >> (64 - eSize)); - - if (Optimizations.UseSse2 && op.Size > 0) - { - Operand d = GetVec(op.Rd); - Operand n = GetVec(op.Rn); - - Intrinsic srlInst = X86PsrlInstruction[op.Size]; - - Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift)); - - Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]); - - Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask); - - Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked); - - if (op.RegisterSize == RegisterSize.Simd64) - { - res = context.VectorZeroUpper64(res); - } - - context.Copy(d, res); - } - else - { - Operand res = context.VectorZero(); - - int elems = op.GetBytesCount() >> op.Size; - - for (int index = 0; index < elems; index++) - { - Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); - - Operand neShifted = shift != 64 ? context.ShiftRightUI(ne, Const(shift)) : Const(0UL); - - Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); - - Operand deMasked = context.BitwiseAnd(de, Const(mask)); - - Operand e = context.BitwiseOr(neShifted, deMasked); - - res = EmitVectorInsert(context, res, e, index, op.Size); - } - - context.Copy(GetVec(op.Rd), res); - } + EmitSri(context, scalar: false); } public static void Srshl_V(ArmEmitterContext context) @@ -1137,5 +1046,116 @@ namespace ARMeilleure.Instructions context.Copy(GetVec(op.Rd), res); } + + private static void EmitSli(ArmEmitterContext context, bool scalar) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand nShifted = context.AddIntrinsic(sllInst, n, Const(shift)); + + Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]); + + Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask); + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand neShifted = context.ShiftLeft(ne, Const(shift)); + + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + Operand deMasked = context.BitwiseAnd(de, Const(mask)); + + Operand e = context.BitwiseOr(neShifted, deMasked); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitSri(ArmEmitterContext context, bool scalar) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + ulong mask = (ulong.MaxValue << (eSize - shift)) & (ulong.MaxValue >> (64 - eSize)); + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift)); + + Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]); + + Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask); + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand neShifted = shift != 64 ? context.ShiftRightUI(ne, Const(shift)) : Const(0UL); + + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + Operand deMasked = context.BitwiseAnd(de, Const(mask)); + + Operand e = context.BitwiseOr(neShifted, deMasked); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } } } diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index 943a9674f5..c81484a6f4 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -313,6 +313,7 @@ namespace ARMeilleure.Instructions Shll_V, Shrn_V, Shsub_V, + Sli_S, Sli_V, Smax_V, Smaxp_V, @@ -354,6 +355,7 @@ namespace ARMeilleure.Instructions Sqxtun_S, Sqxtun_V, Srhadd_V, + Sri_S, Sri_V, Srshl_V, Srshr_S, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs index 327900c92b..0f1b0dac25 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs @@ -218,7 +218,7 @@ namespace Ryujinx.Tests.Cpu return new uint[] { 0x5F405400u, // SHL D0, D0, #0 - //0x7F405400u // SLI D0, D0, #0 + 0x7F405400u // SLI D0, D0, #0 }; } @@ -289,7 +289,7 @@ namespace Ryujinx.Tests.Cpu { return new uint[] { - //0x7F404400u, // SRI D0, D0, #64 + 0x7F404400u, // SRI D0, D0, #64 0x5F402400u, // SRSHR D0, D0, #64 0x5F403400u, // SRSRA D0, D0, #64 0x5F400400u, // SSHR D0, D0, #64