diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index 9034528a0e..d54a61af81 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -180,6 +180,7 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Pshufd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstFlags.Vex | InstFlags.Prefix66)); Add(X86Instruction.Pslld, new InstInfo(BadOp, 0x06000f72, BadOp, BadOp, 0x00000ff2, InstFlags.Vex | InstFlags.Prefix66)); Add(X86Instruction.Pslldq, new InstInfo(BadOp, 0x07000f73, BadOp, BadOp, BadOp, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Psllq, new InstInfo(BadOp, 0x06000f73, BadOp, BadOp, 0x00000ff3, InstFlags.Vex | InstFlags.Prefix66)); Add(X86Instruction.Psllw, new InstInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstFlags.Vex | InstFlags.Prefix66)); Add(X86Instruction.Psrad, new InstInfo(BadOp, 0x04000f72, BadOp, BadOp, 0x00000fe2, InstFlags.Vex | InstFlags.Prefix66)); Add(X86Instruction.Psraw, new InstInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstFlags.Vex | InstFlags.Prefix66)); @@ -951,6 +952,11 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(source1, source, X86Instruction.Pslldq, dest); } + public void Psllq(Operand dest, Operand source, Operand source1) + { + WriteInstruction(source1, source, X86Instruction.Psllq, dest); + } + public void Psllw(Operand dest, Operand source, Operand source1) { WriteInstruction(source1, source, X86Instruction.Psllw, dest); diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index 40f509c3eb..47a80fee64 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -159,6 +159,7 @@ namespace ARMeilleure.CodeGen.X86 Add(Instruction.X86Pshufb, GenerateX86Pshufb); Add(Instruction.X86Pslld, GenerateX86Pslld); Add(Instruction.X86Pslldq, GenerateX86Pslldq); + Add(Instruction.X86Psllq, GenerateX86Psllq); Add(Instruction.X86Psllw, GenerateX86Psllw); Add(Instruction.X86Psrad, GenerateX86Psrad); Add(Instruction.X86Psraw, GenerateX86Psraw); @@ -447,50 +448,6 @@ namespace ARMeilleure.CodeGen.X86 } } - private static void GenerateConvertToFPUI(CodeGenContext context, Operation operation) - { - Operand dest = operation.Dest; - Operand source = operation.GetSource(0); - - Debug.Assert(dest.Type == OperandType.FP32 || - dest.Type == OperandType.FP64); - - if (dest.Type == OperandType.FP32) - { - Debug.Assert(source.Type == OperandType.I32 || - source.Type == OperandType.FP64); - - if (source.Type == OperandType.I32) - { - context.Assembler.Xorps(dest, dest, dest); - context.Assembler.Cvtsi2ss(dest, source, dest); - } - else /* if (source.Type == OperandType.FP64) */ - { - context.Assembler.Cvtsd2ss(dest, source, dest); - - ZeroUpper96(context, dest, dest); - } - } - else /* if (dest.Type == OperandType.FP64) */ - { - Debug.Assert(source.Type == OperandType.I64 || - source.Type == OperandType.FP32); - - if (source.Type == OperandType.I64) - { - context.Assembler.Xorps(dest, dest, dest); - context.Assembler.Cvtsi2sd(dest, source, dest); - } - else /* if (source.Type == OperandType.FP32) */ - { - context.Assembler.Cvtss2sd(dest, source, dest); - - ZeroUpper64(context, dest, dest); - } - } - } - private static void GenerateCopy(CodeGenContext context, Operation operation) { Operand dest = operation.Dest; @@ -1428,6 +1385,11 @@ namespace ARMeilleure.CodeGen.X86 context.Assembler.Pslldq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); } + private static void GenerateX86Psllq(CodeGenContext context, Operation operation) + { + context.Assembler.Psllq(operation.Dest, operation.GetSource(1), operation.GetSource(0)); + } + private static void GenerateX86Psllw(CodeGenContext context, Operation operation) { context.Assembler.Psllw(operation.Dest, operation.GetSource(1), operation.GetSource(0)); diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index 7a59805216..d0559aa924 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -120,6 +120,7 @@ namespace ARMeilleure.CodeGen.X86 Pshufd, Pslld, Pslldq, + Psllq, Psllw, Psrad, Psraw, diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 903e836472..16c3f2f73f 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -397,7 +397,7 @@ namespace ARMeilleure.Decoders SetA64("0x00111000100000000110xxxxxxxxxx", InstName.Rev16_V, InstEmit.Rev16_V, typeof(OpCodeSimd)); SetA64("0x1011100x100000000010xxxxxxxxxx", InstName.Rev32_V, InstEmit.Rev32_V, typeof(OpCodeSimd)); SetA64("0x001110<<100000000010xxxxxxxxxx", InstName.Rev64_V, InstEmit.Rev64_V, typeof(OpCodeSimd)); - SetA64("0x00111100>>>xxx100011xxxxxxxxxx", InstName.Rshrn_V, null, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx100011xxxxxxxxxx", InstName.Rshrn_V, InstEmit.Rshrn_V, typeof(OpCodeSimdShImm)); SetA64("0x101110<<1xxxxx011000xxxxxxxxxx", InstName.Rsubhn_V, InstEmit.Rsubhn_V, typeof(OpCodeSimdReg)); SetA64("0x001110<<1xxxxx011111xxxxxxxxxx", InstName.Saba_V, InstEmit.Saba_V, typeof(OpCodeSimdReg)); SetA64("0x001110<<1xxxxx010100xxxxxxxxxx", InstName.Sabal_V, InstEmit.Sabal_V, typeof(OpCodeSimdReg)); @@ -424,13 +424,13 @@ namespace ARMeilleure.Decoders SetA64("0101111000101000001010xxxxxxxxxx", InstName.Sha256su0_V, null, typeof(OpCodeSimd)); SetA64("01011110000xxxxx011000xxxxxxxxxx", InstName.Sha256su1_V, null, typeof(OpCodeSimdReg)); SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", InstName.Shadd_V, InstEmit.Shadd_V, typeof(OpCodeSimdReg)); - SetA64("0101111101xxxxxx010101xxxxxxxxxx", InstName.Shl_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x00111100>>>xxx010101xxxxxxxxxx", InstName.Shl_V, null, typeof(OpCodeSimdShImm)); - SetA64("0100111101xxxxxx010101xxxxxxxxxx", InstName.Shl_V, null, typeof(OpCodeSimdShImm)); - SetA64("0x101110<<100001001110xxxxxxxxxx", InstName.Shll_V, null, typeof(OpCodeSimd)); - SetA64("0x00111100>>>xxx100001xxxxxxxxxx", InstName.Shrn_V, null, typeof(OpCodeSimdShImm)); + SetA64("0101111101xxxxxx010101xxxxxxxxxx", InstName.Shl_S, InstEmit.Shl_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, typeof(OpCodeSimdShImm)); + SetA64("0100111101xxxxxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, typeof(OpCodeSimdShImm)); + SetA64("0x101110<<100001001110xxxxxxxxxx", InstName.Shll_V, InstEmit.Shll_V, typeof(OpCodeSimd)); + SetA64("0x00111100>>>xxx100001xxxxxxxxxx", InstName.Shrn_V, InstEmit.Shrn_V, typeof(OpCodeSimdShImm)); SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", InstName.Shsub_V, InstEmit.Shsub_V, typeof(OpCodeSimdReg)); - SetA64("0x1011110>>>>xxx010101xxxxxxxxxx", InstName.Sli_V, null, typeof(OpCodeSimdShImm)); + SetA64("0x1011110>>>>xxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, typeof(OpCodeSimdShImm)); SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", InstName.Smax_V, InstEmit.Smax_V, typeof(OpCodeSimdReg)); SetA64("0x001110<<1xxxxx101001xxxxxxxxxx", InstName.Smaxp_V, InstEmit.Smaxp_V, typeof(OpCodeSimdReg)); SetA64("000011100x110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, typeof(OpCodeSimd)); @@ -460,16 +460,16 @@ namespace ARMeilleure.Decoders SetA64("01111110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, typeof(OpCodeSimdReg)); SetA64("0x101110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, typeof(OpCodeSimdReg)); SetA64("0x101110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, typeof(OpCodeSimdReg)); - SetA64("0>001110<<1xxxxx010111xxxxxxxxxx", InstName.Sqrshl_V, null, typeof(OpCodeSimdReg)); - SetA64("0101111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x00111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_V, null, typeof(OpCodeSimdShImm)); - SetA64("0111111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x10111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_V, null, typeof(OpCodeSimdShImm)); - SetA64("0>001110<<1xxxxx010011xxxxxxxxxx", InstName.Sqshl_V, null, typeof(OpCodeSimdReg)); - SetA64("0101111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x00111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_V, null, typeof(OpCodeSimdShImm)); - SetA64("0111111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x10111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_V, null, typeof(OpCodeSimdShImm)); + SetA64("0>001110<<1xxxxx010111xxxxxxxxxx", InstName.Sqrshl_V, InstEmit.Sqrshl_V, typeof(OpCodeSimdReg)); + SetA64("0101111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_S, InstEmit.Sqrshrn_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_V, InstEmit.Sqrshrn_V, typeof(OpCodeSimdShImm)); + SetA64("0111111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_S, InstEmit.Sqrshrun_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_V, InstEmit.Sqrshrun_V, typeof(OpCodeSimdShImm)); + SetA64("0>001110<<1xxxxx010011xxxxxxxxxx", InstName.Sqshl_V, InstEmit.Sqshl_V, typeof(OpCodeSimdReg)); + SetA64("0101111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_S, InstEmit.Sqshrn_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_V, InstEmit.Sqshrn_V, typeof(OpCodeSimdShImm)); + SetA64("0111111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_S, InstEmit.Sqshrun_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_V, InstEmit.Sqshrun_V, typeof(OpCodeSimdShImm)); SetA64("01011110xx1xxxxx001011xxxxxxxxxx", InstName.Sqsub_S, InstEmit.Sqsub_S, typeof(OpCodeSimdReg)); SetA64("0>001110<<1xxxxx001011xxxxxxxxxx", InstName.Sqsub_V, InstEmit.Sqsub_V, typeof(OpCodeSimdReg)); SetA64("01011110<<100001010010xxxxxxxxxx", InstName.Sqxtn_S, InstEmit.Sqxtn_S, typeof(OpCodeSimd)); @@ -477,21 +477,21 @@ namespace ARMeilleure.Decoders SetA64("01111110<<100001001010xxxxxxxxxx", InstName.Sqxtun_S, InstEmit.Sqxtun_S, typeof(OpCodeSimd)); SetA64("0x101110<<100001001010xxxxxxxxxx", InstName.Sqxtun_V, InstEmit.Sqxtun_V, typeof(OpCodeSimd)); SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", InstName.Srhadd_V, InstEmit.Srhadd_V, typeof(OpCodeSimdReg)); - SetA64("0>001110<<1xxxxx010101xxxxxxxxxx", InstName.Srshl_V, null, typeof(OpCodeSimdReg)); - SetA64("0101111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x00111100>>>xxx001001xxxxxxxxxx", InstName.Srshr_V, null, typeof(OpCodeSimdShImm)); - SetA64("0100111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_V, null, typeof(OpCodeSimdShImm)); - SetA64("0101111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x00111100>>>xxx001101xxxxxxxxxx", InstName.Srsra_V, null, typeof(OpCodeSimdShImm)); - SetA64("0100111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_V, null, typeof(OpCodeSimdShImm)); - SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", InstName.Sshl_V, null, typeof(OpCodeSimdReg)); - SetA64("0x00111100>>>xxx101001xxxxxxxxxx", InstName.Sshll_V, null, typeof(OpCodeSimdShImm)); - SetA64("0101111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x00111100>>>xxx000001xxxxxxxxxx", InstName.Sshr_V, null, typeof(OpCodeSimdShImm)); - SetA64("0100111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_V, null, typeof(OpCodeSimdShImm)); - SetA64("0101111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x00111100>>>xxx000101xxxxxxxxxx", InstName.Ssra_V, null, typeof(OpCodeSimdShImm)); - SetA64("0100111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_V, null, typeof(OpCodeSimdShImm)); + SetA64("0>001110<<1xxxxx010101xxxxxxxxxx", InstName.Srshl_V, InstEmit.Srshl_V, typeof(OpCodeSimdReg)); + SetA64("0101111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_S, InstEmit.Srshr_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, typeof(OpCodeSimdShImm)); + SetA64("0100111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, typeof(OpCodeSimdShImm)); + SetA64("0101111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_S, InstEmit.Srsra_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm)); + SetA64("0100111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm)); + SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", InstName.Sshl_V, InstEmit.Sshl_V, typeof(OpCodeSimdReg)); + SetA64("0x00111100>>>xxx101001xxxxxxxxxx", InstName.Sshll_V, InstEmit.Sshll_V, typeof(OpCodeSimdShImm)); + SetA64("0101111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_S, InstEmit.Sshr_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx000001xxxxxxxxxx", InstName.Sshr_V, InstEmit.Sshr_V, typeof(OpCodeSimdShImm)); + SetA64("0100111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_V, InstEmit.Sshr_V, typeof(OpCodeSimdShImm)); + SetA64("0101111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_S, InstEmit.Ssra_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx000101xxxxxxxxxx", InstName.Ssra_V, InstEmit.Ssra_V, typeof(OpCodeSimdShImm)); + SetA64("0100111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_V, InstEmit.Ssra_V, typeof(OpCodeSimdShImm)); SetA64("0x001110<<1xxxxx001000xxxxxxxxxx", InstName.Ssubl_V, InstEmit.Ssubl_V, typeof(OpCodeSimdReg)); SetA64("0x001110<<1xxxxx001100xxxxxxxxxx", InstName.Ssubw_V, InstEmit.Ssubw_V, typeof(OpCodeSimdReg)); SetA64("0x00110000000000xxxxxxxxxxxxxxxx", InstName.St__Vms, null, typeof(OpCodeSimdMemMs)); @@ -545,34 +545,34 @@ namespace ARMeilleure.Decoders SetA64("0x101111xxxxxxxx1010x0xxxxxxxxxx", InstName.Umull_Ve, InstEmit.Umull_Ve, typeof(OpCodeSimdRegElem)); SetA64("01111110xx1xxxxx000011xxxxxxxxxx", InstName.Uqadd_S, InstEmit.Uqadd_S, typeof(OpCodeSimdReg)); SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", InstName.Uqadd_V, InstEmit.Uqadd_V, typeof(OpCodeSimdReg)); - SetA64("0>101110<<1xxxxx010111xxxxxxxxxx", InstName.Uqrshl_V, null, typeof(OpCodeSimdReg)); - SetA64("0111111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x10111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_V, null, typeof(OpCodeSimdShImm)); - SetA64("0>101110<<1xxxxx010011xxxxxxxxxx", InstName.Uqshl_V, null, typeof(OpCodeSimdReg)); - SetA64("0111111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x10111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_V, null, typeof(OpCodeSimdShImm)); + SetA64("0>101110<<1xxxxx010111xxxxxxxxxx", InstName.Uqrshl_V, InstEmit.Uqrshl_V, typeof(OpCodeSimdReg)); + SetA64("0111111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_S, InstEmit.Uqrshrn_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_V, InstEmit.Uqrshrn_V, typeof(OpCodeSimdShImm)); + SetA64("0>101110<<1xxxxx010011xxxxxxxxxx", InstName.Uqshl_V, InstEmit.Uqshl_V, typeof(OpCodeSimdReg)); + SetA64("0111111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_S, InstEmit.Uqshrn_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_V, InstEmit.Uqshrn_V, typeof(OpCodeSimdShImm)); SetA64("01111110xx1xxxxx001011xxxxxxxxxx", InstName.Uqsub_S, InstEmit.Uqsub_S, typeof(OpCodeSimdReg)); SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", InstName.Uqsub_V, InstEmit.Uqsub_V, typeof(OpCodeSimdReg)); SetA64("01111110<<100001010010xxxxxxxxxx", InstName.Uqxtn_S, InstEmit.Uqxtn_S, typeof(OpCodeSimd)); SetA64("0x101110<<100001010010xxxxxxxxxx", InstName.Uqxtn_V, InstEmit.Uqxtn_V, typeof(OpCodeSimd)); SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", InstName.Urhadd_V, InstEmit.Urhadd_V, typeof(OpCodeSimdReg)); - SetA64("0>101110<<1xxxxx010101xxxxxxxxxx", InstName.Urshl_V, null, typeof(OpCodeSimdReg)); - SetA64("0111111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x10111100>>>xxx001001xxxxxxxxxx", InstName.Urshr_V, null, typeof(OpCodeSimdShImm)); - SetA64("0110111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_V, null, typeof(OpCodeSimdShImm)); - SetA64("0111111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x10111100>>>xxx001101xxxxxxxxxx", InstName.Ursra_V, null, typeof(OpCodeSimdShImm)); - SetA64("0110111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_V, null, typeof(OpCodeSimdShImm)); - SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", InstName.Ushl_V, null, typeof(OpCodeSimdReg)); - SetA64("0x10111100>>>xxx101001xxxxxxxxxx", InstName.Ushll_V, null, typeof(OpCodeSimdShImm)); - SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x10111100>>>xxx000001xxxxxxxxxx", InstName.Ushr_V, null, typeof(OpCodeSimdShImm)); - SetA64("0110111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_V, null, typeof(OpCodeSimdShImm)); + SetA64("0>101110<<1xxxxx010101xxxxxxxxxx", InstName.Urshl_V, InstEmit.Urshl_V, typeof(OpCodeSimdReg)); + SetA64("0111111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_S, InstEmit.Urshr_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx001001xxxxxxxxxx", InstName.Urshr_V, InstEmit.Urshr_V, typeof(OpCodeSimdShImm)); + SetA64("0110111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_V, InstEmit.Urshr_V, typeof(OpCodeSimdShImm)); + SetA64("0111111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_S, InstEmit.Ursra_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm)); + SetA64("0110111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm)); + SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", InstName.Ushl_V, InstEmit.Ushl_V, typeof(OpCodeSimdReg)); + SetA64("0x10111100>>>xxx101001xxxxxxxxxx", InstName.Ushll_V, InstEmit.Ushll_V, typeof(OpCodeSimdShImm)); + SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S, InstEmit.Ushr_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx000001xxxxxxxxxx", InstName.Ushr_V, InstEmit.Ushr_V, typeof(OpCodeSimdShImm)); + SetA64("0110111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_V, InstEmit.Ushr_V, typeof(OpCodeSimdShImm)); SetA64("01111110xx100000001110xxxxxxxxxx", InstName.Usqadd_S, InstEmit.Usqadd_S, typeof(OpCodeSimd)); SetA64("0>101110<<100000001110xxxxxxxxxx", InstName.Usqadd_V, InstEmit.Usqadd_V, typeof(OpCodeSimd)); - SetA64("0111111101xxxxxx000101xxxxxxxxxx", InstName.Usra_S, null, typeof(OpCodeSimdShImm)); - SetA64("0x10111100>>>xxx000101xxxxxxxxxx", InstName.Usra_V, null, typeof(OpCodeSimdShImm)); - SetA64("0110111101xxxxxx000101xxxxxxxxxx", InstName.Usra_V, null, typeof(OpCodeSimdShImm)); + SetA64("0111111101xxxxxx000101xxxxxxxxxx", InstName.Usra_S, InstEmit.Usra_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx000101xxxxxxxxxx", InstName.Usra_V, InstEmit.Usra_V, typeof(OpCodeSimdShImm)); + SetA64("0110111101xxxxxx000101xxxxxxxxxx", InstName.Usra_V, InstEmit.Usra_V, typeof(OpCodeSimdShImm)); SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", InstName.Usubl_V, InstEmit.Usubl_V, typeof(OpCodeSimdReg)); SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", InstName.Usubw_V, InstEmit.Usubw_V, typeof(OpCodeSimdReg)); SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", InstName.Uzp1_V, InstEmit.Uzp1_V, typeof(OpCodeSimdReg)); diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs index e77a2c8abe..a0478bfbf0 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -84,6 +84,29 @@ namespace ARMeilleure.Instructions Instruction.X86Pmovzxdq }; + public static readonly Instruction[] X86PsllInstruction = new Instruction[] + { + 0, + Instruction.X86Psllw, + Instruction.X86Pslld, + Instruction.X86Psllq + }; + + public static readonly Instruction[] X86PsraInstruction = new Instruction[] + { + 0, + Instruction.X86Psraw, + Instruction.X86Psrad + }; + + public static readonly Instruction[] X86PsrlInstruction = new Instruction[] + { + 0, + Instruction.X86Psrlw, + Instruction.X86Psrld, + Instruction.X86Psrlq + }; + public static readonly Instruction[] X86PsubInstruction = new Instruction[] { Instruction.X86Psubb, diff --git a/ARMeilleure/Instructions/InstEmitSimdMove.cs b/ARMeilleure/Instructions/InstEmitSimdMove.cs index 905fcc2fdf..c260e42b63 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMove.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMove.cs @@ -107,20 +107,31 @@ namespace ARMeilleure.Instructions if (op.Size == 0) { - res = context.AddIntrinsic(Instruction.X86Psrldq, res, Const(op.DstIndex)); + if (op.DstIndex != 0) + { + res = context.AddIntrinsic(Instruction.X86Psrldq, res, Const(op.DstIndex)); + } + res = context.AddIntrinsic(Instruction.X86Punpcklbw, res, res); res = context.AddIntrinsic(Instruction.X86Punpcklwd, res, res); + res = context.AddIntrinsic(Instruction.X86Shufps, res, res, Const(0)); } else if (op.Size == 1) { - res = context.AddIntrinsic(Instruction.X86Psrldq, res, Const(op.DstIndex * 2)); - res = context.AddIntrinsic(Instruction.X86Punpcklwd, res, res); - } + if (op.DstIndex != 0) + { + res = context.AddIntrinsic(Instruction.X86Psrldq, res, Const(op.DstIndex * 2)); + } - if (op.Size < 3) - { + res = context.AddIntrinsic(Instruction.X86Punpcklwd, res, res); res = context.AddIntrinsic(Instruction.X86Shufps, res, res, Const(0)); } + else if (op.Size == 2) + { + int mask = op.DstIndex * 0b01010101; + + res = context.AddIntrinsic(Instruction.X86Shufps, res, res, Const(mask)); + } else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64) { res = context.AddIntrinsic(Instruction.X86Movlhps, res, res); diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs new file mode 100644 index 0000000000..7483c7cc95 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs @@ -0,0 +1,1076 @@ +// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h + +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func; + + static partial class InstEmit + { +#region "Masks" + private static readonly long[] _masks_RshrnShrn = new long[] + { + 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, + 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 + }; +#endregion + + public static void Rshrn_V(EmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand dLow = context.AddIntrinsic(Instruction.X86Movlhps, d, context.VectorZero()); + + Operand mask = null; + + switch (op.Size + 1) + { + case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break; + case 2: mask = X86GetAllElements(context, (int)roundConst); break; + case 3: mask = X86GetAllElements(context, roundConst); break; + } + + Instruction addInst = X86PaddInstruction[op.Size + 1]; + + Operand res = context.AddIntrinsic(addInst, n, mask); + + Instruction srlInst = X86PsrlInstruction[op.Size + 1]; + + res = context.AddIntrinsic(srlInst, res, Const(shift)); + + Operand mask2 = X86GetAllElements(context, _masks_RshrnShrn[op.Size]); + + res = context.AddIntrinsic(Instruction.X86Pshufb, res, mask2); + + Instruction movInst = op.RegisterSize == RegisterSize.Simd128 + ? Instruction.X86Movlhps + : Instruction.X86Movhlps; + + res = context.AddIntrinsic(movInst, dLow, res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmNarrowOpZx(context, round: true); + } + } + + public static void Shl_S(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + } + + public static void Shl_V(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand n = GetVec(op.Rn); + + Instruction sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(shift)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + } + } + + public static void Shll_V(EmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int shift = 8 << op.Size; + + if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + } + + Instruction movsxInst = X86PmovsxInstruction[op.Size]; + + Operand res = context.AddIntrinsic(movsxInst, n); + + Instruction sllInst = X86PsllInstruction[op.Size + 1]; + + res = context.AddIntrinsic(sllInst, res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); + } + } + + public static void Shrn_V(EmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand dLow = context.AddIntrinsic(Instruction.X86Movlhps, d, context.VectorZero()); + + Instruction srlInst = X86PsrlInstruction[op.Size + 1]; + + Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift)); + + Operand mask = X86GetAllElements(context, _masks_RshrnShrn[op.Size]); + + Operand res = context.AddIntrinsic(Instruction.X86Pshufb, nShifted, mask); + + Instruction movInst = op.RegisterSize == RegisterSize.Simd128 + ? Instruction.X86Movlhps + : Instruction.X86Movhlps; + + res = context.AddIntrinsic(movInst, dLow, res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmNarrowOpZx(context, round: false); + } + } + + public static void Sli_V(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int shift = GetImmShl(op); + + ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand neShifted = context.ShiftLeft(ne, Const(shift)); + + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + Operand deMasked = context.BitwiseAnd(de, Const(mask)); + + Operand e = context.BitwiseOr(neShifted, deMasked); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sqrshl_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShlRegSatQ)); + + Operand e = context.Call(info, ne, me, Const(1), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sqrshrn_S(EmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqrshrn_V(EmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqrshrun_S(EmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqrshrun_V(EmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Sqshl_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShlRegSatQ)); + + Operand e = context.Call(info, ne, me, Const(0), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sqshrn_S(EmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqshrn_V(EmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqshrun_S(EmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqshrun_V(EmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Srshl_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShlReg)); + + Operand e = context.Call(info, ne, me, Const(1), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Srshr_S(EmitterContext context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round); + } + + public static void Srshr_V(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand n = GetVec(op.Rn); + + Instruction sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Instruction srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Instruction sraInst = X86PsraInstruction[op.Size]; + + Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift)); + + Instruction addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSra); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Round); + } + } + + public static void Srsra_S(EmitterContext context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + + public static void Srsra_V(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Instruction sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Instruction srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Instruction sraInst = X86PsraInstruction[op.Size]; + + Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift)); + + Instruction addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSra); + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Sshl_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShlReg)); + + Operand e = context.Call(info, ne, me, Const(0), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sshll_V(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + } + + Instruction movsxInst = X86PmovsxInstruction[op.Size]; + + Operand res = context.AddIntrinsic(movsxInst, n); + + if (shift != 0) + { + Instruction sllInst = X86PsllInstruction[op.Size + 1]; + + res = context.AddIntrinsic(sllInst, res, Const(shift)); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); + } + } + + public static void Sshr_S(EmitterContext context) + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx); + } + + public static void Sshr_V(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + + Operand n = GetVec(op.Rn); + + Instruction sraInst = X86PsraInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sraInst, n, Const(shift)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitShrImmOp(context, ShrImmFlags.VectorSx); + } + } + + public static void Ssra_S(EmitterContext context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate); + } + + public static void Ssra_V(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Instruction sraInst = X86PsraInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sraInst, n, Const(shift)); + + Instruction addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate); + } + } + + public static void Uqrshl_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShlRegSatQ)); + + Operand e = context.Call(info, ne, me, Const(1), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Uqrshrn_S(EmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqrshrn_V(EmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + + public static void Uqshl_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShlRegSatQ)); + + Operand e = context.Call(info, ne, me, Const(0), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Uqshrn_S(EmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqshrn_V(EmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + + public static void Urshl_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShlRegSatQ)); + + Operand e = context.Call(info, ne, me, Const(1), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Urshr_S(EmitterContext context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round); + } + + public static void Urshr_V(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand n = GetVec(op.Rn); + + Instruction sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Instruction srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift)); + + Instruction addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSrl); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Round); + } + } + + public static void Ursra_S(EmitterContext context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + + public static void Ursra_V(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Instruction sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Instruction srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift)); + + Instruction addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSrl); + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Ushl_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + MethodInfo info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShlReg)); + + Operand e = context.Call(info, ne, me, Const(0), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Ushll_V(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Instruction.X86Psrldq, n, Const(8)); + } + + Instruction movzxInst = X86PmovzxInstruction[op.Size]; + + Operand res = context.AddIntrinsic(movzxInst, n); + + if (shift != 0) + { + Instruction sllInst = X86PsllInstruction[op.Size + 1]; + + res = context.AddIntrinsic(sllInst, res, Const(shift)); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); + } + } + + public static void Ushr_S(EmitterContext context) + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx); + } + + public static void Ushr_V(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + + Operand n = GetVec(op.Rn); + + Instruction srlInst = X86PsrlInstruction[op.Size]; + + Operand res = context.AddIntrinsic(srlInst, n, Const(shift)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitShrImmOp(context, ShrImmFlags.VectorZx); + } + } + + public static void Usra_S(EmitterContext context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate); + } + + public static void Usra_V(EmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Instruction srlInst = X86PsrlInstruction[op.Size]; + + Operand res = context.AddIntrinsic(srlInst, n, Const(shift)); + + Instruction addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate); + } + } + + [Flags] + private enum ShrImmFlags + { + Scalar = 1 << 0, + Signed = 1 << 1, + + Round = 1 << 2, + Accumulate = 1 << 3, + + ScalarSx = Scalar | Signed, + ScalarZx = Scalar, + + VectorSx = Signed, + VectorZx = 0 + } + + private static void EmitScalarShrImmOpSx(EmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags); + } + + private static void EmitScalarShrImmOpZx(EmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags); + } + + private static void EmitVectorShrImmOpSx(EmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.VectorSx | flags); + } + + private static void EmitVectorShrImmOpZx(EmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.VectorZx | flags); + } + + private static void EmitShrImmOp(EmitterContext context, ShrImmFlags flags) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + Operand res = context.VectorZero(); + + bool scalar = (flags & ShrImmFlags.Scalar) != 0; + bool signed = (flags & ShrImmFlags.Signed) != 0; + bool round = (flags & ShrImmFlags.Round) != 0; + bool accumulate = (flags & ShrImmFlags.Accumulate) != 0; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + if (op.Size <= 2) + { + if (round) + { + e = context.Add(e, Const(roundConst)); + } + + e = signed + ? context.ShiftRightSI(e, Const(shift)) + : context.ShiftRightUI(e, Const(shift)); + } + else /* if (op.Size == 3) */ + { + e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift); + } + + if (accumulate) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed); + + e = context.Add(e, de); + } + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitVectorShrImmNarrowOpZx(EmitterContext context, bool round) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + + if (round) + { + e = context.Add(e, Const(roundConst)); + } + + e = context.ShiftRightUI(e, Const(shift)); + + res = EmitVectorInsert(context, res, e, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + [Flags] + private enum ShrImmSaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + Round = 1 << 3, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + private static void EmitRoundShrImmSaturatingNarrowOp(EmitterContext context, ShrImmSaturatingNarrowFlags flags) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags); + } + + private static void EmitShrImmSaturatingNarrowOp(EmitterContext context, ShrImmSaturatingNarrowFlags flags) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; + bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = !scalar ? 8 >> op.Size : 1; + + int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); + + if (op.Size <= 1 || !round) + { + if (round) + { + e = context.Add(e, Const(roundConst)); + } + + e = signedSrc + ? context.ShiftRightSI(e, Const(shift)) + : context.ShiftRightUI(e, Const(shift)); + } + else /* if (op.Size == 2 && round) */ + { + e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32 + } + + e = EmitSatQ(context, e, op.Size, signedSrc, signedDst); + + res = EmitVectorInsert(context, res, e, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + // dst64 = (Int(src64, signed) + roundConst) >> shift; + private static Operand EmitShrImm64( + EmitterContext context, + Operand value, + bool signed, + long roundConst, + int shift) + { + string name = signed + ? nameof(SoftFallback.SignedShrImm64) + : nameof(SoftFallback.UnsignedShrImm64); + + MethodInfo info = typeof(SoftFallback).GetMethod(name); + + return context.Call(info, value, Const(roundConst), Const(shift)); + } + + private static void EmitVectorShImmWidenBinarySx(EmitterContext context, Func2I emit, int imm) + { + EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true); + } + + private static void EmitVectorShImmWidenBinaryZx(EmitterContext context, Func2I emit, int imm) + { + EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false); + } + + private static void EmitVectorShImmWidenBinaryOp(EmitterContext context, Func2I emit, int imm, bool signed) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + } +} diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs index 9851c7d716..e8b4f8a21b 100644 --- a/ARMeilleure/Instructions/SoftFallback.cs +++ b/ARMeilleure/Instructions/SoftFallback.cs @@ -6,11 +6,370 @@ namespace ARMeilleure.Instructions static class SoftFallback { #region "ShlReg" + public static long SignedShlReg(long value, long shift, bool round, int size) + { + int eSize = 8 << size; + int shiftLsB = (sbyte)shift; + + if (shiftLsB < 0) + { + return SignedShrReg(value, -shiftLsB, round, eSize); + } + else if (shiftLsB > 0) + { + if (shiftLsB >= eSize) + { + return 0L; + } + + return value << shiftLsB; + } + else /* if (shiftLsB == 0) */ + { + return value; + } + } + + public static ulong UnsignedShlReg(ulong value, ulong shift, bool round, int size) + { + int eSize = 8 << size; + + int shiftLsB = (sbyte)shift; + + if (shiftLsB < 0) + { + return UnsignedShrReg(value, -shiftLsB, round, eSize); + } + else if (shiftLsB > 0) + { + if (shiftLsB >= eSize) + { + return 0UL; + } + + return value << shiftLsB; + } + else /* if (shiftLsB == 0) */ + { + return value; + } + } + + public static long SignedShlRegSatQ(long value, long shift, bool round, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + int shiftLsB = (sbyte)shift; + + if (shiftLsB < 0) + { + return SignedShrReg(value, -shiftLsB, round, eSize); + } + else if (shiftLsB > 0) + { + if (shiftLsB >= eSize) + { + return SignedSignSatQ(value, eSize, context); + } + + if (eSize == 64) + { + long shl = value << shiftLsB; + long shr = shl >> shiftLsB; + + if (shr != value) + { + return SignedSignSatQ(value, eSize, context); + } + else /* if (shr == value) */ + { + return shl; + } + } + else /* if (eSize != 64) */ + { + return SignedSrcSignedDstSatQ(value << shiftLsB, size); + } + } + else /* if (shiftLsB == 0) */ + { + return value; + } + } + + public static ulong UnsignedShlRegSatQ(ulong value, ulong shift, bool round, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + int shiftLsB = (sbyte)shift; + + if (shiftLsB < 0) + { + return UnsignedShrReg(value, -shiftLsB, round, eSize); + } + else if (shiftLsB > 0) + { + if (shiftLsB >= eSize) + { + return UnsignedSignSatQ(value, eSize, context); + } + + if (eSize == 64) + { + ulong shl = value << shiftLsB; + ulong shr = shl >> shiftLsB; + + if (shr != value) + { + return UnsignedSignSatQ(value, eSize, context); + } + else /* if (shr == value) */ + { + return shl; + } + } + else /* if (eSize != 64) */ + { + return UnsignedSrcUnsignedDstSatQ(value << shiftLsB, size); + } + } + else /* if (shiftLsB == 0) */ + { + return value; + } + } + + private static long SignedShrReg(long value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}. + { + if (round) + { + if (shift >= eSize) + { + return 0L; + } + + long roundConst = 1L << (shift - 1); + + long add = value + roundConst; + + if (eSize == 64) + { + if ((~value & (value ^ add)) < 0L) + { + return (long)((ulong)add >> shift); + } + else + { + return add >> shift; + } + } + else /* if (eSize != 64) */ + { + return add >> shift; + } + } + else /* if (!round) */ + { + if (shift >= eSize) + { + if (value < 0L) + { + return -1L; + } + else /* if (value >= 0L) */ + { + return 0L; + } + } + + return value >> shift; + } + } + + private static ulong UnsignedShrReg(ulong value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}. + { + if (round) + { + if (shift > 64) + { + return 0UL; + } + + ulong roundConst = 1UL << (shift - 1); + + ulong add = value + roundConst; + + if (eSize == 64) + { + if ((add < value) && (add < roundConst)) + { + if (shift == 64) + { + return 1UL; + } + + return (add >> shift) | (0x8000000000000000UL >> (shift - 1)); + } + else + { + if (shift == 64) + { + return 0UL; + } + + return add >> shift; + } + } + else /* if (eSize != 64) */ + { + if (shift == 64) + { + return 0UL; + } + + return add >> shift; + } + } + else /* if (!round) */ + { + if (shift >= eSize) + { + return 0UL; + } + + return value >> shift; + } + } + + private static long SignedSignSatQ(long op, int eSize, ExecutionContext context) // eSize := {8, 16, 32, 64}. + { + long tMaxValue = (1L << (eSize - 1)) - 1L; + long tMinValue = -(1L << (eSize - 1)); + + if (op > 0L) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else if (op < 0L) + { + context.Fpsr |= FPSR.Qc; + + return tMinValue; + } + else + { + return 0L; + } + } + + private static ulong UnsignedSignSatQ(ulong op, int eSize, ExecutionContext context) // eSize := {8, 16, 32, 64}. + { + ulong tMaxValue = ulong.MaxValue >> (64 - eSize); + + if (op > 0UL) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else + { + return 0UL; + } + } #endregion #region "ShrImm64" + public static long SignedShrImm64(long value, long roundConst, int shift) + { + if (roundConst == 0L) + { + if (shift <= 63) + { + return value >> shift; + } + else /* if (shift == 64) */ + { + if (value < 0L) + { + return -1L; + } + else /* if (value >= 0L) */ + { + return 0L; + } + } + } + else /* if (roundConst == 1L << (shift - 1)) */ + { + if (shift <= 63) + { + long add = value + roundConst; + if ((~value & (value ^ add)) < 0L) + { + return (long)((ulong)add >> shift); + } + else + { + return add >> shift; + } + } + else /* if (shift == 64) */ + { + return 0L; + } + } + } + + public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift) + { + if (roundConst == 0L) + { + if (shift <= 63) + { + return value >> shift; + } + else /* if (shift == 64) */ + { + return 0UL; + } + } + else /* if (roundConst == 1L << (shift - 1)) */ + { + ulong add = value + (ulong)roundConst; + + if ((add < value) && (add < (ulong)roundConst)) + { + if (shift <= 63) + { + return (add >> shift) | (0x8000000000000000UL >> (shift - 1)); + } + else /* if (shift == 64) */ + { + return 1UL; + } + } + else + { + if (shift <= 63) + { + return add >> shift; + } + else /* if (shift == 64) */ + { + return 0UL; + } + } + } + } #endregion #region "Rounding" diff --git a/ARMeilleure/IntermediateRepresentation/Instruction.cs b/ARMeilleure/IntermediateRepresentation/Instruction.cs index 14f79b75a6..5602ef800b 100644 --- a/ARMeilleure/IntermediateRepresentation/Instruction.cs +++ b/ARMeilleure/IntermediateRepresentation/Instruction.cs @@ -147,6 +147,7 @@ namespace ARMeilleure.IntermediateRepresentation X86Pshufb, X86Pslld, X86Pslldq, + X86Psllq, X86Psllw, X86Psrad, X86Psraw,