From ca3a73dd4e1ba1f8bb7d2accb8d80dd8f0c3c700 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 15 Jun 2019 23:50:05 -0300 Subject: [PATCH] Implement FP & SIMD comparison instructions, and some fixes --- ARMeilleure/CodeGen/X86/Assembler.cs | 32 +- ARMeilleure/CodeGen/X86/CodeGenerator.cs | 62 ++ ARMeilleure/CodeGen/X86/PreAllocator.cs | 45 +- ARMeilleure/CodeGen/X86/X86Instruction.cs | 4 + ARMeilleure/Decoders/OpCodeTable.cs | 84 +-- ARMeilleure/Instructions/InstEmitAluHelper.cs | 23 +- ARMeilleure/Instructions/InstEmitSimdCmp.cs | 705 ++++++++++++++++++ ARMeilleure/Instructions/SoftFloat.cs | 8 +- .../IntermediateRepresentation/Instruction.cs | 8 + .../OperandHelper.cs | 5 + ARMeilleure/State/V128.cs | 13 +- Ryujinx.Tests.Unicorn/SimdValue.cs | 49 +- 12 files changed, 914 insertions(+), 124 deletions(-) create mode 100644 ARMeilleure/Instructions/InstEmitSimdCmp.cs diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index d54a61af81..31fd3a335b 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -77,6 +77,10 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Cmp, new InstInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstFlags.None)); Add(X86Instruction.Cmppd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstFlags.Vex | InstFlags.Prefix66)); Add(X86Instruction.Cmpps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstFlags.Vex)); + Add(X86Instruction.Cmpsd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstFlags.Vex | InstFlags.PrefixF2)); + Add(X86Instruction.Cmpss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstFlags.Vex | InstFlags.PrefixF3)); + Add(X86Instruction.Comisd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstFlags.Vex | InstFlags.Prefix66)); + Add(X86Instruction.Comiss, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstFlags.Vex)); Add(X86Instruction.Cvtdq2pd, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstFlags.Vex | InstFlags.PrefixF3)); Add(X86Instruction.Cvtdq2ps, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstFlags.Vex)); Add(X86Instruction.Cvtpd2dq, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstFlags.Vex | InstFlags.PrefixF2)); @@ -313,9 +317,9 @@ namespace ARMeilleure.CodeGen.X86 WriteRRMOpCode(dest, source, info.Flags, info.OpRRM | (int)condition); } - public void Cmp(Operand src1, Operand src2) + public void Cmp(Operand source1, Operand source2) { - WriteInstruction(src1, src2, X86Instruction.Cmp); + WriteInstruction(source1, source2, X86Instruction.Cmp); } public void Cqo() @@ -338,6 +342,30 @@ namespace ARMeilleure.CodeGen.X86 WriteByte(imm); } + public void Cmpsd(Operand dest, Operand source, Operand source1, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Cmpsd, source1); + + WriteByte(imm); + } + + public void Cmpss(Operand dest, Operand source, Operand source1, byte imm) + { + WriteInstruction(dest, source, X86Instruction.Cmpss, source1); + + WriteByte(imm); + } + + public void Comisd(Operand source1, Operand source2) + { + WriteInstruction(source1, source2, X86Instruction.Comisd); + } + + public void Comiss(Operand source1, Operand source2) + { + WriteInstruction(source1, source2, X86Instruction.Comiss); + } + public void Cvtdq2pd(Operand dest, Operand source) { WriteInstruction(dest, source, X86Instruction.Cvtdq2pd); diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index 47a80fee64..4428e94a20 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -88,6 +88,14 @@ namespace ARMeilleure.CodeGen.X86 Add(Instruction.X86Andnps, GenerateX86Andnps); Add(Instruction.X86Cmppd, GenerateX86Cmppd); Add(Instruction.X86Cmpps, GenerateX86Cmpps); + Add(Instruction.X86Cmpsd, GenerateX86Cmpsd); + Add(Instruction.X86Cmpss, GenerateX86Cmpss); + Add(Instruction.X86Comisdeq, GenerateX86Comisdeq); + Add(Instruction.X86Comisdge, GenerateX86Comisdge); + Add(Instruction.X86Comisdlt, GenerateX86Comisdlt); + Add(Instruction.X86Comisseq, GenerateX86Comisseq); + Add(Instruction.X86Comissge, GenerateX86Comissge); + Add(Instruction.X86Comisslt, GenerateX86Comisslt); Add(Instruction.X86Cvtdq2pd, GenerateX86Cvtdq2pd); Add(Instruction.X86Cvtdq2ps, GenerateX86Cvtdq2ps); Add(Instruction.X86Cvtpd2dq, GenerateX86Cvtpd2dq); @@ -1026,6 +1034,60 @@ namespace ARMeilleure.CodeGen.X86 operation.GetSource(2).AsByte()); } + private static void GenerateX86Cmpsd(CodeGenContext context, Operation operation) + { + context.Assembler.Cmpsd( + operation.Dest, + operation.GetSource(1), + operation.GetSource(0), + operation.GetSource(2).AsByte()); + } + + private static void GenerateX86Cmpss(CodeGenContext context, Operation operation) + { + context.Assembler.Cmpss( + operation.Dest, + operation.GetSource(1), + operation.GetSource(0), + operation.GetSource(2).AsByte()); + } + + private static void GenerateX86Comisdeq(CodeGenContext context, Operation operation) + { + context.Assembler.Comisd(operation.GetSource(0), operation.GetSource(1)); + context.Assembler.Setcc(operation.Dest, X86Condition.Equal); + } + + private static void GenerateX86Comisdge(CodeGenContext context, Operation operation) + { + context.Assembler.Comisd(operation.GetSource(0), operation.GetSource(1)); + context.Assembler.Setcc(operation.Dest, X86Condition.AboveOrEqual); + } + + private static void GenerateX86Comisdlt(CodeGenContext context, Operation operation) + { + context.Assembler.Comisd(operation.GetSource(0), operation.GetSource(1)); + context.Assembler.Setcc(operation.Dest, X86Condition.Below); + } + + private static void GenerateX86Comisseq(CodeGenContext context, Operation operation) + { + context.Assembler.Comiss(operation.GetSource(0), operation.GetSource(1)); + context.Assembler.Setcc(operation.Dest, X86Condition.Equal); + } + + private static void GenerateX86Comissge(CodeGenContext context, Operation operation) + { + context.Assembler.Comiss(operation.GetSource(0), operation.GetSource(1)); + context.Assembler.Setcc(operation.Dest, X86Condition.AboveOrEqual); + } + + private static void GenerateX86Comisslt(CodeGenContext context, Operation operation) + { + context.Assembler.Comiss(operation.GetSource(0), operation.GetSource(1)); + context.Assembler.Setcc(operation.Dest, X86Condition.Below); + } + private static void GenerateX86Cvtdq2pd(CodeGenContext context, Operation operation) { context.Assembler.Cvtdq2pd(operation.Dest, operation.GetSource(0)); diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs index c32cc4cd1b..0192f1ddc2 100644 --- a/ARMeilleure/CodeGen/X86/PreAllocator.cs +++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -106,7 +106,7 @@ namespace ARMeilleure.CodeGen.X86 //Comparison instructions uses CMOVcc, which does not zero the //upper bits of the register (since it's R8), we need to ensure it //is zero by zeroing it beforehand. - if (inst.IsComparison()) + if (inst.IsComparison() || IsComparisonIntrinsic(inst)) { Operation copyOp = new Operation(Instruction.Copy, operation.Dest, Const(0)); @@ -121,12 +121,12 @@ namespace ARMeilleure.CodeGen.X86 ReplaceConvertToFPUIWithSI(node, operation); } - //There's no SSE FP negate instruction, so we need to transform that into: - //r = 0 - n or - //r = n ^ (1 << (OperandSize - 1)) + //There's no SSE FP negate instruction, so we need to transform that into + //a XOR of the value to be negated with a mask with the highest bit set. + //This also produces -0 for a negation of the value 0. if (inst == Instruction.Negate && !operation.GetSource(0).Type.IsInteger()) { - ReplaceNegateWithSubtract(node, operation); + ReplaceNegateWithXor(node, operation); } AddFixedRegisterCopy(node, operation); @@ -272,19 +272,32 @@ namespace ARMeilleure.CodeGen.X86 Delete(node, operation); } - private static void ReplaceNegateWithSubtract(LinkedListNode node, Operation operation) + private static void ReplaceNegateWithXor(LinkedListNode node, Operation operation) { Operand dest = operation.Dest; Operand source = operation.GetSource(0); + Debug.Assert(dest.Type == OperandType.FP32 || + dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\"."); + LinkedList nodes = node.List; LinkedListNode temp = node; Operand res = Local(dest.Type); - temp = nodes.AddAfter(temp, new Operation(Instruction.VectorZero, res)); - temp = nodes.AddAfter(temp, new Operation(Instruction.Subtract, res, res, source)); + temp = nodes.AddAfter(temp, new Operation(Instruction.X86Pcmpeqw, res, res, res)); + + if (dest.Type == OperandType.FP32) + { + temp = nodes.AddAfter(temp, new Operation(Instruction.X86Pslld, res, res, Const(31))); + } + else /* if (dest.Type == OperandType.FP64) */ + { + temp = nodes.AddAfter(temp, new Operation(Instruction.X86Psllq, res, res, Const(63))); + } + + temp = nodes.AddAfter(temp, new Operation(Instruction.X86Xorps, res, res, source)); temp = nodes.AddAfter(temp, new Operation(Instruction.Copy, dest, res)); Delete(node, operation); @@ -700,5 +713,21 @@ namespace ARMeilleure.CodeGen.X86 return inst > Instruction.X86Intrinsic_Start && inst < Instruction.X86Intrinsic_End; } + + private static bool IsComparisonIntrinsic(Instruction inst) + { + switch (inst) + { + case Instruction.X86Comisdeq: + case Instruction.X86Comisdge: + case Instruction.X86Comisdlt: + case Instruction.X86Comisseq: + case Instruction.X86Comissge: + case Instruction.X86Comisslt: + return true; + } + + return false; + } } } \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index d0559aa924..6538d07dab 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -17,6 +17,10 @@ namespace ARMeilleure.CodeGen.X86 Cmp, Cmppd, Cmpps, + Cmpsd, + Cmpss, + Comisd, + Comiss, Cvtdq2pd, Cvtdq2ps, Cvtpd2dq, diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index c4220691ed..5db983ce1b 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -211,28 +211,28 @@ namespace ARMeilleure.Decoders SetA64("0x101110011xxxxx000111xxxxxxxxxx", InstName.Bsl_V, InstEmit.Bsl_V, typeof(OpCodeSimdReg)); SetA64("0x001110<<100000010010xxxxxxxxxx", InstName.Cls_V, InstEmit.Cls_V, typeof(OpCodeSimd)); SetA64("0x101110<<100000010010xxxxxxxxxx", InstName.Clz_V, InstEmit.Clz_V, typeof(OpCodeSimd)); - SetA64("01111110111xxxxx100011xxxxxxxxxx", InstName.Cmeq_S, null, typeof(OpCodeSimdReg)); - SetA64("0101111011100000100110xxxxxxxxxx", InstName.Cmeq_S, null, typeof(OpCodeSimd)); - SetA64("0>101110<<1xxxxx100011xxxxxxxxxx", InstName.Cmeq_V, null, typeof(OpCodeSimdReg)); - SetA64("0>001110<<100000100110xxxxxxxxxx", InstName.Cmeq_V, null, typeof(OpCodeSimd)); - SetA64("01011110111xxxxx001111xxxxxxxxxx", InstName.Cmge_S, null, typeof(OpCodeSimdReg)); - SetA64("0111111011100000100010xxxxxxxxxx", InstName.Cmge_S, null, typeof(OpCodeSimd)); - SetA64("0>001110<<1xxxxx001111xxxxxxxxxx", InstName.Cmge_V, null, typeof(OpCodeSimdReg)); - SetA64("0>101110<<100000100010xxxxxxxxxx", InstName.Cmge_V, null, typeof(OpCodeSimd)); - SetA64("01011110111xxxxx001101xxxxxxxxxx", InstName.Cmgt_S, null, typeof(OpCodeSimdReg)); - SetA64("0101111011100000100010xxxxxxxxxx", InstName.Cmgt_S, null, typeof(OpCodeSimd)); - SetA64("0>001110<<1xxxxx001101xxxxxxxxxx", InstName.Cmgt_V, null, typeof(OpCodeSimdReg)); - SetA64("0>001110<<100000100010xxxxxxxxxx", InstName.Cmgt_V, null, typeof(OpCodeSimd)); - SetA64("01111110111xxxxx001101xxxxxxxxxx", InstName.Cmhi_S, null, typeof(OpCodeSimdReg)); - SetA64("0>101110<<1xxxxx001101xxxxxxxxxx", InstName.Cmhi_V, null, typeof(OpCodeSimdReg)); - SetA64("01111110111xxxxx001111xxxxxxxxxx", InstName.Cmhs_S, null, typeof(OpCodeSimdReg)); - SetA64("0>101110<<1xxxxx001111xxxxxxxxxx", InstName.Cmhs_V, null, typeof(OpCodeSimdReg)); - SetA64("0111111011100000100110xxxxxxxxxx", InstName.Cmle_S, null, typeof(OpCodeSimd)); - SetA64("0>101110<<100000100110xxxxxxxxxx", InstName.Cmle_V, null, typeof(OpCodeSimd)); - SetA64("0101111011100000101010xxxxxxxxxx", InstName.Cmlt_S, null, typeof(OpCodeSimd)); - SetA64("0>001110<<100000101010xxxxxxxxxx", InstName.Cmlt_V, null, typeof(OpCodeSimd)); - SetA64("01011110111xxxxx100011xxxxxxxxxx", InstName.Cmtst_S, null, typeof(OpCodeSimdReg)); - SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstName.Cmtst_V, null, typeof(OpCodeSimdReg)); + SetA64("01111110111xxxxx100011xxxxxxxxxx", InstName.Cmeq_S, InstEmit.Cmeq_S, typeof(OpCodeSimdReg)); + SetA64("0101111011100000100110xxxxxxxxxx", InstName.Cmeq_S, InstEmit.Cmeq_S, typeof(OpCodeSimd)); + SetA64("0>101110<<1xxxxx100011xxxxxxxxxx", InstName.Cmeq_V, InstEmit.Cmeq_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<100000100110xxxxxxxxxx", InstName.Cmeq_V, InstEmit.Cmeq_V, typeof(OpCodeSimd)); + SetA64("01011110111xxxxx001111xxxxxxxxxx", InstName.Cmge_S, InstEmit.Cmge_S, typeof(OpCodeSimdReg)); + SetA64("0111111011100000100010xxxxxxxxxx", InstName.Cmge_S, InstEmit.Cmge_S, typeof(OpCodeSimd)); + SetA64("0>001110<<1xxxxx001111xxxxxxxxxx", InstName.Cmge_V, InstEmit.Cmge_V, typeof(OpCodeSimdReg)); + SetA64("0>101110<<100000100010xxxxxxxxxx", InstName.Cmge_V, InstEmit.Cmge_V, typeof(OpCodeSimd)); + SetA64("01011110111xxxxx001101xxxxxxxxxx", InstName.Cmgt_S, InstEmit.Cmgt_S, typeof(OpCodeSimdReg)); + SetA64("0101111011100000100010xxxxxxxxxx", InstName.Cmgt_S, InstEmit.Cmgt_S, typeof(OpCodeSimd)); + SetA64("0>001110<<1xxxxx001101xxxxxxxxxx", InstName.Cmgt_V, InstEmit.Cmgt_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<100000100010xxxxxxxxxx", InstName.Cmgt_V, InstEmit.Cmgt_V, typeof(OpCodeSimd)); + SetA64("01111110111xxxxx001101xxxxxxxxxx", InstName.Cmhi_S, InstEmit.Cmhi_S, typeof(OpCodeSimdReg)); + SetA64("0>101110<<1xxxxx001101xxxxxxxxxx", InstName.Cmhi_V, InstEmit.Cmhi_V, typeof(OpCodeSimdReg)); + SetA64("01111110111xxxxx001111xxxxxxxxxx", InstName.Cmhs_S, InstEmit.Cmhs_S, typeof(OpCodeSimdReg)); + SetA64("0>101110<<1xxxxx001111xxxxxxxxxx", InstName.Cmhs_V, InstEmit.Cmhs_V, typeof(OpCodeSimdReg)); + SetA64("0111111011100000100110xxxxxxxxxx", InstName.Cmle_S, InstEmit.Cmle_S, typeof(OpCodeSimd)); + SetA64("0>101110<<100000100110xxxxxxxxxx", InstName.Cmle_V, InstEmit.Cmle_V, typeof(OpCodeSimd)); + SetA64("0101111011100000101010xxxxxxxxxx", InstName.Cmlt_S, InstEmit.Cmlt_S, typeof(OpCodeSimd)); + SetA64("0>001110<<100000101010xxxxxxxxxx", InstName.Cmlt_V, InstEmit.Cmlt_V, typeof(OpCodeSimd)); + SetA64("01011110111xxxxx100011xxxxxxxxxx", InstName.Cmtst_S, InstEmit.Cmtst_S, typeof(OpCodeSimdReg)); + SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstName.Cmtst_V, InstEmit.Cmtst_V, typeof(OpCodeSimdReg)); SetA64("0x00111000100000010110xxxxxxxxxx", InstName.Cnt_V, InstEmit.Cnt_V, typeof(OpCodeSimd)); SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp, InstEmit.Dup_Gp, typeof(OpCodeSimdIns)); SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S, InstEmit.Dup_S, typeof(OpCodeSimdIns)); @@ -247,26 +247,26 @@ namespace ARMeilleure.Decoders SetA64("0>0011100<1xxxxx110101xxxxxxxxxx", InstName.Fadd_V, InstEmit.Fadd_V, typeof(OpCodeSimdReg)); SetA64("011111100x110000110110xxxxxxxxxx", InstName.Faddp_S, InstEmit.Faddp_S, typeof(OpCodeSimd)); SetA64("0>1011100<1xxxxx110101xxxxxxxxxx", InstName.Faddp_V, InstEmit.Faddp_V, typeof(OpCodeSimdReg)); - SetA64("000111100x1xxxxxxxxx01xxxxx0xxxx", InstName.Fccmp_S, null, typeof(OpCodeSimdFcond)); - SetA64("000111100x1xxxxxxxxx01xxxxx1xxxx", InstName.Fccmpe_S, null, typeof(OpCodeSimdFcond)); - SetA64("010111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_S, null, typeof(OpCodeSimdReg)); - SetA64("010111101x100000110110xxxxxxxxxx", InstName.Fcmeq_S, null, typeof(OpCodeSimd)); - SetA64("0>0011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_V, null, typeof(OpCodeSimdReg)); - SetA64("0>0011101<100000110110xxxxxxxxxx", InstName.Fcmeq_V, null, typeof(OpCodeSimd)); - SetA64("011111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmge_S, null, typeof(OpCodeSimdReg)); - SetA64("011111101x100000110010xxxxxxxxxx", InstName.Fcmge_S, null, typeof(OpCodeSimd)); - SetA64("0>1011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmge_V, null, typeof(OpCodeSimdReg)); - SetA64("0>1011101<100000110010xxxxxxxxxx", InstName.Fcmge_V, null, typeof(OpCodeSimd)); - SetA64("011111101x1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_S, null, typeof(OpCodeSimdReg)); - SetA64("010111101x100000110010xxxxxxxxxx", InstName.Fcmgt_S, null, typeof(OpCodeSimd)); - SetA64("0>1011101<1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_V, null, typeof(OpCodeSimdReg)); - SetA64("0>0011101<100000110010xxxxxxxxxx", InstName.Fcmgt_V, null, typeof(OpCodeSimd)); - SetA64("011111101x100000110110xxxxxxxxxx", InstName.Fcmle_S, null, typeof(OpCodeSimd)); - SetA64("0>1011101<100000110110xxxxxxxxxx", InstName.Fcmle_V, null, typeof(OpCodeSimd)); - SetA64("010111101x100000111010xxxxxxxxxx", InstName.Fcmlt_S, null, typeof(OpCodeSimd)); - SetA64("0>0011101<100000111010xxxxxxxxxx", InstName.Fcmlt_V, null, typeof(OpCodeSimd)); - SetA64("000111100x1xxxxx001000xxxxx0x000", InstName.Fcmp_S, null, typeof(OpCodeSimdReg)); - SetA64("000111100x1xxxxx001000xxxxx1x000", InstName.Fcmpe_S, null, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxxxxxx01xxxxx0xxxx", InstName.Fccmp_S, InstEmit.Fccmp_S, typeof(OpCodeSimdFcond)); + SetA64("000111100x1xxxxxxxxx01xxxxx1xxxx", InstName.Fccmpe_S, InstEmit.Fccmpe_S, typeof(OpCodeSimdFcond)); + SetA64("010111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_S, InstEmit.Fcmeq_S, typeof(OpCodeSimdReg)); + SetA64("010111101x100000110110xxxxxxxxxx", InstName.Fcmeq_S, InstEmit.Fcmeq_S, typeof(OpCodeSimd)); + SetA64("0>0011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_V, InstEmit.Fcmeq_V, typeof(OpCodeSimdReg)); + SetA64("0>0011101<100000110110xxxxxxxxxx", InstName.Fcmeq_V, InstEmit.Fcmeq_V, typeof(OpCodeSimd)); + SetA64("011111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmge_S, InstEmit.Fcmge_S, typeof(OpCodeSimdReg)); + SetA64("011111101x100000110010xxxxxxxxxx", InstName.Fcmge_S, InstEmit.Fcmge_S, typeof(OpCodeSimd)); + SetA64("0>1011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmge_V, InstEmit.Fcmge_V, typeof(OpCodeSimdReg)); + SetA64("0>1011101<100000110010xxxxxxxxxx", InstName.Fcmge_V, InstEmit.Fcmge_V, typeof(OpCodeSimd)); + SetA64("011111101x1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_S, InstEmit.Fcmgt_S, typeof(OpCodeSimdReg)); + SetA64("010111101x100000110010xxxxxxxxxx", InstName.Fcmgt_S, InstEmit.Fcmgt_S, typeof(OpCodeSimd)); + SetA64("0>1011101<1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_V, InstEmit.Fcmgt_V, typeof(OpCodeSimdReg)); + SetA64("0>0011101<100000110010xxxxxxxxxx", InstName.Fcmgt_V, InstEmit.Fcmgt_V, typeof(OpCodeSimd)); + SetA64("011111101x100000110110xxxxxxxxxx", InstName.Fcmle_S, InstEmit.Fcmle_S, typeof(OpCodeSimd)); + SetA64("0>1011101<100000110110xxxxxxxxxx", InstName.Fcmle_V, InstEmit.Fcmle_V, typeof(OpCodeSimd)); + SetA64("010111101x100000111010xxxxxxxxxx", InstName.Fcmlt_S, InstEmit.Fcmlt_S, typeof(OpCodeSimd)); + SetA64("0>0011101<100000111010xxxxxxxxxx", InstName.Fcmlt_V, InstEmit.Fcmlt_V, typeof(OpCodeSimd)); + SetA64("000111100x1xxxxx001000xxxxx0x000", InstName.Fcmp_S, InstEmit.Fcmp_S, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx001000xxxxx1x000", InstName.Fcmpe_S, InstEmit.Fcmpe_S, typeof(OpCodeSimdReg)); SetA64("000111100x1xxxxxxxxx11xxxxxxxxxx", InstName.Fcsel_S, InstEmit.Fcsel_S, typeof(OpCodeSimdFcond)); SetA64("000111100x10001xx10000xxxxxxxxxx", InstName.Fcvt_S, InstEmit.Fcvt_S, typeof(OpCodeSimd)); SetA64("x00111100x100100000000xxxxxxxxxx", InstName.Fcvtas_Gp, InstEmit.Fcvtas_Gp, typeof(OpCodeSimdCvt)); diff --git a/ARMeilleure/Instructions/InstEmitAluHelper.cs b/ARMeilleure/Instructions/InstEmitAluHelper.cs index 723e1a92ec..8e60013263 100644 --- a/ARMeilleure/Instructions/InstEmitAluHelper.cs +++ b/ARMeilleure/Instructions/InstEmitAluHelper.cs @@ -59,11 +59,7 @@ namespace ARMeilleure.Instructions context.Copy(GetFlag(PState.CFlag), context.ICompareGreaterOrEqualUI(n, m)); } - public static void EmitSubsVCheck( - EmitterContext context, - Operand n, - Operand m, - Operand d) + public static void EmitSubsVCheck(EmitterContext context, Operand n, Operand m, Operand d) { //V = (Rd ^ Rn) & (Rn ^ Rm) < 0 Operand vOut = context.BitwiseExclusiveOr(d, n); @@ -158,23 +154,6 @@ namespace ARMeilleure.Instructions } } - public static void EmitSetNzcv(EmitterContext context, Operand nzcv) - { - Operand Extract(Operand value, int bit) - { - value = context.ShiftRightUI(value, Const(bit)); - - value = context.BitwiseAnd(value, Const(1)); - - return value; - } - - context.Copy(GetFlag(PState.VFlag), Extract(nzcv, 0)); - context.Copy(GetFlag(PState.CFlag), Extract(nzcv, 1)); - context.Copy(GetFlag(PState.ZFlag), Extract(nzcv, 2)); - context.Copy(GetFlag(PState.NFlag), Extract(nzcv, 3)); - } - private static Exception InvalidOpCodeType(OpCode opCode) { return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\"."); diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs new file mode 100644 index 0000000000..3b2e1c797b --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs @@ -0,0 +1,705 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func; + + static partial class InstEmit + { + public static void Cmeq_S(EmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: true); + } + + public static void Cmeq_V(EmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m; + + if (op is OpCodeSimdReg binOp) + { + m = GetVec(op.Rn); + } + else + { + m = context.VectorZero(); + } + + Instruction cmpInst = X86PcmpeqInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: false); + } + } + + public static void Cmge_S(EmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: true); + } + + public static void Cmge_V(EmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m; + + if (op is OpCodeSimdReg binOp) + { + m = GetVec(op.Rn); + } + else + { + m = context.VectorZero(); + } + + Instruction cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, m); + + Operand mask = X86GetAllElements(context, -1L); + + res = context.AddIntrinsic(Instruction.X86Pandn, res, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: false); + } + } + + public static void Cmgt_S(EmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: true); + } + + public static void Cmgt_V(EmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m; + + if (op is OpCodeSimdReg binOp) + { + m = GetVec(op.Rn); + } + else + { + m = context.VectorZero(); + } + + Instruction cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: false); + } + } + + public static void Cmhi_S(EmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: true); + } + + public static void Cmhi_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 3) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction maxInst = X86PmaxuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, m, n); + + Instruction cmpInst = X86PcmpeqInstruction[op.Size]; + + res = context.AddIntrinsic(cmpInst, res, m); + + Operand mask = X86GetAllElements(context, -1L); + + res = context.AddIntrinsic(Instruction.X86Pandn, res, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: false); + } + } + + public static void Cmhs_S(EmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: true); + } + + public static void Cmhs_V(EmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 3) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Instruction maxInst = X86PmaxuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, n, m); + + Instruction cmpInst = X86PcmpeqInstruction[op.Size]; + + res = context.AddIntrinsic(cmpInst, res, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: false); + } + } + + public static void Cmle_S(EmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: true); + } + + public static void Cmle_V(EmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Instruction cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, context.VectorZero()); + + Operand mask = X86GetAllElements(context, -1L); + + res = context.AddIntrinsic(Instruction.X86Pandn, res, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: false); + } + } + + public static void Cmlt_S(EmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: true); + } + + public static void Cmlt_V(EmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Instruction cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, context.VectorZero(), n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: false); + } + } + + public static void Cmtst_S(EmitterContext context) + { + EmitCmtstOp(context, scalar: true); + } + + public static void Cmtst_V(EmitterContext context) + { + EmitCmtstOp(context, scalar: false); + } + + public static void Fccmp_S(EmitterContext context) + { + EmitFccmpOrFccmpe(context, signalNaNs: false); + } + + public static void Fccmpe_S(EmitterContext context) + { + EmitFccmpOrFccmpe(context, signalNaNs: true); + } + + public static void Fcmeq_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.Equal, scalar: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: true); + } + } + + public static void Fcmeq_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.Equal, scalar: false); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: false); + } + } + + public static void Fcmge_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: true); + } + } + + public static void Fcmge_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: false); + } + } + + public static void Fcmgt_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: true); + } + } + + public static void Fcmgt_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: false); + } + } + + public static void Fcmle_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, isLeOrLt: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: true); + } + } + + public static void Fcmle_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, isLeOrLt: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: false); + } + } + + public static void Fcmlt_S(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true, isLeOrLt: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: true); + } + } + + public static void Fcmlt_V(EmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false, isLeOrLt: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: false); + } + } + + public static void Fcmp_S(EmitterContext context) + { + EmitFcmpOrFcmpe(context, signalNaNs: false); + } + + public static void Fcmpe_S(EmitterContext context) + { + EmitFcmpOrFcmpe(context, signalNaNs: true); + } + + public static void EmitFccmpOrFccmpe(EmitterContext context, bool signalNaNs) + { + OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp; + + Operand lblTrue = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond)); + + EmitSetNzcv(context, Const(op.Nzcv)); + + context.Branch(lblEnd); + + context.MarkLabel(lblTrue); + + EmitFcmpOrFcmpe(context, signalNaNs); + + context.MarkLabel(lblEnd); + } + + private static void EmitFcmpOrFcmpe(EmitterContext context, bool signalNaNs) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + const int cmpOrdered = 7; + + bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false; + + if (Optimizations.FastFP && Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm); + + Operand lblNaN = Label(); + Operand lblEnd = Label(); + + if (op.Size == 0) + { + Operand ordMask = context.AddIntrinsic(Instruction.X86Cmpss, n, m, Const(cmpOrdered)); + + Operand isOrdered = context.VectorExtract16(ordMask, Local(OperandType.I32), 0); + + context.BranchIfFalse(lblNaN, isOrdered); + + Operand cf = context.AddIntrinsicInt(Instruction.X86Comissge, n, m); + Operand zf = context.AddIntrinsicInt(Instruction.X86Comisseq, n, m); + Operand nf = context.AddIntrinsicInt(Instruction.X86Comisslt, n, m); + + context.Copy(GetFlag(PState.VFlag), Const(0)); + context.Copy(GetFlag(PState.CFlag), cf); + context.Copy(GetFlag(PState.ZFlag), zf); + context.Copy(GetFlag(PState.NFlag), nf); + } + else /* if (op.Size == 1) */ + { + Operand ordMask = context.AddIntrinsic(Instruction.X86Cmpsd, n, m, Const(cmpOrdered)); + + Operand isOrdered = context.VectorExtract16(ordMask, Local(OperandType.I32), 0); + + context.BranchIfFalse(lblNaN, isOrdered); + + Operand cf = context.AddIntrinsicInt(Instruction.X86Comisdge, n, m); + Operand zf = context.AddIntrinsicInt(Instruction.X86Comisdeq, n, m); + Operand nf = context.AddIntrinsicInt(Instruction.X86Comisdlt, n, m); + + context.Copy(GetFlag(PState.VFlag), Const(0)); + context.Copy(GetFlag(PState.CFlag), cf); + context.Copy(GetFlag(PState.ZFlag), zf); + context.Copy(GetFlag(PState.NFlag), nf); + } + + context.Branch(lblEnd); + + context.MarkLabel(lblNaN); + + context.Copy(GetFlag(PState.VFlag), Const(1)); + context.Copy(GetFlag(PState.CFlag), Const(1)); + context.Copy(GetFlag(PState.ZFlag), Const(0)); + context.Copy(GetFlag(PState.NFlag), Const(0)); + + context.MarkLabel(lblEnd); + } + else + { + OperandType type = op.Size != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), 0); + Operand me; + + if (cmpWithZero) + { + me = op.Size == 0 ? ConstF(0f) : ConstF(0d); + } + else + { + me = context.VectorExtract(GetVec(op.Rm), Local(type), 0); + } + + Operand nzcv = EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare), ne, me, Const(signalNaNs)); + + EmitSetNzcv(context, nzcv); + } + } + + private static void EmitSetNzcv(EmitterContext context, Operand nzcv) + { + Operand Extract(Operand value, int bit) + { + if (bit != 0) + { + value = context.ShiftRightUI(value, Const(bit)); + } + + value = context.BitwiseAnd(value, Const(1)); + + return value; + } + + context.Copy(GetFlag(PState.VFlag), Extract(nzcv, 0)); + context.Copy(GetFlag(PState.CFlag), Extract(nzcv, 1)); + context.Copy(GetFlag(PState.ZFlag), Extract(nzcv, 2)); + context.Copy(GetFlag(PState.NFlag), Extract(nzcv, 3)); + } + + private static void EmitCmpOp(EmitterContext context, Func2I emitCmp, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me; + + if (op is OpCodeSimdReg binOp) + { + me = EmitVectorExtractSx(context, binOp.Rm, index, op.Size); + } + else + { + me = Const(0L); + } + + Operand isTrue = emitCmp(ne, me); + + Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L)); + + res = EmitVectorInsert(context, res, mask, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitCmtstOp(EmitterContext context, bool scalar) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + Operand test = context.BitwiseAnd(ne, me); + + Operand isTrue = context.ICompareNotEqual(test, Const(0L)); + + Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L)); + + res = EmitVectorInsert(context, res, mask, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitCmpOpF(EmitterContext context, string name, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = !scalar ? op.GetBytesCount() >> sizeF + 2 : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(GetVec(op.Rn), Local(type), index); + Operand me; + + if (op is OpCodeSimdReg binOp) + { + me = context.VectorExtract(GetVec(binOp.Rm), Local(type), index); + } + else + { + me = sizeF == 0 ? ConstF(0f) : ConstF(0d); + } + + Operand e = EmitSoftFloatCall(context, name, ne, me); + + res = context.VectorInsert(res, e, index); + } + + context.Copy(GetVec(op.Rd), res); + } + + private enum CmpCondition + { + Equal = 0, + GreaterThanOrEqual = 5, + GreaterThan = 6 + } + + private static void EmitCmpSseOrSse2OpF( + EmitterContext context, + CmpCondition cond, + bool scalar, + bool isLeOrLt = false) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = op is OpCodeSimdReg binOp ? GetVec(binOp.Rm) : context.VectorZero(); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Instruction inst = scalar ? Instruction.X86Cmpss : Instruction.X86Cmpps; + + Operand res = isLeOrLt + ? context.AddIntrinsic(inst, m, n, Const((int)cond)) + : context.AddIntrinsic(inst, n, m, Const((int)cond)); + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Instruction inst = scalar ? Instruction.X86Cmpsd : Instruction.X86Cmppd; + + Operand res = isLeOrLt + ? context.AddIntrinsic(inst, m, n, Const((int)cond)) + : context.AddIntrinsic(inst, n, m, Const((int)cond)); + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } + } +} diff --git a/ARMeilleure/Instructions/SoftFloat.cs b/ARMeilleure/Instructions/SoftFloat.cs index 13b9a18089..244fe37e7c 100644 --- a/ARMeilleure/Instructions/SoftFloat.cs +++ b/ARMeilleure/Instructions/SoftFloat.cs @@ -669,8 +669,10 @@ namespace ARMeilleure.Instructions return result; } - public static int FPCompare(float value1, float value2, bool signalNaNs, ExecutionContext context) + public static int FPCompare(float value1, float value2, bool signalNaNs) { + ExecutionContext context = NativeInterface.GetContext(); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); @@ -1730,8 +1732,10 @@ namespace ARMeilleure.Instructions return result; } - public static int FPCompare(double value1, double value2, bool signalNaNs, ExecutionContext context) + public static int FPCompare(double value1, double value2, bool signalNaNs) { + ExecutionContext context = NativeInterface.GetContext(); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); diff --git a/ARMeilleure/IntermediateRepresentation/Instruction.cs b/ARMeilleure/IntermediateRepresentation/Instruction.cs index 5602ef800b..e828580805 100644 --- a/ARMeilleure/IntermediateRepresentation/Instruction.cs +++ b/ARMeilleure/IntermediateRepresentation/Instruction.cs @@ -76,6 +76,14 @@ namespace ARMeilleure.IntermediateRepresentation X86Andnps, X86Cmppd, X86Cmpps, + X86Cmpsd, + X86Cmpss, + X86Comisdeq, + X86Comisdge, + X86Comisdlt, + X86Comisseq, + X86Comissge, + X86Comisslt, X86Cvtdq2pd, X86Cvtdq2ps, X86Cvtpd2dq, diff --git a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs index f15ec07e19..4a930e03f4 100644 --- a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs +++ b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs @@ -10,6 +10,11 @@ namespace ARMeilleure.IntermediateRepresentation return type == OperandType.I32 ? new Operand((int)value) : new Operand(value); } + public static Operand Const(bool value) + { + return new Operand(value ? 1 : 0); + } + public static Operand Const(int value) { return new Operand(value); diff --git a/ARMeilleure/State/V128.cs b/ARMeilleure/State/V128.cs index 00b20fa1d2..8060f96422 100644 --- a/ARMeilleure/State/V128.cs +++ b/ARMeilleure/State/V128.cs @@ -7,9 +7,9 @@ namespace ARMeilleure.State private ulong _e0; private ulong _e1; - public V128(float value) : this(value, value, value, value) { } + public V128(float value) : this(value, 0, 0, 0) { } - public V128(double value) : this(value, value) { } + public V128(double value) : this(value, 0) { } public V128(float e0, float e1, float e2, float e3) { @@ -84,12 +84,15 @@ namespace ARMeilleure.State public uint GetUInt32(int index) { - if ((uint)index > 3) + switch (index) { - throw new ArgumentOutOfRangeException(nameof(index)); + case 0: return (uint)(_e0 >> 0); + case 1: return (uint)(_e0 >> 32); + case 2: return (uint)(_e1 >> 0); + case 3: return (uint)(_e1 >> 32); } - return (uint)(GetUInt64(index >> 1) >> (index & 1)); + throw new ArgumentOutOfRangeException(nameof(index)); } public ulong GetUInt64(int index) diff --git a/Ryujinx.Tests.Unicorn/SimdValue.cs b/Ryujinx.Tests.Unicorn/SimdValue.cs index 7d85df7df1..2b52843058 100644 --- a/Ryujinx.Tests.Unicorn/SimdValue.cs +++ b/Ryujinx.Tests.Unicorn/SimdValue.cs @@ -7,46 +7,6 @@ namespace Ryujinx.Tests.Unicorn private ulong _e0; private ulong _e1; - public SimdValue(float value) : this(value, value, value, value) { } - - public SimdValue(double value) : this(value, value) { } - - public SimdValue(float e0, float e1, float e2, float e3) - { - _e0 = (ulong)(uint)BitConverter.SingleToInt32Bits(e0) << 0; - _e0 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e1) << 32; - _e1 = (ulong)(uint)BitConverter.SingleToInt32Bits(e2) << 0; - _e1 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e3) << 32; - } - - public SimdValue(double e0, double e1) - { - _e0 = (ulong)BitConverter.DoubleToInt64Bits(e0); - _e1 = (ulong)BitConverter.DoubleToInt64Bits(e1); - } - - public SimdValue(int e0, int e1, int e2, int e3) - { - _e0 = (ulong)(uint)e0 << 0; - _e0 |= (ulong)(uint)e1 << 32; - _e1 = (ulong)(uint)e2 << 0; - _e1 |= (ulong)(uint)e3 << 32; - } - - public SimdValue(uint e0, uint e1, uint e2, uint e3) - { - _e0 = (ulong)e0 << 0; - _e0 |= (ulong)e1 << 32; - _e1 = (ulong)e2 << 0; - _e1 |= (ulong)e3 << 32; - } - - public SimdValue(long e0, long e1) - { - _e0 = (ulong)e0; - _e1 = (ulong)e1; - } - public SimdValue(ulong e0, ulong e1) { _e0 = e0; @@ -84,12 +44,15 @@ namespace Ryujinx.Tests.Unicorn public uint GetUInt32(int index) { - if ((uint)index > 3) + switch (index) { - throw new ArgumentOutOfRangeException(nameof(index)); + case 0: return (uint)(_e0 >> 0); + case 1: return (uint)(_e0 >> 32); + case 2: return (uint)(_e1 >> 0); + case 3: return (uint)(_e1 >> 32); } - return (uint)(GetUInt64(index >> 1) >> (index & 1)); + throw new ArgumentOutOfRangeException(nameof(index)); } public ulong GetUInt64(int index)