diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index 6bb74c1d8e..745d40e181 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -50,6 +50,7 @@ namespace ARMeilleure.CodeGen.X86 // Name RM/R RM/I8 RM/I32 R/I64 R/RM Opers Add(X86Instruction.Add, new InstInfo(0x000001, 0x000083, 0x000081, BadOp, 0x000003, 2)); Add(X86Instruction.And, new InstInfo(0x000021, 0x040083, 0x040081, BadOp, 0x000023, 2)); + Add(X86Instruction.Bsr, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x000fbd, 2)); Add(X86Instruction.Cmp, new InstInfo(0x000039, 0x070083, 0x070081, BadOp, 0x00003b, 2)); Add(X86Instruction.Idiv, new InstInfo(BadOp, BadOp, BadOp, BadOp, 0x0700f7, 1)); Add(X86Instruction.Imul, new InstInfo(BadOp, 0x00006b, 0x000069, BadOp, 0x000faf, 2)); @@ -95,6 +96,11 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(dest, source, X86Instruction.And); } + public void Bsr(Operand dest, Operand source) + { + WriteInstruction(dest, source, X86Instruction.Bsr); + } + public void Cdq() { WriteByte(0x99); diff --git a/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/ARMeilleure/CodeGen/X86/CodeGenContext.cs index 673a5584f9..9c4ecf35f0 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenContext.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenContext.cs @@ -1,6 +1,7 @@ using ARMeilleure.CodeGen.RegisterAllocators; using ARMeilleure.IntermediateRepresentation; using System.Collections.Generic; +using System.Diagnostics; using System.IO; namespace ARMeilleure.CodeGen.X86 @@ -60,6 +61,10 @@ namespace ARMeilleure.CodeGen.X86 private List _jumps; + private X86Condition _jNearCondition; + private long _jNearPosition; + private int _jNearLength; + public CodeGenContext(Stream stream, RAReport raReport, int blocksCount) { _stream = stream; @@ -94,6 +99,30 @@ namespace ARMeilleure.CodeGen.X86 WritePadding(ReservedBytesForJump); } + public void JumpToNear(X86Condition condition) + { + _jNearCondition = condition; + _jNearPosition = _stream.Position; + _jNearLength = Assembler.GetJccLength(0); + + _stream.Seek(_jNearLength, SeekOrigin.Current); + } + + public void JumpHere() + { + long currentPosition = _stream.Position; + + _stream.Seek(_jNearPosition, SeekOrigin.Begin); + + long offset = currentPosition - (_jNearPosition + _jNearLength); + + Debug.Assert(_jNearLength == Assembler.GetJccLength(offset), "Relative offset doesn't fit on near jump."); + + Assembler.Jcc(_jNearCondition, offset); + + _stream.Seek(currentPosition, SeekOrigin.Begin); + } + private void WritePadding(int size) { while (size-- > 0) diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index d4a0b6d715..ae4be5a659 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -36,6 +36,7 @@ namespace ARMeilleure.CodeGen.X86 Add(Instruction.CompareLessUI, GenerateCompareLessUI); Add(Instruction.CompareNotEqual, GenerateCompareNotEqual); Add(Instruction.ConditionalSelect, GenerateConditionalSelect); + Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros); Add(Instruction.Copy, GenerateCopy); Add(Instruction.Divide, GenerateDivide); Add(Instruction.Fill, GenerateFill); @@ -228,6 +229,34 @@ namespace ARMeilleure.CodeGen.X86 context.Assembler.Cmovcc(operation.Dest, operation.GetSource(1), X86Condition.NotEqual); } + private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation) + { + Operand dest = operation.Dest; + + Operand dest32 = Get32BitsRegister(dest.GetRegister()); + + context.Assembler.Bsr(dest, operation.GetSource(0)); + + int operandSize = dest.Type == OperandType.I32 ? 32 : 64; + int operandMask = operandSize - 1; + + //When the input operand is 0, the result is undefined, however the + //ZF flag is set. We are supposed to return the operand size on that + //case. So, add an additional jump to handle that case, by moving the + //operand size constant to the destination register. + context.JumpToNear(X86Condition.NotEqual); + + context.Assembler.Mov(dest32, new Operand(operandSize | operandMask)); + + context.JumpHere(); + + //BSR returns the zero based index of the last bit set on the operand, + //starting from the least significant bit. However we are supposed to + //return the number of 0 bits on the high end. So, we invert the result + //of the BSR using XOR to get the correct value. + context.Assembler.Xor(dest32, new Operand(operandMask)); + } + private static void GenerateCopy(CodeGenContext context, Operation operation) { Operand dest = operation.Dest; diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index ea0ad7e8a8..af0b4a2bd8 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -4,6 +4,7 @@ namespace ARMeilleure.CodeGen.X86 { Add, And, + Bsr, Cmp, Idiv, Imul, diff --git a/ARMeilleure/Instructions/InstEmitAlu.cs b/ARMeilleure/Instructions/InstEmitAlu.cs index 01d68078d1..21076b9719 100644 --- a/ARMeilleure/Instructions/InstEmitAlu.cs +++ b/ARMeilleure/Instructions/InstEmitAlu.cs @@ -103,14 +103,19 @@ namespace ARMeilleure.Instructions Operand n = GetIntOrZR(op, op.Rn); - ulong mask = ulong.MaxValue >> ((64 - op.GetBitsCount()) + 1); + Operand nHigh = context.ShiftRightUI(n, Const(1)); - n = context.BitwiseExclusiveOr(context.BitwiseAnd(n, Const(mask << 1)), - context.BitwiseAnd(n, Const(mask))); + bool is32Bits = op.RegisterSize == RegisterSize.Int32; - Operand d = context.CountLeadingZeros(n); + Operand mask = is32Bits ? Const(int.MaxValue) : Const(long.MaxValue); - SetAluDOrZR(context, d); + Operand nLow = context.BitwiseAnd(n, mask); + + Operand res = context.CountLeadingZeros(context.BitwiseExclusiveOr(nHigh, nLow)); + + res = context.ISubtract(res, Const(res.Type, 1)); + + SetAluDOrZR(context, res); } public static void Clz(EmitterContext context)