From cfb7c165be7b62506da6547bb1715054857e9cec Mon Sep 17 00:00:00 2001
From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>
Date: Tue, 18 Dec 2018 00:54:19 +0100
Subject: [PATCH] Update SoftFloat.cs

---
 ChocolArm64/Instructions/SoftFloat.cs | 515 ++++++++++++++++++--------
 1 file changed, 357 insertions(+), 158 deletions(-)

diff --git a/ChocolArm64/Instructions/SoftFloat.cs b/ChocolArm64/Instructions/SoftFloat.cs
index ad580972e7..2afa77fe23 100644
--- a/ChocolArm64/Instructions/SoftFloat.cs
+++ b/ChocolArm64/Instructions/SoftFloat.cs
@@ -13,187 +13,68 @@ namespace ChocolArm64.Instructions
             InvSqrtEstimateTable = BuildInvSqrtEstimateTable();
         }
 
-        private static readonly byte[] RecipEstimateTable;
-        private static readonly byte[] InvSqrtEstimateTable;
+        internal static readonly byte[] RecipEstimateTable;
+        internal static readonly byte[] InvSqrtEstimateTable;
 
         private static byte[] BuildRecipEstimateTable()
         {
-            byte[] table = new byte[256];
-            for (ulong index = 0; index < 256; index++)
+            byte[] tbl = new byte[256];
+
+            for (uint idx = 0u; idx < 256u; idx++)
             {
-                ulong a = index | 0x100;
+                uint src = idx + 256u;
 
-                a = (a << 1) + 1;
-                ulong b = 0x80000 / a;
-                b = (b + 1) >> 1;
+                Debug.Assert(256u <= src && src < 512u);
 
-                table[index] = (byte)(b & 0xFF);
+                src = (src << 1) + 1u;
+
+                uint aux = 0x00080000u / src;
+
+                uint dst = (aux + 1u) >> 1;
+
+                Debug.Assert(256u <= dst && dst < 512u);
+
+                tbl[idx] = (byte)(dst - 256u);
             }
-            return table;
+
+            return tbl;
         }
 
         private static byte[] BuildInvSqrtEstimateTable()
         {
-            byte[] table = new byte[512];
-            for (ulong index = 128; index < 512; index++)
+            byte[] tbl = new byte[384];
+
+            for (uint idx = 0u; idx < 384u; idx++)
             {
-                ulong a = index;
-                if (a < 256)
+                uint src = idx + 128u;
+
+                Debug.Assert(128u <= src && src < 512u);
+
+                if (src < 256u)
                 {
-                    a = (a << 1) + 1;
+                    src = (src << 1) + 1u;
                 }
                 else
                 {
-                    a = (a | 1) << 1;
+                    src = (src >> 1) << 1;
+                    src = (src + 1u) << 1;
                 }
 
-                ulong b = 256;
-                while (a * (b + 1) * (b + 1) < (1ul << 28))
+                uint aux = 512u;
+
+                while (src * (aux + 1u) * (aux + 1u) < 0x10000000u)
                 {
-                    b++;
-                }
-                b = (b + 1) >> 1;
-
-                table[index] = (byte)(b & 0xFF);
-            }
-            return table;
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static float RecipEstimate(float x)
-        {
-            return (float)RecipEstimate((double)x);
-        }
-
-        public static double RecipEstimate(double x)
-        {
-            ulong xBits  = (ulong)BitConverter.DoubleToInt64Bits(x);
-            ulong xSign  = xBits & 0x8000000000000000;
-            ulong xExp   = (xBits >> 52) & 0x7FF;
-            ulong scaled = xBits & ((1ul << 52) - 1);
-
-            if (xExp >= 2045)
-            {
-                if (xExp == 0x7ff && scaled != 0)
-                {
-                    // NaN
-                    return BitConverter.Int64BitsToDouble((long)(xBits | 0x0008000000000000));
+                    aux = aux + 1u;
                 }
 
-                // Infinity, or Out of range -> Zero
-                return BitConverter.Int64BitsToDouble((long)xSign);
+                uint dst = (aux + 1u) >> 1;
+
+                Debug.Assert(256u <= dst && dst < 512u);
+
+                tbl[idx] = (byte)(dst - 256u);
             }
 
-            if (xExp == 0)
-            {
-                if (scaled == 0)
-                {
-                    // Zero -> Infinity
-                    return BitConverter.Int64BitsToDouble((long)(xSign | 0x7FF0000000000000));
-                }
-
-                // Denormal
-                if ((scaled & (1ul << 51)) == 0)
-                {
-                    xExp = ~0ul;
-                    scaled <<= 2;
-                }
-                else
-                {
-                    scaled <<= 1;
-                }
-            }
-
-            scaled >>= 44;
-            scaled &= 0xFF;
-
-            ulong resultExp = (2045 - xExp) & 0x7FF;
-            ulong estimate  = (ulong)RecipEstimateTable[scaled];
-            ulong fraction  = estimate << 44;
-
-            if (resultExp == 0)
-            {
-                fraction >>= 1;
-                fraction |= 1ul << 51;
-            }
-            else if (resultExp == 0x7FF)
-            {
-                resultExp = 0;
-                fraction >>= 2;
-                fraction |= 1ul << 50;
-            }
-
-            ulong result = xSign | (resultExp << 52) | fraction;
-            return BitConverter.Int64BitsToDouble((long)result);
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static float InvSqrtEstimate(float x)
-        {
-            return (float)InvSqrtEstimate((double)x);
-        }
-
-        public static double InvSqrtEstimate(double x)
-        {
-            ulong xBits  = (ulong)BitConverter.DoubleToInt64Bits(x);
-            ulong xSign  = xBits & 0x8000000000000000;
-            long  xExp   = (long)((xBits >> 52) & 0x7FF);
-            ulong scaled = xBits & ((1ul << 52) - 1);
-
-            if (xExp == 0x7FF && scaled != 0)
-            {
-                // NaN
-                return BitConverter.Int64BitsToDouble((long)(xBits | 0x0008000000000000));
-            }
-
-            if (xExp == 0)
-            {
-                if (scaled == 0)
-                {
-                    // Zero -> Infinity
-                    return BitConverter.Int64BitsToDouble((long)(xSign | 0x7FF0000000000000));
-                }
-
-                // Denormal
-                while ((scaled & (1 << 51)) == 0)
-                {
-                    scaled <<= 1;
-                    xExp--;
-                }
-                scaled <<= 1;
-            }
-
-            if (xSign != 0)
-            {
-                // Negative -> NaN
-                return BitConverter.Int64BitsToDouble((long)0x7FF8000000000000);
-            }
-
-            if (xExp == 0x7ff && scaled == 0)
-            {
-                // Infinity -> Zero
-                return BitConverter.Int64BitsToDouble((long)xSign);
-            }
-
-            if (((ulong)xExp & 1) == 1)
-            {
-                scaled >>= 45;
-                scaled &= 0xFF;
-                scaled |= 0x80;
-            }
-            else
-            {
-                scaled >>= 44;
-                scaled &= 0xFF;
-                scaled |= 0x100;
-            }
-
-            ulong resultExp = ((ulong)(3068 - xExp) / 2) & 0x7FF;
-            ulong estimate  = (ulong)InvSqrtEstimateTable[scaled];
-            ulong fraction  = estimate << 44;
-
-            ulong result = xSign | (resultExp << 52) | fraction;
-            return BitConverter.Int64BitsToDouble((long)result);
+            return tbl;
         }
     }
 
@@ -1276,6 +1157,95 @@ namespace ChocolArm64.Instructions
             return result;
         }
 
+        public static float FPRecipEstimate(float value, CpuThreadState state)
+        {
+            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+
+            value.FPUnpack(out FpType type, out bool sign, out uint op, state);
+
+            float result;
+
+            if (type == FpType.SNaN || type == FpType.QNaN)
+            {
+                result = FPProcessNaN(type, op, state);
+            }
+            else if (type == FpType.Infinity)
+            {
+                result = FPZero(sign);
+            }
+            else if (type == FpType.Zero)
+            {
+                result = FPInfinity(sign);
+
+                FPProcessException(FpExc.DivideByZero, state);
+            }
+            else if (MathF.Abs(value) < MathF.Pow(2f, -128))
+            {
+                bool overflowToInf;
+
+                switch (state.FPRoundingMode())
+                {
+                    default:
+                    case RoundMode.ToNearest:            overflowToInf = true;  break;
+                    case RoundMode.TowardsPlusInfinity:  overflowToInf = !sign; break;
+                    case RoundMode.TowardsMinusInfinity: overflowToInf = sign;  break;
+                    case RoundMode.TowardsZero:          overflowToInf = false; break;
+                }
+
+                result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+                FPProcessException(FpExc.Overflow, state);
+                FPProcessException(FpExc.Inexact,  state);
+            }
+            else if (state.GetFpcrFlag(Fpcr.Fz) && (MathF.Abs(value) >= MathF.Pow(2f, 126)))
+            {
+                result = FPZero(sign);
+
+                state.SetFpsrFlag(Fpsr.Ufc);
+            }
+            else
+            {
+                ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+                uint exp = (op & 0x7F800000u) >> 23;
+
+                if (exp == 0u)
+                {
+                    if ((fraction & 0x0008000000000000ul) == 0ul)
+                    {
+                        fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+                        exp -= 1u;
+                    }
+                    else
+                    {
+                        fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+                    }
+                }
+
+                uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+                uint resultExp = 253u - exp;
+
+                uint estimate = SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+                fraction = (ulong)(estimate & 0xFFu) << 44;
+
+                if (resultExp == 0u)
+                {
+                    fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+                }
+                else if (resultExp + 1u == 0u)
+                {
+                    fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+                    resultExp = 0u;
+                }
+
+                result = BitConverter.Int32BitsToSingle(
+                    (int)((sign ? 1u : 0u) << 31 | (resultExp & 0xFFu) << 23 | (uint)(fraction >> 29) & 0x007FFFFFu));
+            }
+
+            return result;
+        }
+
         public static float FPRecipStepFused(float value1, float value2, CpuThreadState state)
         {
             Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
@@ -1343,6 +1313,71 @@ namespace ChocolArm64.Instructions
             return result;
         }
 
+        public static float FPRSqrtEstimate(float value, CpuThreadState state)
+        {
+            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+
+            value.FPUnpack(out FpType type, out bool sign, out uint op, state);
+
+            float result;
+
+            if (type == FpType.SNaN || type == FpType.QNaN)
+            {
+                result = FPProcessNaN(type, op, state);
+            }
+            else if (type == FpType.Zero)
+            {
+                result = FPInfinity(sign);
+
+                FPProcessException(FpExc.DivideByZero, state);
+            }
+            else if (sign)
+            {
+                result = FPDefaultNaN();
+
+                FPProcessException(FpExc.InvalidOp, state);
+            }
+            else if (type == FpType.Infinity)
+            {
+                result = FPZero(false);
+            }
+            else
+            {
+                ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+                uint exp = (op & 0x7F800000u) >> 23;
+
+                if (exp == 0u)
+                {
+                    while ((fraction & 0x0008000000000000ul) == 0ul)
+                    {
+                        fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+                        exp -= 1u;
+                    }
+
+                    fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+                }
+
+                uint scaled;
+
+                if ((exp & 1u) == 0u)
+                {
+                    scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+                }
+                else
+                {
+                    scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+                }
+
+                uint resultExp = (380u - exp) >> 1;
+
+                uint estimate = SoftFloat.InvSqrtEstimateTable[scaled - 128u] + 256u;
+
+                result = BitConverter.Int32BitsToSingle((int)((resultExp & 0xFFu) << 23 | (estimate & 0xFFu) << 15));
+            }
+
+            return result;
+        }
+
         public static float FPRSqrtStepFused(float value1, float value2, CpuThreadState state)
         {
             Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
@@ -1490,6 +1525,11 @@ namespace ChocolArm64.Instructions
             return sign ? -0f : +0f;
         }
 
+        private static float FPMaxNormal(bool sign)
+        {
+            return sign ? float.MinValue : float.MaxValue;
+        }
+
         private static float FPTwo(bool sign)
         {
             return sign ? -2f : +2f;
@@ -2201,6 +2241,95 @@ namespace ChocolArm64.Instructions
             return result;
         }
 
+        public static double FPRecipEstimate(double value, CpuThreadState state)
+        {
+            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+
+            value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
+
+            double result;
+
+            if (type == FpType.SNaN || type == FpType.QNaN)
+            {
+                result = FPProcessNaN(type, op, state);
+            }
+            else if (type == FpType.Infinity)
+            {
+                result = FPZero(sign);
+            }
+            else if (type == FpType.Zero)
+            {
+                result = FPInfinity(sign);
+
+                FPProcessException(FpExc.DivideByZero, state);
+            }
+            else if (Math.Abs(value) < Math.Pow(2d, -1024))
+            {
+                bool overflowToInf;
+
+                switch (state.FPRoundingMode())
+                {
+                    default:
+                    case RoundMode.ToNearest:            overflowToInf = true;  break;
+                    case RoundMode.TowardsPlusInfinity:  overflowToInf = !sign; break;
+                    case RoundMode.TowardsMinusInfinity: overflowToInf = sign;  break;
+                    case RoundMode.TowardsZero:          overflowToInf = false; break;
+                }
+
+                result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+                FPProcessException(FpExc.Overflow, state);
+                FPProcessException(FpExc.Inexact,  state);
+            }
+            else if (state.GetFpcrFlag(Fpcr.Fz) && (Math.Abs(value) >= Math.Pow(2d, 1022)))
+            {
+                result = FPZero(sign);
+
+                state.SetFpsrFlag(Fpsr.Ufc);
+            }
+            else
+            {
+                ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+                uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+                if (exp == 0u)
+                {
+                    if ((fraction & 0x0008000000000000ul) == 0ul)
+                    {
+                        fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+                        exp -= 1u;
+                    }
+                    else
+                    {
+                        fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+                    }
+                }
+
+                uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+                uint resultExp = 2045u - exp;
+
+                uint estimate = SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+                fraction = (ulong)(estimate & 0xFFu) << 44;
+
+                if (resultExp == 0u)
+                {
+                    fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+                }
+                else if (resultExp + 1u == 0u)
+                {
+                    fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+                    resultExp = 0u;
+                }
+
+                result = BitConverter.Int64BitsToDouble(
+                    (long)((sign ? 1ul : 0ul) << 63 | (resultExp & 0x7FFul) << 52 | (fraction & 0x000FFFFFFFFFFFFFul)));
+            }
+
+            return result;
+        }
+
         public static double FPRecipStepFused(double value1, double value2, CpuThreadState state)
         {
             Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
@@ -2268,6 +2397,71 @@ namespace ChocolArm64.Instructions
             return result;
         }
 
+        public static double FPRSqrtEstimate(double value, CpuThreadState state)
+        {
+            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+
+            value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
+
+            double result;
+
+            if (type == FpType.SNaN || type == FpType.QNaN)
+            {
+                result = FPProcessNaN(type, op, state);
+            }
+            else if (type == FpType.Zero)
+            {
+                result = FPInfinity(sign);
+
+                FPProcessException(FpExc.DivideByZero, state);
+            }
+            else if (sign)
+            {
+                result = FPDefaultNaN();
+
+                FPProcessException(FpExc.InvalidOp, state);
+            }
+            else if (type == FpType.Infinity)
+            {
+                result = FPZero(false);
+            }
+            else
+            {
+                ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+                uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+                if (exp == 0u)
+                {
+                    while ((fraction & 0x0008000000000000ul) == 0ul)
+                    {
+                        fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+                        exp -= 1u;
+                    }
+
+                    fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+                }
+
+                uint scaled;
+
+                if ((exp & 1u) == 0u)
+                {
+                    scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+                }
+                else
+                {
+                    scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+                }
+
+                uint resultExp = (3068u - exp) >> 1;
+
+                uint estimate = SoftFloat.InvSqrtEstimateTable[scaled - 128u] + 256u;
+
+                result = BitConverter.Int64BitsToDouble((long)((resultExp & 0x7FFul) << 52 | (estimate & 0xFFul) << 44));
+            }
+
+            return result;
+        }
+
         public static double FPRSqrtStepFused(double value1, double value2, CpuThreadState state)
         {
             Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
@@ -2415,6 +2609,11 @@ namespace ChocolArm64.Instructions
             return sign ? -0d : +0d;
         }
 
+        private static double FPMaxNormal(bool sign)
+        {
+            return sign ? double.MinValue : double.MaxValue;
+        }
+
         private static double FPTwo(bool sign)
         {
             return sign ? -2d : +2d;