From 997c476e9142ca11d1ac1db560cc01fdff7be648 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Thu, 10 May 2018 23:08:51 -0300 Subject: [PATCH] Drop SSE4.1 requirement --- ChocolArm64/Instruction/AVectorHelper.cs | 266 ++++++++++++++--------- ChocolArm64/Memory/AMemory.cs | 18 +- 2 files changed, 175 insertions(+), 109 deletions(-) diff --git a/ChocolArm64/Instruction/AVectorHelper.cs b/ChocolArm64/Instruction/AVectorHelper.cs index b9d1e6de70..1a21359230 100644 --- a/ChocolArm64/Instruction/AVectorHelper.cs +++ b/ChocolArm64/Instruction/AVectorHelper.cs @@ -101,26 +101,6 @@ namespace ChocolArm64.Instruction ((Value >> 6) & 1) + (Value >> 7); } - public static float MaxF(float LHS, float RHS) - { - if (LHS == 0.0 && RHS == 0.0) - { - if (BitConverter.SingleToInt32Bits(LHS) < 0 && - BitConverter.SingleToInt32Bits(RHS) < 0) - return -0.0f; - - return 0.0f; - } - - if (LHS > RHS) - return LHS; - - if (float.IsNaN(LHS)) - return LHS; - - return RHS; - } - public static double Max(double LHS, double RHS) { if (LHS == 0.0 && RHS == 0.0) @@ -141,18 +121,18 @@ namespace ChocolArm64.Instruction return RHS; } - public static float MinF(float LHS, float RHS) + public static float MaxF(float LHS, float RHS) { if (LHS == 0.0 && RHS == 0.0) { - if (BitConverter.SingleToInt32Bits(LHS) < 0 || + if (BitConverter.SingleToInt32Bits(LHS) < 0 && BitConverter.SingleToInt32Bits(RHS) < 0) return -0.0f; return 0.0f; } - if (LHS < RHS) + if (LHS > RHS) return LHS; if (float.IsNaN(LHS)) @@ -181,17 +161,24 @@ namespace ChocolArm64.Instruction return RHS; } - public static float RoundF(float Value, int Fpcr) + public static float MinF(float LHS, float RHS) { - switch ((ARoundMode)((Fpcr >> 22) & 3)) + if (LHS == 0.0 && RHS == 0.0) { - case ARoundMode.ToNearest: return MathF.Round (Value); - case ARoundMode.TowardsPlusInfinity: return MathF.Ceiling (Value); - case ARoundMode.TowardsMinusInfinity: return MathF.Floor (Value); - case ARoundMode.TowardsZero: return MathF.Truncate(Value); + if (BitConverter.SingleToInt32Bits(LHS) < 0 || + BitConverter.SingleToInt32Bits(RHS) < 0) + return -0.0f; + + return 0.0f; } - throw new InvalidOperationException(); + if (LHS < RHS) + return LHS; + + if (float.IsNaN(LHS)) + return LHS; + + return RHS; } public static double Round(double Value, int Fpcr) @@ -207,6 +194,19 @@ namespace ChocolArm64.Instruction throw new InvalidOperationException(); } + public static float RoundF(float Value, int Fpcr) + { + switch ((ARoundMode)((Fpcr >> 22) & 3)) + { + case ARoundMode.ToNearest: return MathF.Round (Value); + case ARoundMode.TowardsPlusInfinity: return MathF.Ceiling (Value); + case ARoundMode.TowardsMinusInfinity: return MathF.Floor (Value); + case ARoundMode.TowardsZero: return MathF.Truncate(Value); + } + + throw new InvalidOperationException(); + } + public static Vector128 Tbl1_V64( Vector128 Vector, Vector128 Tb0) @@ -300,6 +300,57 @@ namespace ChocolArm64.Instruction return Res; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static double VectorExtractDouble(Vector128 Vector, byte Index) + { + return BitConverter.Int64BitsToDouble(VectorExtractIntSx(Vector, Index, 3)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long VectorExtractIntSx(Vector128 Vector, byte Index, int Size) + { + if (Sse41.IsSupported) + { + switch (Size) + { + case 0: + return (sbyte)Sse41.Extract(Sse.StaticCast(Vector), Index); + + case 1: + return (short)Sse2.Extract(Sse.StaticCast(Vector), Index); + + case 2: + return Sse41.Extract(Sse.StaticCast(Vector), Index); + + case 3: + return Sse41.Extract(Sse.StaticCast(Vector), Index); + } + + throw new ArgumentOutOfRangeException(nameof(Size)); + } + else if (Sse2.IsSupported) + { + switch (Size) + { + case 0: + return (sbyte)VectorExtractIntZx(Vector, Index, Size); + + case 1: + return (short)VectorExtractIntZx(Vector, Index, Size); + + case 2: + return (int)VectorExtractIntZx(Vector, Index, Size); + + case 3: + return (long)VectorExtractIntZx(Vector, Index, Size); + } + + throw new ArgumentOutOfRangeException(nameof(Size)); + } + + throw new PlatformNotSupportedException(); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ulong VectorExtractIntZx(Vector128 Vector, byte Index, int Size) { @@ -322,28 +373,40 @@ namespace ChocolArm64.Instruction throw new ArgumentOutOfRangeException(nameof(Size)); } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static long VectorExtractIntSx(Vector128 Vector, byte Index, int Size) - { - if (Sse41.IsSupported) + else if (Sse2.IsSupported) { + int ShortIdx = Size == 0 + ? Index >> 1 + : Index << (Size - 1); + + ushort Value = Sse2.Extract(Sse.StaticCast(Vector), (byte)ShortIdx); + switch (Size) { case 0: - return ExtractSByte(Vector, Index); + return (byte)(Value >> (Index & 1) * 8); case 1: - return ExtractShort(Vector, Index); + return Value; case 2: - return Sse41.Extract(Sse.StaticCast(Vector), Index); - case 3: - return Sse41.Extract(Sse.StaticCast(Vector), Index); + { + ushort Value1 = Sse2.Extract(Sse.StaticCast(Vector), (byte)(ShortIdx + 1)); + + if (Size == 2) + { + return (uint)(Value | (Value1 << 16)); + } + + ushort Value2 = Sse2.Extract(Sse.StaticCast(Vector), (byte)(ShortIdx + 2)); + ushort Value3 = Sse2.Extract(Sse.StaticCast(Vector), (byte)(ShortIdx + 3)); + + return ((ulong)Value << 0) | + ((ulong)Value1 << 16) | + ((ulong)Value2 << 32) | + ((ulong)Value3 << 48); + } } throw new ArgumentOutOfRangeException(nameof(Size)); @@ -352,22 +415,6 @@ namespace ChocolArm64.Instruction throw new PlatformNotSupportedException(); } - [MethodImpl(MethodImplOptions.NoInlining)] - private static sbyte ExtractSByte(Vector128 Vector, byte Index) - { - //Workaround to JIT bug. - //https://github.com/dotnet/coreclr/issues/17957 - return Sse41.Extract(Sse.StaticCast(Vector), Index); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - private static short ExtractShort(Vector128 Vector, byte Index) - { - //Workaround to JIT bug. - //https://github.com/dotnet/coreclr/issues/17957 - return Sse2.Extract(Sse.StaticCast(Vector), Index); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float VectorExtractSingle(Vector128 Vector, byte Index) { @@ -379,54 +426,10 @@ namespace ChocolArm64.Instruction throw new PlatformNotSupportedException(); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static double VectorExtractDouble(Vector128 Vector, byte Index) - { - if (Sse41.IsSupported) - { - int FIdx = Index << 1; - - int Low = BitConverter.SingleToInt32Bits(Sse41.Extract(Vector, (byte)(FIdx + 0))); - int High = BitConverter.SingleToInt32Bits(Sse41.Extract(Vector, (byte)(FIdx + 1))); - - return BitConverter.Int64BitsToDouble( - ((long)(uint)Low << 0) | - ((long)(uint)High << 32)); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorInsertSingle(float Value, Vector128 Vector, byte Index) - { - if (Sse41.IsSupported) - { - return Sse41.Insert(Vector, Value, (byte)(Index << 4)); - } - - throw new PlatformNotSupportedException(); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 VectorInsertDouble(double Value, Vector128 Vector, byte Index) { - if (Sse41.IsSupported) - { - int FIdx = Index << 5; - - long Raw = BitConverter.DoubleToInt64Bits(Value); - - float Low = BitConverter.Int32BitsToSingle((int)((ulong)Raw >> 0)); - float High = BitConverter.Int32BitsToSingle((int)((ulong)Raw >> 32)); - - Vector = Sse41.Insert(Vector, Low, (byte)(FIdx + 0)); - Vector = Sse41.Insert(Vector, High, (byte)(FIdx + 0x10)); - - return Vector; - } - - throw new PlatformNotSupportedException(); + return VectorInsertInt((ulong)BitConverter.DoubleToInt64Bits(Value), Vector, Index, 3); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -451,6 +454,61 @@ namespace ChocolArm64.Instruction throw new ArgumentOutOfRangeException(nameof(Size)); } + else if (Sse2.IsSupported) + { + Vector128 ShortVector = Sse.StaticCast(Vector); + + int ShortIdx = Size == 0 + ? Index >> 1 + : Index << (Size - 1); + + switch (Size) + { + case 0: + { + ushort ShortVal = Sse2.Extract(Sse.StaticCast(Vector), (byte)ShortIdx); + + int Shift = (Index & 1) * 8; + + ShortVal &= (ushort)(0xff00 >> Shift); + + ShortVal |= (ushort)((byte)Value << Shift); + + return Sse.StaticCast(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx)); + } + + case 1: + return Sse.StaticCast(Sse2.Insert(Sse.StaticCast(Vector), (ushort)Value, Index)); + + case 2: + case 3: + { + ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 0), (byte)(ShortIdx + 0)); + ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1)); + + if (Size == 3) + { + ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2)); + ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3)); + } + + return Sse.StaticCast(ShortVector); + } + } + + throw new ArgumentOutOfRangeException(nameof(Size)); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 VectorInsertSingle(float Value, Vector128 Vector, byte Index) + { + if (Sse41.IsSupported) + { + return Sse41.Insert(Vector, Value, (byte)(Index << 4)); + } throw new PlatformNotSupportedException(); } diff --git a/ChocolArm64/Memory/AMemory.cs b/ChocolArm64/Memory/AMemory.cs index 77ada1a7b5..b5a240cf34 100644 --- a/ChocolArm64/Memory/AMemory.cs +++ b/ChocolArm64/Memory/AMemory.cs @@ -193,9 +193,9 @@ namespace ChocolArm64.Memory public Vector128 ReadVector8(long Position) { - if (Sse41.IsSupported) + if (Sse2.IsSupported) { - return Sse.StaticCast(Sse41.Insert(new Vector128(), ReadByte(Position), 0)); + return Sse.StaticCast(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ReadByte(Position))); } else { @@ -207,7 +207,7 @@ namespace ChocolArm64.Memory { if (Sse2.IsSupported) { - return Sse.StaticCast(Sse2.Insert(new Vector128(), ReadUInt16(Position), 0)); + return Sse.StaticCast(Sse2.Insert(Sse2.SetZeroVector128(), ReadUInt16(Position), 0)); } else { @@ -302,9 +302,9 @@ namespace ChocolArm64.Memory public Vector128 ReadVector8Unchecked(long Position) { - if (Sse41.IsSupported) + if (Sse2.IsSupported) { - return Sse.StaticCast(Sse41.Insert(Sse2.SetZeroVector128(), ReadByteUnchecked(Position), 0)); + return Sse.StaticCast(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ReadByte(Position))); } else { @@ -417,6 +417,10 @@ namespace ChocolArm64.Memory { WriteByte(Position, Sse41.Extract(Sse.StaticCast(Value), 0)); } + else if (Sse2.IsSupported) + { + WriteByteUnchecked(Position, (byte)Sse2.Extract(Sse.StaticCast(Value), 0)); + } else { throw new PlatformNotSupportedException(); @@ -526,6 +530,10 @@ namespace ChocolArm64.Memory { WriteByteUnchecked(Position, Sse41.Extract(Sse.StaticCast(Value), 0)); } + else if (Sse2.IsSupported) + { + WriteByteUnchecked(Position, (byte)Sse2.Extract(Sse.StaticCast(Value), 0)); + } else { throw new PlatformNotSupportedException();