From 932224f05112180aa5f52162cbbc3a17c339075f Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 18 Feb 2019 20:52:06 -0300 Subject: [PATCH 01/12] ARM exclusive monitor and multicore fixes (#589) * Implement ARM exclusive load/store with compare exchange insts, and enable multicore by default * Fix comment typo * Support Linux and OSX on MemoryAlloc and CompareExchange128, some cleanup * Use intel syntax on assembly code * Adjust identation * Add CPUID check and fix exclusive reservation granule size * Update schema multicore scheduling default value * Make the cpu id check code lower case aswell --- ChocolArm64/ChocolArm64.csproj | 1 + ChocolArm64/CpuThread.cs | 2 - ChocolArm64/Instructions/InstEmitMemoryEx.cs | 281 ++++++++++++++---- ChocolArm64/Memory/CompareExchange128.cs | 151 ++++++++++ ChocolArm64/Memory/MemoryAlloc.cs | 114 +++++++ ChocolArm64/Memory/MemoryAllocUnix.cs | 70 +++++ ChocolArm64/Memory/MemoryAllocWindows.cs | 155 ++++++++++ ChocolArm64/Memory/MemoryManager.cs | 194 ++++++------ ChocolArm64/Memory/MemoryProtection.cs | 16 + .../Memory/MemoryProtectionException.cs | 10 + ChocolArm64/State/CpuThreadState.cs | 31 +- ChocolArm64/Translation/ILEmitterCtx.cs | 34 +++ .../HOS/Kernel/Common/KernelTransfer.cs | 2 +- .../HOS/Kernel/Threading/HleScheduler.cs | 2 - .../HOS/Kernel/Threading/KAddressArbiter.cs | 141 +++------ .../HOS/Kernel/Threading/KCoreContext.cs | 2 - Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs | 5 - Ryujinx/Config.jsonc | 2 +- Ryujinx/_schema.json | 2 +- 19 files changed, 954 insertions(+), 261 deletions(-) create mode 100644 ChocolArm64/Memory/CompareExchange128.cs create mode 100644 ChocolArm64/Memory/MemoryAlloc.cs create mode 100644 ChocolArm64/Memory/MemoryAllocUnix.cs create mode 100644 ChocolArm64/Memory/MemoryAllocWindows.cs create mode 100644 ChocolArm64/Memory/MemoryProtection.cs create mode 100644 ChocolArm64/Memory/MemoryProtectionException.cs diff --git a/ChocolArm64/ChocolArm64.csproj b/ChocolArm64/ChocolArm64.csproj index 1156e361f5..0b4051b051 100644 --- a/ChocolArm64/ChocolArm64.csproj +++ b/ChocolArm64/ChocolArm64.csproj @@ -14,6 +14,7 @@ + diff --git a/ChocolArm64/CpuThread.cs b/ChocolArm64/CpuThread.cs index 6cd34f8127..ad1fd6f3c1 100644 --- a/ChocolArm64/CpuThread.cs +++ b/ChocolArm64/CpuThread.cs @@ -32,8 +32,6 @@ namespace ChocolArm64 { translator.ExecuteSubroutine(this, entrypoint); - memory.RemoveMonitor(ThreadState.Core); - WorkFinished?.Invoke(this, EventArgs.Empty); }); } diff --git a/ChocolArm64/Instructions/InstEmitMemoryEx.cs b/ChocolArm64/Instructions/InstEmitMemoryEx.cs index 42daca63b7..215fcffdd5 100644 --- a/ChocolArm64/Instructions/InstEmitMemoryEx.cs +++ b/ChocolArm64/Instructions/InstEmitMemoryEx.cs @@ -23,7 +23,9 @@ namespace ChocolArm64.Instructions public static void Clrex(ILEmitterCtx context) { - EmitMemoryCall(context, nameof(MemoryManager.ClearExclusive)); + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.ClearExclusiveAddress)); } public static void Dmb(ILEmitterCtx context) => EmitBarrier(context); @@ -37,12 +39,12 @@ namespace ChocolArm64.Instructions private static void EmitLdr(ILEmitterCtx context, AccessType accType) { - EmitLoad(context, accType, false); + EmitLoad(context, accType, pair: false); } private static void EmitLdp(ILEmitterCtx context, AccessType accType) { - EmitLoad(context, accType, true); + EmitLoad(context, accType, pair: true); } private static void EmitLoad(ILEmitterCtx context, AccessType accType, bool pair) @@ -57,32 +59,128 @@ namespace ChocolArm64.Instructions EmitBarrier(context); } - if (exclusive) - { - EmitMemoryCall(context, nameof(MemoryManager.SetExclusive), op.Rn); - } - context.EmitLdint(op.Rn); context.EmitSttmp(); - context.EmitLdarg(TranslatedSub.MemoryArgIdx); - context.EmitLdtmp(); + if (exclusive) + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdtmp(); - EmitReadZxCall(context, op.Size); + context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.SetExclusiveAddress)); + } - context.EmitStintzr(op.Rt); + void WriteExclusiveValue(string propName) + { + if (op.Size < 3) + { + context.Emit(OpCodes.Conv_U8); + } + + context.EmitSttmp2(); + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdtmp2(); + + context.EmitCallPrivatePropSet(typeof(CpuThreadState), propName); + + context.EmitLdtmp2(); + + if (op.Size < 3) + { + context.Emit(OpCodes.Conv_U4); + } + } if (pair) { + //Exclusive loads should be atomic. For pairwise loads, we need to + //read all the data at once. For a 32-bits pairwise load, we do a + //simple 64-bits load, for a 128-bits load, we need to call a special + //method to read 128-bits atomically. + if (op.Size == 2) + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdtmp(); + + EmitReadZxCall(context, 3); + + context.Emit(OpCodes.Dup); + + //Mask low half. + context.Emit(OpCodes.Conv_U4); + + if (exclusive) + { + WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow)); + } + + context.EmitStintzr(op.Rt); + + //Shift high half. + context.EmitLsr(32); + context.Emit(OpCodes.Conv_U4); + + if (exclusive) + { + WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueHigh)); + } + + context.EmitStintzr(op.Rt2); + } + else if (op.Size == 3) + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdtmp(); + + context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicReadInt128)); + + context.Emit(OpCodes.Dup); + + //Load low part of the vector. + context.EmitLdc_I4(0); + context.EmitLdc_I4(3); + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractIntZx)); + + if (exclusive) + { + WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow)); + } + + context.EmitStintzr(op.Rt); + + //Load high part of the vector. + context.EmitLdc_I4(1); + context.EmitLdc_I4(3); + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractIntZx)); + + if (exclusive) + { + WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueHigh)); + } + + context.EmitStintzr(op.Rt2); + } + else + { + throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes."); + } + } + else + { + //8, 16, 32 or 64-bits (non-pairwise) load. context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdtmp(); - context.EmitLdc_I8(1 << op.Size); - - context.Emit(OpCodes.Add); EmitReadZxCall(context, op.Size); - context.EmitStintzr(op.Rt2); + if (exclusive) + { + WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow)); + } + + context.EmitStintzr(op.Rt); } } @@ -99,12 +197,12 @@ namespace ChocolArm64.Instructions private static void EmitStr(ILEmitterCtx context, AccessType accType) { - EmitStore(context, accType, false); + EmitStore(context, accType, pair: false); } private static void EmitStp(ILEmitterCtx context, AccessType accType) { - EmitStore(context, accType, true); + EmitStore(context, accType, pair: true); } private static void EmitStore(ILEmitterCtx context, AccessType accType, bool pair) @@ -119,66 +217,133 @@ namespace ChocolArm64.Instructions EmitBarrier(context); } - ILLabel lblEx = new ILLabel(); - ILLabel lblEnd = new ILLabel(); - if (exclusive) { - EmitMemoryCall(context, nameof(MemoryManager.TestExclusive), op.Rn); + ILLabel lblEx = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdint(op.Rn); + + context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.CheckExclusiveAddress)); context.Emit(OpCodes.Brtrue_S, lblEx); - context.EmitLdc_I8(1); + //Address check failed, set error right away and do not store anything. + context.EmitLdc_I4(1); context.EmitStintzr(op.Rs); - context.Emit(OpCodes.Br_S, lblEnd); - } + context.Emit(OpCodes.Br, lblEnd); - context.MarkLabel(lblEx); + //Address check passsed. + context.MarkLabel(lblEx); - context.EmitLdarg(TranslatedSub.MemoryArgIdx); - context.EmitLdint(op.Rn); - context.EmitLdintzr(op.Rt); - - EmitWriteCall(context, op.Size); - - if (pair) - { context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdint(op.Rn); - context.EmitLdc_I8(1 << op.Size); - context.Emit(OpCodes.Add); + context.EmitLdarg(TranslatedSub.StateArgIdx); - context.EmitLdintzr(op.Rt2); + context.EmitCallPrivatePropGet(typeof(CpuThreadState), nameof(CpuThreadState.ExclusiveValueLow)); - EmitWriteCall(context, op.Size); - } + void EmitCast() + { + //The input should be always int64. + switch (op.Size) + { + case 0: context.Emit(OpCodes.Conv_U1); break; + case 1: context.Emit(OpCodes.Conv_U2); break; + case 2: context.Emit(OpCodes.Conv_U4); break; + } + } + + EmitCast(); + + if (pair) + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitCallPrivatePropGet(typeof(CpuThreadState), nameof(CpuThreadState.ExclusiveValueHigh)); + + EmitCast(); + + context.EmitLdintzr(op.Rt); + + EmitCast(); + + context.EmitLdintzr(op.Rt2); + + EmitCast(); + + switch (op.Size) + { + case 2: context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchange2xInt32)); break; + case 3: context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt128)); break; + + default: throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes."); + } + } + else + { + context.EmitLdintzr(op.Rt); + + EmitCast(); + + switch (op.Size) + { + case 0: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeByte)); break; + case 1: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt16)); break; + case 2: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt32)); break; + case 3: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt64)); break; + + default: throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes."); + } + } + + //The value returned is a bool, true if the values compared + //were equal and the new value was written, false otherwise. + //We need to invert this result, as on ARM 1 indicates failure, + //and 0 success on those instructions. + context.EmitLdc_I4(1); + + context.Emit(OpCodes.Xor); + context.Emit(OpCodes.Dup); + context.Emit(OpCodes.Conv_U8); - if (exclusive) - { - context.EmitLdc_I8(0); context.EmitStintzr(op.Rs); - EmitMemoryCall(context, nameof(MemoryManager.ClearExclusiveForStore)); + //Only clear the exclusive monitor if the store was successful (Rs = false). + context.Emit(OpCodes.Brtrue_S, lblEnd); + + Clrex(context); + + context.MarkLabel(lblEnd); } - - context.MarkLabel(lblEnd); - } - - private static void EmitMemoryCall(ILEmitterCtx context, string name, int rn = -1) - { - context.EmitLdarg(TranslatedSub.MemoryArgIdx); - context.EmitLdarg(TranslatedSub.StateArgIdx); - - context.EmitCallPropGet(typeof(CpuThreadState), nameof(CpuThreadState.Core)); - - if (rn != -1) + else { - context.EmitLdint(rn); - } + void EmitWrite(int rt, long offset) + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdint(op.Rn); - context.EmitCall(typeof(MemoryManager), name); + if (offset != 0) + { + context.EmitLdc_I8(offset); + + context.Emit(OpCodes.Add); + } + + context.EmitLdintzr(rt); + + EmitWriteCall(context, op.Size); + } + + EmitWrite(op.Rt, 0); + + if (pair) + { + EmitWrite(op.Rt2, 1 << op.Size); + } + } } private static void EmitBarrier(ILEmitterCtx context) diff --git a/ChocolArm64/Memory/CompareExchange128.cs b/ChocolArm64/Memory/CompareExchange128.cs new file mode 100644 index 0000000000..0fbe10f2cf --- /dev/null +++ b/ChocolArm64/Memory/CompareExchange128.cs @@ -0,0 +1,151 @@ +using System; +using System.Runtime.InteropServices; + +namespace ChocolArm64.Memory +{ + static class CompareExchange128 + { + private struct Int128 + { + public ulong Low { get; } + public ulong High { get; } + + public Int128(ulong low, ulong high) + { + Low = low; + High = high; + } + } + + private delegate Int128 InterlockedCompareExchange(IntPtr address, Int128 expected, Int128 desired); + + private delegate int GetCpuId(); + + private static InterlockedCompareExchange _interlockedCompareExchange; + + static CompareExchange128() + { + if (RuntimeInformation.OSArchitecture != Architecture.X64 || !IsCmpxchg16bSupported()) + { + throw new PlatformNotSupportedException(); + } + + byte[] interlockedCompareExchange128Code; + + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + interlockedCompareExchange128Code = new byte[] + { + 0x53, // push rbx + 0x49, 0x8b, 0x00, // mov rax, [r8] + 0x49, 0x8b, 0x19, // mov rbx, [r9] + 0x49, 0x89, 0xca, // mov r10, rcx + 0x49, 0x89, 0xd3, // mov r11, rdx + 0x49, 0x8b, 0x49, 0x08, // mov rcx, [r9+8] + 0x49, 0x8b, 0x50, 0x08, // mov rdx, [r8+8] + 0xf0, 0x49, 0x0f, 0xc7, 0x0b, // lock cmpxchg16b [r11] + 0x49, 0x89, 0x02, // mov [r10], rax + 0x4c, 0x89, 0xd0, // mov rax, r10 + 0x49, 0x89, 0x52, 0x08, // mov [r10+8], rdx + 0x5b, // pop rbx + 0xc3 // ret + }; + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || + RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + interlockedCompareExchange128Code = new byte[] + { + 0x53, // push rbx + 0x49, 0x89, 0xd1, // mov r9, rdx + 0x48, 0x89, 0xcb, // mov rbx, rcx + 0x48, 0x89, 0xf0, // mov rax, rsi + 0x4c, 0x89, 0xca, // mov rdx, r9 + 0x4c, 0x89, 0xc1, // mov rcx, r8 + 0xf0, 0x48, 0x0f, 0xc7, 0x0f, // lock cmpxchg16b [rdi] + 0x5b, // pop rbx + 0xc3 // ret + }; + } + else + { + throw new PlatformNotSupportedException(); + } + + IntPtr funcPtr = MapCodeAsExecutable(interlockedCompareExchange128Code); + + _interlockedCompareExchange = Marshal.GetDelegateForFunctionPointer(funcPtr); + } + + private static bool IsCmpxchg16bSupported() + { + byte[] getCpuIdCode = new byte[] + { + 0x53, // push rbx + 0xb8, 0x01, 0x00, 0x00, 0x00, // mov eax, 0x1 + 0x0f, 0xa2, // cpuid + 0x89, 0xc8, // mov eax, ecx + 0x5b, // pop rbx + 0xc3 // ret + }; + + IntPtr funcPtr = MapCodeAsExecutable(getCpuIdCode); + + GetCpuId getCpuId = Marshal.GetDelegateForFunctionPointer(funcPtr); + + int cpuId = getCpuId(); + + MemoryAlloc.Free(funcPtr); + + return (cpuId & (1 << 13)) != 0; + } + + private static IntPtr MapCodeAsExecutable(byte[] code) + { + ulong codeLength = (ulong)code.Length; + + IntPtr funcPtr = MemoryAlloc.Allocate(codeLength); + + unsafe + { + fixed (byte* codePtr = code) + { + byte* dest = (byte*)funcPtr; + + long size = (long)codeLength; + + Buffer.MemoryCopy(codePtr, dest, size, size); + } + } + + MemoryAlloc.Reprotect(funcPtr, codeLength, MemoryProtection.Execute); + + return funcPtr; + } + + public static bool InterlockedCompareExchange128( + IntPtr address, + ulong expectedLow, + ulong expectedHigh, + ulong desiredLow, + ulong desiredHigh) + { + Int128 expected = new Int128(expectedLow, expectedHigh); + Int128 desired = new Int128(desiredLow, desiredHigh); + + Int128 old = _interlockedCompareExchange(address, expected, desired); + + return old.Low == expected.Low && old.High == expected.High; + } + + public static void InterlockedRead128(IntPtr address, out ulong low, out ulong high) + { + Int128 zero = new Int128(0, 0); + + Int128 old = _interlockedCompareExchange(address, zero, zero); + + low = old.Low; + high = old.High; + } + } +} \ No newline at end of file diff --git a/ChocolArm64/Memory/MemoryAlloc.cs b/ChocolArm64/Memory/MemoryAlloc.cs new file mode 100644 index 0000000000..a24299cd70 --- /dev/null +++ b/ChocolArm64/Memory/MemoryAlloc.cs @@ -0,0 +1,114 @@ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace ChocolArm64.Memory +{ + public static class MemoryAlloc + { + public static bool HasWriteWatchSupport => RuntimeInformation.IsOSPlatform(OSPlatform.Windows); + + public static IntPtr Allocate(ulong size) + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + IntPtr sizeNint = new IntPtr((long)size); + + return MemoryAllocWindows.Allocate(sizeNint); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || + RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + return MemoryAllocUnix.Allocate(size); + } + else + { + throw new PlatformNotSupportedException(); + } + } + + public static IntPtr AllocateWriteTracked(ulong size) + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + IntPtr sizeNint = new IntPtr((long)size); + + return MemoryAllocWindows.AllocateWriteTracked(sizeNint); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || + RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + return MemoryAllocUnix.Allocate(size); + } + else + { + throw new PlatformNotSupportedException(); + } + } + + public static void Reprotect(IntPtr address, ulong size, MemoryProtection permission) + { + bool result; + + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + IntPtr sizeNint = new IntPtr((long)size); + + result = MemoryAllocWindows.Reprotect(address, sizeNint, permission); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || + RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + result = MemoryAllocUnix.Reprotect(address, size, permission); + } + else + { + throw new PlatformNotSupportedException(); + } + + if (!result) + { + throw new MemoryProtectionException(permission); + } + } + + public static bool Free(IntPtr address) + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + return MemoryAllocWindows.Free(address); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || + RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + return MemoryAllocUnix.Free(address); + } + else + { + throw new PlatformNotSupportedException(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool GetModifiedPages( + IntPtr address, + IntPtr size, + IntPtr[] addresses, + out ulong count) + { + //This is only supported on windows, but returning + //false (failed) is also valid for platforms without + //write tracking support on the OS. + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + return MemoryAllocWindows.GetModifiedPages(address, size, addresses, out count); + } + else + { + count = 0; + + return false; + } + } + } +} \ No newline at end of file diff --git a/ChocolArm64/Memory/MemoryAllocUnix.cs b/ChocolArm64/Memory/MemoryAllocUnix.cs new file mode 100644 index 0000000000..857c1c5042 --- /dev/null +++ b/ChocolArm64/Memory/MemoryAllocUnix.cs @@ -0,0 +1,70 @@ +using Mono.Unix.Native; +using System; + +namespace ChocolArm64.Memory +{ + static class MemoryAllocUnix + { + public static IntPtr Allocate(ulong size) + { + ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE); + + const MmapProts prot = MmapProts.PROT_READ | MmapProts.PROT_WRITE; + + const MmapFlags flags = MmapFlags.MAP_PRIVATE | MmapFlags.MAP_ANONYMOUS; + + IntPtr ptr = Syscall.mmap(IntPtr.Zero, size + pageSize, prot, flags, -1, 0); + + if (ptr == IntPtr.Zero) + { + throw new OutOfMemoryException(); + } + + unsafe + { + ptr = new IntPtr(ptr.ToInt64() + (long)pageSize); + + *((ulong*)ptr - 1) = size; + } + + return ptr; + } + + public static bool Reprotect(IntPtr address, ulong size, Memory.MemoryProtection protection) + { + MmapProts prot = GetProtection(protection); + + return Syscall.mprotect(address, size, prot) == 0; + } + + private static MmapProts GetProtection(Memory.MemoryProtection protection) + { + switch (protection) + { + case Memory.MemoryProtection.None: return MmapProts.PROT_NONE; + case Memory.MemoryProtection.Read: return MmapProts.PROT_READ; + case Memory.MemoryProtection.ReadAndWrite: return MmapProts.PROT_READ | MmapProts.PROT_WRITE; + case Memory.MemoryProtection.ReadAndExecute: return MmapProts.PROT_READ | MmapProts.PROT_EXEC; + case Memory.MemoryProtection.Execute: return MmapProts.PROT_EXEC; + + default: throw new ArgumentException($"Invalid permission \"{protection}\"."); + } + } + + public static bool Free(IntPtr address) + { + ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE); + + ulong size; + + unsafe + { + size = *((ulong*)address - 1); + + address = new IntPtr(address.ToInt64() - (long)pageSize); + } + + return Syscall.munmap(address, size + pageSize) == 0; + } + } +} \ No newline at end of file diff --git a/ChocolArm64/Memory/MemoryAllocWindows.cs b/ChocolArm64/Memory/MemoryAllocWindows.cs new file mode 100644 index 0000000000..82be8b1e4f --- /dev/null +++ b/ChocolArm64/Memory/MemoryAllocWindows.cs @@ -0,0 +1,155 @@ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace ChocolArm64.Memory +{ + static class MemoryAllocWindows + { + [Flags] + private enum AllocationType : uint + { + Commit = 0x1000, + Reserve = 0x2000, + Decommit = 0x4000, + Release = 0x8000, + Reset = 0x80000, + Physical = 0x400000, + TopDown = 0x100000, + WriteWatch = 0x200000, + LargePages = 0x20000000 + } + + [Flags] + private enum MemoryProtection + { + NoAccess = 0x01, + ReadOnly = 0x02, + ReadWrite = 0x04, + WriteCopy = 0x08, + Execute = 0x10, + ExecuteRead = 0x20, + ExecuteReadWrite = 0x40, + ExecuteWriteCopy = 0x80, + GuardModifierflag = 0x100, + NoCacheModifierflag = 0x200, + WriteCombineModifierflag = 0x400 + } + + private enum WriteWatchFlags : uint + { + None = 0, + Reset = 1 + } + + [DllImport("kernel32.dll")] + private static extern IntPtr VirtualAlloc( + IntPtr lpAddress, + IntPtr dwSize, + AllocationType flAllocationType, + MemoryProtection flProtect); + + [DllImport("kernel32.dll")] + private static extern bool VirtualProtect( + IntPtr lpAddress, + IntPtr dwSize, + MemoryProtection flNewProtect, + out MemoryProtection lpflOldProtect); + + [DllImport("kernel32.dll")] + private static extern bool VirtualFree( + IntPtr lpAddress, + uint dwSize, + AllocationType dwFreeType); + + [DllImport("kernel32.dll")] + private static extern int GetWriteWatch( + WriteWatchFlags dwFlags, + IntPtr lpBaseAddress, + IntPtr dwRegionSize, + IntPtr[] lpAddresses, + ref ulong lpdwCount, + out uint lpdwGranularity); + + public static IntPtr Allocate(IntPtr size) + { + const AllocationType flags = + AllocationType.Reserve | + AllocationType.Commit; + + IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite); + + if (ptr == IntPtr.Zero) + { + throw new OutOfMemoryException(); + } + + return ptr; + } + + public static IntPtr AllocateWriteTracked(IntPtr size) + { + const AllocationType flags = + AllocationType.Reserve | + AllocationType.Commit | + AllocationType.WriteWatch; + + IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite); + + if (ptr == IntPtr.Zero) + { + throw new OutOfMemoryException(); + } + + return ptr; + } + + public static bool Reprotect(IntPtr address, IntPtr size, Memory.MemoryProtection protection) + { + MemoryProtection prot = GetProtection(protection); + + return VirtualProtect(address, size, prot, out _); + } + + private static MemoryProtection GetProtection(Memory.MemoryProtection protection) + { + switch (protection) + { + case Memory.MemoryProtection.None: return MemoryProtection.NoAccess; + case Memory.MemoryProtection.Read: return MemoryProtection.ReadOnly; + case Memory.MemoryProtection.ReadAndWrite: return MemoryProtection.ReadWrite; + case Memory.MemoryProtection.ReadAndExecute: return MemoryProtection.ExecuteRead; + case Memory.MemoryProtection.Execute: return MemoryProtection.Execute; + + default: throw new ArgumentException($"Invalid permission \"{protection}\"."); + } + } + + public static bool Free(IntPtr address) + { + return VirtualFree(address, 0, AllocationType.Release); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool GetModifiedPages( + IntPtr address, + IntPtr size, + IntPtr[] addresses, + out ulong count) + { + ulong pagesCount = (ulong)addresses.Length; + + int result = GetWriteWatch( + WriteWatchFlags.Reset, + address, + size, + addresses, + ref pagesCount, + out uint granularity); + + count = pagesCount; + + return result == 0; + } + } +} \ No newline at end of file diff --git a/ChocolArm64/Memory/MemoryManager.cs b/ChocolArm64/Memory/MemoryManager.cs index 1f21256807..afb0f65143 100644 --- a/ChocolArm64/Memory/MemoryManager.cs +++ b/ChocolArm64/Memory/MemoryManager.cs @@ -1,16 +1,16 @@ using ChocolArm64.Events; using ChocolArm64.Exceptions; using ChocolArm64.Instructions; -using ChocolArm64.State; using System; using System.Collections.Concurrent; -using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using System.Threading; +using static ChocolArm64.Memory.CompareExchange128; + namespace ChocolArm64.Memory { public unsafe class MemoryManager : IMemory, IDisposable @@ -30,21 +30,6 @@ namespace ChocolArm64.Memory private const int PtLvl0Bit = PageBits + PtLvl1Bits; private const int PtLvl1Bit = PageBits; - private const long ErgMask = (4 << CpuThreadState.ErgSizeLog2) - 1; - - private class ArmMonitor - { - public long Position; - public bool ExState; - - public bool HasExclusiveAccess(long position) - { - return Position == position && ExState; - } - } - - private Dictionary _monitors; - private ConcurrentDictionary _observedPages; public IntPtr Ram { get; private set; } @@ -59,8 +44,6 @@ namespace ChocolArm64.Memory public MemoryManager(IntPtr ram) { - _monitors = new Dictionary(); - _observedPages = new ConcurrentDictionary(); Ram = ram; @@ -75,104 +58,139 @@ namespace ChocolArm64.Memory } } - public void RemoveMonitor(int core) + internal bool AtomicCompareExchange2xInt32( + long position, + int expectedLow, + int expectedHigh, + int desiredLow, + int desiredHigh) { - lock (_monitors) - { - ClearExclusive(core); + long expected = (uint)expectedLow; + long desired = (uint)desiredLow; - _monitors.Remove(core); - } + expected |= (long)expectedHigh << 32; + desired |= (long)desiredHigh << 32; + + return AtomicCompareExchangeInt64(position, expected, desired); } - public void SetExclusive(int core, long position) + internal bool AtomicCompareExchangeInt128( + long position, + ulong expectedLow, + ulong expectedHigh, + ulong desiredLow, + ulong desiredHigh) { - position &= ~ErgMask; - - lock (_monitors) + if ((position & 0xf) != 0) { - foreach (ArmMonitor mon in _monitors.Values) - { - if (mon.Position == position && mon.ExState) - { - mon.ExState = false; - } - } - - if (!_monitors.TryGetValue(core, out ArmMonitor threadMon)) - { - threadMon = new ArmMonitor(); - - _monitors.Add(core, threadMon); - } - - threadMon.Position = position; - threadMon.ExState = true; + AbortWithAlignmentFault(position); } + + IntPtr ptr = new IntPtr(TranslateWrite(position)); + + return InterlockedCompareExchange128(ptr, expectedLow, expectedHigh, desiredLow, desiredHigh); } - public bool TestExclusive(int core, long position) + internal Vector128 AtomicReadInt128(long position) { - //Note: Any call to this method also should be followed by a - //call to ClearExclusiveForStore if this method returns true. - position &= ~ErgMask; - - Monitor.Enter(_monitors); - - if (!_monitors.TryGetValue(core, out ArmMonitor threadMon)) + if ((position & 0xf) != 0) { - Monitor.Exit(_monitors); - - return false; + AbortWithAlignmentFault(position); } - bool exState = threadMon.HasExclusiveAccess(position); + IntPtr ptr = new IntPtr(Translate(position)); - if (!exState) - { - Monitor.Exit(_monitors); - } + InterlockedRead128(ptr, out ulong low, out ulong high); - return exState; + Vector128 vector = default(Vector128); + + vector = VectorHelper.VectorInsertInt(low, vector, 0, 3); + vector = VectorHelper.VectorInsertInt(high, vector, 1, 3); + + return vector; } - public void ClearExclusiveForStore(int core) + public bool AtomicCompareExchangeByte(long position, byte expected, byte desired) { - if (_monitors.TryGetValue(core, out ArmMonitor threadMon)) - { - threadMon.ExState = false; - } + int* ptr = (int*)Translate(position); - Monitor.Exit(_monitors); + int currentValue = *ptr; + + int expected32 = (currentValue & ~byte.MaxValue) | expected; + int desired32 = (currentValue & ~byte.MaxValue) | desired; + + return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32; } - public void ClearExclusive(int core) + public bool AtomicCompareExchangeInt16(long position, short expected, short desired) { - lock (_monitors) + if ((position & 1) != 0) { - if (_monitors.TryGetValue(core, out ArmMonitor threadMon)) - { - threadMon.ExState = false; - } + AbortWithAlignmentFault(position); } + + int* ptr = (int*)Translate(position); + + int currentValue = *ptr; + + int expected32 = (currentValue & ~ushort.MaxValue) | (ushort)expected; + int desired32 = (currentValue & ~ushort.MaxValue) | (ushort)desired; + + return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32; } - public void WriteInt32ToSharedAddr(long position, int value) + public bool AtomicCompareExchangeInt32(long position, int expected, int desired) { - long maskedPosition = position & ~ErgMask; - - lock (_monitors) + if ((position & 3) != 0) { - foreach (ArmMonitor mon in _monitors.Values) - { - if (mon.Position == maskedPosition && mon.ExState) - { - mon.ExState = false; - } - } - - WriteInt32(position, value); + AbortWithAlignmentFault(position); } + + int* ptr = (int*)TranslateWrite(position); + + return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected; + } + + public bool AtomicCompareExchangeInt64(long position, long expected, long desired) + { + if ((position & 7) != 0) + { + AbortWithAlignmentFault(position); + } + + long* ptr = (long*)TranslateWrite(position); + + return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected; + } + + public int AtomicIncrementInt32(long position) + { + if ((position & 3) != 0) + { + AbortWithAlignmentFault(position); + } + + int* ptr = (int*)TranslateWrite(position); + + return Interlocked.Increment(ref *ptr); + } + + public int AtomicDecrementInt32(long position) + { + if ((position & 3) != 0) + { + AbortWithAlignmentFault(position); + } + + int* ptr = (int*)TranslateWrite(position); + + return Interlocked.Decrement(ref *ptr); + } + + private void AbortWithAlignmentFault(long position) + { + //TODO: Abort mode and exception support on the CPU. + throw new InvalidOperationException($"Tried to compare exchange a misaligned address 0x{position:X16}."); } public sbyte ReadSByte(long position) diff --git a/ChocolArm64/Memory/MemoryProtection.cs b/ChocolArm64/Memory/MemoryProtection.cs new file mode 100644 index 0000000000..d0874bfc0f --- /dev/null +++ b/ChocolArm64/Memory/MemoryProtection.cs @@ -0,0 +1,16 @@ +using System; + +namespace ChocolArm64.Memory +{ + [Flags] + public enum MemoryProtection + { + None = 0, + Read = 1 << 0, + Write = 1 << 1, + Execute = 1 << 2, + + ReadAndWrite = Read | Write, + ReadAndExecute = Read | Execute + } +} \ No newline at end of file diff --git a/ChocolArm64/Memory/MemoryProtectionException.cs b/ChocolArm64/Memory/MemoryProtectionException.cs new file mode 100644 index 0000000000..3d2cebad33 --- /dev/null +++ b/ChocolArm64/Memory/MemoryProtectionException.cs @@ -0,0 +1,10 @@ +using System; + +namespace ChocolArm64.Memory +{ + class MemoryProtectionException : Exception + { + public MemoryProtectionException(MemoryProtection protection) : + base($"Failed to set memory protection to \"{protection}\".") { } + } +} \ No newline at end of file diff --git a/ChocolArm64/State/CpuThreadState.cs b/ChocolArm64/State/CpuThreadState.cs index abec60bb2e..caf73deb1f 100644 --- a/ChocolArm64/State/CpuThreadState.cs +++ b/ChocolArm64/State/CpuThreadState.cs @@ -37,7 +37,6 @@ namespace ChocolArm64.State public int ElrHyp; public bool Running { get; set; } - public int Core { get; set; } private bool _interrupted; @@ -85,6 +84,16 @@ namespace ChocolArm64.State internal Translator CurrentTranslator; + private ulong _exclusiveAddress; + + internal ulong ExclusiveValueLow { get; set; } + internal ulong ExclusiveValueHigh { get; set; } + + public CpuThreadState() + { + ClearExclusiveAddress(); + } + static CpuThreadState() { _hostTickFreq = 1.0 / Stopwatch.Frequency; @@ -94,6 +103,26 @@ namespace ChocolArm64.State _tickCounter.Start(); } + internal void SetExclusiveAddress(ulong address) + { + _exclusiveAddress = GetMaskedExclusiveAddress(address); + } + + internal bool CheckExclusiveAddress(ulong address) + { + return GetMaskedExclusiveAddress(address) == _exclusiveAddress; + } + + internal void ClearExclusiveAddress() + { + _exclusiveAddress = ulong.MaxValue; + } + + private ulong GetMaskedExclusiveAddress(ulong address) + { + return address & ~((4UL << ErgSizeLog2) - 1); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal bool Synchronize(int bbWeight) { diff --git a/ChocolArm64/Translation/ILEmitterCtx.cs b/ChocolArm64/Translation/ILEmitterCtx.cs index fa65bbf989..5490123774 100644 --- a/ChocolArm64/Translation/ILEmitterCtx.cs +++ b/ChocolArm64/Translation/ILEmitterCtx.cs @@ -49,6 +49,7 @@ namespace ChocolArm64.Translation private const int CmpOptTmp2Index = -4; private const int VecTmp1Index = -5; private const int VecTmp2Index = -6; + private const int IntTmp2Index = -7; public ILEmitterCtx(TranslatorCache cache, TranslatorQueue queue, TranslationTier tier, Block graph) { @@ -562,6 +563,9 @@ namespace ChocolArm64.Translation public void EmitLdtmp() => EmitLdint(IntTmpIndex); public void EmitSttmp() => EmitStint(IntTmpIndex); + public void EmitLdtmp2() => EmitLdint(IntTmp2Index); + public void EmitSttmp2() => EmitStint(IntTmp2Index); + public void EmitLdvectmp() => EmitLdvec(VecTmp1Index); public void EmitStvectmp() => EmitStvec(VecTmp1Index); @@ -635,6 +639,36 @@ namespace ChocolArm64.Translation EmitCall(objType.GetMethod($"set_{propName}")); } + public void EmitCallPrivatePropGet(Type objType, string propName) + { + if (objType == null) + { + throw new ArgumentNullException(nameof(objType)); + } + + if (propName == null) + { + throw new ArgumentNullException(nameof(propName)); + } + + EmitPrivateCall(objType, $"get_{propName}"); + } + + public void EmitCallPrivatePropSet(Type objType, string propName) + { + if (objType == null) + { + throw new ArgumentNullException(nameof(objType)); + } + + if (propName == null) + { + throw new ArgumentNullException(nameof(propName)); + } + + EmitPrivateCall(objType, $"set_{propName}"); + } + public void EmitCall(Type objType, string mthdName) { if (objType == null) diff --git a/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs b/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs index 2b7591406f..0fcb31483a 100644 --- a/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs +++ b/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs @@ -66,7 +66,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common if (currentProcess.CpuMemory.IsMapped((long)address) && currentProcess.CpuMemory.IsMapped((long)address + 3)) { - currentProcess.CpuMemory.WriteInt32ToSharedAddr((long)address, value); + currentProcess.CpuMemory.WriteInt32((long)address, value); return true; } diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs b/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs index 835c2a2f83..d5dbb4d8e4 100644 --- a/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs +++ b/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs @@ -92,8 +92,6 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading if (coreContext.CurrentThread != null) { - coreContext.CurrentThread.ClearExclusive(); - CoreManager.Set(coreContext.CurrentThread.Context.Work); coreContext.CurrentThread.Context.Execute(); diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KAddressArbiter.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KAddressArbiter.cs index faeea5c54b..b11df61ed9 100644 --- a/Ryujinx.HLE/HOS/Kernel/Threading/KAddressArbiter.cs +++ b/Ryujinx.HLE/HOS/Kernel/Threading/KAddressArbiter.cs @@ -228,43 +228,31 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading KProcess currentProcess = _system.Scheduler.GetCurrentProcess(); - currentProcess.CpuMemory.SetExclusive(0, (long)address); + int mutexValue, newMutexValue; - if (!KernelTransfer.UserToKernelInt32(_system, address, out int mutexValue)) + do { - //Invalid address. - currentProcess.CpuMemory.ClearExclusive(0); - - requester.SignaledObj = null; - requester.ObjSyncResult = KernelResult.InvalidMemState; - - return null; - } - - while (true) - { - if (currentProcess.CpuMemory.TestExclusive(0, (long)address)) + if (!KernelTransfer.UserToKernelInt32(_system, address, out mutexValue)) { - if (mutexValue != 0) - { - //Update value to indicate there is a mutex waiter now. - currentProcess.CpuMemory.WriteInt32((long)address, mutexValue | HasListenersMask); - } - else - { - //No thread owning the mutex, assign to requesting thread. - currentProcess.CpuMemory.WriteInt32((long)address, requester.ThreadHandleForUserMutex); - } + //Invalid address. + requester.SignaledObj = null; + requester.ObjSyncResult = KernelResult.InvalidMemState; - currentProcess.CpuMemory.ClearExclusiveForStore(0); - - break; + return null; } - currentProcess.CpuMemory.SetExclusive(0, (long)address); - - mutexValue = currentProcess.CpuMemory.ReadInt32((long)address); + if (mutexValue != 0) + { + //Update value to indicate there is a mutex waiter now. + newMutexValue = mutexValue | HasListenersMask; + } + else + { + //No thread owning the mutex, assign to requesting thread. + newMutexValue = requester.ThreadHandleForUserMutex; + } } + while (!currentProcess.CpuMemory.AtomicCompareExchangeInt32((long)address, mutexValue, newMutexValue)); if (mutexValue == 0) { @@ -392,9 +380,6 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading KProcess currentProcess = _system.Scheduler.GetCurrentProcess(); - //If ShouldDecrement is true, do atomic decrement of the value at Address. - currentProcess.CpuMemory.SetExclusive(0, (long)address); - if (!KernelTransfer.UserToKernelInt32(_system, address, out int currentValue)) { _system.CriticalSection.Leave(); @@ -404,25 +389,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading if (shouldDecrement) { - while (currentValue < value) - { - if (currentProcess.CpuMemory.TestExclusive(0, (long)address)) - { - currentProcess.CpuMemory.WriteInt32((long)address, currentValue - 1); - - currentProcess.CpuMemory.ClearExclusiveForStore(0); - - break; - } - - currentProcess.CpuMemory.SetExclusive(0, (long)address); - - currentValue = currentProcess.CpuMemory.ReadInt32((long)address); - } + currentValue = currentProcess.CpuMemory.AtomicDecrementInt32((long)address) + 1; } - currentProcess.CpuMemory.ClearExclusive(0); - if (currentValue < value) { if (timeout == 0) @@ -511,39 +480,25 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading KProcess currentProcess = _system.Scheduler.GetCurrentProcess(); - currentProcess.CpuMemory.SetExclusive(0, (long)address); + int currentValue; - if (!KernelTransfer.UserToKernelInt32(_system, address, out int currentValue)) + do { - _system.CriticalSection.Leave(); - - return KernelResult.InvalidMemState; - } - - while (currentValue == value) - { - if (currentProcess.CpuMemory.TestExclusive(0, (long)address)) + if (!KernelTransfer.UserToKernelInt32(_system, address, out currentValue)) { - currentProcess.CpuMemory.WriteInt32((long)address, currentValue + 1); + _system.CriticalSection.Leave(); - currentProcess.CpuMemory.ClearExclusiveForStore(0); - - break; + return KernelResult.InvalidMemState; } - currentProcess.CpuMemory.SetExclusive(0, (long)address); + if (currentValue != value) + { + _system.CriticalSection.Leave(); - currentValue = currentProcess.CpuMemory.ReadInt32((long)address); - } - - currentProcess.CpuMemory.ClearExclusive(0); - - if (currentValue != value) - { - _system.CriticalSection.Leave(); - - return KernelResult.InvalidState; + return KernelResult.InvalidState; + } } + while (!currentProcess.CpuMemory.AtomicCompareExchangeInt32((long)address, currentValue, currentValue + 1)); WakeArbiterThreads(address, count); @@ -582,39 +537,25 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading KProcess currentProcess = _system.Scheduler.GetCurrentProcess(); - currentProcess.CpuMemory.SetExclusive(0, (long)address); + int currentValue; - if (!KernelTransfer.UserToKernelInt32(_system, address, out int currentValue)) + do { - _system.CriticalSection.Leave(); - - return KernelResult.InvalidMemState; - } - - while (currentValue == value) - { - if (currentProcess.CpuMemory.TestExclusive(0, (long)address)) + if (!KernelTransfer.UserToKernelInt32(_system, address, out currentValue)) { - currentProcess.CpuMemory.WriteInt32((long)address, currentValue + offset); + _system.CriticalSection.Leave(); - currentProcess.CpuMemory.ClearExclusiveForStore(0); - - break; + return KernelResult.InvalidMemState; } - currentProcess.CpuMemory.SetExclusive(0, (long)address); + if (currentValue != value) + { + _system.CriticalSection.Leave(); - currentValue = currentProcess.CpuMemory.ReadInt32((long)address); - } - - currentProcess.CpuMemory.ClearExclusive(0); - - if (currentValue != value) - { - _system.CriticalSection.Leave(); - - return KernelResult.InvalidState; + return KernelResult.InvalidState; + } } + while (!currentProcess.CpuMemory.AtomicCompareExchangeInt32((long)address, currentValue, currentValue + offset)); WakeArbiterThreads(address, count); diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs index 81cd88834f..9790717729 100644 --- a/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs +++ b/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs @@ -70,8 +70,6 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading CurrentThread.TotalTimeRunning += currentTime - CurrentThread.LastScheduledTime; CurrentThread.LastScheduledTime = currentTime; - CurrentThread.ClearExclusive(); - _coreManager.Set(CurrentThread.Context.Work); CurrentThread.Context.Execute(); diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs index 7eb27efc12..17e0f3c3bf 100644 --- a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs +++ b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs @@ -1004,11 +1004,6 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading Context.ThreadState.X1 = (ulong)threadHandle; } - public void ClearExclusive() - { - Owner.CpuMemory.ClearExclusive(CurrentCore); - } - public void TimeUp() { ReleaseAndResume(); diff --git a/Ryujinx/Config.jsonc b/Ryujinx/Config.jsonc index 1ba601647a..8b5ebe0328 100644 --- a/Ryujinx/Config.jsonc +++ b/Ryujinx/Config.jsonc @@ -36,7 +36,7 @@ "enable_vsync": true, // Enable or Disable Multi-core scheduling of threads - "enable_multicore_scheduling": false, + "enable_multicore_scheduling": true, // Enable integrity checks on Switch content files "enable_fs_integrity_checks": true, diff --git a/Ryujinx/_schema.json b/Ryujinx/_schema.json index 28f3511181..0e586671d6 100644 --- a/Ryujinx/_schema.json +++ b/Ryujinx/_schema.json @@ -382,7 +382,7 @@ "type": "boolean", "title": "Enable Multicore Scheduling", "description": "Enables or disables multi-core scheduling of threads", - "default": false, + "default": true, "examples": [ true, false From 6335753e382eec1cf9037545851f1de2459b94cc Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 18 Feb 2019 21:12:53 -0300 Subject: [PATCH 02/12] Implement ConvertScalingMode properly (#596) * Implement ConvertScalingMode properly * Fix up the naming * Only values 2 and 4 are allowed * Return a nullable enum from ConvetScalingMode * Fix typo on method name * Use convertedScalingMode --- .../Services/Vi/IApplicationDisplayService.cs | 21 +++++++++++++----- Ryujinx.HLE/HOS/Services/Vi/ScalingMode.cs | 22 ++++++++----------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs b/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs index b272e0788d..48cf328806 100644 --- a/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs +++ b/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs @@ -180,22 +180,31 @@ namespace Ryujinx.HLE.HOS.Services.Vi public long ConvertScalingMode(ServiceCtx context) { - SrcScalingMode scalingMode = (SrcScalingMode)context.RequestData.ReadInt32(); - DstScalingMode? destScalingMode = ConvetScalingModeImpl(scalingMode); + SrcScalingMode scalingMode = (SrcScalingMode)context.RequestData.ReadInt32(); - if (!destScalingMode.HasValue) + DstScalingMode? convertedScalingMode = ConvertScalingMode(scalingMode); + + if (!convertedScalingMode.HasValue) { + //Scaling mode out of the range of valid values. return MakeError(ErrorModule.Vi, 1); } - context.ResponseData.Write((ulong)destScalingMode); + if (scalingMode != SrcScalingMode.ScaleToWindow && + scalingMode != SrcScalingMode.PreserveAspectRatio) + { + //Invalid scaling mode specified. + return MakeError(ErrorModule.Vi, 6); + } + + context.ResponseData.Write((ulong)convertedScalingMode); return 0; } - private DstScalingMode? ConvetScalingModeImpl(SrcScalingMode srcScalingMode) + private DstScalingMode? ConvertScalingMode(SrcScalingMode source) { - switch (srcScalingMode) + switch (source) { case SrcScalingMode.None: return DstScalingMode.None; case SrcScalingMode.Freeze: return DstScalingMode.Freeze; diff --git a/Ryujinx.HLE/HOS/Services/Vi/ScalingMode.cs b/Ryujinx.HLE/HOS/Services/Vi/ScalingMode.cs index 824a27b70a..7b555b5999 100644 --- a/Ryujinx.HLE/HOS/Services/Vi/ScalingMode.cs +++ b/Ryujinx.HLE/HOS/Services/Vi/ScalingMode.cs @@ -1,24 +1,20 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace Ryujinx.HLE.HOS.Services.Vi +namespace Ryujinx.HLE.HOS.Services.Vi { enum SrcScalingMode { - Freeze = 0, - ScaleToWindow = 1, - ScaleAndCrop = 2, - None = 3, + None = 0, + Freeze = 1, + ScaleToWindow = 2, + ScaleAndCrop = 3, PreserveAspectRatio = 4 } enum DstScalingMode { - None = 0, - Freeze = 1, - ScaleToWindow = 2, - ScaleAndCrop = 3, + Freeze = 0, + ScaleToWindow = 1, + ScaleAndCrop = 2, + None = 3, PreserveAspectRatio = 4 } } From 7ed2b4cc39ca286a03589cd3768a419c5ed9941f Mon Sep 17 00:00:00 2001 From: gdkchan Date: Fri, 22 Feb 2019 02:14:02 -0300 Subject: [PATCH 03/12] Initialize FrontFace register with a default value (#601) --- Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs index 1ca3ca1ce1..6120053dae 100644 --- a/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs @@ -66,6 +66,8 @@ namespace Ryujinx.Graphics.Graphics3d WriteRegister(NvGpuEngine3dReg.FrameBufferSrgb, 1); + WriteRegister(NvGpuEngine3dReg.FrontFace, (int)GalFrontFace.CW); + for (int Index = 0; Index < GalPipelineState.RenderTargetsCount; Index++) { WriteRegister(NvGpuEngine3dReg.IBlendNEquationRgb + Index * 8, (int)GalBlendEquation.FuncAdd); From 9679896b9471afdebf860c016d3fd360b9af7f80 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 23 Feb 2019 20:52:48 -0300 Subject: [PATCH 04/12] Implement fixed-point variant of the UCVTF and SCVTF instructions (#578) * Add fixed-point variant of the UCVTF instruction * Change encoding of some fixed-point instructions to not allow invalid encodings * Fix Fcvtzu_Gp_Fixed encoding * Add SCVTF (fixed-point GP to Scalar) instruction * Simplify *Fixed encodings --- ChocolArm64/Decoders/OpCodeSimdCvt64.cs | 9 -- ChocolArm64/Instructions/InstEmitSimdCvt.cs | 108 +++++++++++++------- ChocolArm64/OpCodeTable.cs | 6 +- 3 files changed, 76 insertions(+), 47 deletions(-) diff --git a/ChocolArm64/Decoders/OpCodeSimdCvt64.cs b/ChocolArm64/Decoders/OpCodeSimdCvt64.cs index eacd594099..3181a85a34 100644 --- a/ChocolArm64/Decoders/OpCodeSimdCvt64.cs +++ b/ChocolArm64/Decoders/OpCodeSimdCvt64.cs @@ -8,18 +8,9 @@ namespace ChocolArm64.Decoders public OpCodeSimdCvt64(Inst inst, long position, int opCode) : base(inst, position, opCode) { - //TODO: - //Und of Fixed Point variants. int scale = (opCode >> 10) & 0x3f; int sf = (opCode >> 31) & 0x1; - /*if (Type != SF && !(Type == 2 && SF == 1)) - { - Emitter = AInstEmit.Und; - - return; - }*/ - FBits = 64 - scale; RegisterSize = sf != 0 diff --git a/ChocolArm64/Instructions/InstEmitSimdCvt.cs b/ChocolArm64/Instructions/InstEmitSimdCvt.cs index 2eac3194d6..11105d891f 100644 --- a/ChocolArm64/Instructions/InstEmitSimdCvt.cs +++ b/ChocolArm64/Instructions/InstEmitSimdCvt.cs @@ -244,7 +244,7 @@ namespace ChocolArm64.Instructions public static void Fcvtzs_Gp_Fixed(ILEmitterCtx context) { - EmitFcvtzs_Gp_Fix(context); + EmitFcvtzs_Gp_Fixed(context); } public static void Fcvtzs_S(ILEmitterCtx context) @@ -264,7 +264,7 @@ namespace ChocolArm64.Instructions public static void Fcvtzu_Gp_Fixed(ILEmitterCtx context) { - EmitFcvtzu_Gp_Fix(context); + EmitFcvtzu_Gp_Fixed(context); } public static void Fcvtzu_S(ILEmitterCtx context) @@ -293,6 +293,24 @@ namespace ChocolArm64.Instructions EmitScalarSetF(context, op.Rd, op.Size); } + public static void Scvtf_Gp_Fixed(ILEmitterCtx context) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_I4); + } + + EmitFloatCast(context, op.Size); + + EmitI2fFBitsMul(context, op.Size, op.FBits); + + EmitScalarSetF(context, op.Rd, op.Size); + } + public static void Scvtf_S(ILEmitterCtx context) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; @@ -349,6 +367,26 @@ namespace ChocolArm64.Instructions EmitScalarSetF(context, op.Rd, op.Size); } + public static void Ucvtf_Gp_Fixed(ILEmitterCtx context) + { + OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; + + context.EmitLdintzr(op.Rn); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U4); + } + + context.Emit(OpCodes.Conv_R_Un); + + EmitFloatCast(context, op.Size); + + EmitI2fFBitsMul(context, op.Size, op.FBits); + + EmitScalarSetF(context, op.Rd, op.Size); + } + public static void Ucvtf_S(ILEmitterCtx context) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; @@ -367,32 +405,6 @@ namespace ChocolArm64.Instructions EmitVectorCvtf(context, signed: false); } - private static int GetFBits(ILEmitterCtx context) - { - if (context.CurrOp is OpCodeSimdShImm64 op) - { - return GetImmShr(op); - } - - return 0; - } - - private static void EmitFloatCast(ILEmitterCtx context, int size) - { - if (size == 0) - { - context.Emit(OpCodes.Conv_R4); - } - else if (size == 1) - { - context.Emit(OpCodes.Conv_R8); - } - else - { - throw new ArgumentOutOfRangeException(nameof(size)); - } - } - private static void EmitFcvtn(ILEmitterCtx context, bool signed, bool scalar) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; @@ -476,17 +488,17 @@ namespace ChocolArm64.Instructions context.EmitStintzr(op.Rd); } - private static void EmitFcvtzs_Gp_Fix(ILEmitterCtx context) + private static void EmitFcvtzs_Gp_Fixed(ILEmitterCtx context) { - EmitFcvtz__Gp_Fix(context, true); + EmitFcvtz__Gp_Fixed(context, true); } - private static void EmitFcvtzu_Gp_Fix(ILEmitterCtx context) + private static void EmitFcvtzu_Gp_Fixed(ILEmitterCtx context) { - EmitFcvtz__Gp_Fix(context, false); + EmitFcvtz__Gp_Fixed(context, false); } - private static void EmitFcvtz__Gp_Fix(ILEmitterCtx context, bool signed) + private static void EmitFcvtz__Gp_Fixed(ILEmitterCtx context, bool signed) { OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp; @@ -530,9 +542,7 @@ namespace ChocolArm64.Instructions context.Emit(OpCodes.Conv_R_Un); } - context.Emit(sizeF == 0 - ? OpCodes.Conv_R4 - : OpCodes.Conv_R8); + EmitFloatCast(context, sizeF); EmitI2fFBitsMul(context, sizeF, fBits); @@ -644,6 +654,32 @@ namespace ChocolArm64.Instructions } } + private static int GetFBits(ILEmitterCtx context) + { + if (context.CurrOp is OpCodeSimdShImm64 op) + { + return GetImmShr(op); + } + + return 0; + } + + private static void EmitFloatCast(ILEmitterCtx context, int size) + { + if (size == 0) + { + context.Emit(OpCodes.Conv_R4); + } + else if (size == 1) + { + context.Emit(OpCodes.Conv_R8); + } + else + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + } + private static void EmitScalarFcvts(ILEmitterCtx context, int size, int fBits) { if (size < 0 || size > 1) diff --git a/ChocolArm64/OpCodeTable.cs b/ChocolArm64/OpCodeTable.cs index 3a8d3948d8..9fdda87b42 100644 --- a/ChocolArm64/OpCodeTable.cs +++ b/ChocolArm64/OpCodeTable.cs @@ -310,12 +310,12 @@ namespace ChocolArm64 SetA64("x00111100x101000000000xxxxxxxxxx", InstEmit.Fcvtps_Gp, typeof(OpCodeSimdCvt64)); SetA64("x00111100x101001000000xxxxxxxxxx", InstEmit.Fcvtpu_Gp, typeof(OpCodeSimdCvt64)); SetA64("x00111100x111000000000xxxxxxxxxx", InstEmit.Fcvtzs_Gp, typeof(OpCodeSimdCvt64)); - SetA64("x00111100x011000xxxxxxxxxxxxxxxx", InstEmit.Fcvtzs_Gp_Fixed, typeof(OpCodeSimdCvt64)); + SetA64(">00111100x011000>xxxxxxxxxxxxxxx", InstEmit.Fcvtzs_Gp_Fixed, typeof(OpCodeSimdCvt64)); SetA64("010111101x100001101110xxxxxxxxxx", InstEmit.Fcvtzs_S, typeof(OpCodeSimd64)); SetA64("0>0011101<100001101110xxxxxxxxxx", InstEmit.Fcvtzs_V, typeof(OpCodeSimd64)); SetA64("0x0011110>>xxxxx111111xxxxxxxxxx", InstEmit.Fcvtzs_V, typeof(OpCodeSimdShImm64)); SetA64("x00111100x111001000000xxxxxxxxxx", InstEmit.Fcvtzu_Gp, typeof(OpCodeSimdCvt64)); - SetA64("x00111100x011001xxxxxxxxxxxxxxxx", InstEmit.Fcvtzu_Gp_Fixed, typeof(OpCodeSimdCvt64)); + SetA64(">00111100x011001>xxxxxxxxxxxxxxx", InstEmit.Fcvtzu_Gp_Fixed, typeof(OpCodeSimdCvt64)); SetA64("011111101x100001101110xxxxxxxxxx", InstEmit.Fcvtzu_S, typeof(OpCodeSimd64)); SetA64("0>1011101<100001101110xxxxxxxxxx", InstEmit.Fcvtzu_V, typeof(OpCodeSimd64)); SetA64("0x1011110>>xxxxx111111xxxxxxxxxx", InstEmit.Fcvtzu_V, typeof(OpCodeSimdShImm64)); @@ -434,6 +434,7 @@ namespace ChocolArm64 SetA64("0x001110<<100000001010xxxxxxxxxx", InstEmit.Saddlp_V, typeof(OpCodeSimd64)); SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstEmit.Saddw_V, typeof(OpCodeSimdReg64)); SetA64("x00111100x100010000000xxxxxxxxxx", InstEmit.Scvtf_Gp, typeof(OpCodeSimdCvt64)); + SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstEmit.Scvtf_Gp_Fixed, typeof(OpCodeSimdCvt64)); SetA64("010111100x100001110110xxxxxxxxxx", InstEmit.Scvtf_S, typeof(OpCodeSimd64)); SetA64("0>0011100<100001110110xxxxxxxxxx", InstEmit.Scvtf_V, typeof(OpCodeSimd64)); SetA64("01011110000xxxxx000000xxxxxxxxxx", InstEmit.Sha1c_V, typeof(OpCodeSimdReg64)); @@ -542,6 +543,7 @@ namespace ChocolArm64 SetA64("01101110<<110000001110xxxxxxxxxx", InstEmit.Uaddlv_V, typeof(OpCodeSimd64)); SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstEmit.Uaddw_V, typeof(OpCodeSimdReg64)); SetA64("x00111100x100011000000xxxxxxxxxx", InstEmit.Ucvtf_Gp, typeof(OpCodeSimdCvt64)); + SetA64(">00111100x000011>xxxxxxxxxxxxxxx", InstEmit.Ucvtf_Gp_Fixed, typeof(OpCodeSimdCvt64)); SetA64("011111100x100001110110xxxxxxxxxx", InstEmit.Ucvtf_S, typeof(OpCodeSimd64)); SetA64("0>1011100<100001110110xxxxxxxxxx", InstEmit.Ucvtf_V, typeof(OpCodeSimd64)); SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstEmit.Uhadd_V, typeof(OpCodeSimdReg64)); From a3d46e41335efd049042cc2e38b35c4077e8ed41 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sun, 24 Feb 2019 00:53:27 +0100 Subject: [PATCH 05/12] Add Tests for instructions Fcvtzs_Gp_Fixed & Fcvtzu_Gp_Fixed, Scvtf_Gp_Fixed & Ucvtf_Gp_Fixed. (#603) * Create CpuTestSimdCvt.cs * Update CpuTestMisc.cs * Update CpuTestSimdCvt.cs --- Ryujinx.Tests/Cpu/CpuTestMisc.cs | 4 +- Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs | 360 ++++++++++++++++++++++++++++ 2 files changed, 362 insertions(+), 2 deletions(-) create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs diff --git a/Ryujinx.Tests/Cpu/CpuTestMisc.cs b/Ryujinx.Tests/Cpu/CpuTestMisc.cs index 89ea47a920..e976c2c00a 100644 --- a/Ryujinx.Tests/Cpu/CpuTestMisc.cs +++ b/Ryujinx.Tests/Cpu/CpuTestMisc.cs @@ -12,8 +12,8 @@ namespace Ryujinx.Tests.Cpu public sealed class CpuTestMisc : CpuTest { #if Misc - private const int RndCnt = 2; - private const int RndCntImm = 2; + private const int RndCnt = 2; + private const int RndCntImm = 2; #region "AluImm & Csel" [Test, Pairwise] diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs new file mode 100644 index 0000000000..ecf90b0aa7 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs @@ -0,0 +1,360 @@ +#define SimdCvt + +using NUnit.Framework; + +using System.Collections.Generic; +using System.Runtime.Intrinsics; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdCvt")] + public sealed class CpuTestSimdCvt : CpuTest + { +#if SimdCvt + +#region "ValueSource (Types)" + private static IEnumerable _1S_F_() + { + yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x0000000080800000ul; // -Min Normal + yield return 0x00000000807FFFFFul; // -Max Subnormal + yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0000000000800000ul; // +Min Normal + yield return 0x00000000007FFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon) + + if (!NoZeros) + { + yield return 0x0000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0x00000000FF800000ul; // -Infinity + yield return 0x000000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) + yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong grbg = TestContext.CurrentContext.Random.NextUInt(); + ulong rnd1 = GenNormalS(); + ulong rnd2 = GenSubnormalS(); + + yield return (grbg << 32) | rnd1; + yield return (grbg << 32) | rnd2; + } + } + + private static IEnumerable _1D_F_() + { + yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) + yield return 0x8010000000000000ul; // -Min Normal + yield return 0x800FFFFFFFFFFFFFul; // -Max Subnormal + yield return 0x8000000000000001ul; // -Min Subnormal (-double.Epsilon) + yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue) + yield return 0x0010000000000000ul; // +Min Normal + yield return 0x000FFFFFFFFFFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (double.Epsilon) + + if (!NoZeros) + { + yield return 0x8000000000000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFFF0000000000000ul; // -Infinity + yield return 0x7FF0000000000000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFF8000000000000ul; // -QNaN (all zeros payload) (double.NaN) + yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload) + yield return 0x7FF8000000000000ul; // +QNaN (all zeros payload) (-double.NaN) (DefaultNaN) + yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong rnd1 = GenNormalD(); + ulong rnd2 = GenSubnormalD(); + + yield return rnd1; + yield return rnd2; + } + } + + private static uint[] _W_() + { + return new uint[] { 0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu }; + } + + private static ulong[] _X_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } +#endregion + +#region "ValueSource (Opcodes)" + private static uint[] _F_Cvt_Z_SU_S_SW_() + { + return new uint[] + { + 0x1E188000u, // FCVTZS W0, S0, #32 + 0x1E198000u // FCVTZU W0, S0, #32 + }; + } + + private static uint[] _F_Cvt_Z_SU_S_SX_() + { + return new uint[] + { + 0x9E180000u, // FCVTZS X0, S0, #64 + 0x9E190000u // FCVTZU X0, S0, #64 + }; + } + + private static uint[] _F_Cvt_Z_SU_S_DW_() + { + return new uint[] + { + 0x1E588000u, // FCVTZS W0, D0, #32 + 0x1E598000u // FCVTZU W0, D0, #32 + }; + } + + private static uint[] _F_Cvt_Z_SU_S_DX_() + { + return new uint[] + { + 0x9E580000u, // FCVTZS X0, D0, #64 + 0x9E590000u // FCVTZU X0, D0, #64 + }; + } + + private static uint[] _SU_Cvt_F_S_WS_() + { + return new uint[] + { + 0x1E028000u, // SCVTF S0, W0, #32 + 0x1E038000u // UCVTF S0, W0, #32 + }; + } + + private static uint[] _SU_Cvt_F_S_WD_() + { + return new uint[] + { + 0x1E428000u, // SCVTF D0, W0, #32 + 0x1E438000u // UCVTF D0, W0, #32 + }; + } + + private static uint[] _SU_Cvt_F_S_XS_() + { + return new uint[] + { + 0x9E020000u, // SCVTF S0, X0, #64 + 0x9E030000u // UCVTF S0, X0, #64 + }; + } + + private static uint[] _SU_Cvt_F_S_XD_() + { + return new uint[] + { + 0x9E420000u, // SCVTF D0, X0, #64 + 0x9E430000u // UCVTF D0, X0, #64 + }; + } +#endregion + + private const int RndCnt = 2; + private const int RndCntFbits = 2; + + private static readonly bool NoZeros = false; + private static readonly bool NoInfs = false; + private static readonly bool NoNaNs = false; + + [Test, Pairwise] [Explicit] + public void F_Cvt_Z_SU_S_SW([ValueSource("_F_Cvt_Z_SU_S_SW_")] uint opcodes, + [Values(0u, 31u)] uint rd, + [Values(1u)] uint rn, + [ValueSource("_1S_F_")] ulong a, + [Values(1u, 32u)] [Random(2u, 31u, RndCntFbits)] uint fbits) + { + uint scale = (64u - fbits) & 0x3Fu; + + opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= (scale << 10); + + ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + Vector128 v1 = MakeVectorE0(a); + + SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] + public void F_Cvt_Z_SU_S_SX([ValueSource("_F_Cvt_Z_SU_S_SX_")] uint opcodes, + [Values(0u, 31u)] uint rd, + [Values(1u)] uint rn, + [ValueSource("_1S_F_")] ulong a, + [Values(1u, 64u)] [Random(2u, 63u, RndCntFbits)] uint fbits) + { + uint scale = (64u - fbits) & 0x3Fu; + + opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= (scale << 10); + + ulong x31 = TestContext.CurrentContext.Random.NextULong(); + Vector128 v1 = MakeVectorE0(a); + + SingleOpcode(opcodes, x31: x31, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] + public void F_Cvt_Z_SU_S_DW([ValueSource("_F_Cvt_Z_SU_S_DW_")] uint opcodes, + [Values(0u, 31u)] uint rd, + [Values(1u)] uint rn, + [ValueSource("_1D_F_")] ulong a, + [Values(1u, 32u)] [Random(2u, 31u, RndCntFbits)] uint fbits) + { + uint scale = (64u - fbits) & 0x3Fu; + + opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= (scale << 10); + + ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + Vector128 v1 = MakeVectorE0(a); + + SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] + public void F_Cvt_Z_SU_S_DX([ValueSource("_F_Cvt_Z_SU_S_DX_")] uint opcodes, + [Values(0u, 31u)] uint rd, + [Values(1u)] uint rn, + [ValueSource("_1D_F_")] ulong a, + [Values(1u, 64u)] [Random(2u, 63u, RndCntFbits)] uint fbits) + { + uint scale = (64u - fbits) & 0x3Fu; + + opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= (scale << 10); + + ulong x31 = TestContext.CurrentContext.Random.NextULong(); + Vector128 v1 = MakeVectorE0(a); + + SingleOpcode(opcodes, x31: x31, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] + public void SU_Cvt_F_S_WS([ValueSource("_SU_Cvt_F_S_WS_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 31u)] uint rn, + [ValueSource("_W_")] [Random(RndCnt)] uint wn, + [Values(1u, 32u)] [Random(2u, 31u, RndCntFbits)] uint fbits) + { + uint scale = (64u - fbits) & 0x3Fu; + + opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= (scale << 10); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE0E1(z, z); + + SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] + public void SU_Cvt_F_S_WD([ValueSource("_SU_Cvt_F_S_WD_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 31u)] uint rn, + [ValueSource("_W_")] [Random(RndCnt)] uint wn, + [Values(1u, 32u)] [Random(2u, 31u, RndCntFbits)] uint fbits) + { + uint scale = (64u - fbits) & 0x3Fu; + + opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= (scale << 10); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE1(z); + + SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] + public void SU_Cvt_F_S_XS([ValueSource("_SU_Cvt_F_S_XS_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 31u)] uint rn, + [ValueSource("_X_")] [Random(RndCnt)] ulong xn, + [Values(1u, 64u)] [Random(2u, 63u, RndCntFbits)] uint fbits) + { + uint scale = (64u - fbits) & 0x3Fu; + + opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= (scale << 10); + + ulong x31 = TestContext.CurrentContext.Random.NextULong(); + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE0E1(z, z); + + SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] + public void SU_Cvt_F_S_XD([ValueSource("_SU_Cvt_F_S_XD_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 31u)] uint rn, + [ValueSource("_X_")] [Random(RndCnt)] ulong xn, + [Values(1u, 64u)] [Random(2u, 63u, RndCntFbits)] uint fbits) + { + uint scale = (64u - fbits) & 0x3Fu; + + opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= (scale << 10); + + ulong x31 = TestContext.CurrentContext.Random.NextULong(); + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE1(z); + + SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0); + + CompareAgainstUnicorn(); + } +#endif + } +} From 5001f78b1d07b988709dd5f5d1009ebe9b44c669 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 24 Feb 2019 04:24:35 -0300 Subject: [PATCH 06/12] Optimize address translation and write tracking on the MMU (#571) * Implement faster address translation and write tracking on the MMU * Rename MemoryAlloc to MemoryManagement, and other nits * Support multi-level page tables * Fix typo * Reword comment a bit * Support scalar vector loads/stores on the memory fast path, and minor fixes * Add missing cast * Alignment * Fix VirtualFree function signature * Change MemoryProtection enum to uint aswell for consistency --- ChocolArm64/Events/InvalidAccessEventArgs.cs | 14 - .../Exceptions/VmmPageFaultException.cs | 13 - ChocolArm64/Instructions/InstEmitMemory.cs | 11 - ChocolArm64/Instructions/InstEmitMemory32.cs | 6 - ChocolArm64/Instructions/InstEmitMemoryEx.cs | 22 +- .../Instructions/InstEmitMemoryHelper.cs | 419 ++++++++++- .../Instructions/InstEmitSimdMemory.cs | 3 - ChocolArm64/Instructions/InstEmitSystem.cs | 1 - ChocolArm64/Memory/CompareExchange128.cs | 6 +- .../{MemoryAlloc.cs => MemoryManagement.cs} | 20 +- ...ryAllocUnix.cs => MemoryManagementUnix.cs} | 2 +- ...cWindows.cs => MemoryManagementWindows.cs} | 8 +- ChocolArm64/Memory/MemoryManager.cs | 704 ++++++++++-------- ChocolArm64/Translation/ILEmitterCtx.cs | 132 ++-- ChocolArm64/Translation/Translator.cs | 4 +- Ryujinx.Graphics/Memory/NvGpuVmmCache.cs | 34 +- Ryujinx.HLE/DeviceMemory.cs | 8 +- Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs | 46 +- .../HOS/Kernel/SupervisorCall/SvcHandler.cs | 3 - .../HOS/Kernel/SupervisorCall/SvcIpc.cs | 4 +- .../HOS/Kernel/SupervisorCall/SvcMemory.cs | 21 +- .../HOS/Kernel/SupervisorCall/SvcSystem.cs | 2 +- .../HOS/Kernel/SupervisorCall/SvcThread.cs | 141 ++-- .../Kernel/SupervisorCall/SvcThreadSync.cs | 2 +- 24 files changed, 1005 insertions(+), 621 deletions(-) delete mode 100644 ChocolArm64/Events/InvalidAccessEventArgs.cs delete mode 100644 ChocolArm64/Exceptions/VmmPageFaultException.cs rename ChocolArm64/Memory/{MemoryAlloc.cs => MemoryManagement.cs} (81%) rename ChocolArm64/Memory/{MemoryAllocUnix.cs => MemoryManagementUnix.cs} (98%) rename ChocolArm64/Memory/{MemoryAllocWindows.cs => MemoryManagementWindows.cs} (95%) diff --git a/ChocolArm64/Events/InvalidAccessEventArgs.cs b/ChocolArm64/Events/InvalidAccessEventArgs.cs deleted file mode 100644 index 9c349755f0..0000000000 --- a/ChocolArm64/Events/InvalidAccessEventArgs.cs +++ /dev/null @@ -1,14 +0,0 @@ -using System; - -namespace ChocolArm64.Events -{ - public class MemoryAccessEventArgs : EventArgs - { - public long Position { get; private set; } - - public MemoryAccessEventArgs(long position) - { - Position = position; - } - } -} \ No newline at end of file diff --git a/ChocolArm64/Exceptions/VmmPageFaultException.cs b/ChocolArm64/Exceptions/VmmPageFaultException.cs deleted file mode 100644 index f33aafc013..0000000000 --- a/ChocolArm64/Exceptions/VmmPageFaultException.cs +++ /dev/null @@ -1,13 +0,0 @@ -using System; - -namespace ChocolArm64.Exceptions -{ - public class VmmPageFaultException : Exception - { - private const string ExMsg = "Tried to access unmapped address 0x{0:x16}!"; - - public VmmPageFaultException() { } - - public VmmPageFaultException(long position) : base(string.Format(ExMsg, position)) { } - } -} \ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitMemory.cs b/ChocolArm64/Instructions/InstEmitMemory.cs index 96f782df64..ea779c8da4 100644 --- a/ChocolArm64/Instructions/InstEmitMemory.cs +++ b/ChocolArm64/Instructions/InstEmitMemory.cs @@ -31,8 +31,6 @@ namespace ChocolArm64.Instructions { OpCodeMem64 op = (OpCodeMem64)context.CurrOp; - context.EmitLdarg(TranslatedSub.MemoryArgIdx); - EmitLoadAddress(context); if (signed && op.Extend64) @@ -69,7 +67,6 @@ namespace ChocolArm64.Instructions return; } - context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdc_I8(op.Imm); if (op.Signed) @@ -116,13 +113,10 @@ namespace ChocolArm64.Instructions } } - context.EmitLdarg(TranslatedSub.MemoryArgIdx); - EmitLoadAddress(context); EmitReadAndStore(op.Rt); - context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdtmp(); context.EmitLdc_I8(1 << op.Size); @@ -137,8 +131,6 @@ namespace ChocolArm64.Instructions { OpCodeMem64 op = (OpCodeMem64)context.CurrOp; - context.EmitLdarg(TranslatedSub.MemoryArgIdx); - EmitLoadAddress(context); if (op is IOpCodeSimd64) @@ -159,8 +151,6 @@ namespace ChocolArm64.Instructions { OpCodeMemPair64 op = (OpCodeMemPair64)context.CurrOp; - context.EmitLdarg(TranslatedSub.MemoryArgIdx); - EmitLoadAddress(context); if (op is IOpCodeSimd64) @@ -174,7 +164,6 @@ namespace ChocolArm64.Instructions EmitWriteCall(context, op.Size); - context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdtmp(); context.EmitLdc_I8(1 << op.Size); diff --git a/ChocolArm64/Instructions/InstEmitMemory32.cs b/ChocolArm64/Instructions/InstEmitMemory32.cs index 4d6a57a472..1e1419e65e 100644 --- a/ChocolArm64/Instructions/InstEmitMemory32.cs +++ b/ChocolArm64/Instructions/InstEmitMemory32.cs @@ -64,9 +64,7 @@ namespace ChocolArm64.Instructions { if ((mask & 1) != 0) { - context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdtmp(); - context.EmitLdc_I4(offset); context.Emit(OpCodes.Add); @@ -129,9 +127,7 @@ namespace ChocolArm64.Instructions { if ((mask & 1) != 0) { - context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdtmp(); - context.EmitLdc_I4(offset); context.Emit(OpCodes.Add); @@ -198,8 +194,6 @@ namespace ChocolArm64.Instructions context.EmitSttmp(); } - context.EmitLdarg(TranslatedSub.MemoryArgIdx); - if (op.Index) { context.EmitLdtmp(); diff --git a/ChocolArm64/Instructions/InstEmitMemoryEx.cs b/ChocolArm64/Instructions/InstEmitMemoryEx.cs index 215fcffdd5..920c695fff 100644 --- a/ChocolArm64/Instructions/InstEmitMemoryEx.cs +++ b/ChocolArm64/Instructions/InstEmitMemoryEx.cs @@ -72,6 +72,8 @@ namespace ChocolArm64.Instructions void WriteExclusiveValue(string propName) { + context.Emit(OpCodes.Dup); + if (op.Size < 3) { context.Emit(OpCodes.Conv_U8); @@ -82,13 +84,6 @@ namespace ChocolArm64.Instructions context.EmitLdtmp2(); context.EmitCallPrivatePropSet(typeof(CpuThreadState), propName); - - context.EmitLdtmp2(); - - if (op.Size < 3) - { - context.Emit(OpCodes.Conv_U4); - } } if (pair) @@ -99,7 +94,6 @@ namespace ChocolArm64.Instructions //method to read 128-bits atomically. if (op.Size == 2) { - context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdtmp(); EmitReadZxCall(context, 3); @@ -164,13 +158,12 @@ namespace ChocolArm64.Instructions } else { - throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes."); + throw new InvalidOperationException($"Invalid load size of {1 << op.Size} bytes."); } } else { //8, 16, 32 or 64-bits (non-pairwise) load. - context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdtmp(); EmitReadZxCall(context, op.Size); @@ -320,9 +313,8 @@ namespace ChocolArm64.Instructions } else { - void EmitWrite(int rt, long offset) + void EmitWriteCall(int rt, long offset) { - context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdint(op.Rn); if (offset != 0) @@ -334,14 +326,14 @@ namespace ChocolArm64.Instructions context.EmitLdintzr(rt); - EmitWriteCall(context, op.Size); + InstEmitMemoryHelper.EmitWriteCall(context, op.Size); } - EmitWrite(op.Rt, 0); + EmitWriteCall(op.Rt, 0); if (pair) { - EmitWrite(op.Rt2, 1 << op.Size); + EmitWriteCall(op.Rt2, 1 << op.Size); } } } diff --git a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs index f953564c46..7645e36316 100644 --- a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs +++ b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs @@ -1,13 +1,20 @@ using ChocolArm64.Decoders; using ChocolArm64.Memory; +using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; namespace ChocolArm64.Instructions { static class InstEmitMemoryHelper { + private static int _tempIntAddress = ILEmitterCtx.GetIntTempIndex(); + private static int _tempIntValue = ILEmitterCtx.GetIntTempIndex(); + private static int _tempIntPtAddr = ILEmitterCtx.GetIntTempIndex(); + private static int _tempVecValue = ILEmitterCtx.GetVecTempIndex(); + private enum Extension { Zx, @@ -32,9 +39,10 @@ namespace ChocolArm64.Instructions private static void EmitReadCall(ILEmitterCtx context, Extension ext, int size) { - bool isSimd = GetIsSimd(context); + //Save the address into a temp. + context.EmitStint(_tempIntAddress); - string name = null; + bool isSimd = IsSimd(context); if (size < 0 || size > (isSimd ? 4 : 3)) { @@ -43,28 +51,27 @@ namespace ChocolArm64.Instructions if (isSimd) { - switch (size) + if (context.Tier == TranslationTier.Tier0 || !Sse2.IsSupported || size < 2) { - case 0: name = nameof(MemoryManager.ReadVector8); break; - case 1: name = nameof(MemoryManager.ReadVector16); break; - case 2: name = nameof(MemoryManager.ReadVector32); break; - case 3: name = nameof(MemoryManager.ReadVector64); break; - case 4: name = nameof(MemoryManager.ReadVector128); break; + EmitReadVectorFallback(context, size); + } + else + { + EmitReadVector(context, size); } } else { - switch (size) + if (context.Tier == TranslationTier.Tier0) { - case 0: name = nameof(MemoryManager.ReadByte); break; - case 1: name = nameof(MemoryManager.ReadUInt16); break; - case 2: name = nameof(MemoryManager.ReadUInt32); break; - case 3: name = nameof(MemoryManager.ReadUInt64); break; + EmitReadIntFallback(context, size); + } + else + { + EmitReadInt(context, size); } } - context.EmitCall(typeof(MemoryManager), name); - if (!isSimd) { if (ext == Extension.Sx32 || @@ -89,50 +96,390 @@ namespace ChocolArm64.Instructions public static void EmitWriteCall(ILEmitterCtx context, int size) { - bool isSimd = GetIsSimd(context); + bool isSimd = IsSimd(context); - string name = null; + //Save the value into a temp. + if (isSimd) + { + context.EmitStvec(_tempVecValue); + } + else + { + context.EmitStint(_tempIntValue); + } + + //Save the address into a temp. + context.EmitStint(_tempIntAddress); if (size < 0 || size > (isSimd ? 4 : 3)) { throw new ArgumentOutOfRangeException(nameof(size)); } - if (size < 3 && !isSimd) - { - context.Emit(OpCodes.Conv_I4); - } - if (isSimd) { - switch (size) + if (context.Tier == TranslationTier.Tier0 || !Sse2.IsSupported || size < 2) { - case 0: name = nameof(MemoryManager.WriteVector8); break; - case 1: name = nameof(MemoryManager.WriteVector16); break; - case 2: name = nameof(MemoryManager.WriteVector32); break; - case 3: name = nameof(MemoryManager.WriteVector64); break; - case 4: name = nameof(MemoryManager.WriteVector128); break; + EmitWriteVectorFallback(context, size); + } + else + { + EmitWriteVector(context, size); } } else { - switch (size) + if (context.Tier == TranslationTier.Tier0) { - case 0: name = nameof(MemoryManager.WriteByte); break; - case 1: name = nameof(MemoryManager.WriteUInt16); break; - case 2: name = nameof(MemoryManager.WriteUInt32); break; - case 3: name = nameof(MemoryManager.WriteUInt64); break; + EmitWriteIntFallback(context, size); + } + else + { + EmitWriteInt(context, size); } } - - context.EmitCall(typeof(MemoryManager), name); } - private static bool GetIsSimd(ILEmitterCtx context) + private static bool IsSimd(ILEmitterCtx context) { return context.CurrOp is IOpCodeSimd64 && !(context.CurrOp is OpCodeSimdMemMs64 || context.CurrOp is OpCodeSimdMemSs64); } + + private static void EmitReadInt(ILEmitterCtx context, int size) + { + EmitAddressCheck(context, size); + + ILLabel lblFastPath = new ILLabel(); + ILLabel lblSlowPath = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.Emit(OpCodes.Brfalse_S, lblFastPath); + + context.MarkLabel(lblSlowPath); + + EmitReadIntFallback(context, size); + + context.Emit(OpCodes.Br, lblEnd); + + context.MarkLabel(lblFastPath); + + EmitPtPointerLoad(context, lblSlowPath); + + switch (size) + { + case 0: context.Emit(OpCodes.Ldind_U1); break; + case 1: context.Emit(OpCodes.Ldind_U2); break; + case 2: context.Emit(OpCodes.Ldind_U4); break; + case 3: context.Emit(OpCodes.Ldind_I8); break; + } + + context.MarkLabel(lblEnd); + } + + private static void EmitReadVector(ILEmitterCtx context, int size) + { + EmitAddressCheck(context, size); + + ILLabel lblFastPath = new ILLabel(); + ILLabel lblSlowPath = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.Emit(OpCodes.Brfalse_S, lblFastPath); + + context.MarkLabel(lblSlowPath); + + EmitReadVectorFallback(context, size); + + context.Emit(OpCodes.Br, lblEnd); + + context.MarkLabel(lblFastPath); + + EmitPtPointerLoad(context, lblSlowPath); + + switch (size) + { + case 2: context.EmitCall(typeof(Sse), nameof(Sse.LoadScalarVector128)); break; + + case 3: + { + Type[] types = new Type[] { typeof(double*) }; + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.LoadScalarVector128), types)); + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleToSingle)); + + break; + } + + case 4: context.EmitCall(typeof(Sse), nameof(Sse.LoadAlignedVector128)); break; + + throw new InvalidOperationException($"Invalid vector load size of {1 << size} bytes."); + } + + context.MarkLabel(lblEnd); + } + + private static void EmitWriteInt(ILEmitterCtx context, int size) + { + EmitAddressCheck(context, size); + + ILLabel lblFastPath = new ILLabel(); + ILLabel lblSlowPath = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.Emit(OpCodes.Brfalse_S, lblFastPath); + + context.MarkLabel(lblSlowPath); + + EmitWriteIntFallback(context, size); + + context.Emit(OpCodes.Br, lblEnd); + + context.MarkLabel(lblFastPath); + + EmitPtPointerLoad(context, lblSlowPath); + + context.EmitLdint(_tempIntValue); + + if (size < 3) + { + context.Emit(OpCodes.Conv_U4); + } + + switch (size) + { + case 0: context.Emit(OpCodes.Stind_I1); break; + case 1: context.Emit(OpCodes.Stind_I2); break; + case 2: context.Emit(OpCodes.Stind_I4); break; + case 3: context.Emit(OpCodes.Stind_I8); break; + } + + context.MarkLabel(lblEnd); + } + + private static void EmitWriteVector(ILEmitterCtx context, int size) + { + EmitAddressCheck(context, size); + + ILLabel lblFastPath = new ILLabel(); + ILLabel lblSlowPath = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.Emit(OpCodes.Brfalse_S, lblFastPath); + + context.MarkLabel(lblSlowPath); + + EmitWriteVectorFallback(context, size); + + context.Emit(OpCodes.Br, lblEnd); + + context.MarkLabel(lblFastPath); + + EmitPtPointerLoad(context, lblSlowPath); + + context.EmitLdvec(_tempVecValue); + + switch (size) + { + case 2: context.EmitCall(typeof(Sse), nameof(Sse.StoreScalar)); break; + + case 3: + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToDouble)); + + context.EmitCall(typeof(Sse2), nameof(Sse2.StoreScalar)); + + break; + } + + case 4: context.EmitCall(typeof(Sse), nameof(Sse.StoreAligned)); break; + + default: throw new InvalidOperationException($"Invalid vector store size of {1 << size} bytes."); + } + + context.MarkLabel(lblEnd); + } + + private static void EmitAddressCheck(ILEmitterCtx context, int size) + { + long addressCheckMask = ~(context.Memory.AddressSpaceSize - 1); + + addressCheckMask |= (1u << size) - 1; + + context.EmitLdint(_tempIntAddress); + + context.EmitLdc_I(addressCheckMask); + + context.Emit(OpCodes.And); + } + + private static void EmitPtPointerLoad(ILEmitterCtx context, ILLabel lblFallbackPath) + { + context.EmitLdc_I8(context.Memory.PageTable.ToInt64()); + + context.Emit(OpCodes.Conv_I); + + int bit = MemoryManager.PageBits; + + do + { + context.EmitLdint(_tempIntAddress); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U8); + } + + context.EmitLsr(bit); + + bit += context.Memory.PtLevelBits; + + if (bit < context.Memory.AddressSpaceBits) + { + context.EmitLdc_I8(context.Memory.PtLevelMask); + + context.Emit(OpCodes.And); + } + + context.EmitLdc_I8(IntPtr.Size); + + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Conv_I); + context.Emit(OpCodes.Add); + context.Emit(OpCodes.Ldind_I); + } + while (bit < context.Memory.AddressSpaceBits); + + if (!context.Memory.HasWriteWatchSupport) + { + context.Emit(OpCodes.Conv_U8); + + context.EmitStint(_tempIntPtAddr); + context.EmitLdint(_tempIntPtAddr); + + context.EmitLdc_I8(MemoryManager.PteFlagsMask); + + context.Emit(OpCodes.And); + + context.Emit(OpCodes.Brtrue, lblFallbackPath); + + context.EmitLdint(_tempIntPtAddr); + + context.Emit(OpCodes.Conv_I); + } + + context.EmitLdint(_tempIntAddress); + + context.EmitLdc_I(MemoryManager.PageMask); + + context.Emit(OpCodes.And); + context.Emit(OpCodes.Conv_I); + context.Emit(OpCodes.Add); + } + + private static void EmitReadIntFallback(ILEmitterCtx context, int size) + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdint(_tempIntAddress); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U8); + } + + string fallbackMethodName = null; + + switch (size) + { + case 0: fallbackMethodName = nameof(MemoryManager.ReadByte); break; + case 1: fallbackMethodName = nameof(MemoryManager.ReadUInt16); break; + case 2: fallbackMethodName = nameof(MemoryManager.ReadUInt32); break; + case 3: fallbackMethodName = nameof(MemoryManager.ReadUInt64); break; + } + + context.EmitCall(typeof(MemoryManager), fallbackMethodName); + } + + private static void EmitReadVectorFallback(ILEmitterCtx context, int size) + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdint(_tempIntAddress); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U8); + } + + string fallbackMethodName = null; + + switch (size) + { + case 0: fallbackMethodName = nameof(MemoryManager.ReadVector8); break; + case 1: fallbackMethodName = nameof(MemoryManager.ReadVector16); break; + case 2: fallbackMethodName = nameof(MemoryManager.ReadVector32); break; + case 3: fallbackMethodName = nameof(MemoryManager.ReadVector64); break; + case 4: fallbackMethodName = nameof(MemoryManager.ReadVector128); break; + } + + context.EmitCall(typeof(MemoryManager), fallbackMethodName); + } + + private static void EmitWriteIntFallback(ILEmitterCtx context, int size) + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdint(_tempIntAddress); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U8); + } + + context.EmitLdint(_tempIntValue); + + if (size < 3) + { + context.Emit(OpCodes.Conv_U4); + } + + string fallbackMethodName = null; + + switch (size) + { + case 0: fallbackMethodName = nameof(MemoryManager.WriteByte); break; + case 1: fallbackMethodName = nameof(MemoryManager.WriteUInt16); break; + case 2: fallbackMethodName = nameof(MemoryManager.WriteUInt32); break; + case 3: fallbackMethodName = nameof(MemoryManager.WriteUInt64); break; + } + + context.EmitCall(typeof(MemoryManager), fallbackMethodName); + } + + private static void EmitWriteVectorFallback(ILEmitterCtx context, int size) + { + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + context.EmitLdint(_tempIntAddress); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + context.Emit(OpCodes.Conv_U8); + } + + context.EmitLdvec(_tempVecValue); + + string fallbackMethodName = null; + + switch (size) + { + case 0: fallbackMethodName = nameof(MemoryManager.WriteVector8); break; + case 1: fallbackMethodName = nameof(MemoryManager.WriteVector16); break; + case 2: fallbackMethodName = nameof(MemoryManager.WriteVector32); break; + case 3: fallbackMethodName = nameof(MemoryManager.WriteVector64); break; + case 4: fallbackMethodName = nameof(MemoryManager.WriteVector128); break; + } + + context.EmitCall(typeof(MemoryManager), fallbackMethodName); + } } } \ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitSimdMemory.cs b/ChocolArm64/Instructions/InstEmitSimdMemory.cs index 9b84eb8681..18ec1d33ea 100644 --- a/ChocolArm64/Instructions/InstEmitSimdMemory.cs +++ b/ChocolArm64/Instructions/InstEmitSimdMemory.cs @@ -45,7 +45,6 @@ namespace ChocolArm64.Instructions if (isLoad) { - context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdint(op.Rn); context.EmitLdc_I8(offset); @@ -62,7 +61,6 @@ namespace ChocolArm64.Instructions } else { - context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdint(op.Rn); context.EmitLdc_I8(offset); @@ -90,7 +88,6 @@ namespace ChocolArm64.Instructions void EmitMemAddress() { - context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdint(op.Rn); context.EmitLdc_I8(offset); diff --git a/ChocolArm64/Instructions/InstEmitSystem.cs b/ChocolArm64/Instructions/InstEmitSystem.cs index 0e61d5bded..5687768a88 100644 --- a/ChocolArm64/Instructions/InstEmitSystem.cs +++ b/ChocolArm64/Instructions/InstEmitSystem.cs @@ -102,7 +102,6 @@ namespace ChocolArm64.Instructions //DC ZVA for (int offs = 0; offs < (4 << CpuThreadState.DczSizeLog2); offs += 8) { - context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdintzr(op.Rt); context.EmitLdc_I(offs); diff --git a/ChocolArm64/Memory/CompareExchange128.cs b/ChocolArm64/Memory/CompareExchange128.cs index 0fbe10f2cf..1618ff0fbc 100644 --- a/ChocolArm64/Memory/CompareExchange128.cs +++ b/ChocolArm64/Memory/CompareExchange128.cs @@ -95,7 +95,7 @@ namespace ChocolArm64.Memory int cpuId = getCpuId(); - MemoryAlloc.Free(funcPtr); + MemoryManagement.Free(funcPtr); return (cpuId & (1 << 13)) != 0; } @@ -104,7 +104,7 @@ namespace ChocolArm64.Memory { ulong codeLength = (ulong)code.Length; - IntPtr funcPtr = MemoryAlloc.Allocate(codeLength); + IntPtr funcPtr = MemoryManagement.Allocate(codeLength); unsafe { @@ -118,7 +118,7 @@ namespace ChocolArm64.Memory } } - MemoryAlloc.Reprotect(funcPtr, codeLength, MemoryProtection.Execute); + MemoryManagement.Reprotect(funcPtr, codeLength, MemoryProtection.Execute); return funcPtr; } diff --git a/ChocolArm64/Memory/MemoryAlloc.cs b/ChocolArm64/Memory/MemoryManagement.cs similarity index 81% rename from ChocolArm64/Memory/MemoryAlloc.cs rename to ChocolArm64/Memory/MemoryManagement.cs index a24299cd70..fa4bc4fac2 100644 --- a/ChocolArm64/Memory/MemoryAlloc.cs +++ b/ChocolArm64/Memory/MemoryManagement.cs @@ -4,7 +4,7 @@ using System.Runtime.InteropServices; namespace ChocolArm64.Memory { - public static class MemoryAlloc + public static class MemoryManagement { public static bool HasWriteWatchSupport => RuntimeInformation.IsOSPlatform(OSPlatform.Windows); @@ -14,12 +14,12 @@ namespace ChocolArm64.Memory { IntPtr sizeNint = new IntPtr((long)size); - return MemoryAllocWindows.Allocate(sizeNint); + return MemoryManagementWindows.Allocate(sizeNint); } else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { - return MemoryAllocUnix.Allocate(size); + return MemoryManagementUnix.Allocate(size); } else { @@ -33,12 +33,12 @@ namespace ChocolArm64.Memory { IntPtr sizeNint = new IntPtr((long)size); - return MemoryAllocWindows.AllocateWriteTracked(sizeNint); + return MemoryManagementWindows.AllocateWriteTracked(sizeNint); } else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { - return MemoryAllocUnix.Allocate(size); + return MemoryManagementUnix.Allocate(size); } else { @@ -54,12 +54,12 @@ namespace ChocolArm64.Memory { IntPtr sizeNint = new IntPtr((long)size); - result = MemoryAllocWindows.Reprotect(address, sizeNint, permission); + result = MemoryManagementWindows.Reprotect(address, sizeNint, permission); } else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { - result = MemoryAllocUnix.Reprotect(address, size, permission); + result = MemoryManagementUnix.Reprotect(address, size, permission); } else { @@ -76,12 +76,12 @@ namespace ChocolArm64.Memory { if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - return MemoryAllocWindows.Free(address); + return MemoryManagementWindows.Free(address); } else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { - return MemoryAllocUnix.Free(address); + return MemoryManagementUnix.Free(address); } else { @@ -101,7 +101,7 @@ namespace ChocolArm64.Memory //write tracking support on the OS. if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - return MemoryAllocWindows.GetModifiedPages(address, size, addresses, out count); + return MemoryManagementWindows.GetModifiedPages(address, size, addresses, out count); } else { diff --git a/ChocolArm64/Memory/MemoryAllocUnix.cs b/ChocolArm64/Memory/MemoryManagementUnix.cs similarity index 98% rename from ChocolArm64/Memory/MemoryAllocUnix.cs rename to ChocolArm64/Memory/MemoryManagementUnix.cs index 857c1c5042..9fe1aef094 100644 --- a/ChocolArm64/Memory/MemoryAllocUnix.cs +++ b/ChocolArm64/Memory/MemoryManagementUnix.cs @@ -3,7 +3,7 @@ using System; namespace ChocolArm64.Memory { - static class MemoryAllocUnix + static class MemoryManagementUnix { public static IntPtr Allocate(ulong size) { diff --git a/ChocolArm64/Memory/MemoryAllocWindows.cs b/ChocolArm64/Memory/MemoryManagementWindows.cs similarity index 95% rename from ChocolArm64/Memory/MemoryAllocWindows.cs rename to ChocolArm64/Memory/MemoryManagementWindows.cs index 82be8b1e4f..6cee134279 100644 --- a/ChocolArm64/Memory/MemoryAllocWindows.cs +++ b/ChocolArm64/Memory/MemoryManagementWindows.cs @@ -4,7 +4,7 @@ using System.Runtime.InteropServices; namespace ChocolArm64.Memory { - static class MemoryAllocWindows + static class MemoryManagementWindows { [Flags] private enum AllocationType : uint @@ -21,7 +21,7 @@ namespace ChocolArm64.Memory } [Flags] - private enum MemoryProtection + private enum MemoryProtection : uint { NoAccess = 0x01, ReadOnly = 0x02, @@ -59,7 +59,7 @@ namespace ChocolArm64.Memory [DllImport("kernel32.dll")] private static extern bool VirtualFree( IntPtr lpAddress, - uint dwSize, + IntPtr dwSize, AllocationType dwFreeType); [DllImport("kernel32.dll")] @@ -127,7 +127,7 @@ namespace ChocolArm64.Memory public static bool Free(IntPtr address) { - return VirtualFree(address, 0, AllocationType.Release); + return VirtualFree(address, IntPtr.Zero, AllocationType.Release); } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/ChocolArm64/Memory/MemoryManager.cs b/ChocolArm64/Memory/MemoryManager.cs index afb0f65143..ce102e096c 100644 --- a/ChocolArm64/Memory/MemoryManager.cs +++ b/ChocolArm64/Memory/MemoryManager.cs @@ -1,8 +1,5 @@ -using ChocolArm64.Events; -using ChocolArm64.Exceptions; using ChocolArm64.Instructions; using System; -using System.Collections.Concurrent; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; @@ -10,52 +7,399 @@ using System.Runtime.Intrinsics.X86; using System.Threading; using static ChocolArm64.Memory.CompareExchange128; +using static ChocolArm64.Memory.MemoryManagement; namespace ChocolArm64.Memory { public unsafe class MemoryManager : IMemory, IDisposable { - private const int PtLvl0Bits = 13; - private const int PtLvl1Bits = 14; - public const int PageBits = 12; + public const int PageBits = 12; + public const int PageSize = 1 << PageBits; + public const int PageMask = PageSize - 1; - private const int PtLvl0Size = 1 << PtLvl0Bits; - private const int PtLvl1Size = 1 << PtLvl1Bits; - public const int PageSize = 1 << PageBits; + private const long PteFlagNotModified = 1; - private const int PtLvl0Mask = PtLvl0Size - 1; - private const int PtLvl1Mask = PtLvl1Size - 1; - public const int PageMask = PageSize - 1; - - private const int PtLvl0Bit = PageBits + PtLvl1Bits; - private const int PtLvl1Bit = PageBits; - - private ConcurrentDictionary _observedPages; + internal const long PteFlagsMask = 7; public IntPtr Ram { get; private set; } private byte* _ramPtr; - private byte*** _pageTable; + private IntPtr _pageTable; - public event EventHandler InvalidAccess; + internal IntPtr PageTable => _pageTable; - public event EventHandler ObservedAccess; + internal int PtLevelBits { get; } + internal int PtLevelSize { get; } + internal int PtLevelMask { get; } - public MemoryManager(IntPtr ram) + public bool HasWriteWatchSupport => MemoryManagement.HasWriteWatchSupport; + + public int AddressSpaceBits { get; } + public long AddressSpaceSize { get; } + + public MemoryManager( + IntPtr ram, + int addressSpaceBits = 48, + bool useFlatPageTable = false) { - _observedPages = new ConcurrentDictionary(); - Ram = ram; _ramPtr = (byte*)ram; - _pageTable = (byte***)Marshal.AllocHGlobal(PtLvl0Size * IntPtr.Size); + AddressSpaceBits = addressSpaceBits; + AddressSpaceSize = 1L << addressSpaceBits; - for (int l0 = 0; l0 < PtLvl0Size; l0++) + //When flat page table is requested, we use a single + //array for the mappings of the entire address space. + //This has better performance, but also high memory usage. + //The multi level page table uses 9 bits per level, so + //the memory usage is lower, but the performance is also + //lower, since each address translation requires multiple reads. + if (useFlatPageTable) { - _pageTable[l0] = null; + PtLevelBits = addressSpaceBits - PageBits; } + else + { + PtLevelBits = 9; + } + + PtLevelSize = 1 << PtLevelBits; + PtLevelMask = PtLevelSize - 1; + + _pageTable = Allocate((ulong)(PtLevelSize * IntPtr.Size)); + } + + public void Map(long va, long pa, long size) + { + SetPtEntries(va, _ramPtr + pa, size); + } + + public void Unmap(long position, long size) + { + SetPtEntries(position, null, size); + } + + public bool IsMapped(long position) + { + return Translate(position) != IntPtr.Zero; + } + + public long GetPhysicalAddress(long virtualAddress) + { + byte* ptr = (byte*)Translate(virtualAddress); + + return (long)(ptr - _ramPtr); + } + + private IntPtr Translate(long position) + { + if (!IsValidPosition(position)) + { + return IntPtr.Zero; + } + + byte* ptr = GetPtEntry(position); + + ulong ptrUlong = (ulong)ptr; + + if ((ptrUlong & PteFlagsMask) != 0) + { + ptrUlong &= ~(ulong)PteFlagsMask; + + ptr = (byte*)ptrUlong; + } + + return new IntPtr(ptr + (position & PageMask)); + } + + private IntPtr TranslateWrite(long position) + { + if (!IsValidPosition(position)) + { + return IntPtr.Zero; + } + + byte* ptr = GetPtEntry(position); + + ulong ptrUlong = (ulong)ptr; + + if ((ptrUlong & PteFlagsMask) != 0) + { + if ((ptrUlong & PteFlagNotModified) != 0) + { + ClearPtEntryFlag(position, PteFlagNotModified); + } + + ptrUlong &= ~(ulong)PteFlagsMask; + + ptr = (byte*)ptrUlong; + } + + return new IntPtr(ptr + (position & PageMask)); + } + + private byte* GetPtEntry(long position) + { + return *(byte**)GetPtPtr(position); + } + + private void SetPtEntries(long va, byte* ptr, long size) + { + long endPosition = (va + size + PageMask) & ~PageMask; + + while ((ulong)va < (ulong)endPosition) + { + SetPtEntry(va, ptr); + + va += PageSize; + + if (ptr != null) + { + ptr += PageSize; + } + } + } + + private void SetPtEntry(long position, byte* ptr) + { + *(byte**)GetPtPtr(position) = ptr; + } + + private void SetPtEntryFlag(long position, long flag) + { + ModifyPtEntryFlag(position, flag, setFlag: true); + } + + private void ClearPtEntryFlag(long position, long flag) + { + ModifyPtEntryFlag(position, flag, setFlag: false); + } + + private void ModifyPtEntryFlag(long position, long flag, bool setFlag) + { + IntPtr* pt = (IntPtr*)_pageTable; + + while (true) + { + IntPtr* ptPtr = GetPtPtr(position); + + IntPtr old = *ptPtr; + + long modified = old.ToInt64(); + + if (setFlag) + { + modified |= flag; + } + else + { + modified &= ~flag; + } + + IntPtr origValue = Interlocked.CompareExchange(ref *ptPtr, new IntPtr(modified), old); + + if (origValue == old) + { + break; + } + } + } + + private IntPtr* GetPtPtr(long position) + { + if (!IsValidPosition(position)) + { + throw new ArgumentOutOfRangeException(nameof(position)); + } + + IntPtr nextPtr = _pageTable; + + IntPtr* ptePtr = null; + + int bit = PageBits; + + while (true) + { + long index = (position >> bit) & PtLevelMask; + + ptePtr = &((IntPtr*)nextPtr)[index]; + + bit += PtLevelBits; + + if (bit >= AddressSpaceBits) + { + break; + } + + nextPtr = *ptePtr; + + if (nextPtr == IntPtr.Zero) + { + //Entry does not yet exist, allocate a new one. + IntPtr newPtr = Allocate((ulong)(PtLevelSize * IntPtr.Size)); + + //Try to swap the current pointer (should be zero), with the allocated one. + nextPtr = Interlocked.Exchange(ref *ptePtr, newPtr); + + //If the old pointer is not null, then another thread already has set it. + if (nextPtr != IntPtr.Zero) + { + Free(newPtr); + } + else + { + nextPtr = newPtr; + } + } + } + + return ptePtr; + } + + public bool IsRegionModified(long position, long size) + { + if (!HasWriteWatchSupport) + { + return IsRegionModifiedFallback(position, size); + } + + IntPtr address = Translate(position); + + IntPtr baseAddr = address; + IntPtr expectedAddr = address; + + long pendingPages = 0; + + long pages = size / PageSize; + + bool modified = false; + + bool IsAnyPageModified() + { + IntPtr pendingSize = new IntPtr(pendingPages * PageSize); + + IntPtr[] addresses = new IntPtr[pendingPages]; + + bool result = GetModifiedPages(baseAddr, pendingSize, addresses, out ulong count); + + if (result) + { + return count != 0; + } + else + { + return true; + } + } + + while (pages-- > 0) + { + if (address != expectedAddr) + { + modified |= IsAnyPageModified(); + + baseAddr = address; + + pendingPages = 0; + } + + expectedAddr = address + PageSize; + + pendingPages++; + + if (pages == 0) + { + break; + } + + position += PageSize; + + address = Translate(position); + } + + if (pendingPages != 0) + { + modified |= IsAnyPageModified(); + } + + return modified; + } + + private unsafe bool IsRegionModifiedFallback(long position, long size) + { + long endAddr = (position + size + PageMask) & ~PageMask; + + bool modified = false; + + while ((ulong)position < (ulong)endAddr) + { + if (IsValidPosition(position)) + { + byte* ptr = ((byte**)_pageTable)[position >> PageBits]; + + ulong ptrUlong = (ulong)ptr; + + if ((ptrUlong & PteFlagNotModified) == 0) + { + modified = true; + + SetPtEntryFlag(position, PteFlagNotModified); + } + } + else + { + modified = true; + } + + position += PageSize; + } + + return modified; + } + + public bool TryGetHostAddress(long position, long size, out IntPtr ptr) + { + if (IsContiguous(position, size)) + { + ptr = (IntPtr)Translate(position); + + return true; + } + + ptr = IntPtr.Zero; + + return false; + } + + private bool IsContiguous(long position, long size) + { + long endPos = position + size; + + position &= ~PageMask; + + long expectedPa = GetPhysicalAddress(position); + + while ((ulong)position < (ulong)endPos) + { + long pa = GetPhysicalAddress(position); + + if (pa != expectedPa) + { + return false; + } + + position += PageSize; + expectedPa += PageSize; + } + + return true; + } + + public bool IsValidPosition(long position) + { + return (ulong)position < (ulong)AddressSpaceSize; } internal bool AtomicCompareExchange2xInt32( @@ -86,7 +430,7 @@ namespace ChocolArm64.Memory AbortWithAlignmentFault(position); } - IntPtr ptr = new IntPtr(TranslateWrite(position)); + IntPtr ptr = TranslateWrite(position); return InterlockedCompareExchange128(ptr, expectedLow, expectedHigh, desiredLow, desiredHigh); } @@ -98,7 +442,7 @@ namespace ChocolArm64.Memory AbortWithAlignmentFault(position); } - IntPtr ptr = new IntPtr(Translate(position)); + IntPtr ptr = Translate(position); InterlockedRead128(ptr, out ulong low, out ulong high); @@ -371,7 +715,7 @@ namespace ChocolArm64.Memory int copySize = (int)(pageLimit - position); - Marshal.Copy((IntPtr)Translate(position), data, offset, copySize); + Marshal.Copy(Translate(position), data, offset, copySize); position += copySize; offset += copySize; @@ -408,7 +752,7 @@ namespace ChocolArm64.Memory int copySize = (int)(pageLimit - position); - Marshal.Copy((IntPtr)Translate(position), data, offset, copySize); + Marshal.Copy(Translate(position), data, offset, copySize); position += copySize; offset += copySize; @@ -571,7 +915,7 @@ namespace ChocolArm64.Memory int copySize = (int)(pageLimit - position); - Marshal.Copy(data, offset, (IntPtr)TranslateWrite(position), copySize); + Marshal.Copy(data, offset, TranslateWrite(position), copySize); position += copySize; offset += copySize; @@ -601,7 +945,7 @@ namespace ChocolArm64.Memory int copySize = (int)(pageLimit - position); - Marshal.Copy(data, offset, (IntPtr)TranslateWrite(position), copySize); + Marshal.Copy(data, offset, Translate(position), copySize); position += copySize; offset += copySize; @@ -614,8 +958,8 @@ namespace ChocolArm64.Memory if (IsContiguous(src, size) && IsContiguous(dst, size)) { - byte* srcPtr = Translate(src); - byte* dstPtr = TranslateWrite(dst); + byte* srcPtr = (byte*)Translate(src); + byte* dstPtr = (byte*)Translate(dst); Buffer.MemoryCopy(srcPtr, dstPtr, size, size); } @@ -625,266 +969,6 @@ namespace ChocolArm64.Memory } } - public void Map(long va, long pa, long size) - { - SetPtEntries(va, _ramPtr + pa, size); - } - - public void Unmap(long position, long size) - { - SetPtEntries(position, null, size); - - StopObservingRegion(position, size); - } - - public bool IsMapped(long position) - { - if (!(IsValidPosition(position))) - { - return false; - } - - long l0 = (position >> PtLvl0Bit) & PtLvl0Mask; - long l1 = (position >> PtLvl1Bit) & PtLvl1Mask; - - if (_pageTable[l0] == null) - { - return false; - } - - return _pageTable[l0][l1] != null || _observedPages.ContainsKey(position >> PageBits); - } - - public long GetPhysicalAddress(long virtualAddress) - { - byte* ptr = Translate(virtualAddress); - - return (long)(ptr - _ramPtr); - } - - internal byte* Translate(long position) - { - long l0 = (position >> PtLvl0Bit) & PtLvl0Mask; - long l1 = (position >> PtLvl1Bit) & PtLvl1Mask; - - long old = position; - - byte** lvl1 = _pageTable[l0]; - - if ((position >> (PtLvl0Bit + PtLvl0Bits)) != 0) - { - goto Unmapped; - } - - if (lvl1 == null) - { - goto Unmapped; - } - - position &= PageMask; - - byte* ptr = lvl1[l1]; - - if (ptr == null) - { - goto Unmapped; - } - - return ptr + position; - -Unmapped: - return HandleNullPte(old); - } - - private byte* HandleNullPte(long position) - { - long key = position >> PageBits; - - if (_observedPages.TryGetValue(key, out IntPtr ptr)) - { - return (byte*)ptr + (position & PageMask); - } - - InvalidAccess?.Invoke(this, new MemoryAccessEventArgs(position)); - - throw new VmmPageFaultException(position); - } - - internal byte* TranslateWrite(long position) - { - long l0 = (position >> PtLvl0Bit) & PtLvl0Mask; - long l1 = (position >> PtLvl1Bit) & PtLvl1Mask; - - long old = position; - - byte** lvl1 = _pageTable[l0]; - - if ((position >> (PtLvl0Bit + PtLvl0Bits)) != 0) - { - goto Unmapped; - } - - if (lvl1 == null) - { - goto Unmapped; - } - - position &= PageMask; - - byte* ptr = lvl1[l1]; - - if (ptr == null) - { - goto Unmapped; - } - - return ptr + position; - -Unmapped: - return HandleNullPteWrite(old); - } - - private byte* HandleNullPteWrite(long position) - { - long key = position >> PageBits; - - MemoryAccessEventArgs e = new MemoryAccessEventArgs(position); - - if (_observedPages.TryGetValue(key, out IntPtr ptr)) - { - SetPtEntry(position, (byte*)ptr); - - ObservedAccess?.Invoke(this, e); - - return (byte*)ptr + (position & PageMask); - } - - InvalidAccess?.Invoke(this, e); - - throw new VmmPageFaultException(position); - } - - private void SetPtEntries(long va, byte* ptr, long size) - { - long endPosition = (va + size + PageMask) & ~PageMask; - - while ((ulong)va < (ulong)endPosition) - { - SetPtEntry(va, ptr); - - va += PageSize; - - if (ptr != null) - { - ptr += PageSize; - } - } - } - - private void SetPtEntry(long position, byte* ptr) - { - if (!IsValidPosition(position)) - { - throw new ArgumentOutOfRangeException(nameof(position)); - } - - long l0 = (position >> PtLvl0Bit) & PtLvl0Mask; - long l1 = (position >> PtLvl1Bit) & PtLvl1Mask; - - if (_pageTable[l0] == null) - { - byte** lvl1 = (byte**)Marshal.AllocHGlobal(PtLvl1Size * IntPtr.Size); - - for (int zl1 = 0; zl1 < PtLvl1Size; zl1++) - { - lvl1[zl1] = null; - } - - Thread.MemoryBarrier(); - - _pageTable[l0] = lvl1; - } - - _pageTable[l0][l1] = ptr; - } - - public void StartObservingRegion(long position, long size) - { - long endPosition = (position + size + PageMask) & ~PageMask; - - position &= ~PageMask; - - while ((ulong)position < (ulong)endPosition) - { - _observedPages[position >> PageBits] = (IntPtr)Translate(position); - - SetPtEntry(position, null); - - position += PageSize; - } - } - - public void StopObservingRegion(long position, long size) - { - long endPosition = (position + size + PageMask) & ~PageMask; - - while (position < endPosition) - { - lock (_observedPages) - { - if (_observedPages.TryRemove(position >> PageBits, out IntPtr ptr)) - { - SetPtEntry(position, (byte*)ptr); - } - } - - position += PageSize; - } - } - - public bool TryGetHostAddress(long position, long size, out IntPtr ptr) - { - if (IsContiguous(position, size)) - { - ptr = (IntPtr)Translate(position); - - return true; - } - - ptr = IntPtr.Zero; - - return false; - } - - private bool IsContiguous(long position, long size) - { - long endPos = position + size; - - position &= ~PageMask; - - long expectedPa = GetPhysicalAddress(position); - - while ((ulong)position < (ulong)endPos) - { - long pa = GetPhysicalAddress(position); - - if (pa != expectedPa) - { - return false; - } - - position += PageSize; - expectedPa += PageSize; - } - - return true; - } - - public bool IsValidPosition(long position) - { - return position >> (PtLvl0Bits + PtLvl1Bits + PageBits) == 0; - } - public void Dispose() { Dispose(true); @@ -892,24 +976,36 @@ Unmapped: protected virtual void Dispose(bool disposing) { - if (_pageTable == null) + IntPtr ptr = Interlocked.Exchange(ref _pageTable, IntPtr.Zero); + + if (ptr != IntPtr.Zero) { + FreePageTableEntry(ptr, PageBits); + } + } + + private void FreePageTableEntry(IntPtr ptr, int levelBitEnd) + { + levelBitEnd += PtLevelBits; + + if (levelBitEnd >= AddressSpaceBits) + { + Free(ptr); + return; } - for (int l0 = 0; l0 < PtLvl0Size; l0++) + for (int index = 0; index < PtLevelSize; index++) { - if (_pageTable[l0] != null) - { - Marshal.FreeHGlobal((IntPtr)_pageTable[l0]); - } + IntPtr ptePtr = ((IntPtr*)ptr)[index]; - _pageTable[l0] = null; + if (ptePtr != IntPtr.Zero) + { + FreePageTableEntry(ptePtr, levelBitEnd); + } } - Marshal.FreeHGlobal((IntPtr)_pageTable); - - _pageTable = null; + Free(ptr); } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/ILEmitterCtx.cs b/ChocolArm64/Translation/ILEmitterCtx.cs index 5490123774..f7e61bc999 100644 --- a/ChocolArm64/Translation/ILEmitterCtx.cs +++ b/ChocolArm64/Translation/ILEmitterCtx.cs @@ -1,5 +1,6 @@ using ChocolArm64.Decoders; using ChocolArm64.Instructions; +using ChocolArm64.Memory; using ChocolArm64.State; using System; using System.Collections.Generic; @@ -10,6 +11,8 @@ namespace ChocolArm64.Translation { class ILEmitterCtx { + public MemoryManager Memory { get; } + private TranslatorCache _cache; private TranslatorQueue _queue; @@ -43,19 +46,34 @@ namespace ChocolArm64.Translation //values needed by some functions, since IL doesn't have a swap instruction. //You can use any value here as long it doesn't conflict with the indices //for the other registers. Any value >= 64 or < 0 will do. - private const int IntTmpIndex = -1; - private const int RorTmpIndex = -2; - private const int CmpOptTmp1Index = -3; - private const int CmpOptTmp2Index = -4; - private const int VecTmp1Index = -5; - private const int VecTmp2Index = -6; - private const int IntTmp2Index = -7; + private const int ReservedLocalsCount = 64; - public ILEmitterCtx(TranslatorCache cache, TranslatorQueue queue, TranslationTier tier, Block graph) + private const int RorTmpIndex = ReservedLocalsCount + 0; + private const int CmpOptTmp1Index = ReservedLocalsCount + 1; + private const int CmpOptTmp2Index = ReservedLocalsCount + 2; + private const int IntGpTmp1Index = ReservedLocalsCount + 3; + private const int IntGpTmp2Index = ReservedLocalsCount + 4; + private const int UserIntTempStart = ReservedLocalsCount + 5; + + //Vectors are part of another "set" of locals. + private const int VecGpTmp1Index = ReservedLocalsCount + 0; + private const int VecGpTmp2Index = ReservedLocalsCount + 1; + private const int UserVecTempStart = ReservedLocalsCount + 2; + + private static int _userIntTempCount; + private static int _userVecTempCount; + + public ILEmitterCtx( + MemoryManager memory, + TranslatorCache cache, + TranslatorQueue queue, + TranslationTier tier, + Block graph) { - _cache = cache ?? throw new ArgumentNullException(nameof(cache)); - _queue = queue ?? throw new ArgumentNullException(nameof(queue)); - _currBlock = graph ?? throw new ArgumentNullException(nameof(graph)); + Memory = memory ?? throw new ArgumentNullException(nameof(memory)); + _cache = cache ?? throw new ArgumentNullException(nameof(cache)); + _queue = queue ?? throw new ArgumentNullException(nameof(queue)); + _currBlock = graph ?? throw new ArgumentNullException(nameof(graph)); Tier = tier; @@ -76,6 +94,16 @@ namespace ChocolArm64.Translation AdvanceOpCode(); } + public static int GetIntTempIndex() + { + return UserIntTempStart + _userIntTempCount++; + } + + public static int GetVecTempIndex() + { + return UserVecTempStart + _userVecTempCount++; + } + public ILBlock[] GetILBlocks() { EmitAllOpCodes(); @@ -145,7 +173,7 @@ namespace ChocolArm64.Translation _ilBlock.Add(new ILBarrier()); } - private Condition GetInverseCond(Condition cond) + private static Condition GetInverseCond(Condition cond) { //Bit 0 of all conditions is basically a negation bit, so //inverting this bit has the effect of inverting the condition. @@ -560,17 +588,17 @@ namespace ChocolArm64.Translation _ilBlock.Add(new ILOpCodeStoreState(_ilBlock)); } - public void EmitLdtmp() => EmitLdint(IntTmpIndex); - public void EmitSttmp() => EmitStint(IntTmpIndex); + public void EmitLdtmp() => EmitLdint(IntGpTmp1Index); + public void EmitSttmp() => EmitStint(IntGpTmp1Index); - public void EmitLdtmp2() => EmitLdint(IntTmp2Index); - public void EmitSttmp2() => EmitStint(IntTmp2Index); + public void EmitLdtmp2() => EmitLdint(IntGpTmp2Index); + public void EmitSttmp2() => EmitStint(IntGpTmp2Index); - public void EmitLdvectmp() => EmitLdvec(VecTmp1Index); - public void EmitStvectmp() => EmitStvec(VecTmp1Index); + public void EmitLdvectmp() => EmitLdvec(VecGpTmp1Index); + public void EmitStvectmp() => EmitStvec(VecGpTmp1Index); - public void EmitLdvectmp2() => EmitLdvec(VecTmp2Index); - public void EmitStvectmp2() => EmitStvec(VecTmp2Index); + public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index); + public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index); public void EmitLdint(int index) => Ldloc(index, IoType.Int); public void EmitStint(int index) => Stloc(index, IoType.Int); @@ -611,62 +639,12 @@ namespace ChocolArm64.Translation public void EmitCallPropGet(Type objType, string propName) { - if (objType == null) - { - throw new ArgumentNullException(nameof(objType)); - } - - if (propName == null) - { - throw new ArgumentNullException(nameof(propName)); - } - - EmitCall(objType.GetMethod($"get_{propName}")); + EmitCall(objType, $"get_{propName}"); } public void EmitCallPropSet(Type objType, string propName) { - if (objType == null) - { - throw new ArgumentNullException(nameof(objType)); - } - - if (propName == null) - { - throw new ArgumentNullException(nameof(propName)); - } - - EmitCall(objType.GetMethod($"set_{propName}")); - } - - public void EmitCallPrivatePropGet(Type objType, string propName) - { - if (objType == null) - { - throw new ArgumentNullException(nameof(objType)); - } - - if (propName == null) - { - throw new ArgumentNullException(nameof(propName)); - } - - EmitPrivateCall(objType, $"get_{propName}"); - } - - public void EmitCallPrivatePropSet(Type objType, string propName) - { - if (objType == null) - { - throw new ArgumentNullException(nameof(objType)); - } - - if (propName == null) - { - throw new ArgumentNullException(nameof(propName)); - } - - EmitPrivateCall(objType, $"set_{propName}"); + EmitCall(objType, $"set_{propName}"); } public void EmitCall(Type objType, string mthdName) @@ -684,6 +662,16 @@ namespace ChocolArm64.Translation EmitCall(objType.GetMethod(mthdName)); } + public void EmitCallPrivatePropGet(Type objType, string propName) + { + EmitPrivateCall(objType, $"get_{propName}"); + } + + public void EmitCallPrivatePropSet(Type objType, string propName) + { + EmitPrivateCall(objType, $"set_{propName}"); + } + public void EmitPrivateCall(Type objType, string mthdName) { if (objType == null) diff --git a/ChocolArm64/Translation/Translator.cs b/ChocolArm64/Translation/Translator.cs index 7f7df6e5b2..dd1215f50c 100644 --- a/ChocolArm64/Translation/Translator.cs +++ b/ChocolArm64/Translation/Translator.cs @@ -138,7 +138,7 @@ namespace ChocolArm64.Translation { Block block = Decoder.DecodeBasicBlock(_memory, position, mode); - ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, TranslationTier.Tier0, block); + ILEmitterCtx context = new ILEmitterCtx(_memory, _cache, _queue, TranslationTier.Tier0, block); string subName = GetSubroutineName(position); @@ -153,7 +153,7 @@ namespace ChocolArm64.Translation { Block graph = Decoder.DecodeSubroutine(_memory, position, mode); - ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, TranslationTier.Tier1, graph); + ILEmitterCtx context = new ILEmitterCtx(_memory, _cache, _queue, TranslationTier.Tier1, graph); ILBlock[] ilBlocks = context.GetILBlocks(); diff --git a/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs b/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs index 2f50463ded..053c216137 100644 --- a/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs +++ b/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs @@ -1,4 +1,3 @@ -using ChocolArm64.Events; using ChocolArm64.Memory; using System.Collections.Concurrent; @@ -19,35 +18,28 @@ namespace Ryujinx.Graphics.Memory { _memory = memory; - _memory.ObservedAccess += MemoryAccessHandler; - CachedPages = new ConcurrentDictionary[1 << 20]; } - private void MemoryAccessHandler(object sender, MemoryAccessEventArgs e) - { - long pa = _memory.GetPhysicalAddress(e.Position); - - CachedPages[pa >> PageBits]?.Clear(); - } - public bool IsRegionModified(long position, long size, NvGpuBufferType bufferType) { - long pa = _memory.GetPhysicalAddress(position); + long va = position; - long addr = pa; + long pa = _memory.GetPhysicalAddress(va); - long endAddr = (addr + size + PageMask) & ~PageMask; + long endAddr = (va + size + PageMask) & ~PageMask; + + long addrTruncated = va & ~PageMask; + + bool modified = _memory.IsRegionModified(addrTruncated, endAddr - addrTruncated); int newBuffMask = 1 << (int)bufferType; - _memory.StartObservingRegion(position, size); - long cachedPagesCount = 0; - while (addr < endAddr) + while (va < endAddr) { - long page = addr >> PageBits; + long page = _memory.GetPhysicalAddress(va) >> PageBits; ConcurrentDictionary dictionary = CachedPages[page]; @@ -57,6 +49,10 @@ namespace Ryujinx.Graphics.Memory CachedPages[page] = dictionary; } + else if (modified) + { + CachedPages[page].Clear(); + } if (dictionary.TryGetValue(pa, out int currBuffMask)) { @@ -74,10 +70,10 @@ namespace Ryujinx.Graphics.Memory dictionary[pa] = newBuffMask; } - addr += PageSize; + va += PageSize; } - return cachedPagesCount != (endAddr - pa + PageMask) >> PageBits; + return cachedPagesCount != (endAddr - addrTruncated) >> PageBits; } } } \ No newline at end of file diff --git a/Ryujinx.HLE/DeviceMemory.cs b/Ryujinx.HLE/DeviceMemory.cs index 310942b872..524adb8466 100644 --- a/Ryujinx.HLE/DeviceMemory.cs +++ b/Ryujinx.HLE/DeviceMemory.cs @@ -1,5 +1,5 @@ +using ChocolArm64.Memory; using System; -using System.Runtime.InteropServices; namespace Ryujinx.HLE { @@ -7,13 +7,13 @@ namespace Ryujinx.HLE { public const long RamSize = 4L * 1024 * 1024 * 1024; - public IntPtr RamPointer { get; private set; } + public IntPtr RamPointer { get; } private unsafe byte* _ramPtr; public unsafe DeviceMemory() { - RamPointer = Marshal.AllocHGlobal(new IntPtr(RamSize)); + RamPointer = MemoryManagement.AllocateWriteTracked(RamSize); _ramPtr = (byte*)RamPointer; } @@ -177,7 +177,7 @@ namespace Ryujinx.HLE protected virtual void Dispose(bool disposing) { - Marshal.FreeHGlobal(RamPointer); + MemoryManagement.Free(RamPointer); } } } \ No newline at end of file diff --git a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs index 338e5543d7..909f6333a9 100644 --- a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs +++ b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs @@ -80,12 +80,14 @@ namespace Ryujinx.HLE.HOS.Kernel.Process public bool IsPaused { get; private set; } - public Translator Translator { get; private set; } - public MemoryManager CpuMemory { get; private set; } + public Translator Translator { get; private set; } + private SvcHandler _svcHandler; + private Horizon _system; + public HleProcessDebugger Debugger { get; private set; } public KProcess(Horizon system) : base(system) @@ -93,14 +95,10 @@ namespace Ryujinx.HLE.HOS.Kernel.Process _processLock = new object(); _threadingLock = new object(); - CpuMemory = new MemoryManager(system.Device.Memory.RamPointer); - - CpuMemory.InvalidAccess += InvalidAccessHandler; + _system = system; AddressArbiter = new KAddressArbiter(system); - MemoryManager = new KMemoryManager(system, CpuMemory); - _fullTlsPages = new SortedDictionary(); _freeTlsPages = new SortedDictionary(); @@ -110,10 +108,6 @@ namespace Ryujinx.HLE.HOS.Kernel.Process _threads = new LinkedList(); - Translator = new Translator(CpuMemory); - - Translator.CpuTrace += CpuTraceHandler; - _svcHandler = new SvcHandler(system.Device, this); Debugger = new HleProcessDebugger(this); @@ -131,6 +125,8 @@ namespace Ryujinx.HLE.HOS.Kernel.Process AddressSpaceType addrSpaceType = (AddressSpaceType)((creationInfo.MmuFlags >> 1) & 7); + InitializeMemoryManager(addrSpaceType, memRegion); + bool aslrEnabled = ((creationInfo.MmuFlags >> 5) & 1) != 0; ulong codeAddress = creationInfo.CodeAddress; @@ -238,6 +234,8 @@ namespace Ryujinx.HLE.HOS.Kernel.Process AddressSpaceType addrSpaceType = (AddressSpaceType)((creationInfo.MmuFlags >> 1) & 7); + InitializeMemoryManager(addrSpaceType, memRegion); + bool aslrEnabled = ((creationInfo.MmuFlags >> 5) & 1) != 0; ulong codeAddress = creationInfo.CodeAddress; @@ -405,7 +403,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process case AddressSpaceType.Addr36Bits: case AddressSpaceType.Addr39Bits: _memoryUsageCapacity = MemoryManager.HeapRegionEnd - - MemoryManager.HeapRegionStart; + MemoryManager.HeapRegionStart; break; case AddressSpaceType.Addr32BitsNoMap: @@ -1010,9 +1008,29 @@ namespace Ryujinx.HLE.HOS.Kernel.Process } } - private void InvalidAccessHandler(object sender, MemoryAccessEventArgs e) + private void InitializeMemoryManager(AddressSpaceType addrSpaceType, MemoryRegion memRegion) { - PrintCurrentThreadStackTrace(); + int addrSpaceBits; + + switch (addrSpaceType) + { + case AddressSpaceType.Addr32Bits: addrSpaceBits = 32; break; + case AddressSpaceType.Addr36Bits: addrSpaceBits = 36; break; + case AddressSpaceType.Addr32BitsNoMap: addrSpaceBits = 32; break; + case AddressSpaceType.Addr39Bits: addrSpaceBits = 39; break; + + default: throw new ArgumentException(nameof(addrSpaceType)); + } + + bool useFlatPageTable = memRegion == MemoryRegion.Application; + + CpuMemory = new MemoryManager(_system.Device.Memory.RamPointer, addrSpaceBits, useFlatPageTable); + + MemoryManager = new KMemoryManager(_system, CpuMemory); + + Translator = new Translator(CpuMemory); + + Translator.CpuTrace += CpuTraceHandler; } public void PrintCurrentThreadStackTrace() diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs index 071b3c2019..cf881a7932 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs @@ -1,5 +1,4 @@ using ChocolArm64.Events; -using ChocolArm64.Memory; using ChocolArm64.State; using Ryujinx.HLE.HOS.Kernel.Process; using System; @@ -11,14 +10,12 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall private Switch _device; private KProcess _process; private Horizon _system; - private MemoryManager _memory; public SvcHandler(Switch device, KProcess process) { _device = device; _process = process; _system = device.System; - _memory = process.CpuMemory; } public void SvcCall(object sender, InstExceptionEventArgs e) diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs index 5493941894..e19d9d2687 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs @@ -93,7 +93,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall private KernelResult SendSyncRequest(ulong messagePtr, ulong size, int handle) { - byte[] messageData = _memory.ReadBytes((long)messagePtr, (long)size); + byte[] messageData = _process.CpuMemory.ReadBytes((long)messagePtr, (long)size); KClientSession clientSession = _process.HandleTable.GetObject(handle); @@ -142,7 +142,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall ipcMessage.Thread.ObjSyncResult = IpcHandler.IpcCall( _device, _process, - _memory, + _process.CpuMemory, ipcMessage.Session, ipcMessage.Message, ipcMessage.MessagePtr); diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcMemory.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcMemory.cs index 6f8180c507..f794d13073 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcMemory.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcMemory.cs @@ -62,11 +62,6 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall attributeMask, attributeValue); - if (result == KernelResult.Success) - { - _memory.StopObservingRegion((long)position, (long)size); - } - return result; } @@ -157,14 +152,14 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall { KMemoryInfo blkInfo = _process.MemoryManager.QueryMemory(position); - _memory.WriteUInt64((long)infoPtr + 0x00, blkInfo.Address); - _memory.WriteUInt64((long)infoPtr + 0x08, blkInfo.Size); - _memory.WriteInt32 ((long)infoPtr + 0x10, (int)blkInfo.State & 0xff); - _memory.WriteInt32 ((long)infoPtr + 0x14, (int)blkInfo.Attribute); - _memory.WriteInt32 ((long)infoPtr + 0x18, (int)blkInfo.Permission); - _memory.WriteInt32 ((long)infoPtr + 0x1c, blkInfo.IpcRefCount); - _memory.WriteInt32 ((long)infoPtr + 0x20, blkInfo.DeviceRefCount); - _memory.WriteInt32 ((long)infoPtr + 0x24, 0); + _process.CpuMemory.WriteUInt64((long)infoPtr + 0x00, blkInfo.Address); + _process.CpuMemory.WriteUInt64((long)infoPtr + 0x08, blkInfo.Size); + _process.CpuMemory.WriteInt32 ((long)infoPtr + 0x10, (int)blkInfo.State & 0xff); + _process.CpuMemory.WriteInt32 ((long)infoPtr + 0x14, (int)blkInfo.Attribute); + _process.CpuMemory.WriteInt32 ((long)infoPtr + 0x18, (int)blkInfo.Permission); + _process.CpuMemory.WriteInt32 ((long)infoPtr + 0x1c, blkInfo.IpcRefCount); + _process.CpuMemory.WriteInt32 ((long)infoPtr + 0x20, blkInfo.DeviceRefCount); + _process.CpuMemory.WriteInt32 ((long)infoPtr + 0x24, 0); return KernelResult.Success; } diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs index be136ff0a5..efc10512ab 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs @@ -201,7 +201,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall private void OutputDebugString(ulong strPtr, ulong size) { - string str = MemoryHelper.ReadAsciiString(_memory, (long)strPtr, (long)size); + string str = MemoryHelper.ReadAsciiString(_process.CpuMemory, (long)strPtr, (long)size); Logger.PrintWarning(LogClass.KernelSvc, str); } diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs index 64268ff23d..fa0b3a6c88 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs @@ -1,3 +1,4 @@ +using ChocolArm64.Memory; using Ryujinx.HLE.HOS.Kernel.Common; using Ryujinx.HLE.HOS.Kernel.Process; using Ryujinx.HLE.HOS.Kernel.Threading; @@ -346,79 +347,81 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall return KernelResult.InvalidThread; } - _memory.WriteUInt64((long)address + 0x0, thread.Context.ThreadState.X0); - _memory.WriteUInt64((long)address + 0x8, thread.Context.ThreadState.X1); - _memory.WriteUInt64((long)address + 0x10, thread.Context.ThreadState.X2); - _memory.WriteUInt64((long)address + 0x18, thread.Context.ThreadState.X3); - _memory.WriteUInt64((long)address + 0x20, thread.Context.ThreadState.X4); - _memory.WriteUInt64((long)address + 0x28, thread.Context.ThreadState.X5); - _memory.WriteUInt64((long)address + 0x30, thread.Context.ThreadState.X6); - _memory.WriteUInt64((long)address + 0x38, thread.Context.ThreadState.X7); - _memory.WriteUInt64((long)address + 0x40, thread.Context.ThreadState.X8); - _memory.WriteUInt64((long)address + 0x48, thread.Context.ThreadState.X9); - _memory.WriteUInt64((long)address + 0x50, thread.Context.ThreadState.X10); - _memory.WriteUInt64((long)address + 0x58, thread.Context.ThreadState.X11); - _memory.WriteUInt64((long)address + 0x60, thread.Context.ThreadState.X12); - _memory.WriteUInt64((long)address + 0x68, thread.Context.ThreadState.X13); - _memory.WriteUInt64((long)address + 0x70, thread.Context.ThreadState.X14); - _memory.WriteUInt64((long)address + 0x78, thread.Context.ThreadState.X15); - _memory.WriteUInt64((long)address + 0x80, thread.Context.ThreadState.X16); - _memory.WriteUInt64((long)address + 0x88, thread.Context.ThreadState.X17); - _memory.WriteUInt64((long)address + 0x90, thread.Context.ThreadState.X18); - _memory.WriteUInt64((long)address + 0x98, thread.Context.ThreadState.X19); - _memory.WriteUInt64((long)address + 0xa0, thread.Context.ThreadState.X20); - _memory.WriteUInt64((long)address + 0xa8, thread.Context.ThreadState.X21); - _memory.WriteUInt64((long)address + 0xb0, thread.Context.ThreadState.X22); - _memory.WriteUInt64((long)address + 0xb8, thread.Context.ThreadState.X23); - _memory.WriteUInt64((long)address + 0xc0, thread.Context.ThreadState.X24); - _memory.WriteUInt64((long)address + 0xc8, thread.Context.ThreadState.X25); - _memory.WriteUInt64((long)address + 0xd0, thread.Context.ThreadState.X26); - _memory.WriteUInt64((long)address + 0xd8, thread.Context.ThreadState.X27); - _memory.WriteUInt64((long)address + 0xe0, thread.Context.ThreadState.X28); - _memory.WriteUInt64((long)address + 0xe8, thread.Context.ThreadState.X29); - _memory.WriteUInt64((long)address + 0xf0, thread.Context.ThreadState.X30); - _memory.WriteUInt64((long)address + 0xf8, thread.Context.ThreadState.X31); + MemoryManager memory = currentProcess.CpuMemory; - _memory.WriteInt64((long)address + 0x100, thread.LastPc); + memory.WriteUInt64((long)address + 0x0, thread.Context.ThreadState.X0); + memory.WriteUInt64((long)address + 0x8, thread.Context.ThreadState.X1); + memory.WriteUInt64((long)address + 0x10, thread.Context.ThreadState.X2); + memory.WriteUInt64((long)address + 0x18, thread.Context.ThreadState.X3); + memory.WriteUInt64((long)address + 0x20, thread.Context.ThreadState.X4); + memory.WriteUInt64((long)address + 0x28, thread.Context.ThreadState.X5); + memory.WriteUInt64((long)address + 0x30, thread.Context.ThreadState.X6); + memory.WriteUInt64((long)address + 0x38, thread.Context.ThreadState.X7); + memory.WriteUInt64((long)address + 0x40, thread.Context.ThreadState.X8); + memory.WriteUInt64((long)address + 0x48, thread.Context.ThreadState.X9); + memory.WriteUInt64((long)address + 0x50, thread.Context.ThreadState.X10); + memory.WriteUInt64((long)address + 0x58, thread.Context.ThreadState.X11); + memory.WriteUInt64((long)address + 0x60, thread.Context.ThreadState.X12); + memory.WriteUInt64((long)address + 0x68, thread.Context.ThreadState.X13); + memory.WriteUInt64((long)address + 0x70, thread.Context.ThreadState.X14); + memory.WriteUInt64((long)address + 0x78, thread.Context.ThreadState.X15); + memory.WriteUInt64((long)address + 0x80, thread.Context.ThreadState.X16); + memory.WriteUInt64((long)address + 0x88, thread.Context.ThreadState.X17); + memory.WriteUInt64((long)address + 0x90, thread.Context.ThreadState.X18); + memory.WriteUInt64((long)address + 0x98, thread.Context.ThreadState.X19); + memory.WriteUInt64((long)address + 0xa0, thread.Context.ThreadState.X20); + memory.WriteUInt64((long)address + 0xa8, thread.Context.ThreadState.X21); + memory.WriteUInt64((long)address + 0xb0, thread.Context.ThreadState.X22); + memory.WriteUInt64((long)address + 0xb8, thread.Context.ThreadState.X23); + memory.WriteUInt64((long)address + 0xc0, thread.Context.ThreadState.X24); + memory.WriteUInt64((long)address + 0xc8, thread.Context.ThreadState.X25); + memory.WriteUInt64((long)address + 0xd0, thread.Context.ThreadState.X26); + memory.WriteUInt64((long)address + 0xd8, thread.Context.ThreadState.X27); + memory.WriteUInt64((long)address + 0xe0, thread.Context.ThreadState.X28); + memory.WriteUInt64((long)address + 0xe8, thread.Context.ThreadState.X29); + memory.WriteUInt64((long)address + 0xf0, thread.Context.ThreadState.X30); + memory.WriteUInt64((long)address + 0xf8, thread.Context.ThreadState.X31); - _memory.WriteUInt64((long)address + 0x108, (ulong)thread.Context.ThreadState.Psr); + memory.WriteInt64((long)address + 0x100, thread.LastPc); - _memory.WriteVector128((long)address + 0x110, thread.Context.ThreadState.V0); - _memory.WriteVector128((long)address + 0x120, thread.Context.ThreadState.V1); - _memory.WriteVector128((long)address + 0x130, thread.Context.ThreadState.V2); - _memory.WriteVector128((long)address + 0x140, thread.Context.ThreadState.V3); - _memory.WriteVector128((long)address + 0x150, thread.Context.ThreadState.V4); - _memory.WriteVector128((long)address + 0x160, thread.Context.ThreadState.V5); - _memory.WriteVector128((long)address + 0x170, thread.Context.ThreadState.V6); - _memory.WriteVector128((long)address + 0x180, thread.Context.ThreadState.V7); - _memory.WriteVector128((long)address + 0x190, thread.Context.ThreadState.V8); - _memory.WriteVector128((long)address + 0x1a0, thread.Context.ThreadState.V9); - _memory.WriteVector128((long)address + 0x1b0, thread.Context.ThreadState.V10); - _memory.WriteVector128((long)address + 0x1c0, thread.Context.ThreadState.V11); - _memory.WriteVector128((long)address + 0x1d0, thread.Context.ThreadState.V12); - _memory.WriteVector128((long)address + 0x1e0, thread.Context.ThreadState.V13); - _memory.WriteVector128((long)address + 0x1f0, thread.Context.ThreadState.V14); - _memory.WriteVector128((long)address + 0x200, thread.Context.ThreadState.V15); - _memory.WriteVector128((long)address + 0x210, thread.Context.ThreadState.V16); - _memory.WriteVector128((long)address + 0x220, thread.Context.ThreadState.V17); - _memory.WriteVector128((long)address + 0x230, thread.Context.ThreadState.V18); - _memory.WriteVector128((long)address + 0x240, thread.Context.ThreadState.V19); - _memory.WriteVector128((long)address + 0x250, thread.Context.ThreadState.V20); - _memory.WriteVector128((long)address + 0x260, thread.Context.ThreadState.V21); - _memory.WriteVector128((long)address + 0x270, thread.Context.ThreadState.V22); - _memory.WriteVector128((long)address + 0x280, thread.Context.ThreadState.V23); - _memory.WriteVector128((long)address + 0x290, thread.Context.ThreadState.V24); - _memory.WriteVector128((long)address + 0x2a0, thread.Context.ThreadState.V25); - _memory.WriteVector128((long)address + 0x2b0, thread.Context.ThreadState.V26); - _memory.WriteVector128((long)address + 0x2c0, thread.Context.ThreadState.V27); - _memory.WriteVector128((long)address + 0x2d0, thread.Context.ThreadState.V28); - _memory.WriteVector128((long)address + 0x2e0, thread.Context.ThreadState.V29); - _memory.WriteVector128((long)address + 0x2f0, thread.Context.ThreadState.V30); - _memory.WriteVector128((long)address + 0x300, thread.Context.ThreadState.V31); + memory.WriteUInt64((long)address + 0x108, (ulong)thread.Context.ThreadState.Psr); - _memory.WriteInt32((long)address + 0x310, thread.Context.ThreadState.Fpcr); - _memory.WriteInt32((long)address + 0x314, thread.Context.ThreadState.Fpsr); - _memory.WriteInt64((long)address + 0x318, thread.Context.ThreadState.Tpidr); + memory.WriteVector128((long)address + 0x110, thread.Context.ThreadState.V0); + memory.WriteVector128((long)address + 0x120, thread.Context.ThreadState.V1); + memory.WriteVector128((long)address + 0x130, thread.Context.ThreadState.V2); + memory.WriteVector128((long)address + 0x140, thread.Context.ThreadState.V3); + memory.WriteVector128((long)address + 0x150, thread.Context.ThreadState.V4); + memory.WriteVector128((long)address + 0x160, thread.Context.ThreadState.V5); + memory.WriteVector128((long)address + 0x170, thread.Context.ThreadState.V6); + memory.WriteVector128((long)address + 0x180, thread.Context.ThreadState.V7); + memory.WriteVector128((long)address + 0x190, thread.Context.ThreadState.V8); + memory.WriteVector128((long)address + 0x1a0, thread.Context.ThreadState.V9); + memory.WriteVector128((long)address + 0x1b0, thread.Context.ThreadState.V10); + memory.WriteVector128((long)address + 0x1c0, thread.Context.ThreadState.V11); + memory.WriteVector128((long)address + 0x1d0, thread.Context.ThreadState.V12); + memory.WriteVector128((long)address + 0x1e0, thread.Context.ThreadState.V13); + memory.WriteVector128((long)address + 0x1f0, thread.Context.ThreadState.V14); + memory.WriteVector128((long)address + 0x200, thread.Context.ThreadState.V15); + memory.WriteVector128((long)address + 0x210, thread.Context.ThreadState.V16); + memory.WriteVector128((long)address + 0x220, thread.Context.ThreadState.V17); + memory.WriteVector128((long)address + 0x230, thread.Context.ThreadState.V18); + memory.WriteVector128((long)address + 0x240, thread.Context.ThreadState.V19); + memory.WriteVector128((long)address + 0x250, thread.Context.ThreadState.V20); + memory.WriteVector128((long)address + 0x260, thread.Context.ThreadState.V21); + memory.WriteVector128((long)address + 0x270, thread.Context.ThreadState.V22); + memory.WriteVector128((long)address + 0x280, thread.Context.ThreadState.V23); + memory.WriteVector128((long)address + 0x290, thread.Context.ThreadState.V24); + memory.WriteVector128((long)address + 0x2a0, thread.Context.ThreadState.V25); + memory.WriteVector128((long)address + 0x2b0, thread.Context.ThreadState.V26); + memory.WriteVector128((long)address + 0x2c0, thread.Context.ThreadState.V27); + memory.WriteVector128((long)address + 0x2d0, thread.Context.ThreadState.V28); + memory.WriteVector128((long)address + 0x2e0, thread.Context.ThreadState.V29); + memory.WriteVector128((long)address + 0x2f0, thread.Context.ThreadState.V30); + memory.WriteVector128((long)address + 0x300, thread.Context.ThreadState.V31); + + memory.WriteInt32((long)address + 0x310, thread.Context.ThreadState.Fpcr); + memory.WriteInt32((long)address + 0x314, thread.Context.ThreadState.Fpsr); + memory.WriteInt64((long)address + 0x318, thread.Context.ThreadState.Tpidr); return KernelResult.Success; } diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThreadSync.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThreadSync.cs index ecda9e2d00..6e5b478251 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThreadSync.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThreadSync.cs @@ -25,7 +25,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall for (int index = 0; index < handlesCount; index++) { - int handle = _memory.ReadInt32((long)handlesPtr + index * 4); + int handle = _process.CpuMemory.ReadInt32((long)handlesPtr + index * 4); KSynchronizationObject syncObj = _process.HandleTable.GetObject(handle); From 504f4f4abfd34696699fbf484264404f3011ec17 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 25 Feb 2019 20:46:34 -0300 Subject: [PATCH 07/12] Remove all the calls to StaticCast methods (#605) --- .../Instructions/InstEmitMemoryHelper.cs | 21 +- .../Instructions/InstEmitSimdArithmetic.cs | 342 ++++++++---------- ChocolArm64/Instructions/InstEmitSimdCmp.cs | 12 +- ChocolArm64/Instructions/InstEmitSimdCvt.cs | 10 +- .../Instructions/InstEmitSimdHelper.cs | 113 +----- .../Instructions/InstEmitSimdLogical.cs | 46 +-- ChocolArm64/Instructions/InstEmitSimdMove.cs | 36 +- ChocolArm64/Instructions/InstEmitSimdShift.cs | 56 +-- ChocolArm64/Instructions/VectorHelper.cs | 198 ---------- 9 files changed, 245 insertions(+), 589 deletions(-) diff --git a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs index 7645e36316..c225cdd8cc 100644 --- a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs +++ b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs @@ -200,7 +200,7 @@ namespace ChocolArm64.Instructions switch (size) { - case 2: context.EmitCall(typeof(Sse), nameof(Sse.LoadScalarVector128)); break; + case 2: context.EmitCall(typeof(Sse), nameof(Sse.LoadScalarVector128)); break; case 3: { @@ -208,12 +208,10 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.LoadScalarVector128), types)); - VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleToSingle)); - break; } - case 4: context.EmitCall(typeof(Sse), nameof(Sse.LoadAlignedVector128)); break; + case 4: context.EmitCall(typeof(Sse), nameof(Sse.LoadAlignedVector128)); break; throw new InvalidOperationException($"Invalid vector load size of {1 << size} bytes."); } @@ -283,18 +281,9 @@ namespace ChocolArm64.Instructions switch (size) { - case 2: context.EmitCall(typeof(Sse), nameof(Sse.StoreScalar)); break; - - case 3: - { - VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToDouble)); - - context.EmitCall(typeof(Sse2), nameof(Sse2.StoreScalar)); - - break; - } - - case 4: context.EmitCall(typeof(Sse), nameof(Sse.StoreAligned)); break; + case 2: context.EmitCall(typeof(Sse), nameof(Sse.StoreScalar)); break; + case 3: context.EmitCall(typeof(Sse2), nameof(Sse2.StoreScalar)); break; + case 4: context.EmitCall(typeof(Sse), nameof(Sse.StoreAligned)); break; default: throw new InvalidOperationException($"Invalid vector store size of {1 << size} bytes."); } diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs index acb9f7f093..f7236e9a4a 100644 --- a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs @@ -194,8 +194,7 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAndNot)); - - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot)); context.EmitStvec(op.Rd); @@ -209,14 +208,13 @@ namespace ChocolArm64.Instructions context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); - EmitLdvecWithCastToDouble(context, op.Rn); - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot)); - - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); EmitVectorZeroUpper(context, op.Rd); } @@ -252,8 +250,7 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAndNot)); - - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot)); context.EmitStvec(op.Rd); @@ -270,14 +267,13 @@ namespace ChocolArm64.Instructions context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); - EmitLdvecWithCastToDouble(context, op.Rn); - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot)); - - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } } else @@ -321,11 +317,11 @@ namespace ChocolArm64.Instructions context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); - EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); EmitVectorZeroUpper(context, op.Rd); } @@ -374,11 +370,11 @@ namespace ChocolArm64.Instructions context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); - EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } } else @@ -445,12 +441,12 @@ namespace ChocolArm64.Instructions { Type[] typesAddH = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); EmitVectorZeroUpper(context, op.Rd); } @@ -536,14 +532,14 @@ namespace ChocolArm64.Instructions { Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithCastToDouble(context, op.Ra); - EmitLdvecWithCastToDouble(context, op.Rn); - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Ra); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AddScalar), typesMulAdd)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); EmitVectorZeroUpper(context, op.Rd); } @@ -718,14 +714,14 @@ namespace ChocolArm64.Instructions { Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithCastToDouble(context, op.Rd); - EmitLdvecWithCastToDouble(context, op.Rn); - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } } else @@ -751,18 +747,14 @@ namespace ChocolArm64.Instructions Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rd); - context.EmitLdvec(op.Rn); - context.EmitLdvec(op.Rm); context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); - + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd)); - - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd)); context.EmitStvec(op.Rd); @@ -776,21 +768,17 @@ namespace ChocolArm64.Instructions Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithCastToDouble(context, op.Rd); - - EmitLdvecWithCastToDouble(context, op.Rn); - - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 1); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); - + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); - - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } } else @@ -841,14 +829,14 @@ namespace ChocolArm64.Instructions { Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithCastToDouble(context, op.Rd); - EmitLdvecWithCastToDouble(context, op.Rn); - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } } else @@ -874,17 +862,13 @@ namespace ChocolArm64.Instructions Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rd); - context.EmitLdvec(op.Rn); - context.EmitLdvec(op.Rm); context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); - + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); context.EmitStvec(op.Rd); @@ -899,21 +883,17 @@ namespace ChocolArm64.Instructions Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithCastToDouble(context, op.Rd); - - EmitLdvecWithCastToDouble(context, op.Rn); - - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 1); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); - + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } } else @@ -950,14 +930,14 @@ namespace ChocolArm64.Instructions { Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithCastToDouble(context, op.Ra); - EmitLdvecWithCastToDouble(context, op.Rn); - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Ra); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); EmitVectorZeroUpper(context, op.Rd); } @@ -1020,13 +1000,11 @@ namespace ChocolArm64.Instructions Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rn); - context.EmitLdvec(op.Rm); context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); - + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul)); context.EmitStvec(op.Rd); @@ -1041,17 +1019,15 @@ namespace ChocolArm64.Instructions Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithCastToDouble(context, op.Rn); - - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 1); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); - + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } } else @@ -1125,11 +1101,11 @@ namespace ChocolArm64.Instructions context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); - EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); EmitVectorZeroUpper(context, op.Rd); } @@ -1175,11 +1151,11 @@ namespace ChocolArm64.Instructions context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); - EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } } else @@ -1242,8 +1218,7 @@ namespace ChocolArm64.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse - && sizeF == 0) + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) { EmitScalarSseOrSse2OpF(context, nameof(Sse.ReciprocalScalar)); } @@ -1262,8 +1237,7 @@ namespace ChocolArm64.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse - && sizeF == 0) + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) { EmitVectorSseOrSse2OpF(context, nameof(Sse.Reciprocal)); } @@ -1310,13 +1284,13 @@ namespace ChocolArm64.Instructions context.EmitLdc_R8(2d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); - EmitLdvecWithCastToDouble(context, op.Rn); - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); EmitVectorZeroUpper(context, op.Rd); } @@ -1367,13 +1341,13 @@ namespace ChocolArm64.Instructions context.EmitLdc_R8(2d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); - EmitLdvecWithCastToDouble(context, op.Rn); - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } } else @@ -1579,8 +1553,7 @@ namespace ChocolArm64.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse - && sizeF == 0) + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) { EmitScalarSseOrSse2OpF(context, nameof(Sse.ReciprocalSqrtScalar)); } @@ -1599,8 +1572,7 @@ namespace ChocolArm64.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse - && sizeF == 0) + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) { EmitVectorSseOrSse2OpF(context, nameof(Sse.ReciprocalSqrt)); } @@ -1654,14 +1626,14 @@ namespace ChocolArm64.Instructions context.EmitLdc_R8(3d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); - EmitLdvecWithCastToDouble(context, op.Rn); - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), typesMulSub)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); EmitVectorZeroUpper(context, op.Rd); } @@ -1719,14 +1691,14 @@ namespace ChocolArm64.Instructions context.EmitLdc_R8(3d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); - EmitLdvecWithCastToDouble(context, op.Rn); - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } } else @@ -1864,11 +1836,11 @@ namespace ChocolArm64.Instructions VectorHelper.EmitCall(context, namesSzv[op.Size]); - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -1953,14 +1925,14 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -1969,7 +1941,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); - EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -1999,9 +1971,8 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1); - - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -2010,7 +1981,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); - EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -2027,12 +1998,12 @@ namespace ChocolArm64.Instructions Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); context.EmitStvectmp(); - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rm); context.Emit(OpCodes.Dup); context.EmitStvectmp2(); @@ -2046,10 +2017,9 @@ namespace ChocolArm64.Instructions context.EmitLdc_I4(1); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); - - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -2083,23 +2053,21 @@ namespace ChocolArm64.Instructions context.EmitStvectmp(); - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitLdvectmp(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub)); context.Emit(OpCodes.Dup); - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rm); context.EmitLdvectmp(); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub)); - - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg)); - + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAddSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesAddSub)); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -2128,12 +2096,12 @@ namespace ChocolArm64.Instructions Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41); - EmitLdvecWithSignedCast(context, op.Rn, op.Size); - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax)); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -2169,12 +2137,12 @@ namespace ChocolArm64.Instructions Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41); - EmitLdvecWithSignedCast(context, op.Rn, op.Size); - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin)); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -2219,16 +2187,15 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithSignedCast(context, op.Rd, op.Size + 1); - - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -2239,7 +2206,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); - EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -2279,16 +2246,15 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithSignedCast(context, op.Rd, op.Size + 1); - - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -2299,7 +2265,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); - EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -2426,20 +2392,19 @@ namespace ChocolArm64.Instructions context.Emit(OpCodes.Dup); context.EmitStvectmp(); - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitLdvectmp(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAdd)); - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rm); context.EmitLdvectmp(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAdd)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesSubAdd)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesSubAdd)); - - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -2478,14 +2443,14 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -2494,7 +2459,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); - EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -2519,9 +2484,8 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1); - - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -2530,7 +2494,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); - EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -2632,14 +2596,14 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -2648,7 +2612,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -2697,9 +2661,8 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1); - - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -2708,7 +2671,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -2725,12 +2688,12 @@ namespace ChocolArm64.Instructions Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); context.EmitStvectmp(); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rm); context.Emit(OpCodes.Dup); context.EmitStvectmp2(); @@ -2744,10 +2707,9 @@ namespace ChocolArm64.Instructions context.EmitLdc_I4(1); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); - - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -2774,16 +2736,15 @@ namespace ChocolArm64.Instructions { Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); - - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub)); + context.EmitLdvec(op.Rm); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesAvgSub)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -2812,12 +2773,12 @@ namespace ChocolArm64.Instructions Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -2853,12 +2814,12 @@ namespace ChocolArm64.Instructions Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -2903,16 +2864,15 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithUnsignedCast(context, op.Rd, op.Size + 1); - - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -2923,7 +2883,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -2963,16 +2923,15 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithUnsignedCast(context, op.Rd, op.Size + 1); - - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -2983,7 +2942,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -3052,12 +3011,12 @@ namespace ChocolArm64.Instructions { Type[] typesAvg = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvg)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -3106,14 +3065,14 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -3122,7 +3081,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -3147,9 +3106,8 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1); - - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -3158,7 +3116,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { diff --git a/ChocolArm64/Instructions/InstEmitSimdCmp.cs b/ChocolArm64/Instructions/InstEmitSimdCmp.cs index fdf3951e64..c29dcd9dc5 100644 --- a/ChocolArm64/Instructions/InstEmitSimdCmp.cs +++ b/ChocolArm64/Instructions/InstEmitSimdCmp.cs @@ -382,7 +382,7 @@ namespace ChocolArm64.Instructions ILLabel lblNaN = new ILLabel(); ILLabel lblEnd = new ILLabel(); - EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); context.EmitStvectmp(); @@ -393,7 +393,7 @@ namespace ChocolArm64.Instructions } else { - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rm); } context.Emit(OpCodes.Dup); @@ -656,12 +656,12 @@ namespace ChocolArm64.Instructions if (!isLeOrLt) { - EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitLdvec(op.Rn); } if (op is OpCodeSimdReg64 binOp) { - EmitLdvecWithCastToDouble(context, binOp.Rm); + context.EmitLdvec(binOp.Rm); } else { @@ -670,12 +670,12 @@ namespace ChocolArm64.Instructions if (isLeOrLt) { - EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitLdvec(op.Rn); } context.EmitCall(typeof(Sse2).GetMethod(name, types)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); if (scalar) { diff --git a/ChocolArm64/Instructions/InstEmitSimdCvt.cs b/ChocolArm64/Instructions/InstEmitSimdCvt.cs index 11105d891f..78a86a33eb 100644 --- a/ChocolArm64/Instructions/InstEmitSimdCvt.cs +++ b/ChocolArm64/Instructions/InstEmitSimdCvt.cs @@ -23,7 +23,7 @@ namespace ChocolArm64.Instructions //Double -> Single. VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); - EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitLdvec(op.Rn); Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) }; @@ -42,7 +42,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), types)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } else { @@ -91,7 +91,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Double), typesCvt)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } else { @@ -154,7 +154,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); - EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt)); context.Emit(OpCodes.Dup); @@ -332,7 +332,7 @@ namespace ChocolArm64.Instructions { Type[] typesCvt = new Type[] { typeof(Vector128) }; - EmitLdvecWithSignedCast(context, op.Rn, 2); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt)); diff --git a/ChocolArm64/Instructions/InstEmitSimdHelper.cs b/ChocolArm64/Instructions/InstEmitSimdHelper.cs index 5a44e1a148..b7dd09b4bb 100644 --- a/ChocolArm64/Instructions/InstEmitSimdHelper.cs +++ b/ChocolArm64/Instructions/InstEmitSimdHelper.cs @@ -86,13 +86,13 @@ namespace ChocolArm64.Instructions { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); Type baseType = VectorIntTypesPerSizeLog2[op.Size]; if (op is OpCodeSimdReg64 binOp) { - EmitLdvecWithSignedCast(context, binOp.Rm, op.Size); + context.EmitLdvec(binOp.Rm); context.EmitCall(type.GetMethod(name, new Type[] { baseType, baseType })); } @@ -101,7 +101,7 @@ namespace ChocolArm64.Instructions context.EmitCall(type.GetMethod(name, new Type[] { baseType })); } - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -109,80 +109,6 @@ namespace ChocolArm64.Instructions } } - public static void EmitLdvecWithSignedCast(ILEmitterCtx context, int reg, int size) - { - context.EmitLdvec(reg); - - switch (size) - { - case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToSByte)); break; - case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToInt16)); break; - case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToInt32)); break; - case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToInt64)); break; - - default: throw new ArgumentOutOfRangeException(nameof(size)); - } - } - - public static void EmitLdvecWithCastToDouble(ILEmitterCtx context, int reg) - { - context.EmitLdvec(reg); - - VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToDouble)); - } - - public static void EmitStvecWithCastFromDouble(ILEmitterCtx context, int reg) - { - VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleToSingle)); - - context.EmitStvec(reg); - } - - public static void EmitLdvecWithUnsignedCast(ILEmitterCtx context, int reg, int size) - { - context.EmitLdvec(reg); - - switch (size) - { - case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToByte)); break; - case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToUInt16)); break; - case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToUInt32)); break; - case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToUInt64)); break; - - default: throw new ArgumentOutOfRangeException(nameof(size)); - } - } - - public static void EmitStvecWithSignedCast(ILEmitterCtx context, int reg, int size) - { - switch (size) - { - case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSByteToSingle)); break; - case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt16ToSingle)); break; - case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt32ToSingle)); break; - case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt64ToSingle)); break; - - default: throw new ArgumentOutOfRangeException(nameof(size)); - } - - context.EmitStvec(reg); - } - - public static void EmitStvecWithUnsignedCast(ILEmitterCtx context, int reg, int size) - { - switch (size) - { - case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorByteToSingle)); break; - case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorUInt16ToSingle)); break; - case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorUInt32ToSingle)); break; - case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorUInt64ToSingle)); break; - - default: throw new ArgumentOutOfRangeException(nameof(size)); - } - - context.EmitStvec(reg); - } - public static void EmitScalarSseOrSse2OpF(ILEmitterCtx context, string name) { EmitSseOrSse2OpF(context, name, true); @@ -199,17 +125,7 @@ namespace ChocolArm64.Instructions int sizeF = op.Size & 1; - void Ldvec(int reg) - { - context.EmitLdvec(reg); - - if (sizeF == 1) - { - VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleToDouble)); - } - } - - Ldvec(op.Rn); + context.EmitLdvec(op.Rn); Type type; Type baseType; @@ -227,7 +143,7 @@ namespace ChocolArm64.Instructions if (op is OpCodeSimdReg64 binOp) { - Ldvec(binOp.Rm); + context.EmitLdvec(binOp.Rm); context.EmitCall(type.GetMethod(name, new Type[] { baseType, baseType })); } @@ -236,11 +152,6 @@ namespace ChocolArm64.Instructions context.EmitCall(type.GetMethod(name, new Type[] { baseType })); } - if (sizeF == 1) - { - VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleToSingle)); - } - context.EmitStvec(op.Rd); if (scalar) @@ -1014,12 +925,12 @@ namespace ChocolArm64.Instructions { Type[] types = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); context.EmitStvectmp(); - EmitLdvecWithCastToDouble(context, op.Rm); + context.EmitLdvec(op.Rm); context.Emit(OpCodes.Dup); context.EmitStvectmp2(); @@ -1033,7 +944,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(name, types)); - EmitStvecWithCastFromDouble(context, op.Rd); + context.EmitStvec(op.Rd); } } @@ -1277,13 +1188,9 @@ namespace ChocolArm64.Instructions } // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned). - public static void EmitSatQ( - ILEmitterCtx context, - int sizeDst, - bool signedSrc, - bool signedDst) + public static void EmitSatQ(ILEmitterCtx context, int sizeDst, bool signedSrc, bool signedDst) { - if (sizeDst > 2) + if ((uint)sizeDst > 2) { throw new ArgumentOutOfRangeException(nameof(sizeDst)); } diff --git a/ChocolArm64/Instructions/InstEmitSimdLogical.cs b/ChocolArm64/Instructions/InstEmitSimdLogical.cs index 3473fc5d98..6c718182db 100644 --- a/ChocolArm64/Instructions/InstEmitSimdLogical.cs +++ b/ChocolArm64/Instructions/InstEmitSimdLogical.cs @@ -32,12 +32,12 @@ namespace ChocolArm64.Instructions Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithUnsignedCast(context, op.Rm, 0); - EmitLdvecWithUnsignedCast(context, op.Rn, 0); + context.EmitLdvec(op.Rm); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); - EmitStvecWithUnsignedCast(context, op.Rd, 0); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -83,16 +83,16 @@ namespace ChocolArm64.Instructions string nameAndNot = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And); - EmitLdvecWithUnsignedCast(context, op.Rd, 0); - EmitLdvecWithUnsignedCast(context, op.Rm, 0); - EmitLdvecWithUnsignedCast(context, op.Rn, 0); - EmitLdvecWithUnsignedCast(context, op.Rd, 0); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rm); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rd); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot)); context.EmitCall(typeof(Sse2).GetMethod(nameAndNot, typesXorAndNot)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot)); - EmitStvecWithUnsignedCast(context, op.Rd, 0); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -141,20 +141,20 @@ namespace ChocolArm64.Instructions Type[] typesXorAnd = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithUnsignedCast(context, op.Rm, 0); + context.EmitLdvec(op.Rm); context.Emit(OpCodes.Dup); - EmitLdvecWithUnsignedCast(context, op.Rn, 0); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd)); - EmitLdvecWithUnsignedCast(context, op.Rd, 0); + context.EmitLdvec(op.Rd); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesXorAnd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd)); - EmitStvecWithUnsignedCast(context, op.Rd, 0); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -199,14 +199,14 @@ namespace ChocolArm64.Instructions Type[] typesSav = new Type[] { typeof(byte) }; Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithUnsignedCast(context, op.Rn, 0); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(byte.MaxValue); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); - EmitStvecWithUnsignedCast(context, op.Rd, 0); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -228,8 +228,8 @@ namespace ChocolArm64.Instructions Type[] typesSav = new Type[] { typeof(byte) }; Type[] typesAndNotOr = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithUnsignedCast(context, op.Rn, 0); - EmitLdvecWithUnsignedCast(context, op.Rm, 0); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(byte.MaxValue); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); @@ -237,7 +237,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNotOr)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndNotOr)); - EmitStvecWithUnsignedCast(context, op.Rd, 0); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -305,7 +305,7 @@ namespace ChocolArm64.Instructions Type[] typesSve = new Type[] { typeof(long), typeof(long) }; Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithSignedCast(context, op.Rn, 0); // value + context.EmitLdvec(op.Rn); // value context.EmitLdc_I8(14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0); // maskE1 context.EmitLdc_I8(06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0); // maskE0 @@ -314,7 +314,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl)); - EmitStvecWithSignedCast(context, op.Rd, 0); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -336,7 +336,7 @@ namespace ChocolArm64.Instructions Type[] typesSve = new Type[] { typeof(long), typeof(long) }; Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value + context.EmitLdvec(op.Rn); // value if (op.Size == 0) { @@ -353,7 +353,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl)); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -375,7 +375,7 @@ namespace ChocolArm64.Instructions Type[] typesSve = new Type[] { typeof(long), typeof(long) }; Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value + context.EmitLdvec(op.Rn); // value if (op.Size == 0) { @@ -397,7 +397,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl)); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { diff --git a/ChocolArm64/Instructions/InstEmitSimdMove.cs b/ChocolArm64/Instructions/InstEmitSimdMove.cs index 2844dfdf4c..7145263d38 100644 --- a/ChocolArm64/Instructions/InstEmitSimdMove.cs +++ b/ChocolArm64/Instructions/InstEmitSimdMove.cs @@ -59,7 +59,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); } else { @@ -108,7 +108,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); } else { @@ -138,7 +138,7 @@ namespace ChocolArm64.Instructions Type[] typesShs = new Type[] { typeof(Vector128), typeof(byte) }; Type[] typesOr = new Type[] { typeof(Vector128), typeof(Vector128) }; - EmitLdvecWithUnsignedCast(context, op.Rn, 0); + context.EmitLdvec(op.Rn); if (op.RegisterSize == RegisterSize.Simd64) { @@ -150,7 +150,7 @@ namespace ChocolArm64.Instructions context.EmitLdc_I4(op.Imm4); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesShs)); - EmitLdvecWithUnsignedCast(context, op.Rm, 0); + context.EmitLdvec(op.Rm); context.EmitLdc_I4((op.RegisterSize == RegisterSize.Simd64 ? 8 : 16) - op.Imm4); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), typesShs)); @@ -164,7 +164,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr)); - EmitStvecWithUnsignedCast(context, op.Rd, 0); + context.EmitStvec(op.Rd); } else { @@ -418,7 +418,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); - EmitLdvecWithSignedCast(context, op.Rn, 0); // value + context.EmitLdvec(op.Rn); // value context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // mask context.Emit(OpCodes.Dup); // mask @@ -492,7 +492,7 @@ namespace ChocolArm64.Instructions ? nameof(Sse2.UnpackLow) : nameof(Sse2.UnpackHigh); - EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value + context.EmitLdvec(op.Rn); // value if (op.Size < 3) { @@ -504,7 +504,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0))); } - EmitLdvecWithSignedCast(context, op.Rm, op.Size); // value + context.EmitLdvec(op.Rm); // value if (op.Size < 3) { @@ -518,7 +518,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(op.Size))); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); } else { @@ -560,7 +560,7 @@ namespace ChocolArm64.Instructions if (op.RegisterSize == RegisterSize.Simd128) { - EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value + context.EmitLdvec(op.Rn); // value if (op.Size < 3) { @@ -572,7 +572,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0))); } - EmitLdvecWithSignedCast(context, op.Rm, op.Size); // value + context.EmitLdvec(op.Rm); // value if (op.Size < 3) { @@ -586,12 +586,12 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3))); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); } else { - EmitLdvecWithSignedCast(context, op.Rn, op.Size); - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesSflUpk(op.Size))); // value @@ -609,7 +609,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3))); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); } } else @@ -648,8 +648,8 @@ namespace ChocolArm64.Instructions ? nameof(Sse2.UnpackLow) : nameof(Sse2.UnpackHigh); - EmitLdvecWithSignedCast(context, op.Rn, op.Size); - EmitLdvecWithSignedCast(context, op.Rm, op.Size); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); if (op.RegisterSize == RegisterSize.Simd128) { @@ -663,7 +663,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3))); } - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); } else { diff --git a/ChocolArm64/Instructions/InstEmitSimdShift.cs b/ChocolArm64/Instructions/InstEmitSimdShift.cs index 843052110f..c0b20d7ea6 100644 --- a/ChocolArm64/Instructions/InstEmitSimdShift.cs +++ b/ChocolArm64/Instructions/InstEmitSimdShift.cs @@ -42,12 +42,12 @@ namespace ChocolArm64.Instructions { Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -82,7 +82,7 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll)); @@ -92,7 +92,7 @@ namespace ChocolArm64.Instructions context.EmitLdc_I4(shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -280,7 +280,7 @@ namespace ChocolArm64.Instructions int shift = GetImmShr(op); int eSize = 8 << op.Size; - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); context.EmitStvectmp(); @@ -298,7 +298,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -329,8 +329,8 @@ namespace ChocolArm64.Instructions int shift = GetImmShr(op); int eSize = 8 << op.Size; - EmitLdvecWithSignedCast(context, op.Rd, op.Size); - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); context.EmitStvectmp(); @@ -349,7 +349,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -405,7 +405,7 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll)); @@ -415,7 +415,7 @@ namespace ChocolArm64.Instructions context.EmitLdc_I4(shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll)); - EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -437,12 +437,12 @@ namespace ChocolArm64.Instructions { Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(GetImmShr(op)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -470,15 +470,15 @@ namespace ChocolArm64.Instructions Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; - EmitLdvecWithSignedCast(context, op.Rd, op.Size); - EmitLdvecWithSignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(GetImmShr(op)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); - EmitStvecWithSignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -610,7 +610,7 @@ namespace ChocolArm64.Instructions int shift = GetImmShr(op); int eSize = 8 << op.Size; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); context.EmitStvectmp(); @@ -628,7 +628,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -658,8 +658,8 @@ namespace ChocolArm64.Instructions int shift = GetImmShr(op); int eSize = 8 << op.Size; - EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); context.EmitStvectmp(); @@ -678,7 +678,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -734,7 +734,7 @@ namespace ChocolArm64.Instructions int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll)); @@ -744,7 +744,7 @@ namespace ChocolArm64.Instructions context.EmitLdc_I4(shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + context.EmitStvec(op.Rd); } else { @@ -765,12 +765,12 @@ namespace ChocolArm64.Instructions { Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(GetImmShr(op)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { @@ -797,15 +797,15 @@ namespace ChocolArm64.Instructions Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; - EmitLdvecWithUnsignedCast(context, op.Rd, op.Size); - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); context.EmitLdc_I4(GetImmShr(op)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { diff --git a/ChocolArm64/Instructions/VectorHelper.cs b/ChocolArm64/Instructions/VectorHelper.cs index f02c131e68..edb3428d85 100644 --- a/ChocolArm64/Instructions/VectorHelper.cs +++ b/ChocolArm64/Instructions/VectorHelper.cs @@ -565,203 +565,5 @@ namespace ChocolArm64.Instructions throw new PlatformNotSupportedException(); } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorSingleToSByte(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorSingleToInt16(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorSingleToInt32(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorSingleToInt64(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorSingleToByte(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorSingleToUInt16(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorSingleToUInt32(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorSingleToUInt64(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorSingleToDouble(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorSByteToSingle(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorInt16ToSingle(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorInt32ToSingle(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorInt64ToSingle(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorByteToSingle(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorUInt16ToSingle(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorUInt32ToSingle(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorUInt64ToSingle(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorDoubleToSingle(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.StaticCast(vector); - } - - throw new PlatformNotSupportedException(); - } } } From ef3f9a2abe85cfd572ab6bec73481e3526762dcc Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 26 Feb 2019 06:16:50 -0300 Subject: [PATCH 08/12] Optmize BFM instruction (#607) --- ChocolArm64/Instructions/InstEmitBfm.cs | 55 ++++++++++++++++++++----- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/ChocolArm64/Instructions/InstEmitBfm.cs b/ChocolArm64/Instructions/InstEmitBfm.cs index d25af8be8a..4a03959940 100644 --- a/ChocolArm64/Instructions/InstEmitBfm.cs +++ b/ChocolArm64/Instructions/InstEmitBfm.cs @@ -11,21 +11,56 @@ namespace ChocolArm64.Instructions { OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp; - EmitBfmLoadRn(context); + if (op.Pos < op.Shift) + { + //BFI. + context.EmitLdintzr(op.Rn); - context.EmitLdintzr(op.Rd); - context.EmitLdc_I(~op.WMask & op.TMask); + int shift = op.GetBitsCount() - op.Shift; - context.Emit(OpCodes.And); - context.Emit(OpCodes.Or); + int width = op.Pos + 1; - context.EmitLdintzr(op.Rd); - context.EmitLdc_I(~op.TMask); + long mask = (long)(ulong.MaxValue >> (64 - width)); - context.Emit(OpCodes.And); - context.Emit(OpCodes.Or); + context.EmitLdc_I(mask); - context.EmitStintzr(op.Rd); + context.Emit(OpCodes.And); + + context.EmitLsl(shift); + + context.EmitLdintzr(op.Rd); + + context.EmitLdc_I(~(mask << shift)); + + context.Emit(OpCodes.And); + context.Emit(OpCodes.Or); + + context.EmitStintzr(op.Rd); + } + else + { + //BFXIL. + context.EmitLdintzr(op.Rn); + + context.EmitLsr(op.Shift); + + int width = op.Pos - op.Shift + 1; + + long mask = (long)(ulong.MaxValue >> (64 - width)); + + context.EmitLdc_I(mask); + + context.Emit(OpCodes.And); + + context.EmitLdintzr(op.Rd); + + context.EmitLdc_I(~mask); + + context.Emit(OpCodes.And); + context.Emit(OpCodes.Or); + + context.EmitStintzr(op.Rd); + } } public static void Sbfm(ILEmitterCtx context) From 81aa50feb0899e73ee62e5113b786efe0ff6b7a9 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 26 Feb 2019 09:50:36 -0300 Subject: [PATCH 09/12] Optimize MOVI/MVNI instructions using intrinsics (#606) --- ChocolArm64/Instructions/InstEmitSimdMove.cs | 50 +++++++++++++++++++- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/ChocolArm64/Instructions/InstEmitSimdMove.cs b/ChocolArm64/Instructions/InstEmitSimdMove.cs index 7145263d38..20647ce09d 100644 --- a/ChocolArm64/Instructions/InstEmitSimdMove.cs +++ b/ChocolArm64/Instructions/InstEmitSimdMove.cs @@ -318,12 +318,26 @@ namespace ChocolArm64.Instructions public static void Movi_V(ILEmitterCtx context) { - EmitVectorImmUnaryOp(context, () => { }); + if (Optimizations.UseSse2) + { + EmitMoviMvni(context, not: false); + } + else + { + EmitVectorImmUnaryOp(context, () => { }); + } } public static void Mvni_V(ILEmitterCtx context) { - EmitVectorImmUnaryOp(context, () => context.Emit(OpCodes.Not)); + if (Optimizations.UseSse2) + { + EmitMoviMvni(context, not: true); + } + else + { + EmitVectorImmUnaryOp(context, () => context.Emit(OpCodes.Not)); + } } public static void Smov_S(ILEmitterCtx context) @@ -480,6 +494,38 @@ namespace ChocolArm64.Instructions } } + private static void EmitMoviMvni(ILEmitterCtx context, bool not) + { + OpCodeSimdImm64 op = (OpCodeSimdImm64)context.CurrOp; + + Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] }; + + long imm = op.Imm; + + if (not) + { + imm = ~imm; + } + + if (op.Size < 3) + { + context.EmitLdc_I4((int)imm); + } + else + { + context.EmitLdc_I8(imm); + } + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + private static void EmitVectorTranspose(ILEmitterCtx context, int part) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; From 884b4e5fd3c2a54ebb796b7f995c0eda9c4d0038 Mon Sep 17 00:00:00 2001 From: Thomas Guillemard Date: Thu, 28 Feb 2019 02:12:24 +0100 Subject: [PATCH 10/12] Initial non 2D textures support (#525) * Initial non 2D textures support - Shaders still need to be changed - Some types aren't yet implemented * Start implementing texture instructions suffixes Fix wrong texture type with cube and TEXS Also support array textures in TEX and TEX.B Clean up TEX and TEXS coords managment Fix TEXS.LL with non-2d textures Implement TEX.AOFFI Get the right arguments for TEX, TEXS and TLDS Also, store suffix operands in appropriate values to support multiple suffix combinaisons * Support depth in read/writeTexture Also support WrapR and detect mipmap * Proper cube map textures support + fix TEXS.LZ * Implement depth compare * some code clean up * Implement CubeMap textures in OGLTexture.Create * Implement TLD4 and TLD4S * Add Texture 1D support * updates comments * fix some code style issues * Fix some nits + rename some things to be less confusing * Remove GetSuffix local functions * AOFFI => AOffI * TextureType => GalTextureTarget * finish renaming TextureType to TextureTarget * Disable LL, LZ and LB support in the decompiler This needs more work at the GL level (GLSL implementation should be right) * Revert "Disable LL, LZ and LB support in the decompiler" This reverts commit 64536c3d9f673645faff3152838d1413c3203395. * Fix TEXS ARRAY_2D index * ImageFormat depth should be 1 for all image format * Fix shader build issues with sampler1DShadow and texture * Fix DC & AOFFI combinaison with TEX/TEXS * Support AOFFI with TLD4 and TLD4S * Fix shader compilation error for TLD4.AOFFI with no DC * Fix binding isuses on the 2d copy engine TODO: support 2d array copy * Support 2D array copy operation in the 2D engine This make every copy right in the GPU side. Thie CPU copy probably needs to be updated * Implement GetGpuSize + fix somes issues with 2d engine copies TODO: mipmap level in it * Don't throw an exception in the layer handling * Fix because of rebase * Reject 2d layers of non textures in 2d copy engine * Add 3D textures and mipmap support on BlockLinearSwizzle * Fix naming on new BitUtils methods * gpu cache: Make sure to invalidate textures that doesn't have the same target * Add the concept of layer count for array instead of using depth Also cleanup GetGpuSize as Swizzle can compute the size with mipmap * Support multi layer with mip map in ReadTexture * Add more check for cache invalidation & remove cubemap and cubemap array code for now Also fix compressed 2d array * Fix texelFetchOffset shader build error * Start looking into cube map again Also add some way to log write in register in engines * fix write register log levles * Remove debug logs in WriteRegister * Disable AOFFI support on non NVIDIA drivers * Fix code align --- Ryujinx.Common/Utilities/BitUtils.cs | 35 + Ryujinx.Graphics/DepthCompareFunc.cs | 14 + Ryujinx.Graphics/Gal/GalImage.cs | 36 +- Ryujinx.Graphics/Gal/GalTextureSampler.cs | 10 +- Ryujinx.Graphics/Gal/GalTextureTarget.cs | 15 + Ryujinx.Graphics/Gal/IGalRenderTarget.cs | 24 +- Ryujinx.Graphics/Gal/IGalTexture.cs | 2 +- Ryujinx.Graphics/Gal/OpenGL/ImageHandler.cs | 1 + .../Gal/OpenGL/OGLEnumConverter.cs | 25 + Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs | 7 + .../Gal/OpenGL/OGLRenderTarget.cs | 55 +- Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs | 2 +- Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs | 256 ++++++-- Ryujinx.Graphics/Gal/Shader/GlslDecl.cs | 26 +- Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs | 322 ++++++++- .../Gal/Shader/ShaderDecodeMem.cs | 613 +++++++++++++++++- .../Gal/Shader/ShaderDecodeOpCode.cs | 5 + Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs | 1 + .../Gal/Shader/ShaderIrMetaTex.cs | 18 +- .../Gal/Shader/ShaderOpCodeTable.cs | 2 + Ryujinx.Graphics/Gal/ShaderDeclInfo.cs | 23 +- Ryujinx.Graphics/GpuResourceManager.cs | 33 +- Ryujinx.Graphics/Graphics3d/NvGpuEngine2d.cs | 96 ++- .../Graphics3d/NvGpuEngine2dReg.cs | 2 + Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs | 14 +- .../Graphics3d/NvGpuEngine3dReg.cs | 217 ++++--- .../Graphics3d/NvGpuEngineM2mf.cs | 21 +- .../Graphics3d/NvGpuEngineP2mf.cs | 8 +- .../Graphics3d/Texture/ASTCDecoder.cs | 1 + .../Graphics3d/Texture/BlockLinearSwizzle.cs | 171 ++++- .../Graphics3d/Texture/ISwizzle.cs | 8 +- .../Graphics3d/Texture/ImageUtils.cs | 239 +++++-- .../Graphics3d/Texture/LinearSwizzle.cs | 36 +- .../Graphics3d/Texture/TextureFactory.cs | 65 +- .../Graphics3d/Texture/TextureHelper.cs | 17 +- .../Texture/TextureInstructionSuffix.cs | 19 + Ryujinx.Graphics/VDec/VideoDecoder.cs | 7 +- Ryujinx.HLE/HOS/Services/Vi/NvFlinger.cs | 6 +- Ryujinx.ShaderTools/Program.cs | 2 +- 39 files changed, 2084 insertions(+), 370 deletions(-) create mode 100644 Ryujinx.Graphics/DepthCompareFunc.cs create mode 100644 Ryujinx.Graphics/Gal/GalTextureTarget.cs create mode 100644 Ryujinx.Graphics/Texture/TextureInstructionSuffix.cs diff --git a/Ryujinx.Common/Utilities/BitUtils.cs b/Ryujinx.Common/Utilities/BitUtils.cs index 135b397d3d..b6fba4fba1 100644 --- a/Ryujinx.Common/Utilities/BitUtils.cs +++ b/Ryujinx.Common/Utilities/BitUtils.cs @@ -34,6 +34,11 @@ namespace Ryujinx.Common return value & -(long)size; } + public static int DivRoundUp(int value, int dividend) + { + return (value + dividend - 1) / dividend; + } + public static ulong DivRoundUp(ulong value, uint dividend) { return (value + dividend - 1) / dividend; @@ -44,6 +49,24 @@ namespace Ryujinx.Common return (value + dividend - 1) / dividend; } + public static int Pow2RoundUp(int value) + { + value--; + + value |= (value >> 1); + value |= (value >> 2); + value |= (value >> 4); + value |= (value >> 8); + value |= (value >> 16); + + return ++value; + } + + public static int Pow2RoundDown(int value) + { + return IsPowerOfTwo32(value) ? value : Pow2RoundUp(value) >> 1; + } + public static bool IsPowerOfTwo32(int value) { return value != 0 && (value & (value - 1)) == 0; @@ -85,6 +108,18 @@ namespace Ryujinx.Common return (ulong)count; } + public static int CountTrailingZeros32(int value) + { + int count = 0; + + while (((value >> count) & 1) == 0) + { + count++; + } + + return count; + } + public static long ReverseBits64(long value) { return (long)ReverseBits64((ulong)value); diff --git a/Ryujinx.Graphics/DepthCompareFunc.cs b/Ryujinx.Graphics/DepthCompareFunc.cs new file mode 100644 index 0000000000..24c8854a4b --- /dev/null +++ b/Ryujinx.Graphics/DepthCompareFunc.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics +{ + public enum DepthCompareFunc + { + Never = 0, + Less = 1, + Equal = 2, + LEqual = 3, + Greater = 4, + NotEqual = 5, + GEqual = 6, + Always = 7 + } +} diff --git a/Ryujinx.Graphics/Gal/GalImage.cs b/Ryujinx.Graphics/Gal/GalImage.cs index 92f43cc9d4..fb904b0925 100644 --- a/Ryujinx.Graphics/Gal/GalImage.cs +++ b/Ryujinx.Graphics/Gal/GalImage.cs @@ -6,9 +6,15 @@ namespace Ryujinx.Graphics.Gal { public int Width; public int Height; + + // FIXME: separate layer and depth + public int Depth; + public int LayerCount; public int TileWidth; public int GobBlockHeight; + public int GobBlockDepth; public int Pitch; + public int MaxMipmapLevel; public GalImageFormat Format; public GalMemoryLayout Layout; @@ -16,34 +22,45 @@ namespace Ryujinx.Graphics.Gal public GalTextureSource YSource; public GalTextureSource ZSource; public GalTextureSource WSource; + public GalTextureTarget TextureTarget; public GalImage( int Width, int Height, + int Depth, + int LayerCount, int TileWidth, int GobBlockHeight, + int GobBlockDepth, GalMemoryLayout Layout, GalImageFormat Format, - GalTextureSource XSource = GalTextureSource.Red, - GalTextureSource YSource = GalTextureSource.Green, - GalTextureSource ZSource = GalTextureSource.Blue, - GalTextureSource WSource = GalTextureSource.Alpha) + GalTextureTarget TextureTarget, + int MaxMipmapLevel = 1, + GalTextureSource XSource = GalTextureSource.Red, + GalTextureSource YSource = GalTextureSource.Green, + GalTextureSource ZSource = GalTextureSource.Blue, + GalTextureSource WSource = GalTextureSource.Alpha) { this.Width = Width; this.Height = Height; + this.LayerCount = LayerCount; + this.Depth = Depth; this.TileWidth = TileWidth; this.GobBlockHeight = GobBlockHeight; + this.GobBlockDepth = GobBlockDepth; this.Layout = Layout; this.Format = Format; + this.MaxMipmapLevel = MaxMipmapLevel; this.XSource = XSource; this.YSource = YSource; this.ZSource = ZSource; this.WSource = WSource; + this.TextureTarget = TextureTarget; Pitch = ImageUtils.GetPitch(Format, Width); } - public bool SizeMatches(GalImage Image) + public bool SizeMatches(GalImage Image, bool IgnoreLayer = false) { if (ImageUtils.GetBytesPerPixel(Format) != ImageUtils.GetBytesPerPixel(Image.Format)) @@ -57,7 +74,14 @@ namespace Ryujinx.Graphics.Gal return false; } - return Height == Image.Height; + bool Result = Height == Image.Height && Depth == Image.Depth; + + if (!IgnoreLayer) + { + Result = Result && LayerCount == Image.LayerCount; + } + + return Result; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/GalTextureSampler.cs b/Ryujinx.Graphics/Gal/GalTextureSampler.cs index b9e5c7658d..1d658cea85 100644 --- a/Ryujinx.Graphics/Gal/GalTextureSampler.cs +++ b/Ryujinx.Graphics/Gal/GalTextureSampler.cs @@ -12,6 +12,9 @@ namespace Ryujinx.Graphics.Gal public GalColorF BorderColor { get; private set; } + public bool DepthCompare { get; private set; } + public DepthCompareFunc DepthCompareFunc { get; private set; } + public GalTextureSampler( GalTextureWrap AddressU, GalTextureWrap AddressV, @@ -19,7 +22,9 @@ namespace Ryujinx.Graphics.Gal GalTextureFilter MinFilter, GalTextureFilter MagFilter, GalTextureMipFilter MipFilter, - GalColorF BorderColor) + GalColorF BorderColor, + bool DepthCompare, + DepthCompareFunc DepthCompareFunc) { this.AddressU = AddressU; this.AddressV = AddressV; @@ -28,6 +33,9 @@ namespace Ryujinx.Graphics.Gal this.MagFilter = MagFilter; this.MipFilter = MipFilter; this.BorderColor = BorderColor; + + this.DepthCompare = DepthCompare; + this.DepthCompareFunc = DepthCompareFunc; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/GalTextureTarget.cs b/Ryujinx.Graphics/Gal/GalTextureTarget.cs new file mode 100644 index 0000000000..bcc0c49a51 --- /dev/null +++ b/Ryujinx.Graphics/Gal/GalTextureTarget.cs @@ -0,0 +1,15 @@ +namespace Ryujinx.Graphics.Gal +{ + public enum GalTextureTarget + { + OneD = 0, + TwoD = 1, + ThreeD = 2, + CubeMap = 3, + OneDArray = 4, + TwoDArray = 5, + OneDBuffer = 6, + TwoDNoMipMap = 7, + CubeArray = 8, + } +} diff --git a/Ryujinx.Graphics/Gal/IGalRenderTarget.cs b/Ryujinx.Graphics/Gal/IGalRenderTarget.cs index f941ccd584..90cad856d9 100644 --- a/Ryujinx.Graphics/Gal/IGalRenderTarget.cs +++ b/Ryujinx.Graphics/Gal/IGalRenderTarget.cs @@ -25,16 +25,20 @@ namespace Ryujinx.Graphics.Gal void Render(); void Copy( - long SrcKey, - long DstKey, - int SrcX0, - int SrcY0, - int SrcX1, - int SrcY1, - int DstX0, - int DstY0, - int DstX1, - int DstY1); + GalImage SrcImage, + GalImage DstImage, + long SrcKey, + long DstKey, + int SrcLayer, + int DstLayer, + int SrcX0, + int SrcY0, + int SrcX1, + int SrcY1, + int DstX0, + int DstY0, + int DstX1, + int DstY1); void Reinterpret(long Key, GalImage NewImage); } diff --git a/Ryujinx.Graphics/Gal/IGalTexture.cs b/Ryujinx.Graphics/Gal/IGalTexture.cs index aeecdf1ac5..de4ba9cba7 100644 --- a/Ryujinx.Graphics/Gal/IGalTexture.cs +++ b/Ryujinx.Graphics/Gal/IGalTexture.cs @@ -13,6 +13,6 @@ namespace Ryujinx.Graphics.Gal void Bind(long Key, int Index, GalImage Image); - void SetSampler(GalTextureSampler Sampler); + void SetSampler(GalImage Image, GalTextureSampler Sampler); } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/OpenGL/ImageHandler.cs b/Ryujinx.Graphics/Gal/OpenGL/ImageHandler.cs index 8db0b8a8c9..5714f3d891 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/ImageHandler.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/ImageHandler.cs @@ -8,6 +8,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL public int Width => Image.Width; public int Height => Image.Height; + public int Depth => Image.Depth; public GalImageFormat Format => Image.Format; diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs index f2afe7b556..3a25fff7a5 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs @@ -189,6 +189,31 @@ namespace Ryujinx.Graphics.Gal.OpenGL throw new NotImplementedException($"{Format & GalImageFormat.FormatMask} {Format & GalImageFormat.TypeMask}"); } + public static All GetDepthCompareFunc(DepthCompareFunc DepthCompareFunc) + { + switch (DepthCompareFunc) + { + case DepthCompareFunc.LEqual: + return All.Lequal; + case DepthCompareFunc.GEqual: + return All.Gequal; + case DepthCompareFunc.Less: + return All.Less; + case DepthCompareFunc.Greater: + return All.Greater; + case DepthCompareFunc.Equal: + return All.Equal; + case DepthCompareFunc.NotEqual: + return All.Notequal; + case DepthCompareFunc.Always: + return All.Always; + case DepthCompareFunc.Never: + return All.Never; + default: + throw new ArgumentException(nameof(DepthCompareFunc) + " \"" + DepthCompareFunc + "\" is not valid!"); + } + } + public static InternalFormat GetCompressedImageFormat(GalImageFormat Format) { switch (Format) diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs index 11daeb593c..52b3d0ce31 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs @@ -9,9 +9,12 @@ namespace Ryujinx.Graphics.Gal.OpenGL private static Lazy s_TextureMirrorClamp = new Lazy(() => HasExtension("GL_EXT_texture_mirror_clamp")); private static Lazy s_ViewportArray = new Lazy(() => HasExtension("GL_ARB_viewport_array")); + private static Lazy s_NvidiaDriver = new Lazy(() => IsNvidiaDriver()); + public static bool EnhancedLayouts => s_EnhancedLayouts.Value; public static bool TextureMirrorClamp => s_TextureMirrorClamp.Value; public static bool ViewportArray => s_ViewportArray.Value; + public static bool NvidiaDrvier => s_NvidiaDriver.Value; private static bool HasExtension(string Name) { @@ -27,5 +30,9 @@ namespace Ryujinx.Graphics.Gal.OpenGL return false; } + + private static bool IsNvidiaDriver() { + return GL.GetString(StringName.Vendor).Equals("NVIDIA Corporation"); + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLRenderTarget.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLRenderTarget.cs index 0d7bb3cd0a..8dd3b37fc2 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLRenderTarget.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLRenderTarget.cs @@ -389,16 +389,20 @@ namespace Ryujinx.Graphics.Gal.OpenGL } public void Copy( - long SrcKey, - long DstKey, - int SrcX0, - int SrcY0, - int SrcX1, - int SrcY1, - int DstX0, - int DstY0, - int DstX1, - int DstY1) + GalImage SrcImage, + GalImage DstImage, + long SrcKey, + long DstKey, + int SrcLayer, + int DstLayer, + int SrcX0, + int SrcY0, + int SrcX1, + int SrcY1, + int DstX0, + int DstY0, + int DstX1, + int DstY1) { if (Texture.TryGetImageHandler(SrcKey, out ImageHandler SrcTex) && Texture.TryGetImageHandler(DstKey, out ImageHandler DstTex)) @@ -425,8 +429,24 @@ namespace Ryujinx.Graphics.Gal.OpenGL FramebufferAttachment Attachment = GetAttachment(SrcTex); - GL.FramebufferTexture(FramebufferTarget.ReadFramebuffer, Attachment, SrcTex.Handle, 0); - GL.FramebufferTexture(FramebufferTarget.DrawFramebuffer, Attachment, DstTex.Handle, 0); + if (ImageUtils.IsArray(SrcImage.TextureTarget) && SrcLayer > 0) + { + GL.FramebufferTextureLayer(FramebufferTarget.ReadFramebuffer, Attachment, SrcTex.Handle, 0, SrcLayer); + } + else + { + GL.FramebufferTexture(FramebufferTarget.ReadFramebuffer, Attachment, SrcTex.Handle, 0); + } + + if (ImageUtils.IsArray(DstImage.TextureTarget) && DstLayer > 0) + { + GL.FramebufferTextureLayer(FramebufferTarget.DrawFramebuffer, Attachment, DstTex.Handle, 0, DstLayer); + } + else + { + GL.FramebufferTexture(FramebufferTarget.DrawFramebuffer, Attachment, DstTex.Handle, 0); + } + BlitFramebufferFilter Filter = BlitFramebufferFilter.Nearest; @@ -452,7 +472,10 @@ namespace Ryujinx.Graphics.Gal.OpenGL if (NewImage.Format == OldImage.Format && NewImage.Width == OldImage.Width && - NewImage.Height == OldImage.Height) + NewImage.Height == OldImage.Height && + NewImage.Depth == OldImage.Depth && + NewImage.LayerCount == OldImage.LayerCount && + NewImage.TextureTarget == OldImage.TextureTarget) { return; } @@ -477,9 +500,11 @@ namespace Ryujinx.Graphics.Gal.OpenGL (_, PixelFormat Format, PixelType Type) = OGLEnumConverter.GetImageFormat(CachedImage.Format); - GL.BindTexture(TextureTarget.Texture2D, CachedImage.Handle); + TextureTarget Target = ImageUtils.GetTextureTarget(NewImage.TextureTarget); - GL.GetTexImage(TextureTarget.Texture2D, 0, Format, Type, IntPtr.Zero); + GL.BindTexture(Target, CachedImage.Handle); + + GL.GetTexImage(Target, 0, Format, Type, IntPtr.Zero); GL.BindBuffer(BufferTarget.PixelPackBuffer, 0); GL.BindBuffer(BufferTarget.PixelUnpackBuffer, CopyPBO); diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs index 10a9120df2..dc168ff919 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs @@ -53,7 +53,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL { GlslProgram Program; - GlslDecompiler Decompiler = new GlslDecompiler(OGLLimit.MaxUboSize); + GlslDecompiler Decompiler = new GlslDecompiler(OGLLimit.MaxUboSize, OGLExtension.NvidiaDrvier); int ShaderDumpIndex = ShaderDumper.DumpIndex; diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs index ef984b1ed3..4fef11d296 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs @@ -38,7 +38,9 @@ namespace Ryujinx.Graphics.Gal.OpenGL { int Handle = GL.GenTexture(); - GL.BindTexture(TextureTarget.Texture2D, Handle); + TextureTarget Target = ImageUtils.GetTextureTarget(Image.TextureTarget); + + GL.BindTexture(Target, Handle); const int Level = 0; //TODO: Support mipmap textures. const int Border = 0; @@ -54,23 +56,70 @@ namespace Ryujinx.Graphics.Gal.OpenGL PixelFormat Format, PixelType Type) = OGLEnumConverter.GetImageFormat(Image.Format); - GL.TexImage2D( - TextureTarget.Texture2D, - Level, - InternalFmt, - Image.Width, - Image.Height, - Border, - Format, - Type, - IntPtr.Zero); + switch (Target) + { + case TextureTarget.Texture1D: + GL.TexImage1D( + Target, + Level, + InternalFmt, + Image.Width, + Border, + Format, + Type, + IntPtr.Zero); + break; + + case TextureTarget.Texture2D: + GL.TexImage2D( + Target, + Level, + InternalFmt, + Image.Width, + Image.Height, + Border, + Format, + Type, + IntPtr.Zero); + break; + case TextureTarget.Texture3D: + GL.TexImage3D( + Target, + Level, + InternalFmt, + Image.Width, + Image.Height, + Image.Depth, + Border, + Format, + Type, + IntPtr.Zero); + break; + case TextureTarget.Texture2DArray: + GL.TexImage3D( + Target, + Level, + InternalFmt, + Image.Width, + Image.Height, + Image.LayerCount, + Border, + Format, + Type, + IntPtr.Zero); + break; + default: + throw new NotImplementedException($"Unsupported texture target type: {Target}"); + } } public void Create(long Key, byte[] Data, GalImage Image) { int Handle = GL.GenTexture(); - GL.BindTexture(TextureTarget.Texture2D, Handle); + TextureTarget Target = ImageUtils.GetTextureTarget(Image.TextureTarget); + + GL.BindTexture(Target, Handle); const int Level = 0; //TODO: Support mipmap textures. const int Border = 0; @@ -81,15 +130,56 @@ namespace Ryujinx.Graphics.Gal.OpenGL { InternalFormat InternalFmt = OGLEnumConverter.GetCompressedImageFormat(Image.Format); - GL.CompressedTexImage2D( - TextureTarget.Texture2D, - Level, - InternalFmt, - Image.Width, - Image.Height, - Border, - Data.Length, - Data); + switch (Target) + { + case TextureTarget.Texture1D: + GL.CompressedTexImage1D( + Target, + Level, + InternalFmt, + Image.Width, + Border, + Data.Length, + Data); + break; + case TextureTarget.Texture2D: + GL.CompressedTexImage2D( + Target, + Level, + InternalFmt, + Image.Width, + Image.Height, + Border, + Data.Length, + Data); + break; + case TextureTarget.Texture3D: + GL.CompressedTexImage3D( + Target, + Level, + InternalFmt, + Image.Width, + Image.Height, + Image.Depth, + Border, + Data.Length, + Data); + break; + case TextureTarget.Texture2DArray: + GL.CompressedTexImage3D( + Target, + Level, + InternalFmt, + Image.Width, + Image.Height, + Image.LayerCount, + Border, + Data.Length, + Data); + break; + default: + throw new NotImplementedException($"Unsupported texture target type: {Target}"); + } } else { @@ -98,13 +188,16 @@ namespace Ryujinx.Graphics.Gal.OpenGL { int TextureBlockWidth = ImageUtils.GetBlockWidth(Image.Format); int TextureBlockHeight = ImageUtils.GetBlockHeight(Image.Format); + int TextureBlockDepth = ImageUtils.GetBlockDepth(Image.Format); Data = ASTCDecoder.DecodeToRGBA8888( Data, TextureBlockWidth, - TextureBlockHeight, 1, + TextureBlockHeight, + TextureBlockDepth, Image.Width, - Image.Height, 1); + Image.Height, + Image.Depth); Image.Format = GalImageFormat.RGBA8 | (Image.Format & GalImageFormat.TypeMask); } @@ -113,16 +206,80 @@ namespace Ryujinx.Graphics.Gal.OpenGL PixelFormat Format, PixelType Type) = OGLEnumConverter.GetImageFormat(Image.Format); - GL.TexImage2D( - TextureTarget.Texture2D, - Level, - InternalFmt, - Image.Width, - Image.Height, - Border, - Format, - Type, - Data); + + switch (Target) + { + case TextureTarget.Texture1D: + GL.TexImage1D( + Target, + Level, + InternalFmt, + Image.Width, + Border, + Format, + Type, + Data); + break; + case TextureTarget.Texture2D: + GL.TexImage2D( + Target, + Level, + InternalFmt, + Image.Width, + Image.Height, + Border, + Format, + Type, + Data); + break; + case TextureTarget.Texture3D: + GL.TexImage3D( + Target, + Level, + InternalFmt, + Image.Width, + Image.Height, + Image.Depth, + Border, + Format, + Type, + Data); + break; + case TextureTarget.Texture2DArray: + GL.TexImage3D( + Target, + Level, + InternalFmt, + Image.Width, + Image.Height, + Image.LayerCount, + Border, + Format, + Type, + Data); + break; + case TextureTarget.TextureCubeMap: + Span Array = new Span(Data); + + int FaceSize = ImageUtils.GetSize(Image) / 6; + + for (int Face = 0; Face < 6; Face++) + { + GL.TexImage2D( + TextureTarget.TextureCubeMapPositiveX + Face, + Level, + InternalFmt, + Image.Width, + Image.Height, + Border, + Format, + Type, + Array.Slice(Face * FaceSize, FaceSize).ToArray()); + } + break; + default: + throw new NotImplementedException($"Unsupported texture target type: {Target}"); + } } } @@ -165,7 +322,9 @@ namespace Ryujinx.Graphics.Gal.OpenGL { GL.ActiveTexture(TextureUnit.Texture0 + Index); - GL.BindTexture(TextureTarget.Texture2D, CachedImage.Handle); + TextureTarget Target = ImageUtils.GetTextureTarget(Image.TextureTarget); + + GL.BindTexture(Target, CachedImage.Handle); int[] SwizzleRgba = new int[] { @@ -175,23 +334,27 @@ namespace Ryujinx.Graphics.Gal.OpenGL (int)OGLEnumConverter.GetTextureSwizzle(Image.WSource) }; - GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureSwizzleRgba, SwizzleRgba); + GL.TexParameter(Target, TextureParameterName.TextureSwizzleRgba, SwizzleRgba); } } - public void SetSampler(GalTextureSampler Sampler) + public void SetSampler(GalImage Image, GalTextureSampler Sampler) { int WrapS = (int)OGLEnumConverter.GetTextureWrapMode(Sampler.AddressU); int WrapT = (int)OGLEnumConverter.GetTextureWrapMode(Sampler.AddressV); + int WrapR = (int)OGLEnumConverter.GetTextureWrapMode(Sampler.AddressP); int MinFilter = (int)OGLEnumConverter.GetTextureMinFilter(Sampler.MinFilter, Sampler.MipFilter); int MagFilter = (int)OGLEnumConverter.GetTextureMagFilter(Sampler.MagFilter); - GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureWrapS, WrapS); - GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureWrapT, WrapT); + TextureTarget Target = ImageUtils.GetTextureTarget(Image.TextureTarget); - GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureMinFilter, MinFilter); - GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureMagFilter, MagFilter); + GL.TexParameter(Target, TextureParameterName.TextureWrapS, WrapS); + GL.TexParameter(Target, TextureParameterName.TextureWrapT, WrapT); + GL.TexParameter(Target, TextureParameterName.TextureWrapR, WrapR); + + GL.TexParameter(Target, TextureParameterName.TextureMinFilter, MinFilter); + GL.TexParameter(Target, TextureParameterName.TextureMagFilter, MagFilter); float[] Color = new float[] { @@ -201,7 +364,18 @@ namespace Ryujinx.Graphics.Gal.OpenGL Sampler.BorderColor.Alpha }; - GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureBorderColor, Color); + GL.TexParameter(Target, TextureParameterName.TextureBorderColor, Color); + + if (Sampler.DepthCompare) + { + GL.TexParameter(Target, TextureParameterName.TextureCompareMode, (int)All.CompareRToTexture); + GL.TexParameter(Target, TextureParameterName.TextureCompareFunc, (int)OGLEnumConverter.GetDepthCompareFunc(Sampler.DepthCompareFunc)); + } + else + { + GL.TexParameter(Target, TextureParameterName.TextureCompareMode, (int)All.None); + GL.TexParameter(Target, TextureParameterName.TextureCompareFunc, (int)All.Never); + } } } } diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs index 43923da742..f7ae34faa5 100644 --- a/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs +++ b/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs @@ -1,3 +1,5 @@ +using Ryujinx.Graphics.Gal.OpenGL; +using Ryujinx.Graphics.Texture; using System; using System.Collections.Generic; @@ -224,6 +226,7 @@ namespace Ryujinx.Graphics.Gal.Shader if (Op.Inst == ShaderIrInst.Texq || Op.Inst == ShaderIrInst.Texs || + Op.Inst == ShaderIrInst.Tld4 || Op.Inst == ShaderIrInst.Txlf) { int Handle = ((ShaderIrOperImm)Op.OperandC).Value; @@ -232,7 +235,25 @@ namespace Ryujinx.Graphics.Gal.Shader string Name = StagePrefix + TextureName + Index; - m_Textures.TryAdd(Handle, new ShaderDeclInfo(Name, Handle)); + GalTextureTarget TextureTarget; + + TextureInstructionSuffix TextureInstructionSuffix; + + // TODO: Non 2D texture type for TEXQ? + if (Op.Inst == ShaderIrInst.Texq) + { + TextureTarget = GalTextureTarget.TwoD; + TextureInstructionSuffix = TextureInstructionSuffix.None; + } + else + { + ShaderIrMetaTex Meta = ((ShaderIrMetaTex)Op.MetaData); + + TextureTarget = Meta.TextureTarget; + TextureInstructionSuffix = Meta.TextureInstructionSuffix; + } + + m_Textures.TryAdd(Handle, new ShaderDeclInfo(Name, Handle, false, 0, 1, TextureTarget, TextureInstructionSuffix)); } else if (Op.Inst == ShaderIrInst.Texb) { @@ -257,9 +278,10 @@ namespace Ryujinx.Graphics.Gal.Shader if (HandleSrc != null && HandleSrc is ShaderIrOperCbuf Cbuf) { + ShaderIrMetaTex Meta = ((ShaderIrMetaTex)Op.MetaData); string Name = StagePrefix + TextureName + "_cb" + Cbuf.Index + "_" + Cbuf.Pos; - m_CbTextures.Add(Op, new ShaderDeclInfo(Name, Cbuf.Pos, true, Cbuf.Index)); + m_CbTextures.Add(Op, new ShaderDeclInfo(Name, Cbuf.Pos, true, Cbuf.Index, 1, Meta.TextureTarget, Meta.TextureInstructionSuffix)); } else { diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs index 854c827ee0..5f809525f9 100644 --- a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs +++ b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs @@ -1,3 +1,5 @@ +using OpenTK.Graphics.OpenGL; +using Ryujinx.Graphics.Texture; using System; using System.Collections.Generic; using System.Globalization; @@ -33,7 +35,9 @@ namespace Ryujinx.Graphics.Gal.Shader public int MaxUboSize { get; } - public GlslDecompiler(int MaxUboSize) + private bool IsNvidiaDriver; + + public GlslDecompiler(int MaxUboSize, bool IsNvidiaDriver) { InstsExpr = new Dictionary() { @@ -103,6 +107,7 @@ namespace Ryujinx.Graphics.Gal.Shader { ShaderIrInst.Texb, GetTexbExpr }, { ShaderIrInst.Texq, GetTexqExpr }, { ShaderIrInst.Texs, GetTexsExpr }, + { ShaderIrInst.Tld4, GetTld4Expr }, { ShaderIrInst.Trunc, GetTruncExpr }, { ShaderIrInst.Txlf, GetTxlfExpr }, { ShaderIrInst.Utof, GetUtofExpr }, @@ -110,6 +115,7 @@ namespace Ryujinx.Graphics.Gal.Shader }; this.MaxUboSize = MaxUboSize / 16; + this.IsNvidiaDriver = IsNvidiaDriver; } public GlslProgram Decompile( @@ -219,14 +225,70 @@ namespace Ryujinx.Graphics.Gal.Shader } } + private string GetSamplerType(TextureTarget TextureTarget, bool HasShadow) + { + string Result; + + switch (TextureTarget) + { + case TextureTarget.Texture1D: + Result = "sampler1D"; + break; + case TextureTarget.Texture2D: + Result = "sampler2D"; + break; + case TextureTarget.Texture3D: + Result = "sampler3D"; + break; + case TextureTarget.TextureCubeMap: + Result = "samplerCube"; + break; + case TextureTarget.TextureRectangle: + Result = "sampler2DRect"; + break; + case TextureTarget.Texture1DArray: + Result = "sampler1DArray"; + break; + case TextureTarget.Texture2DArray: + Result = "sampler2DArray"; + break; + case TextureTarget.TextureCubeMapArray: + Result = "samplerCubeArray"; + break; + case TextureTarget.TextureBuffer: + Result = "samplerBuffer"; + break; + case TextureTarget.Texture2DMultisample: + Result = "sampler2DMS"; + break; + case TextureTarget.Texture2DMultisampleArray: + Result = "sampler2DMSArray"; + break; + default: + throw new NotSupportedException(); + } + + if (HasShadow) + Result += "Shadow"; + + return Result; + } + private void PrintDeclTextures() { foreach (ShaderDeclInfo DeclInfo in IterateCbTextures()) { - SB.AppendLine("uniform sampler2D " + DeclInfo.Name + ";"); + TextureTarget Target = ImageUtils.GetTextureTarget(DeclInfo.TextureTarget); + SB.AppendLine($"// {DeclInfo.TextureSuffix}"); + SB.AppendLine("uniform " + GetSamplerType(Target, (DeclInfo.TextureSuffix & TextureInstructionSuffix.DC) != 0) + " " + DeclInfo.Name + ";"); } - PrintDecls(Decl.Textures, "uniform sampler2D"); + foreach (ShaderDeclInfo DeclInfo in Decl.Textures.Values.OrderBy(DeclKeySelector)) + { + TextureTarget Target = ImageUtils.GetTextureTarget(DeclInfo.TextureTarget); + SB.AppendLine($"// {DeclInfo.TextureSuffix}"); + SB.AppendLine("uniform " + GetSamplerType(Target, (DeclInfo.TextureSuffix & TextureInstructionSuffix.DC) != 0) + " " + DeclInfo.Name + ";"); + } } private IEnumerable IterateCbTextures() @@ -778,6 +840,7 @@ namespace Ryujinx.Graphics.Gal.Shader case ShaderIrInst.Ipa: case ShaderIrInst.Texq: case ShaderIrInst.Texs: + case ShaderIrInst.Tld4: case ShaderIrInst.Txlf: return false; } @@ -1124,7 +1187,7 @@ namespace Ryujinx.Graphics.Gal.Shader string Ch = "rgba".Substring(Meta.Elem, 1); - return "texture(" + DeclInfo.Name + ", " + Coords + ")." + Ch; + return GetTextureOperation(Op, DeclInfo.Name, Coords, Ch); } private string GetTexqExpr(ShaderIrOp Op) @@ -1157,20 +1220,50 @@ namespace Ryujinx.Graphics.Gal.Shader string Ch = "rgba".Substring(Meta.Elem, 1); - return "texture(" + Sampler + ", " + Coords + ")." + Ch; + return GetTextureOperation(Op, Sampler, Coords, Ch); } - private string GetTxlfExpr(ShaderIrOp Op) + private string GetTld4Expr(ShaderIrOp Op) { ShaderIrMetaTex Meta = (ShaderIrMetaTex)Op.MetaData; string Sampler = GetTexSamplerName(Op); + string Coords = GetTexSamplerCoords(Op); + + string Ch = "rgba".Substring(Meta.Elem, 1); + + return GetTextureGatherOperation(Op, Sampler, Coords, Ch); + } + + // TODO: support AOFFI on non nvidia drivers + private string GetTxlfExpr(ShaderIrOp Op) + { + // TODO: Support all suffixes + ShaderIrMetaTex Meta = (ShaderIrMetaTex)Op.MetaData; + + TextureInstructionSuffix Suffix = Meta.TextureInstructionSuffix; + + string Sampler = GetTexSamplerName(Op); + string Coords = GetITexSamplerCoords(Op); string Ch = "rgba".Substring(Meta.Elem, 1); - return "texelFetch(" + Sampler + ", " + Coords + ", 0)." + Ch; + string Lod = "0"; + + if (Meta.LevelOfDetail != null) + { + Lod = GetOperExpr(Op, Meta.LevelOfDetail); + } + + if ((Suffix & TextureInstructionSuffix.AOffI) != 0 && IsNvidiaDriver) + { + string Offset = GetTextureOffset(Meta, GetOperExpr(Op, Meta.Offset)); + return "texelFetchOffset(" + Sampler + ", " + Coords + ", " + Lod + ", " + Offset + ")." + Ch; + } + + return "texelFetch(" + Sampler + ", " + Coords + ", " + Lod + ")." + Ch; } private string GetTruncExpr(ShaderIrOp Op) => GetUnaryCall(Op, "trunc"); @@ -1246,14 +1339,205 @@ namespace Ryujinx.Graphics.Gal.Shader private string GetTexSamplerCoords(ShaderIrOp Op) { - return "vec2(" + GetOperExpr(Op, Op.OperandA) + ", " + - GetOperExpr(Op, Op.OperandB) + ")"; + ShaderIrMetaTex Meta = (ShaderIrMetaTex)Op.MetaData; + + bool HasDepth = (Meta.TextureInstructionSuffix & TextureInstructionSuffix.DC) != 0; + + int Coords = ImageUtils.GetCoordsCountTextureTarget(Meta.TextureTarget); + + bool IsArray = ImageUtils.IsArray(Meta.TextureTarget); + + + string GetLastArgument(ShaderIrNode Node) + { + string Result = GetOperExpr(Op, Node); + + // array index is actually an integer so we need to pass it correctly + if (IsArray) + { + Result = "float(floatBitsToInt(" + Result + "))"; + } + + return Result; + } + + string LastArgument; + string DepthArgument = ""; + + int VecSize = Coords; + if (HasDepth && Op.Inst != ShaderIrInst.Tld4) + { + VecSize++; + DepthArgument = $", {GetOperExpr(Op, Meta.DepthCompare)}"; + } + + switch (Coords) + { + case 1: + if (HasDepth) + { + return $"vec3({GetOperExpr(Op, Meta.Coordinates[0])}, 0.0{DepthArgument})"; + } + + return GetOperExpr(Op, Meta.Coordinates[0]); + case 2: + LastArgument = GetLastArgument(Meta.Coordinates[1]); + + return $"vec{VecSize}({GetOperExpr(Op, Meta.Coordinates[0])}, {LastArgument}{DepthArgument})"; + case 3: + LastArgument = GetLastArgument(Meta.Coordinates[2]); + + return $"vec{VecSize}({GetOperExpr(Op, Meta.Coordinates[0])}, {GetOperExpr(Op, Meta.Coordinates[1])}, {LastArgument}{DepthArgument})"; + case 4: + LastArgument = GetLastArgument(Meta.Coordinates[3]); + + return $"vec4({GetOperExpr(Op, Meta.Coordinates[0])}, {GetOperExpr(Op, Meta.Coordinates[1])}, {GetOperExpr(Op, Meta.Coordinates[2])}, {LastArgument}){DepthArgument}"; + default: + throw new InvalidOperationException(); + } + + } + + private string GetTextureOffset(ShaderIrMetaTex Meta, string Oper, int Shift = 4, int Mask = 0xF) + { + string GetOffset(string Operation, int Index) + { + return $"({Operation} >> {Index * Shift}) & 0x{Mask:x}"; + } + + int Coords = ImageUtils.GetCoordsCountTextureTarget(Meta.TextureTarget); + + if (ImageUtils.IsArray(Meta.TextureTarget)) + Coords -= 1; + + switch (Coords) + { + case 1: + return GetOffset(Oper, 0); + case 2: + return "ivec2(" + GetOffset(Oper, 0) + ", " + GetOffset(Oper, 1) + ")"; + case 3: + return "ivec3(" + GetOffset(Oper, 0) + ", " + GetOffset(Oper, 1) + ", " + GetOffset(Oper, 2) + ")"; + case 4: + return "ivec4(" + GetOffset(Oper, 0) + ", " + GetOffset(Oper, 1) + ", " + GetOffset(Oper, 2) + ", " + GetOffset(Oper, 3) + ")"; + default: + throw new InvalidOperationException(); + } + } + + // TODO: support AOFFI on non nvidia drivers + private string GetTextureGatherOperation(ShaderIrOp Op, string Sampler, string Coords, string Ch) + { + ShaderIrMetaTex Meta = (ShaderIrMetaTex)Op.MetaData; + + TextureInstructionSuffix Suffix = Meta.TextureInstructionSuffix; + + string ChString = "." + Ch; + + string Comp = Meta.Component.ToString(); + + if ((Suffix & TextureInstructionSuffix.DC) != 0) + { + Comp = GetOperExpr(Op, Meta.DepthCompare); + } + + if ((Suffix & TextureInstructionSuffix.AOffI) != 0 && IsNvidiaDriver) + { + string Offset = GetTextureOffset(Meta, "floatBitsToInt((" + GetOperExpr(Op, Meta.Offset) + "))", 8, 0x3F); + + if ((Suffix & TextureInstructionSuffix.DC) != 0) + { + return "textureGatherOffset(" + Sampler + ", " + Coords + ", " + Comp + ", " + Offset + ")" + ChString; + } + + return "textureGatherOffset(" + Sampler + ", " + Coords + ", " + Offset + ", " + Comp + ")" + ChString; + } + // TODO: Support PTP + else if ((Suffix & TextureInstructionSuffix.PTP) != 0) + { + throw new NotImplementedException(); + } + + return "textureGather(" + Sampler + ", " + Coords + ", " + Comp + ")" + ChString; + } + + // TODO: support AOFFI on non nvidia drivers + private string GetTextureOperation(ShaderIrOp Op, string Sampler, string Coords, string Ch) + { + ShaderIrMetaTex Meta = (ShaderIrMetaTex)Op.MetaData; + + TextureInstructionSuffix Suffix = Meta.TextureInstructionSuffix; + + string ChString = "." + Ch; + + if ((Suffix & TextureInstructionSuffix.DC) != 0) + { + ChString = ""; + } + + // TODO: Support LBA and LLA + if ((Suffix & TextureInstructionSuffix.LZ) != 0) + { + if ((Suffix & TextureInstructionSuffix.AOffI) != 0 && IsNvidiaDriver) + { + string Offset = GetTextureOffset(Meta, "floatBitsToInt((" + GetOperExpr(Op, Meta.Offset) + "))"); + + return "textureLodOffset(" + Sampler + ", " + Coords + ", 0.0, " + Offset + ")" + ChString; + } + + return "textureLod(" + Sampler + ", " + Coords + ", 0.0)" + ChString; + } + else if ((Suffix & TextureInstructionSuffix.LB) != 0) + { + if ((Suffix & TextureInstructionSuffix.AOffI) != 0 && IsNvidiaDriver) + { + string Offset = GetTextureOffset(Meta, "floatBitsToInt((" + GetOperExpr(Op, Meta.Offset) + "))"); + + return "textureOffset(" + Sampler + ", " + Coords + ", " + Offset + ", " + GetOperExpr(Op, Meta.LevelOfDetail) + ")" + ChString; + } + + return "texture(" + Sampler + ", " + Coords + ", " + GetOperExpr(Op, Meta.LevelOfDetail) + ")" + ChString; + } + else if ((Suffix & TextureInstructionSuffix.LL) != 0) + { + if ((Suffix & TextureInstructionSuffix.AOffI) != 0 && IsNvidiaDriver) + { + string Offset = GetTextureOffset(Meta, "floatBitsToInt((" + GetOperExpr(Op, Meta.Offset) + "))"); + + return "textureLodOffset(" + Sampler + ", " + Coords + ", " + GetOperExpr(Op, Meta.LevelOfDetail) + ", " + Offset + ")" + ChString; + } + + return "textureLod(" + Sampler + ", " + Coords + ", " + GetOperExpr(Op, Meta.LevelOfDetail) + ")" + ChString; + } + else if ((Suffix & TextureInstructionSuffix.AOffI) != 0 && IsNvidiaDriver) + { + string Offset = GetTextureOffset(Meta, "floatBitsToInt((" + GetOperExpr(Op, Meta.Offset) + "))"); + + return "textureOffset(" + Sampler + ", " + Coords + ", " + Offset + ")" + ChString; + } + else + { + return "texture(" + Sampler + ", " + Coords + ")" + ChString; + } + throw new NotImplementedException($"Texture Suffix {Meta.TextureInstructionSuffix} is not implemented"); + } private string GetITexSamplerCoords(ShaderIrOp Op) { - return "ivec2(" + GetOperExpr(Op, Op.OperandA) + ", " + - GetOperExpr(Op, Op.OperandB) + ")"; + ShaderIrMetaTex Meta = (ShaderIrMetaTex)Op.MetaData; + + switch (ImageUtils.GetCoordsCountTextureTarget(Meta.TextureTarget)) + { + case 1: + return GetOperExpr(Op, Meta.Coordinates[0]); + case 2: + return "ivec2(" + GetOperExpr(Op, Meta.Coordinates[0]) + ", " + GetOperExpr(Op, Meta.Coordinates[1]) + ")"; + case 3: + return "ivec3(" + GetOperExpr(Op, Meta.Coordinates[0]) + ", " + GetOperExpr(Op, Meta.Coordinates[1]) + ", " + GetOperExpr(Op, Meta.Coordinates[2]) + ")"; + default: + throw new InvalidOperationException(); + } } private string GetOperExpr(ShaderIrOp Op, ShaderIrNode Oper) @@ -1292,22 +1576,6 @@ namespace Ryujinx.Graphics.Gal.Shader } break; } - - case ShaderIrOperImm Imm: - { - //For integer immediates being used as float, - //it's better (for readability) to just return the float value. - if (DstType == OperType.F32) - { - float Value = BitConverter.Int32BitsToSingle(Imm.Value); - - if (!float.IsNaN(Value) && !float.IsInfinity(Value)) - { - return GetFloatConst(Value); - } - } - break; - } } switch (DstType) diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs index adcc47b955..8b4eacdf20 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs @@ -1,3 +1,4 @@ +using Ryujinx.Graphics.Texture; using System; using static Ryujinx.Graphics.Gal.Shader.ShaderDecodeHelper; @@ -29,6 +30,75 @@ namespace Ryujinx.Graphics.Gal.Shader { RGB_, RG_A, R_BA, _GBA, RGBA, ____, ____, ____ } }; + private static GalTextureTarget TexToTextureTarget(int TexType, bool IsArray) + { + switch (TexType) + { + case 0: + return IsArray ? GalTextureTarget.OneDArray : GalTextureTarget.OneD; + case 2: + return IsArray ? GalTextureTarget.TwoDArray : GalTextureTarget.TwoD; + case 4: + if (IsArray) + throw new InvalidOperationException($"ARRAY bit set on a TEX with 3D texture!"); + return GalTextureTarget.ThreeD; + case 6: + return IsArray ? GalTextureTarget.CubeArray : GalTextureTarget.CubeMap; + default: + throw new InvalidOperationException(); + } + } + + private static GalTextureTarget TexsToTextureTarget(int TexType) + { + switch (TexType) + { + case 0: + return GalTextureTarget.OneD; + case 2: + case 4: + case 6: + case 8: + case 0xa: + case 0xc: + return GalTextureTarget.TwoD; + case 0xe: + case 0x10: + case 0x12: + return GalTextureTarget.TwoDArray; + case 0x14: + case 0x16: + return GalTextureTarget.ThreeD; + case 0x18: + case 0x1a: + return GalTextureTarget.CubeMap; + default: + throw new InvalidOperationException(); + } + } + + public static GalTextureTarget TldsToTextureTarget(int TexType) + { + switch (TexType) + { + case 0: + case 2: + return GalTextureTarget.OneD; + case 4: + case 8: + case 0xa: + case 0xc: + case 0x18: + return GalTextureTarget.TwoD; + case 0x10: + return GalTextureTarget.TwoDArray; + case 0xe: + return GalTextureTarget.ThreeD; + default: + throw new InvalidOperationException(); + } + } + public static void Ld_A(ShaderIrBlock Block, long OpCode, int Position) { ShaderIrNode[] Opers = OpCode.Abuf20(); @@ -132,43 +202,166 @@ namespace Ryujinx.Graphics.Gal.Shader public static void Tex(ShaderIrBlock Block, long OpCode, int Position) { - EmitTex(Block, OpCode, GprHandle: false); + TextureInstructionSuffix Suffix; + + int RawSuffix = OpCode.Read(0x34, 0x38); + + switch (RawSuffix) + { + case 0: + Suffix = TextureInstructionSuffix.None; + break; + case 0x8: + Suffix = TextureInstructionSuffix.LZ; + break; + case 0x10: + Suffix = TextureInstructionSuffix.LB; + break; + case 0x18: + Suffix = TextureInstructionSuffix.LL; + break; + case 0x30: + Suffix = TextureInstructionSuffix.LBA; + break; + case 0x38: + Suffix = TextureInstructionSuffix.LLA; + break; + default: + throw new InvalidOperationException($"Invalid Suffix for TEX instruction {RawSuffix}"); + } + + bool IsOffset = OpCode.Read(0x36); + + if (IsOffset) + Suffix |= TextureInstructionSuffix.AOffI; + + EmitTex(Block, OpCode, Suffix, GprHandle: false); } public static void Tex_B(ShaderIrBlock Block, long OpCode, int Position) { - EmitTex(Block, OpCode, GprHandle: true); + TextureInstructionSuffix Suffix; + + int RawSuffix = OpCode.Read(0x24, 0xe); + + switch (RawSuffix) + { + case 0: + Suffix = TextureInstructionSuffix.None; + break; + case 0x2: + Suffix = TextureInstructionSuffix.LZ; + break; + case 0x4: + Suffix = TextureInstructionSuffix.LB; + break; + case 0x6: + Suffix = TextureInstructionSuffix.LL; + break; + case 0xc: + Suffix = TextureInstructionSuffix.LBA; + break; + case 0xe: + Suffix = TextureInstructionSuffix.LLA; + break; + default: + throw new InvalidOperationException($"Invalid Suffix for TEX.B instruction {RawSuffix}"); + } + + bool IsOffset = OpCode.Read(0x23); + + if (IsOffset) + Suffix |= TextureInstructionSuffix.AOffI; + + EmitTex(Block, OpCode, Suffix, GprHandle: true); } - private static void EmitTex(ShaderIrBlock Block, long OpCode, bool GprHandle) + private static void EmitTex(ShaderIrBlock Block, long OpCode, TextureInstructionSuffix TextureInstructionSuffix, bool GprHandle) { - //TODO: Support other formats. - ShaderIrOperGpr[] Coords = new ShaderIrOperGpr[2]; + bool IsArray = OpCode.HasArray(); - for (int Index = 0; Index < Coords.Length; Index++) + GalTextureTarget TextureTarget = TexToTextureTarget(OpCode.Read(28, 6), IsArray); + + bool HasDepthCompare = OpCode.Read(0x32); + + if (HasDepthCompare) + { + TextureInstructionSuffix |= TextureInstructionSuffix.DC; + } + + ShaderIrOperGpr[] Coords = new ShaderIrOperGpr[ImageUtils.GetCoordsCountTextureTarget(TextureTarget)]; + + int IndexExtraCoord = 0; + + if (IsArray) + { + IndexExtraCoord++; + + Coords[Coords.Length - 1] = OpCode.Gpr8(); + } + + + for (int Index = 0; Index < Coords.Length - IndexExtraCoord; Index++) { ShaderIrOperGpr CoordReg = OpCode.Gpr8(); CoordReg.Index += Index; + CoordReg.Index += IndexExtraCoord; + if (!CoordReg.IsValidRegister) { CoordReg.Index = ShaderIrOperGpr.ZRIndex; } - Coords[Index] = ShaderIrOperGpr.MakeTemporary(Index); - - Block.AddNode(new ShaderIrAsg(Coords[Index], CoordReg)); + Coords[Index] = CoordReg; } int ChMask = OpCode.Read(31, 0xf); + ShaderIrOperGpr LevelOfDetail = null; + ShaderIrOperGpr Offset = null; + ShaderIrOperGpr DepthCompare = null; + + // TODO: determine first argument when TEX.B is used + int OperBIndex = GprHandle ? 1 : 0; + + if ((TextureInstructionSuffix & TextureInstructionSuffix.LL) != 0 || + (TextureInstructionSuffix & TextureInstructionSuffix.LB) != 0 || + (TextureInstructionSuffix & TextureInstructionSuffix.LBA) != 0 || + (TextureInstructionSuffix & TextureInstructionSuffix.LLA) != 0) + { + LevelOfDetail = OpCode.Gpr20(); + LevelOfDetail.Index += OperBIndex; + + OperBIndex++; + } + + if ((TextureInstructionSuffix & TextureInstructionSuffix.AOffI) != 0) + { + Offset = OpCode.Gpr20(); + Offset.Index += OperBIndex; + + OperBIndex++; + } + + if ((TextureInstructionSuffix & TextureInstructionSuffix.DC) != 0) + { + DepthCompare = OpCode.Gpr20(); + DepthCompare.Index += OperBIndex; + + OperBIndex++; + } + + // ??? ShaderIrNode OperC = GprHandle ? (ShaderIrNode)OpCode.Gpr20() : (ShaderIrNode)OpCode.Imm13_36(); ShaderIrInst Inst = GprHandle ? ShaderIrInst.Texb : ShaderIrInst.Texs; + Coords = CoordsRegistersToTempRegisters(Block, Coords); + int RegInc = 0; for (int Ch = 0; Ch < 4; Ch++) @@ -187,9 +380,14 @@ namespace Ryujinx.Graphics.Gal.Shader continue; } - ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch); + ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch, TextureTarget, TextureInstructionSuffix, Coords) + { + LevelOfDetail = LevelOfDetail, + Offset = Offset, + DepthCompare = DepthCompare + }; - ShaderIrOp Op = new ShaderIrOp(Inst, Coords[0], Coords[1], OperC, Meta); + ShaderIrOp Op = new ShaderIrOp(Inst, Coords[0], Coords.Length > 1 ? Coords[1] : null, OperC, Meta); Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op))); } @@ -197,17 +395,238 @@ namespace Ryujinx.Graphics.Gal.Shader public static void Texs(ShaderIrBlock Block, long OpCode, int Position) { - EmitTexs(Block, OpCode, ShaderIrInst.Texs); + TextureInstructionSuffix Suffix; + + int RawSuffix = OpCode.Read(0x34, 0x1e); + + switch (RawSuffix) + { + case 0: + case 0x4: + case 0x10: + case 0x16: + Suffix = TextureInstructionSuffix.LZ; + break; + case 0x6: + case 0x1a: + Suffix = TextureInstructionSuffix.LL; + break; + case 0x8: + Suffix = TextureInstructionSuffix.DC; + break; + case 0x2: + case 0xe: + case 0x14: + case 0x18: + Suffix = TextureInstructionSuffix.None; + break; + case 0xa: + Suffix = TextureInstructionSuffix.LL | TextureInstructionSuffix.DC; + break; + case 0xc: + case 0x12: + Suffix = TextureInstructionSuffix.LZ | TextureInstructionSuffix.DC; + break; + default: + throw new InvalidOperationException($"Invalid Suffix for TEXS instruction {RawSuffix}"); + } + + GalTextureTarget TextureTarget = TexsToTextureTarget(OpCode.Read(52, 0x1e)); + + EmitTexs(Block, OpCode, ShaderIrInst.Texs, TextureTarget, Suffix); } public static void Tlds(ShaderIrBlock Block, long OpCode, int Position) { - EmitTexs(Block, OpCode, ShaderIrInst.Txlf); + TextureInstructionSuffix Suffix; + + int RawSuffix = OpCode.Read(0x34, 0x1e); + + switch (RawSuffix) + { + case 0: + case 0x4: + case 0x8: + Suffix = TextureInstructionSuffix.LZ | TextureInstructionSuffix.AOffI; + break; + case 0xc: + Suffix = TextureInstructionSuffix.LZ | TextureInstructionSuffix.MZ; + break; + case 0xe: + case 0x10: + Suffix = TextureInstructionSuffix.LZ; + break; + case 0x2: + case 0xa: + Suffix = TextureInstructionSuffix.LL; + break; + case 0x18: + Suffix = TextureInstructionSuffix.LL | TextureInstructionSuffix.AOffI; + break; + default: + throw new InvalidOperationException($"Invalid Suffix for TLDS instruction {RawSuffix}"); + } + + GalTextureTarget TextureTarget = TldsToTextureTarget(OpCode.Read(52, 0x1e)); + + EmitTexs(Block, OpCode, ShaderIrInst.Txlf, TextureTarget, Suffix); } - private static void EmitTexs(ShaderIrBlock Block, long OpCode, ShaderIrInst Inst) + public static void Tld4(ShaderIrBlock Block, long OpCode, int Position) { - //TODO: Support other formats. + TextureInstructionSuffix Suffix; + + int RawSuffix = OpCode.Read(0x34, 0xc); + + switch (RawSuffix) + { + case 0: + Suffix = TextureInstructionSuffix.None; + break; + case 0x4: + Suffix = TextureInstructionSuffix.AOffI; + break; + case 0x8: + Suffix = TextureInstructionSuffix.PTP; + break; + default: + throw new InvalidOperationException($"Invalid Suffix for TLD4 instruction {RawSuffix}"); + } + + bool IsShadow = OpCode.Read(0x32); + + bool IsArray = OpCode.HasArray(); + int ChMask = OpCode.Read(31, 0xf); + + GalTextureTarget TextureTarget = TexToTextureTarget(OpCode.Read(28, 6), IsArray); + + if (IsShadow) + { + Suffix |= TextureInstructionSuffix.DC; + } + + EmitTld4(Block, OpCode, TextureTarget, Suffix, ChMask, OpCode.Read(0x38, 0x3), false); + } + + public static void Tld4s(ShaderIrBlock Block, long OpCode, int Position) + { + TextureInstructionSuffix Suffix = TextureInstructionSuffix.None; + + bool IsOffset = OpCode.Read(0x33); + bool IsShadow = OpCode.Read(0x32); + + if (IsOffset) + { + Suffix |= TextureInstructionSuffix.AOffI; + } + + if (IsShadow) + { + Suffix |= TextureInstructionSuffix.DC; + } + + // TLD4S seems to only support 2D textures with RGBA mask? + EmitTld4(Block, OpCode, GalTextureTarget.TwoD, Suffix, RGBA, OpCode.Read(0x34, 0x3), true); + } + + private static void EmitTexs(ShaderIrBlock Block, + long OpCode, + ShaderIrInst Inst, + GalTextureTarget TextureTarget, + TextureInstructionSuffix TextureInstructionSuffix) + { + if (Inst == ShaderIrInst.Txlf && TextureTarget == GalTextureTarget.CubeArray) + { + throw new InvalidOperationException("TLDS instructions cannot use CUBE modifier!"); + } + + bool IsArray = ImageUtils.IsArray(TextureTarget); + + ShaderIrOperGpr[] Coords = new ShaderIrOperGpr[ImageUtils.GetCoordsCountTextureTarget(TextureTarget)]; + + ShaderIrOperGpr OperA = OpCode.Gpr8(); + ShaderIrOperGpr OperB = OpCode.Gpr20(); + + ShaderIrOperGpr SuffixExtra = OpCode.Gpr20(); + SuffixExtra.Index += 1; + + int CoordStartIndex = 0; + + if (IsArray) + { + CoordStartIndex++; + Coords[Coords.Length - 1] = OpCode.Gpr8(); + } + + switch (Coords.Length - CoordStartIndex) + { + case 1: + Coords[0] = OpCode.Gpr8(); + + break; + case 2: + Coords[0] = OpCode.Gpr8(); + Coords[0].Index += CoordStartIndex; + + break; + case 3: + Coords[0] = OpCode.Gpr8(); + Coords[0].Index += CoordStartIndex; + + Coords[1] = OpCode.Gpr8(); + Coords[1].Index += 1 + CoordStartIndex; + + break; + default: + throw new NotSupportedException($"{Coords.Length - CoordStartIndex} coords textures aren't supported in TEXS"); + } + + int OperBIndex = 0; + + ShaderIrOperGpr LevelOfDetail = null; + ShaderIrOperGpr Offset = null; + ShaderIrOperGpr DepthCompare = null; + + // OperB is always the last value + // Not applicable to 1d textures + if (Coords.Length - CoordStartIndex != 1) + { + Coords[Coords.Length - CoordStartIndex - 1] = OperB; + OperBIndex++; + } + + // Encoding of TEXS/TLDS is a bit special and change for 2d textures + // NOTE: OperA seems to hold at best two args. + // On 2D textures, if no suffix need an additional values, Y is stored in OperB, otherwise coords are in OperA and the additional values is in OperB. + if (TextureInstructionSuffix != TextureInstructionSuffix.None && TextureInstructionSuffix != TextureInstructionSuffix.LZ && TextureTarget == GalTextureTarget.TwoD) + { + Coords[Coords.Length - CoordStartIndex - 1] = OpCode.Gpr8(); + Coords[Coords.Length - CoordStartIndex - 1].Index += Coords.Length - CoordStartIndex - 1; + OperBIndex--; + } + + // TODO: Find what MZ does and what changes about the encoding (Maybe Multisample?) + if ((TextureInstructionSuffix & TextureInstructionSuffix.LL) != 0) + { + LevelOfDetail = OpCode.Gpr20(); + LevelOfDetail.Index += OperBIndex; + OperBIndex++; + } + + if ((TextureInstructionSuffix & TextureInstructionSuffix.AOffI) != 0) + { + Offset = OpCode.Gpr20(); + Offset.Index += OperBIndex; + OperBIndex++; + } + + if ((TextureInstructionSuffix & TextureInstructionSuffix.DC) != 0) + { + DepthCompare = OpCode.Gpr20(); + DepthCompare.Index += OperBIndex; + OperBIndex++; + } + int LutIndex; LutIndex = !OpCode.Gpr0().IsConst ? 1 : 0; @@ -276,12 +695,7 @@ namespace Ryujinx.Graphics.Gal.Shader } ShaderIrNode OperC = OpCode.Imm13_36(); - - ShaderIrOperGpr Coord0 = ShaderIrOperGpr.MakeTemporary(0); - ShaderIrOperGpr Coord1 = ShaderIrOperGpr.MakeTemporary(1); - - Block.AddNode(new ShaderIrAsg(Coord0, OpCode.Gpr8())); - Block.AddNode(new ShaderIrAsg(Coord1, OpCode.Gpr20())); + Coords = CoordsRegistersToTempRegisters(Block, Coords); for (int Ch = 0; Ch < 4; Ch++) { @@ -290,9 +704,13 @@ namespace Ryujinx.Graphics.Gal.Shader continue; } - ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch); - - ShaderIrOp Op = new ShaderIrOp(Inst, Coord0, Coord1, OperC, Meta); + ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch, TextureTarget, TextureInstructionSuffix, Coords) + { + LevelOfDetail = LevelOfDetail, + Offset = Offset, + DepthCompare = DepthCompare + }; + ShaderIrOp Op = new ShaderIrOp(Inst, OperA, OperB, OperC, Meta); ShaderIrOperGpr Dst = GetDst(); @@ -303,9 +721,156 @@ namespace Ryujinx.Graphics.Gal.Shader } } + private static void EmitTld4(ShaderIrBlock Block, long OpCode, GalTextureTarget TextureType, TextureInstructionSuffix TextureInstructionSuffix, int ChMask, int Component, bool Scalar) + { + ShaderIrOperGpr OperA = OpCode.Gpr8(); + ShaderIrOperGpr OperB = OpCode.Gpr20(); + ShaderIrOperImm OperC = OpCode.Imm13_36(); + + ShaderIrOperGpr[] Coords = new ShaderIrOperGpr[ImageUtils.GetCoordsCountTextureTarget(TextureType)]; + + ShaderIrOperGpr Offset = null; + ShaderIrOperGpr DepthCompare = null; + + bool IsArray = ImageUtils.IsArray(TextureType); + + int OperBIndex = 0; + + if (Scalar) + { + int CoordStartIndex = 0; + + if (IsArray) + { + CoordStartIndex++; + Coords[Coords.Length - 1] = OperB; + } + + switch (Coords.Length - CoordStartIndex) + { + case 1: + Coords[0] = OpCode.Gpr8(); + + break; + case 2: + Coords[0] = OpCode.Gpr8(); + Coords[0].Index += CoordStartIndex; + + break; + case 3: + Coords[0] = OpCode.Gpr8(); + Coords[0].Index += CoordStartIndex; + + Coords[1] = OpCode.Gpr8(); + Coords[1].Index += 1 + CoordStartIndex; + + break; + default: + throw new NotSupportedException($"{Coords.Length - CoordStartIndex} coords textures aren't supported in TLD4S"); + } + + if (Coords.Length - CoordStartIndex != 1) + { + Coords[Coords.Length - CoordStartIndex - 1] = OperB; + OperBIndex++; + } + + if (TextureInstructionSuffix != TextureInstructionSuffix.None && TextureType == GalTextureTarget.TwoD) + { + Coords[Coords.Length - CoordStartIndex - 1] = OpCode.Gpr8(); + Coords[Coords.Length - CoordStartIndex - 1].Index += Coords.Length - CoordStartIndex - 1; + OperBIndex--; + } + } + else + { + int IndexExtraCoord = 0; + + if (IsArray) + { + IndexExtraCoord++; + + Coords[Coords.Length - 1] = OpCode.Gpr8(); + } + + for (int Index = 0; Index < Coords.Length - IndexExtraCoord; Index++) + { + Coords[Index] = OpCode.Gpr8(); + + Coords[Index].Index += Index; + + Coords[Index].Index += IndexExtraCoord; + + if (Coords[Index].Index > ShaderIrOperGpr.ZRIndex) + { + Coords[Index].Index = ShaderIrOperGpr.ZRIndex; + } + } + } + + if ((TextureInstructionSuffix & TextureInstructionSuffix.AOffI) != 0) + { + Offset = OpCode.Gpr20(); + Offset.Index += OperBIndex; + OperBIndex++; + } + + if ((TextureInstructionSuffix & TextureInstructionSuffix.DC) != 0) + { + DepthCompare = OpCode.Gpr20(); + DepthCompare.Index += OperBIndex; + OperBIndex++; + } + + Coords = CoordsRegistersToTempRegisters(Block, Coords); + + int RegInc = 0; + + for (int Ch = 0; Ch < 4; Ch++) + { + if (!IsChannelUsed(ChMask, Ch)) + { + continue; + } + + ShaderIrOperGpr Dst = OpCode.Gpr0(); + + Dst.Index += RegInc++; + + if (!Dst.IsValidRegister || Dst.IsConst) + { + continue; + } + + ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch, TextureType, TextureInstructionSuffix, Coords) + { + Component = Component, + Offset = Offset, + DepthCompare = DepthCompare + }; + + ShaderIrOp Op = new ShaderIrOp(ShaderIrInst.Tld4, OperA, OperB, OperC, Meta); + + Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op))); + } + } + private static bool IsChannelUsed(int ChMask, int Ch) { return (ChMask & (1 << Ch)) != 0; } + + private static ShaderIrOperGpr[] CoordsRegistersToTempRegisters(ShaderIrBlock Block, params ShaderIrOperGpr[] Registers) + { + ShaderIrOperGpr[] Res = new ShaderIrOperGpr[Registers.Length]; + + for (int Index = 0; Index < Res.Length; Index++) + { + Res[Index] = ShaderIrOperGpr.MakeTemporary(Index); + Block.AddNode(new ShaderIrAsg(Res[Index], Registers[Index])); + } + + return Res; + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeOpCode.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeOpCode.cs index f0f92148e6..e241e1ca58 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeOpCode.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeOpCode.cs @@ -19,6 +19,11 @@ namespace Ryujinx.Graphics.Gal.Shader return ((int)(OpCode >> 20) << 8) >> 8; } + private static bool HasArray(this long OpCode) + { + return OpCode.Read(0x1c); + } + private static ShaderIrOperAbuf[] Abuf20(this long OpCode) { int Abuf = OpCode.Read(20, 0x3ff); diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs b/Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs index 35dea61216..68ff214e4e 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs @@ -49,6 +49,7 @@ namespace Ryujinx.Graphics.Gal.Shader Ipa, Texb, Texs, + Tld4, Trunc, F_End, diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderIrMetaTex.cs b/Ryujinx.Graphics/Gal/Shader/ShaderIrMetaTex.cs index 82f3bb774a..72ea221ad3 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderIrMetaTex.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderIrMetaTex.cs @@ -1,12 +1,24 @@ +using Ryujinx.Graphics.Texture; + namespace Ryujinx.Graphics.Gal.Shader { class ShaderIrMetaTex : ShaderIrMeta { - public int Elem { get; private set; } + public int Elem { get; private set; } + public GalTextureTarget TextureTarget { get; private set; } + public ShaderIrNode[] Coordinates { get; private set; } + public TextureInstructionSuffix TextureInstructionSuffix { get; private set; } + public ShaderIrOperGpr LevelOfDetail; + public ShaderIrOperGpr Offset; + public ShaderIrOperGpr DepthCompare; + public int Component; // for TLD4(S) - public ShaderIrMetaTex(int Elem) + public ShaderIrMetaTex(int Elem, GalTextureTarget TextureTarget, TextureInstructionSuffix TextureInstructionSuffix, params ShaderIrNode[] Coordinates) { - this.Elem = Elem; + this.Elem = Elem; + this.TextureTarget = TextureTarget; + this.TextureInstructionSuffix = TextureInstructionSuffix; + this.Coordinates = Coordinates; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs b/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs index 177e36c3e1..d2bbd38c6b 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs @@ -122,6 +122,8 @@ namespace Ryujinx.Graphics.Gal.Shader Set("1101111101001x", ShaderDecode.Texq); Set("1101x00xxxxxxx", ShaderDecode.Texs); Set("1101101xxxxxxx", ShaderDecode.Tlds); + Set("110010xxxx111x", ShaderDecode.Tld4); + Set("1101111100xxxx", ShaderDecode.Tld4s); Set("01011111xxxxxx", ShaderDecode.Vmad); Set("0100111xxxxxxx", ShaderDecode.Xmad_CR); Set("0011011x00xxxx", ShaderDecode.Xmad_I); diff --git a/Ryujinx.Graphics/Gal/ShaderDeclInfo.cs b/Ryujinx.Graphics/Gal/ShaderDeclInfo.cs index ef47ca2e1b..ed1955cdbb 100644 --- a/Ryujinx.Graphics/Gal/ShaderDeclInfo.cs +++ b/Ryujinx.Graphics/Gal/ShaderDeclInfo.cs @@ -1,3 +1,5 @@ +using Ryujinx.Graphics.Texture; + namespace Ryujinx.Graphics.Gal { public class ShaderDeclInfo @@ -9,18 +11,27 @@ namespace Ryujinx.Graphics.Gal public int Cbuf { get; private set; } public int Size { get; private set; } + public GalTextureTarget TextureTarget { get; private set; } + + public TextureInstructionSuffix TextureSuffix { get; private set; } + public ShaderDeclInfo( string Name, int Index, bool IsCb = false, int Cbuf = 0, - int Size = 1) + int Size = 1, + GalTextureTarget TextureTarget = GalTextureTarget.TwoD, + TextureInstructionSuffix TextureSuffix = TextureInstructionSuffix.None) { - this.Name = Name; - this.Index = Index; - this.IsCb = IsCb; - this.Cbuf = Cbuf; - this.Size = Size; + this.Name = Name; + this.Index = Index; + this.IsCb = IsCb; + this.Cbuf = Cbuf; + this.Size = Size; + + this.TextureTarget = TextureTarget; + this.TextureSuffix = TextureSuffix; } internal void Enlarge(int NewSize) diff --git a/Ryujinx.Graphics/GpuResourceManager.cs b/Ryujinx.Graphics/GpuResourceManager.cs index d46129516d..4f2d92b03a 100644 --- a/Ryujinx.Graphics/GpuResourceManager.cs +++ b/Ryujinx.Graphics/GpuResourceManager.cs @@ -1,6 +1,8 @@ +using Ryujinx.Common.Logging; using Ryujinx.Graphics.Gal; using Ryujinx.Graphics.Memory; using Ryujinx.Graphics.Texture; +using System; using System.Collections.Generic; namespace Ryujinx.Graphics @@ -11,6 +13,7 @@ namespace Ryujinx.Graphics { None, Texture, + TextureArrayLayer, ColorBuffer, ZetaBuffer } @@ -20,6 +23,7 @@ namespace Ryujinx.Graphics private HashSet[] UploadedKeys; private Dictionary ImageTypes; + private Dictionary MirroredTextures; public GpuResourceManager(NvGpu Gpu) { @@ -33,6 +37,7 @@ namespace Ryujinx.Graphics } ImageTypes = new Dictionary(); + MirroredTextures = new Dictionary(); } public void SendColorBuffer(NvGpuVmm Vmm, long Position, int Attachment, GalImage NewImage) @@ -70,6 +75,32 @@ namespace Ryujinx.Graphics ImageTypes[Position] = ImageType.Texture; } + public bool TryGetTextureLayer(long Position, out int LayerIndex) + { + if (MirroredTextures.TryGetValue(Position, out LayerIndex)) + { + ImageType Type = ImageTypes[Position]; + + // FIXME(thog): I'm actually unsure if we should deny all other image type, gpu testing needs to be done here. + if (Type != ImageType.Texture && Type != ImageType.TextureArrayLayer) + { + LayerIndex = -1; + return false; + } + + return true; + } + + LayerIndex = -1; + return false; + } + + public void SetTextureArrayLayer(long Position, int LayerIndex) + { + ImageTypes[Position] = ImageType.TextureArrayLayer; + MirroredTextures[Position] = LayerIndex; + } + private void PrepareSendTexture(NvGpuVmm Vmm, long Position, GalImage NewImage) { long Size = ImageUtils.GetSize(NewImage); @@ -102,7 +133,7 @@ namespace Ryujinx.Graphics private bool TryReuse(NvGpuVmm Vmm, long Position, GalImage NewImage) { - if (Gpu.Renderer.Texture.TryGetImage(Position, out GalImage CachedImage) && CachedImage.SizeMatches(NewImage)) + if (Gpu.Renderer.Texture.TryGetImage(Position, out GalImage CachedImage) && CachedImage.TextureTarget == NewImage.TextureTarget && CachedImage.SizeMatches(NewImage)) { Gpu.Renderer.RenderTarget.Reinterpret(Position, NewImage); diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine2d.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine2d.cs index 55e3ebd4c4..3295f6da05 100644 --- a/Ryujinx.Graphics/Graphics3d/NvGpuEngine2d.cs +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine2d.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common.Logging; using Ryujinx.Graphics.Gal; using Ryujinx.Graphics.Memory; using Ryujinx.Graphics.Texture; @@ -46,6 +47,8 @@ namespace Ryujinx.Graphics.Graphics3d bool DstLinear = ReadRegister(NvGpuEngine2dReg.DstLinear) != 0; int DstWidth = ReadRegister(NvGpuEngine2dReg.DstWidth); int DstHeight = ReadRegister(NvGpuEngine2dReg.DstHeight); + int DstDepth = ReadRegister(NvGpuEngine2dReg.DstDepth); + int DstLayer = ReadRegister(NvGpuEngine2dReg.DstLayer); int DstPitch = ReadRegister(NvGpuEngine2dReg.DstPitch); int DstBlkDim = ReadRegister(NvGpuEngine2dReg.DstBlockDimensions); @@ -53,6 +56,8 @@ namespace Ryujinx.Graphics.Graphics3d bool SrcLinear = ReadRegister(NvGpuEngine2dReg.SrcLinear) != 0; int SrcWidth = ReadRegister(NvGpuEngine2dReg.SrcWidth); int SrcHeight = ReadRegister(NvGpuEngine2dReg.SrcHeight); + int SrcDepth = ReadRegister(NvGpuEngine2dReg.SrcDepth); + int SrcLayer = ReadRegister(NvGpuEngine2dReg.SrcLayer); int SrcPitch = ReadRegister(NvGpuEngine2dReg.SrcPitch); int SrcBlkDim = ReadRegister(NvGpuEngine2dReg.SrcBlockDimensions); @@ -82,26 +87,99 @@ namespace Ryujinx.Graphics.Graphics3d long SrcKey = Vmm.GetPhysicalAddress(SrcAddress); long DstKey = Vmm.GetPhysicalAddress(DstAddress); + bool IsSrcLayered = false; + bool IsDstLayered = false; + + GalTextureTarget SrcTarget = GalTextureTarget.TwoD; + + if (SrcDepth != 0) + { + SrcTarget = GalTextureTarget.TwoDArray; + SrcDepth++; + IsSrcLayered = true; + } + else + { + SrcDepth = 1; + } + + GalTextureTarget DstTarget = GalTextureTarget.TwoD; + + if (DstDepth != 0) + { + DstTarget = GalTextureTarget.TwoDArray; + DstDepth++; + IsDstLayered = true; + } + else + { + DstDepth = 1; + } + GalImage SrcTexture = new GalImage( SrcWidth, - SrcHeight, 1, - SrcBlockHeight, + SrcHeight, + 1, SrcDepth, 1, + SrcBlockHeight, 1, SrcLayout, - SrcImgFormat); + SrcImgFormat, + SrcTarget); GalImage DstTexture = new GalImage( DstWidth, - DstHeight, 1, - DstBlockHeight, + DstHeight, + 1, DstDepth, 1, + DstBlockHeight, 1, DstLayout, - DstImgFormat); + DstImgFormat, + DstTarget); SrcTexture.Pitch = SrcPitch; DstTexture.Pitch = DstPitch; + long GetLayerOffset(GalImage Image, int Layer) + { + int TargetMipLevel = Image.MaxMipmapLevel <= 1 ? 1 : Image.MaxMipmapLevel - 1; + return ImageUtils.GetLayerOffset(Image, TargetMipLevel) * Layer; + } + + int SrcLayerIndex = -1; + + if (IsSrcLayered && Gpu.ResourceManager.TryGetTextureLayer(SrcKey, out SrcLayerIndex) && SrcLayerIndex != 0) + { + SrcKey = SrcKey - GetLayerOffset(SrcTexture, SrcLayerIndex); + } + + int DstLayerIndex = -1; + + if (IsDstLayered && Gpu.ResourceManager.TryGetTextureLayer(DstKey, out DstLayerIndex) && DstLayerIndex != 0) + { + DstKey = DstKey - GetLayerOffset(DstTexture, DstLayerIndex); + } + Gpu.ResourceManager.SendTexture(Vmm, SrcKey, SrcTexture); Gpu.ResourceManager.SendTexture(Vmm, DstKey, DstTexture); + if (IsSrcLayered && SrcLayerIndex == -1) + { + for (int Layer = 0; Layer < SrcTexture.LayerCount; Layer++) + { + Gpu.ResourceManager.SetTextureArrayLayer(SrcKey + GetLayerOffset(SrcTexture, Layer), Layer); + } + + SrcLayerIndex = 0; + } + + if (IsDstLayered && DstLayerIndex == -1) + { + for (int Layer = 0; Layer < DstTexture.LayerCount; Layer++) + { + Gpu.ResourceManager.SetTextureArrayLayer(DstKey + GetLayerOffset(DstTexture, Layer), Layer); + } + + DstLayerIndex = 0; + } + int SrcBlitX1 = (int)(SrcBlitX >> 32); int SrcBlitY1 = (int)(SrcBlitY >> 32); @@ -109,8 +187,12 @@ namespace Ryujinx.Graphics.Graphics3d int SrcBlitY2 = (int)(SrcBlitY + DstBlitH * BlitDvDy >> 32); Gpu.Renderer.RenderTarget.Copy( + SrcTexture, + DstTexture, SrcKey, DstKey, + SrcLayerIndex, + DstLayerIndex, SrcBlitX1, SrcBlitY1, SrcBlitX2, @@ -124,6 +206,8 @@ namespace Ryujinx.Graphics.Graphics3d //the texture is modified by the guest, however it doesn't //work when resources that the gpu can write to are copied, //like framebuffers. + + // FIXME: SUPPORT MULTILAYER CORRECTLY HERE (this will cause weird stuffs on the first layer) ImageUtils.CopyTexture( Vmm, SrcTexture, diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine2dReg.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine2dReg.cs index c1c0dba29f..7747b5a3ab 100644 --- a/Ryujinx.Graphics/Graphics3d/NvGpuEngine2dReg.cs +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine2dReg.cs @@ -11,6 +11,7 @@ namespace Ryujinx.Graphics.Graphics3d DstWidth = 0x86, DstHeight = 0x87, DstAddress = 0x88, + DstAddressLow = 0x89, SrcFormat = 0x8c, SrcLinear = 0x8d, SrcBlockDimensions = 0x8e, @@ -20,6 +21,7 @@ namespace Ryujinx.Graphics.Graphics3d SrcWidth = 0x92, SrcHeight = 0x93, SrcAddress = 0x94, + SrcAddressLow = 0x95, ClipEnable = 0xa4, CopyOperation = 0xab, BlitControl = 0x223, diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs index 6120053dae..eb6289fbdb 100644 --- a/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs @@ -1,4 +1,5 @@ using Ryujinx.Common; +using Ryujinx.Common.Logging; using Ryujinx.Graphics.Gal; using Ryujinx.Graphics.Memory; using Ryujinx.Graphics.Texture; @@ -190,7 +191,11 @@ namespace Ryujinx.Graphics.Graphics3d int Width = ReadRegister(NvGpuEngine3dReg.FrameBufferNWidth + FbIndex * 0x10); int Height = ReadRegister(NvGpuEngine3dReg.FrameBufferNHeight + FbIndex * 0x10); - int BlockDim = ReadRegister(NvGpuEngine3dReg.FrameBufferNBlockDim + FbIndex * 0x10); + int ArrayMode = ReadRegister(NvGpuEngine3dReg.FrameBufferNArrayMode + FbIndex * 0x10); + int LayerCount = ArrayMode & 0xFFFF; + int LayerStride = ReadRegister(NvGpuEngine3dReg.FrameBufferNLayerStride + FbIndex * 0x10); + int BaseLayer = ReadRegister(NvGpuEngine3dReg.FrameBufferNBaseLayer + FbIndex * 0x10); + int BlockDim = ReadRegister(NvGpuEngine3dReg.FrameBufferNBlockDim + FbIndex * 0x10); int GobBlockHeight = 1 << ((BlockDim >> 4) & 7); @@ -210,7 +215,7 @@ namespace Ryujinx.Graphics.Graphics3d GalImageFormat Format = ImageUtils.ConvertSurface((GalSurfaceFormat)SurfFormat); - GalImage Image = new GalImage(Width, Height, 1, GobBlockHeight, Layout, Format); + GalImage Image = new GalImage(Width, Height, 1, 1, 1, GobBlockHeight, 1, Layout, Format, GalTextureTarget.TwoD); Gpu.ResourceManager.SendColorBuffer(Vmm, Key, FbIndex, Image); @@ -264,7 +269,8 @@ namespace Ryujinx.Graphics.Graphics3d GalImageFormat Format = ImageUtils.ConvertZeta((GalZetaFormat)ZetaFormat); - GalImage Image = new GalImage(Width, Height, 1, GobBlockHeight, Layout, Format); + // TODO: Support non 2D? + GalImage Image = new GalImage(Width, Height, 1, 1, 1, GobBlockHeight, 1, Layout, Format, GalTextureTarget.TwoD); Gpu.ResourceManager.SendZetaBuffer(Vmm, Key, Image); } @@ -600,7 +606,7 @@ namespace Ryujinx.Graphics.Graphics3d } Gpu.Renderer.Texture.Bind(Key, Index, Image); - Gpu.Renderer.Texture.SetSampler(Sampler); + Gpu.Renderer.Texture.SetSampler(Image, Sampler); } } diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine3dReg.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3dReg.cs index 026b0cd198..9134646403 100644 --- a/Ryujinx.Graphics/Graphics3d/NvGpuEngine3dReg.cs +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3dReg.cs @@ -2,112 +2,115 @@ namespace Ryujinx.Graphics.Graphics3d { enum NvGpuEngine3dReg { - FrameBufferNAddress = 0x200, - FrameBufferNWidth = 0x202, - FrameBufferNHeight = 0x203, - FrameBufferNFormat = 0x204, - FrameBufferNBlockDim = 0x205, - ViewportNScaleX = 0x280, - ViewportNScaleY = 0x281, - ViewportNScaleZ = 0x282, - ViewportNTranslateX = 0x283, - ViewportNTranslateY = 0x284, - ViewportNTranslateZ = 0x285, - ViewportNHoriz = 0x300, - ViewportNVert = 0x301, - DepthRangeNNear = 0x302, - DepthRangeNFar = 0x303, - VertexArrayFirst = 0x35d, - VertexArrayCount = 0x35e, - ClearNColor = 0x360, - ClearDepth = 0x364, - ClearStencil = 0x368, - ScissorEnable = 0x380, - ScissorHorizontal = 0x381, - ScissorVertical = 0x382, - StencilBackFuncRef = 0x3d5, - StencilBackMask = 0x3d6, - StencilBackFuncMask = 0x3d7, - ColorMaskCommon = 0x3e4, - RTSeparateFragData = 0x3eb, - ZetaAddress = 0x3f8, - ZetaFormat = 0x3fa, - ZetaBlockDimensions = 0x3fb, - ZetaLayerStride = 0x3fc, - VertexAttribNFormat = 0x458, - RTControl = 0x487, - ZetaHoriz = 0x48a, - ZetaVert = 0x48b, - ZetaArrayMode = 0x48c, - LinkedTsc = 0x48d, - DepthTestEnable = 0x4b3, - BlendIndependent = 0x4b9, - DepthWriteEnable = 0x4ba, - DepthTestFunction = 0x4c3, - BlendSeparateAlpha = 0x4cf, - BlendEquationRgb = 0x4d0, - BlendFuncSrcRgb = 0x4d1, - BlendFuncDstRgb = 0x4d2, - BlendEquationAlpha = 0x4d3, - BlendFuncSrcAlpha = 0x4d4, - BlendFuncDstAlpha = 0x4d6, - BlendEnable = 0x4d7, - IBlendNEnable = 0x4d8, - StencilEnable = 0x4e0, - StencilFrontOpFail = 0x4e1, - StencilFrontOpZFail = 0x4e2, - StencilFrontOpZPass = 0x4e3, - StencilFrontFuncFunc = 0x4e4, - StencilFrontFuncRef = 0x4e5, - StencilFrontFuncMask = 0x4e6, - StencilFrontMask = 0x4e7, - ScreenYControl = 0x4eb, - VertexArrayElemBase = 0x50d, - VertexArrayInstBase = 0x50e, - ZetaEnable = 0x54e, - TexHeaderPoolOffset = 0x55d, - TexSamplerPoolOffset = 0x557, - StencilTwoSideEnable = 0x565, - StencilBackOpFail = 0x566, - StencilBackOpZFail = 0x567, - StencilBackOpZPass = 0x568, - StencilBackFuncFunc = 0x569, - FrameBufferSrgb = 0x56e, - ShaderAddress = 0x582, - VertexBeginGl = 0x586, - PrimRestartEnable = 0x591, - PrimRestartIndex = 0x592, - IndexArrayAddress = 0x5f2, - IndexArrayEndAddr = 0x5f4, - IndexArrayFormat = 0x5f6, - IndexBatchFirst = 0x5f7, - IndexBatchCount = 0x5f8, - VertexArrayNInstance = 0x620, - CullFaceEnable = 0x646, - FrontFace = 0x647, - CullFace = 0x648, - ColorMaskN = 0x680, - QueryAddress = 0x6c0, - QuerySequence = 0x6c2, - QueryControl = 0x6c3, - VertexArrayNControl = 0x700, - VertexArrayNAddress = 0x701, - VertexArrayNDivisor = 0x703, - IBlendNSeparateAlpha = 0x780, - IBlendNEquationRgb = 0x781, - IBlendNFuncSrcRgb = 0x782, - IBlendNFuncDstRgb = 0x783, - IBlendNEquationAlpha = 0x784, - IBlendNFuncSrcAlpha = 0x785, - IBlendNFuncDstAlpha = 0x786, - VertexArrayNEndAddr = 0x7c0, - ShaderNControl = 0x800, - ShaderNOffset = 0x801, - ShaderNMaxGprs = 0x803, - ShaderNType = 0x804, - ConstBufferSize = 0x8e0, - ConstBufferAddress = 0x8e1, - ConstBufferOffset = 0x8e3, - TextureCbIndex = 0x982 + FrameBufferNAddress = 0x200, + FrameBufferNWidth = 0x202, + FrameBufferNHeight = 0x203, + FrameBufferNFormat = 0x204, + FrameBufferNBlockDim = 0x205, + FrameBufferNArrayMode = 0x206, + FrameBufferNLayerStride = 0x207, + FrameBufferNBaseLayer = 0x208, + ViewportNScaleX = 0x280, + ViewportNScaleY = 0x281, + ViewportNScaleZ = 0x282, + ViewportNTranslateX = 0x283, + ViewportNTranslateY = 0x284, + ViewportNTranslateZ = 0x285, + ViewportNHoriz = 0x300, + ViewportNVert = 0x301, + DepthRangeNNear = 0x302, + DepthRangeNFar = 0x303, + VertexArrayFirst = 0x35d, + VertexArrayCount = 0x35e, + ClearNColor = 0x360, + ClearDepth = 0x364, + ClearStencil = 0x368, + ScissorEnable = 0x380, + ScissorHorizontal = 0x381, + ScissorVertical = 0x382, + StencilBackFuncRef = 0x3d5, + StencilBackMask = 0x3d6, + StencilBackFuncMask = 0x3d7, + ColorMaskCommon = 0x3e4, + RTSeparateFragData = 0x3eb, + ZetaAddress = 0x3f8, + ZetaFormat = 0x3fa, + ZetaBlockDimensions = 0x3fb, + ZetaLayerStride = 0x3fc, + VertexAttribNFormat = 0x458, + RTControl = 0x487, + ZetaHoriz = 0x48a, + ZetaVert = 0x48b, + ZetaArrayMode = 0x48c, + LinkedTsc = 0x48d, + DepthTestEnable = 0x4b3, + BlendIndependent = 0x4b9, + DepthWriteEnable = 0x4ba, + DepthTestFunction = 0x4c3, + BlendSeparateAlpha = 0x4cf, + BlendEquationRgb = 0x4d0, + BlendFuncSrcRgb = 0x4d1, + BlendFuncDstRgb = 0x4d2, + BlendEquationAlpha = 0x4d3, + BlendFuncSrcAlpha = 0x4d4, + BlendFuncDstAlpha = 0x4d6, + BlendEnable = 0x4d7, + IBlendNEnable = 0x4d8, + StencilEnable = 0x4e0, + StencilFrontOpFail = 0x4e1, + StencilFrontOpZFail = 0x4e2, + StencilFrontOpZPass = 0x4e3, + StencilFrontFuncFunc = 0x4e4, + StencilFrontFuncRef = 0x4e5, + StencilFrontFuncMask = 0x4e6, + StencilFrontMask = 0x4e7, + ScreenYControl = 0x4eb, + VertexArrayElemBase = 0x50d, + VertexArrayInstBase = 0x50e, + ZetaEnable = 0x54e, + TexHeaderPoolOffset = 0x55d, + TexSamplerPoolOffset = 0x557, + StencilTwoSideEnable = 0x565, + StencilBackOpFail = 0x566, + StencilBackOpZFail = 0x567, + StencilBackOpZPass = 0x568, + StencilBackFuncFunc = 0x569, + FrameBufferSrgb = 0x56e, + ShaderAddress = 0x582, + VertexBeginGl = 0x586, + PrimRestartEnable = 0x591, + PrimRestartIndex = 0x592, + IndexArrayAddress = 0x5f2, + IndexArrayEndAddr = 0x5f4, + IndexArrayFormat = 0x5f6, + IndexBatchFirst = 0x5f7, + IndexBatchCount = 0x5f8, + VertexArrayNInstance = 0x620, + CullFaceEnable = 0x646, + FrontFace = 0x647, + CullFace = 0x648, + ColorMaskN = 0x680, + QueryAddress = 0x6c0, + QuerySequence = 0x6c2, + QueryControl = 0x6c3, + VertexArrayNControl = 0x700, + VertexArrayNAddress = 0x701, + VertexArrayNDivisor = 0x703, + IBlendNSeparateAlpha = 0x780, + IBlendNEquationRgb = 0x781, + IBlendNFuncSrcRgb = 0x782, + IBlendNFuncDstRgb = 0x783, + IBlendNEquationAlpha = 0x784, + IBlendNFuncSrcAlpha = 0x785, + IBlendNFuncDstAlpha = 0x786, + VertexArrayNEndAddr = 0x7c0, + ShaderNControl = 0x800, + ShaderNOffset = 0x801, + ShaderNMaxGprs = 0x803, + ShaderNType = 0x804, + ConstBufferSize = 0x8e0, + ConstBufferAddress = 0x8e1, + ConstBufferOffset = 0x8e3, + TextureCbIndex = 0x982 } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mf.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mf.cs index d89059c0c5..2f1df3d377 100644 --- a/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mf.cs +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngineM2mf.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common.Logging; using Ryujinx.Graphics.Memory; using Ryujinx.Graphics.Texture; using System.Collections.Generic; @@ -125,29 +126,37 @@ namespace Ryujinx.Graphics.Graphics3d if (SrcLinear) { - SrcSwizzle = new LinearSwizzle(SrcPitch, SrcCpp); + SrcSwizzle = new LinearSwizzle(SrcPitch, SrcCpp, SrcSizeX, SrcSizeY); } else { - SrcSwizzle = new BlockLinearSwizzle(SrcSizeX, SrcCpp, SrcBlockHeight); + SrcSwizzle = new BlockLinearSwizzle( + SrcSizeX, + SrcSizeY, 1, + SrcBlockHeight, 1, + SrcCpp); } ISwizzle DstSwizzle; if (DstLinear) { - DstSwizzle = new LinearSwizzle(DstPitch, DstCpp); + DstSwizzle = new LinearSwizzle(DstPitch, DstCpp, SrcSizeX, SrcSizeY); } else { - DstSwizzle = new BlockLinearSwizzle(DstSizeX, DstCpp, DstBlockHeight); + DstSwizzle = new BlockLinearSwizzle( + DstSizeX, + DstSizeY, 1, + DstBlockHeight, 1, + DstCpp); } for (int Y = 0; Y < YCount; Y++) for (int X = 0; X < XCount; X++) { - int SrcOffset = SrcSwizzle.GetSwizzleOffset(SrcPosX + X, SrcPosY + Y); - int DstOffset = DstSwizzle.GetSwizzleOffset(DstPosX + X, DstPosY + Y); + int SrcOffset = SrcSwizzle.GetSwizzleOffset(SrcPosX + X, SrcPosY + Y, 0); + int DstOffset = DstSwizzle.GetSwizzleOffset(DstPosX + X, DstPosY + Y, 0); long Src = SrcPA + (uint)SrcOffset; long Dst = DstPA + (uint)DstOffset; diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mf.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mf.cs index 681552556c..62872ba15c 100644 --- a/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mf.cs +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngineP2mf.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common.Logging; using Ryujinx.Graphics.Memory; using Ryujinx.Graphics.Texture; using System.Collections.Generic; @@ -119,14 +120,17 @@ namespace Ryujinx.Graphics.Graphics3d } else { - BlockLinearSwizzle Swizzle = new BlockLinearSwizzle(CopyWidth, 1, CopyGobBlockHeight); + BlockLinearSwizzle Swizzle = new BlockLinearSwizzle( + CopyWidth, + CopyHeight, 1, + CopyGobBlockHeight, 1, 1); int SrcOffset = 0; for (int Y = CopyStartY; Y < CopyHeight && SrcOffset < CopySize; Y++) for (int X = CopyStartX; X < CopyWidth && SrcOffset < CopySize; X++) { - int DstOffset = Swizzle.GetSwizzleOffset(X, Y); + int DstOffset = Swizzle.GetSwizzleOffset(X, Y, 0); Vmm.WriteByte(CopyAddress + DstOffset, Buffer[SrcOffset++]); } diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ASTCDecoder.cs b/Ryujinx.Graphics/Graphics3d/Texture/ASTCDecoder.cs index 1efa025523..00158dc103 100644 --- a/Ryujinx.Graphics/Graphics3d/Texture/ASTCDecoder.cs +++ b/Ryujinx.Graphics/Graphics3d/Texture/ASTCDecoder.cs @@ -72,6 +72,7 @@ namespace Ryujinx.Graphics.Texture if (BlockZ != 1 || Z != 1) { + // TODO: Support 3D textures? throw new ASTCDecoderException("3D compressed textures unsupported!"); } diff --git a/Ryujinx.Graphics/Graphics3d/Texture/BlockLinearSwizzle.cs b/Ryujinx.Graphics/Graphics3d/Texture/BlockLinearSwizzle.cs index 9451291e9a..1be0644283 100644 --- a/Ryujinx.Graphics/Graphics3d/Texture/BlockLinearSwizzle.cs +++ b/Ryujinx.Graphics/Graphics3d/Texture/BlockLinearSwizzle.cs @@ -1,51 +1,178 @@ +using Ryujinx.Common; using System; namespace Ryujinx.Graphics.Texture { class BlockLinearSwizzle : ISwizzle { - private int BhShift; - private int BppShift; + private const int GobWidth = 64; + private const int GobHeight = 8; + + private const int GobSize = GobWidth * GobHeight; + + private int TexWidth; + private int TexHeight; + private int TexDepth; + private int TexGobBlockHeight; + private int TexGobBlockDepth; + private int TexBpp; + private int BhMask; + private int BdMask; + + private int BhShift; + private int BdShift; + private int BppShift; private int XShift; - private int GobStride; - public BlockLinearSwizzle(int Width, int Bpp, int BlockHeight = 16) + private int RobSize; + private int SliceSize; + + private int BaseOffset; + + public BlockLinearSwizzle( + int Width, + int Height, + int Depth, + int GobBlockHeight, + int GobBlockDepth, + int Bpp) { - BhMask = (BlockHeight * 8) - 1; + TexWidth = Width; + TexHeight = Height; + TexDepth = Depth; + TexGobBlockHeight = GobBlockHeight; + TexGobBlockDepth = GobBlockDepth; + TexBpp = Bpp; - BhShift = CountLsbZeros(BlockHeight * 8); - BppShift = CountLsbZeros(Bpp); + BppShift = BitUtils.CountTrailingZeros32(Bpp); - int WidthInGobs = (int)MathF.Ceiling(Width * Bpp / 64f); - - GobStride = 512 * BlockHeight * WidthInGobs; - - XShift = CountLsbZeros(512 * BlockHeight); + SetMipLevel(0); } - private int CountLsbZeros(int Value) + public void SetMipLevel(int Level) { - int Count = 0; + BaseOffset = GetMipOffset(Level); - while (((Value >> Count) & 1) == 0) + int Width = Math.Max(1, TexWidth >> Level); + int Height = Math.Max(1, TexHeight >> Level); + int Depth = Math.Max(1, TexDepth >> Level); + + GobBlockSizes GbSizes = AdjustGobBlockSizes(Height, Depth); + + BhMask = GbSizes.Height - 1; + BdMask = GbSizes.Depth - 1; + + BhShift = BitUtils.CountTrailingZeros32(GbSizes.Height); + BdShift = BitUtils.CountTrailingZeros32(GbSizes.Depth); + + XShift = BitUtils.CountTrailingZeros32(GobSize * GbSizes.Height * GbSizes.Depth); + + RobAndSliceSizes GsSizes = GetRobAndSliceSizes(Width, Height, GbSizes); + + RobSize = GsSizes.RobSize; + SliceSize = GsSizes.SliceSize; + } + + public int GetImageSize(int MipsCount) + { + int Size = GetMipOffset(MipsCount); + + Size = (Size + 0x1fff) & ~0x1fff; + + return Size; + } + + public int GetMipOffset(int Level) + { + int TotalSize = 0; + + for (int Index = 0; Index < Level; Index++) { - Count++; + int Width = Math.Max(1, TexWidth >> Index); + int Height = Math.Max(1, TexHeight >> Index); + int Depth = Math.Max(1, TexDepth >> Index); + + GobBlockSizes GbSizes = AdjustGobBlockSizes(Height, Depth); + + RobAndSliceSizes RsSizes = GetRobAndSliceSizes(Width, Height, GbSizes); + + TotalSize += BitUtils.DivRoundUp(Depth, GbSizes.Depth) * RsSizes.SliceSize; } - return Count; + return TotalSize; } - public int GetSwizzleOffset(int X, int Y) + private struct GobBlockSizes + { + public int Height; + public int Depth; + + public GobBlockSizes(int GobBlockHeight, int GobBlockDepth) + { + this.Height = GobBlockHeight; + this.Depth = GobBlockDepth; + } + } + + private GobBlockSizes AdjustGobBlockSizes(int Height, int Depth) + { + int GobBlockHeight = TexGobBlockHeight; + int GobBlockDepth = TexGobBlockDepth; + + int Pow2Height = BitUtils.Pow2RoundUp(Height); + int Pow2Depth = BitUtils.Pow2RoundUp(Depth); + + while (GobBlockHeight * GobHeight > Pow2Height && GobBlockHeight > 1) + { + GobBlockHeight >>= 1; + } + + while (GobBlockDepth > Pow2Depth && GobBlockDepth > 1) + { + GobBlockDepth >>= 1; + } + + return new GobBlockSizes(GobBlockHeight, GobBlockDepth); + } + + private struct RobAndSliceSizes + { + public int RobSize; + public int SliceSize; + + public RobAndSliceSizes(int RobSize, int SliceSize) + { + this.RobSize = RobSize; + this.SliceSize = SliceSize; + } + } + + private RobAndSliceSizes GetRobAndSliceSizes(int Width, int Height, GobBlockSizes GbSizes) + { + int WidthInGobs = BitUtils.DivRoundUp(Width * TexBpp, GobWidth); + + int RobSize = GobSize * GbSizes.Height * GbSizes.Depth * WidthInGobs; + + int SliceSize = BitUtils.DivRoundUp(Height, GbSizes.Height * GobHeight) * RobSize; + + return new RobAndSliceSizes(RobSize, SliceSize); + } + + public int GetSwizzleOffset(int X, int Y, int Z) { X <<= BppShift; - int Position = (Y >> BhShift) * GobStride; + int YH = Y / GobHeight; - Position += (X >> 6) << XShift; + int Position = (Z >> BdShift) * SliceSize + (YH >> BhShift) * RobSize; - Position += ((Y & BhMask) >> 3) << 9; + Position += (X / GobWidth) << XShift; + + Position += (YH & BhMask) * GobSize; + + Position += ((Z & BdMask) * GobSize) << BhShift; Position += ((X & 0x3f) >> 5) << 8; Position += ((Y & 0x07) >> 1) << 6; @@ -53,7 +180,7 @@ namespace Ryujinx.Graphics.Texture Position += ((Y & 0x01) >> 0) << 4; Position += ((X & 0x0f) >> 0) << 0; - return Position; + return BaseOffset + Position; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ISwizzle.cs b/Ryujinx.Graphics/Graphics3d/Texture/ISwizzle.cs index 583fc20c53..2e0e8aedd4 100644 --- a/Ryujinx.Graphics/Graphics3d/Texture/ISwizzle.cs +++ b/Ryujinx.Graphics/Graphics3d/Texture/ISwizzle.cs @@ -2,6 +2,12 @@ namespace Ryujinx.Graphics.Texture { interface ISwizzle { - int GetSwizzleOffset(int X, int Y); + int GetSwizzleOffset(int X, int Y, int Z); + + void SetMipLevel(int Level); + + int GetMipOffset(int Level); + + int GetImageSize(int MipsCount); } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs b/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs index f958e1de81..c4208935c3 100644 --- a/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs +++ b/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs @@ -1,4 +1,6 @@ using ChocolArm64.Memory; +using OpenTK.Graphics.OpenGL; +using Ryujinx.Common; using Ryujinx.Graphics.Gal; using Ryujinx.Graphics.Memory; using System; @@ -23,14 +25,16 @@ namespace Ryujinx.Graphics.Texture public int BytesPerPixel { get; private set; } public int BlockWidth { get; private set; } public int BlockHeight { get; private set; } + public int BlockDepth { get; private set; } public TargetBuffer Target { get; private set; } - public ImageDescriptor(int BytesPerPixel, int BlockWidth, int BlockHeight, TargetBuffer Target) + public ImageDescriptor(int BytesPerPixel, int BlockWidth, int BlockHeight, int BlockDepth, TargetBuffer Target) { this.BytesPerPixel = BytesPerPixel; this.BlockWidth = BlockWidth; this.BlockHeight = BlockHeight; + this.BlockDepth = BlockDepth; this.Target = Target; } } @@ -92,52 +96,52 @@ namespace Ryujinx.Graphics.Texture private static readonly Dictionary s_ImageTable = new Dictionary() { - { GalImageFormat.RGBA32, new ImageDescriptor(16, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.RGBA16, new ImageDescriptor(8, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.RG32, new ImageDescriptor(8, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.RGBX8, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.RGBA8, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.BGRA8, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.RGB10A2, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.R32, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.RGBA4, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.BptcSfloat, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, - { GalImageFormat.BptcUfloat, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, - { GalImageFormat.BGR5A1, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.RGB5A1, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.RGB565, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.BGR565, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.BptcUnorm, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, - { GalImageFormat.RG16, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.RG8, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.R16, new ImageDescriptor(2, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.R8, new ImageDescriptor(1, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.R11G11B10, new ImageDescriptor(4, 1, 1, TargetBuffer.Color) }, - { GalImageFormat.BC1, new ImageDescriptor(8, 4, 4, TargetBuffer.Color) }, - { GalImageFormat.BC2, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, - { GalImageFormat.BC3, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, - { GalImageFormat.BC4, new ImageDescriptor(8, 4, 4, TargetBuffer.Color) }, - { GalImageFormat.BC5, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, - { GalImageFormat.Astc2D4x4, new ImageDescriptor(16, 4, 4, TargetBuffer.Color) }, - { GalImageFormat.Astc2D5x5, new ImageDescriptor(16, 5, 5, TargetBuffer.Color) }, - { GalImageFormat.Astc2D6x6, new ImageDescriptor(16, 6, 6, TargetBuffer.Color) }, - { GalImageFormat.Astc2D8x8, new ImageDescriptor(16, 8, 8, TargetBuffer.Color) }, - { GalImageFormat.Astc2D10x10, new ImageDescriptor(16, 10, 10, TargetBuffer.Color) }, - { GalImageFormat.Astc2D12x12, new ImageDescriptor(16, 12, 12, TargetBuffer.Color) }, - { GalImageFormat.Astc2D5x4, new ImageDescriptor(16, 5, 4, TargetBuffer.Color) }, - { GalImageFormat.Astc2D6x5, new ImageDescriptor(16, 6, 5, TargetBuffer.Color) }, - { GalImageFormat.Astc2D8x6, new ImageDescriptor(16, 8, 6, TargetBuffer.Color) }, - { GalImageFormat.Astc2D10x8, new ImageDescriptor(16, 10, 8, TargetBuffer.Color) }, - { GalImageFormat.Astc2D12x10, new ImageDescriptor(16, 12, 10, TargetBuffer.Color) }, - { GalImageFormat.Astc2D8x5, new ImageDescriptor(16, 8, 5, TargetBuffer.Color) }, - { GalImageFormat.Astc2D10x5, new ImageDescriptor(16, 10, 5, TargetBuffer.Color) }, - { GalImageFormat.Astc2D10x6, new ImageDescriptor(16, 10, 6, TargetBuffer.Color) }, + { GalImageFormat.RGBA32, new ImageDescriptor(16, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGBA16, new ImageDescriptor(8, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RG32, new ImageDescriptor(8, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGBX8, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGBA8, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.BGRA8, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGB10A2, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.R32, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGBA4, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.BptcSfloat, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) }, + { GalImageFormat.BptcUfloat, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) }, + { GalImageFormat.BGR5A1, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGB5A1, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RGB565, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.BGR565, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.BptcUnorm, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) }, + { GalImageFormat.RG16, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.RG8, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.R16, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.R8, new ImageDescriptor(1, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.R11G11B10, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Color) }, + { GalImageFormat.BC1, new ImageDescriptor(8, 4, 4, 1, TargetBuffer.Color) }, + { GalImageFormat.BC2, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) }, + { GalImageFormat.BC3, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) }, + { GalImageFormat.BC4, new ImageDescriptor(8, 4, 4, 1, TargetBuffer.Color) }, + { GalImageFormat.BC5, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D4x4, new ImageDescriptor(16, 4, 4, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D5x5, new ImageDescriptor(16, 5, 5, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D6x6, new ImageDescriptor(16, 6, 6, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D8x8, new ImageDescriptor(16, 8, 8, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D10x10, new ImageDescriptor(16, 10, 10, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D12x12, new ImageDescriptor(16, 12, 12, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D5x4, new ImageDescriptor(16, 5, 4, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D6x5, new ImageDescriptor(16, 6, 5, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D8x6, new ImageDescriptor(16, 8, 6, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D10x8, new ImageDescriptor(16, 10, 8, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D12x10, new ImageDescriptor(16, 12, 10, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D8x5, new ImageDescriptor(16, 8, 5, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D10x5, new ImageDescriptor(16, 10, 5, 1, TargetBuffer.Color) }, + { GalImageFormat.Astc2D10x6, new ImageDescriptor(16, 10, 6, 1, TargetBuffer.Color) }, - { GalImageFormat.D16, new ImageDescriptor(2, 1, 1, TargetBuffer.Depth) }, - { GalImageFormat.D24, new ImageDescriptor(4, 1, 1, TargetBuffer.Depth) }, - { GalImageFormat.D24S8, new ImageDescriptor(4, 1, 1, TargetBuffer.DepthStencil) }, - { GalImageFormat.D32, new ImageDescriptor(4, 1, 1, TargetBuffer.Depth) }, - { GalImageFormat.D32S8, new ImageDescriptor(8, 1, 1, TargetBuffer.DepthStencil) } + { GalImageFormat.D16, new ImageDescriptor(2, 1, 1, 1, TargetBuffer.Depth) }, + { GalImageFormat.D24, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Depth) }, + { GalImageFormat.D24S8, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.DepthStencil) }, + { GalImageFormat.D32, new ImageDescriptor(4, 1, 1, 1, TargetBuffer.Depth) }, + { GalImageFormat.D32S8, new ImageDescriptor(8, 1, 1, 1, TargetBuffer.DepthStencil) } }; public static GalImageFormat ConvertTexture( @@ -241,26 +245,37 @@ namespace Ryujinx.Graphics.Texture ImageDescriptor Desc = GetImageDescriptor(Image.Format); - (int Width, int Height) = GetImageSizeInBlocks(Image); + (int Width, int Height, int Depth) = GetImageSizeInBlocks(Image); int BytesPerPixel = Desc.BytesPerPixel; //Note: Each row of the texture needs to be aligned to 4 bytes. int Pitch = (Width * BytesPerPixel + 3) & ~3; - byte[] Data = new byte[Height * Pitch]; - for (int Y = 0; Y < Height; Y++) + int DataLayerSize = Height * Pitch * Depth; + byte[] Data = new byte[DataLayerSize * Image.LayerCount]; + + int TargetMipLevel = Image.MaxMipmapLevel <= 1 ? 1 : Image.MaxMipmapLevel - 1; + int LayerOffset = ImageUtils.GetLayerOffset(Image, TargetMipLevel); + + for (int Layer = 0; Layer < Image.LayerCount; Layer++) { - int OutOffs = Y * Pitch; - - for (int X = 0; X < Width; X++) + for (int Z = 0; Z < Depth; Z++) { - long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y); + for (int Y = 0; Y < Height; Y++) + { + int OutOffs = (DataLayerSize * Layer) + Y * Pitch + (Z * Width * Height * BytesPerPixel); - CpuMemory.ReadBytes(Position + Offset, Data, OutOffs, BytesPerPixel); + for (int X = 0; X < Width; X++) + { + long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y, Z); - OutOffs += BytesPerPixel; + CpuMemory.ReadBytes(Position + (LayerOffset * Layer) + Offset, Data, OutOffs, BytesPerPixel); + + OutOffs += BytesPerPixel; + } + } } } @@ -273,16 +288,17 @@ namespace Ryujinx.Graphics.Texture ImageDescriptor Desc = GetImageDescriptor(Image.Format); - (int Width, int Height) = ImageUtils.GetImageSizeInBlocks(Image); + (int Width, int Height, int Depth) = ImageUtils.GetImageSizeInBlocks(Image); int BytesPerPixel = Desc.BytesPerPixel; int InOffs = 0; + for (int Z = 0; Z < Depth; Z++) for (int Y = 0; Y < Height; Y++) for (int X = 0; X < Width; X++) { - long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y); + long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y, Z); Vmm.Memory.WriteBytes(Position + Offset, Data, InOffs, BytesPerPixel); @@ -290,6 +306,7 @@ namespace Ryujinx.Graphics.Texture } } + // TODO: Support non 2D public static bool CopyTexture( NvGpuVmm Vmm, GalImage SrcImage, @@ -318,8 +335,8 @@ namespace Ryujinx.Graphics.Texture for (int Y = 0; Y < Height; Y++) for (int X = 0; X < Width; X++) { - long SrcOffset = (uint)SrcSwizzle.GetSwizzleOffset(SrcX + X, SrcY + Y); - long DstOffset = (uint)DstSwizzle.GetSwizzleOffset(DstX + X, DstY + Y); + long SrcOffset = (uint)SrcSwizzle.GetSwizzleOffset(SrcX + X, SrcY + Y, 0); + long DstOffset = (uint)DstSwizzle.GetSwizzleOffset(DstX + X, DstY + Y, 0); byte[] Texel = Vmm.ReadBytes(SrcAddress + SrcOffset, BytesPerPixel); @@ -333,10 +350,41 @@ namespace Ryujinx.Graphics.Texture { ImageDescriptor Desc = GetImageDescriptor(Image.Format); + int ComponentCount = GetCoordsCountTextureTarget(Image.TextureTarget); + + if (IsArray(Image.TextureTarget)) + ComponentCount--; + int Width = DivRoundUp(Image.Width, Desc.BlockWidth); int Height = DivRoundUp(Image.Height, Desc.BlockHeight); + int Depth = DivRoundUp(Image.Depth, Desc.BlockDepth); - return Desc.BytesPerPixel * Width * Height; + switch (ComponentCount) + { + case 1: + return Desc.BytesPerPixel * Width * Image.LayerCount; + case 2: + return Desc.BytesPerPixel * Width * Height * Image.LayerCount; + case 3: + return Desc.BytesPerPixel * Width * Height * Depth * Image.LayerCount; + default: + throw new InvalidOperationException($"Invalid component count: {ComponentCount}"); + } + } + + public static int GetGpuSize(GalImage Image, bool forcePitch = false) + { + return TextureHelper.GetSwizzle(Image).GetImageSize(Image.MaxMipmapLevel) * Image.LayerCount; + } + + public static int GetLayerOffset(GalImage Image, int MipLevel) + { + if (MipLevel <= 0) + { + MipLevel = 1; + } + + return TextureHelper.GetSwizzle(Image).GetMipOffset(MipLevel); } public static int GetPitch(GalImageFormat Format, int Width) @@ -360,6 +408,11 @@ namespace Ryujinx.Graphics.Texture return GetImageDescriptor(Format).BlockHeight; } + public static int GetBlockDepth(GalImageFormat Format) + { + return GetImageDescriptor(Format).BlockDepth; + } + public static int GetAlignedWidth(GalImage Image) { ImageDescriptor Desc = GetImageDescriptor(Image.Format); @@ -378,12 +431,13 @@ namespace Ryujinx.Graphics.Texture return (Image.Width + AlignMask) & ~AlignMask; } - public static (int Width, int Height) GetImageSizeInBlocks(GalImage Image) + public static (int Width, int Height, int Depth) GetImageSizeInBlocks(GalImage Image) { ImageDescriptor Desc = GetImageDescriptor(Image.Format); return (DivRoundUp(Image.Width, Desc.BlockWidth), - DivRoundUp(Image.Height, Desc.BlockHeight)); + DivRoundUp(Image.Height, Desc.BlockHeight), + DivRoundUp(Image.Depth, Desc.BlockDepth)); } public static int GetBytesPerPixel(GalImageFormat Format) @@ -443,5 +497,66 @@ namespace Ryujinx.Graphics.Texture default: throw new NotImplementedException(((int)Type).ToString()); } } + + public static TextureTarget GetTextureTarget(GalTextureTarget GalTextureTarget) + { + switch (GalTextureTarget) + { + case GalTextureTarget.OneD: + return TextureTarget.Texture1D; + case GalTextureTarget.TwoD: + case GalTextureTarget.TwoDNoMipMap: + return TextureTarget.Texture2D; + case GalTextureTarget.ThreeD: + return TextureTarget.Texture3D; + case GalTextureTarget.OneDArray: + return TextureTarget.Texture1DArray; + case GalTextureTarget.OneDBuffer: + return TextureTarget.TextureBuffer; + case GalTextureTarget.TwoDArray: + return TextureTarget.Texture2DArray; + case GalTextureTarget.CubeMap: + return TextureTarget.TextureCubeMap; + case GalTextureTarget.CubeArray: + return TextureTarget.TextureCubeMapArray; + default: + throw new NotSupportedException($"Texture target {GalTextureTarget} currently not supported!"); + } + } + + public static bool IsArray(GalTextureTarget TextureTarget) + { + switch (TextureTarget) + { + case GalTextureTarget.OneDArray: + case GalTextureTarget.TwoDArray: + case GalTextureTarget.CubeArray: + return true; + default: + return false; + } + } + + public static int GetCoordsCountTextureTarget(GalTextureTarget TextureTarget) + { + switch (TextureTarget) + { + case GalTextureTarget.OneD: + return 1; + case GalTextureTarget.OneDArray: + case GalTextureTarget.OneDBuffer: + case GalTextureTarget.TwoD: + case GalTextureTarget.TwoDNoMipMap: + return 2; + case GalTextureTarget.ThreeD: + case GalTextureTarget.TwoDArray: + case GalTextureTarget.CubeMap: + return 3; + case GalTextureTarget.CubeArray: + return 4; + default: + throw new NotImplementedException($"TextureTarget.{TextureTarget} not implemented yet."); + } + } } } diff --git a/Ryujinx.Graphics/Graphics3d/Texture/LinearSwizzle.cs b/Ryujinx.Graphics/Graphics3d/Texture/LinearSwizzle.cs index ef468e27b5..e6509baa6a 100644 --- a/Ryujinx.Graphics/Graphics3d/Texture/LinearSwizzle.cs +++ b/Ryujinx.Graphics/Graphics3d/Texture/LinearSwizzle.cs @@ -1,3 +1,5 @@ +using System; + namespace Ryujinx.Graphics.Texture { class LinearSwizzle : ISwizzle @@ -5,15 +7,39 @@ namespace Ryujinx.Graphics.Texture private int Pitch; private int Bpp; - public LinearSwizzle(int Pitch, int Bpp) + private int SliceSize; + + public LinearSwizzle(int Pitch, int Bpp, int Width, int Height) { - this.Pitch = Pitch; - this.Bpp = Bpp; + this.Pitch = Pitch; + this.Bpp = Bpp; + SliceSize = Width * Height * Bpp; } - public int GetSwizzleOffset(int X, int Y) + public void SetMipLevel(int Level) { - return X * Bpp + Y * Pitch; + throw new NotImplementedException(); + } + + public int GetMipOffset(int Level) + { + if (Level == 1) + return SliceSize; + throw new NotImplementedException(); + } + + public int GetImageSize(int MipsCount) + { + int Size = GetMipOffset(MipsCount); + + Size = (Size + 0x1fff) & ~0x1fff; + + return Size; + } + + public int GetSwizzleOffset(int X, int Y, int Z) + { + return Z * SliceSize + X * Bpp + Y * Pitch; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/Texture/TextureFactory.cs b/Ryujinx.Graphics/Graphics3d/Texture/TextureFactory.cs index 1f2d625ec4..a2ce86f56d 100644 --- a/Ryujinx.Graphics/Graphics3d/Texture/TextureFactory.cs +++ b/Ryujinx.Graphics/Graphics3d/Texture/TextureFactory.cs @@ -12,6 +12,8 @@ namespace Ryujinx.Graphics.Texture GalImageFormat Format = GetImageFormat(Tic); + GalTextureTarget TextureTarget = (GalTextureTarget)((Tic[4] >> 23) & 0xF); + GalTextureSource XSource = (GalTextureSource)((Tic[0] >> 19) & 7); GalTextureSource YSource = (GalTextureSource)((Tic[0] >> 22) & 7); GalTextureSource ZSource = (GalTextureSource)((Tic[0] >> 25) & 7); @@ -19,6 +21,8 @@ namespace Ryujinx.Graphics.Texture TextureSwizzle Swizzle = (TextureSwizzle)((Tic[2] >> 21) & 7); + int MaxMipmapLevel = (Tic[3] >> 28) & 0xF + 1; + GalMemoryLayout Layout; if (Swizzle == TextureSwizzle.BlockLinear || @@ -31,22 +35,61 @@ namespace Ryujinx.Graphics.Texture Layout = GalMemoryLayout.Pitch; } - int BlockHeightLog2 = (Tic[3] >> 3) & 7; - int TileWidthLog2 = (Tic[3] >> 10) & 7; + int GobBlockHeightLog2 = (Tic[3] >> 3) & 7; + int GobBlockDepthLog2 = (Tic[3] >> 6) & 7; + int TileWidthLog2 = (Tic[3] >> 10) & 7; - int BlockHeight = 1 << BlockHeightLog2; - int TileWidth = 1 << TileWidthLog2; + int GobBlockHeight = 1 << GobBlockHeightLog2; + int GobBlockDepth = 1 << GobBlockDepthLog2; + int TileWidth = 1 << TileWidthLog2; - int Width = (Tic[4] & 0xffff) + 1; - int Height = (Tic[5] & 0xffff) + 1; + int Width = ((Tic[4] >> 0) & 0xffff) + 1; + int Height = ((Tic[5] >> 0) & 0xffff) + 1; + int Depth = ((Tic[5] >> 16) & 0x3fff) + 1; + + int LayoutCount = 1; + + // TODO: check this + if (ImageUtils.IsArray(TextureTarget)) + { + LayoutCount = Depth; + Depth = 1; + } + + if (TextureTarget == GalTextureTarget.OneD) + { + Height = 1; + } + + if (TextureTarget == GalTextureTarget.TwoD || TextureTarget == GalTextureTarget.OneD) + { + Depth = 1; + } + else if (TextureTarget == GalTextureTarget.CubeMap) + { + // FIXME: This is a bit hacky but I guess it's fine for now + LayoutCount = 6; + Depth = 1; + } + else if (TextureTarget == GalTextureTarget.CubeArray) + { + // FIXME: This is a really really hacky but I guess it's fine for now + LayoutCount *= 6; + Depth = 1; + } GalImage Image = new GalImage( Width, Height, + Depth, + LayoutCount, TileWidth, - BlockHeight, + GobBlockHeight, + GobBlockDepth, Layout, Format, + TextureTarget, + MaxMipmapLevel, XSource, YSource, ZSource, @@ -68,6 +111,10 @@ namespace Ryujinx.Graphics.Texture GalTextureWrap AddressV = (GalTextureWrap)((Tsc[0] >> 3) & 7); GalTextureWrap AddressP = (GalTextureWrap)((Tsc[0] >> 6) & 7); + bool DepthCompare = ((Tsc[0] >> 9) & 1) == 1; + + DepthCompareFunc DepthCompareFunc = (DepthCompareFunc)((Tsc[0] >> 10) & 7); + GalTextureFilter MagFilter = (GalTextureFilter) ((Tsc[1] >> 0) & 3); GalTextureFilter MinFilter = (GalTextureFilter) ((Tsc[1] >> 4) & 3); GalTextureMipFilter MipFilter = (GalTextureMipFilter)((Tsc[1] >> 6) & 3); @@ -85,7 +132,9 @@ namespace Ryujinx.Graphics.Texture MinFilter, MagFilter, MipFilter, - BorderColor); + BorderColor, + DepthCompare, + DepthCompareFunc); } private static GalImageFormat GetImageFormat(int[] Tic) diff --git a/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs b/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs index 6ac91d8b59..33ccb0aa51 100644 --- a/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs +++ b/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs @@ -1,4 +1,5 @@ using ChocolArm64.Memory; +using Ryujinx.Common; using Ryujinx.Graphics.Gal; using Ryujinx.Graphics.Memory; @@ -9,9 +10,13 @@ namespace Ryujinx.Graphics.Texture public static ISwizzle GetSwizzle(GalImage Image) { int BlockWidth = ImageUtils.GetBlockWidth (Image.Format); + int BlockHeight = ImageUtils.GetBlockHeight (Image.Format); + int BlockDepth = ImageUtils.GetBlockDepth (Image.Format); int BytesPerPixel = ImageUtils.GetBytesPerPixel(Image.Format); - int Width = (Image.Width + (BlockWidth - 1)) / BlockWidth; + int Width = BitUtils.DivRoundUp(Image.Width, BlockWidth); + int Height = BitUtils.DivRoundUp(Image.Height, BlockHeight); + int Depth = BitUtils.DivRoundUp(Image.Depth, BlockDepth); if (Image.Layout == GalMemoryLayout.BlockLinear) { @@ -19,11 +24,17 @@ namespace Ryujinx.Graphics.Texture Width = (Width + AlignMask) & ~AlignMask; - return new BlockLinearSwizzle(Width, BytesPerPixel, Image.GobBlockHeight); + return new BlockLinearSwizzle( + Width, + Height, + Depth, + Image.GobBlockHeight, + Image.GobBlockDepth, + BytesPerPixel); } else { - return new LinearSwizzle(Image.Pitch, BytesPerPixel); + return new LinearSwizzle(Image.Pitch, BytesPerPixel, Width, Height); } } diff --git a/Ryujinx.Graphics/Texture/TextureInstructionSuffix.cs b/Ryujinx.Graphics/Texture/TextureInstructionSuffix.cs new file mode 100644 index 0000000000..bcb64af0be --- /dev/null +++ b/Ryujinx.Graphics/Texture/TextureInstructionSuffix.cs @@ -0,0 +1,19 @@ +using System; + +namespace Ryujinx.Graphics.Texture +{ + [Flags] + public enum TextureInstructionSuffix + { + None = 0x00, // No Modifier + LZ = 0x02, // Load LOD Zero + LB = 0x08, // Load Bias + LL = 0x10, // Load LOD + LBA = 0x20, // Load Bias with OperA? Auto? + LLA = 0x40, // Load LOD with OperA? Auto? + DC = 0x80, // Depth Compare + AOffI = 0x100, // Offset + MZ = 0x200, // Multisample Zero? + PTP = 0x400 // ??? + } +} diff --git a/Ryujinx.Graphics/VDec/VideoDecoder.cs b/Ryujinx.Graphics/VDec/VideoDecoder.cs index 847392b0d9..be53b1a02c 100644 --- a/Ryujinx.Graphics/VDec/VideoDecoder.cs +++ b/Ryujinx.Graphics/VDec/VideoDecoder.cs @@ -216,10 +216,11 @@ namespace Ryujinx.Graphics.VDec GalImage Image = new GalImage( OutputConfig.SurfaceWidth, - OutputConfig.SurfaceHeight, 1, - OutputConfig.GobBlockHeight, + OutputConfig.SurfaceHeight, 1, 1, 1, + OutputConfig.GobBlockHeight, 1, GalMemoryLayout.BlockLinear, - GalImageFormat.RGBA8 | GalImageFormat.Unorm); + GalImageFormat.RGBA8 | GalImageFormat.Unorm, + GalTextureTarget.TwoD); ImageUtils.WriteTexture(Vmm, Image, Vmm.GetPhysicalAddress(OutputConfig.SurfaceLumaAddress), Frame.Data); } diff --git a/Ryujinx.HLE/HOS/Services/Vi/NvFlinger.cs b/Ryujinx.HLE/HOS/Services/Vi/NvFlinger.cs index db04f47cd0..dbf255beee 100644 --- a/Ryujinx.HLE/HOS/Services/Vi/NvFlinger.cs +++ b/Ryujinx.HLE/HOS/Services/Vi/NvFlinger.cs @@ -1,6 +1,7 @@ using Ryujinx.Common.Logging; using Ryujinx.Graphics.Gal; using Ryujinx.Graphics.Memory; +using Ryujinx.HLE.HOS.Kernel; using Ryujinx.HLE.HOS.Kernel.Threading; using Ryujinx.HLE.HOS.Services.Nv.NvGpuAS; using Ryujinx.HLE.HOS.Services.Nv.NvMap; @@ -415,9 +416,10 @@ namespace Ryujinx.HLE.HOS.Services.Android { image = new GalImage( fbWidth, - fbHeight, 1, BlockHeight, + fbHeight, 1, 1, 1, BlockHeight, 1, GalMemoryLayout.BlockLinear, - imageFormat); + imageFormat, + GalTextureTarget.TwoD); } context.Device.Gpu.ResourceManager.ClearPbCache(); diff --git a/Ryujinx.ShaderTools/Program.cs b/Ryujinx.ShaderTools/Program.cs index 30fa71aea2..77aba0abe6 100644 --- a/Ryujinx.ShaderTools/Program.cs +++ b/Ryujinx.ShaderTools/Program.cs @@ -13,7 +13,7 @@ namespace Ryujinx.ShaderTools { if (args.Length == 2) { - GlslDecompiler Decompiler = new GlslDecompiler(MaxUboSize); + GlslDecompiler Decompiler = new GlslDecompiler(MaxUboSize, true); GalShaderType ShaderType = GalShaderType.Vertex; From e21ebbf666f10d39d44a0856e5a44143d3d69d0d Mon Sep 17 00:00:00 2001 From: gdkchan Date: Wed, 27 Feb 2019 23:03:31 -0300 Subject: [PATCH 11/12] Misc. CPU optimizations (#575) * Add optimizations related to caller/callee saved registers, thread synchronization and disable tier 0 * Refactoring * Add a config entry to enable or disable the reg load/store opt. * Remove unnecessary register state stores for calls when the callee is know * Rename IoType to VarType * Enable tier 0 while fixing some perf issues related to tier 0 * Small tweak -- Compile before adding to the cache, to avoid lags * Add required config entry --- ChocolArm64/Instructions/InstEmitFlow.cs | 3 +- ChocolArm64/Instructions/InstEmitFlow32.cs | 1 - .../Instructions/InstEmitFlowHelper.cs | 26 +-- ChocolArm64/Optimizations.cs | 32 +-- ChocolArm64/Translation/CallType.cs | 9 + ChocolArm64/Translation/ILBlock.cs | 32 +-- ChocolArm64/Translation/ILEmitterCtx.cs | 89 +++++--- ChocolArm64/Translation/ILLabel.cs | 6 +- ChocolArm64/Translation/ILMethodBuilder.cs | 45 ++-- ChocolArm64/Translation/ILOpCode.cs | 6 +- ChocolArm64/Translation/ILOpCodeBranch.cs | 10 +- ChocolArm64/Translation/ILOpCodeCall.cs | 4 +- ChocolArm64/Translation/ILOpCodeConst.cs | 2 + ChocolArm64/Translation/ILOpCodeLoad.cs | 20 +- ChocolArm64/Translation/ILOpCodeLoadField.cs | 2 +- ChocolArm64/Translation/ILOpCodeLoadState.cs | 17 +- ChocolArm64/Translation/ILOpCodeLog.cs | 6 +- ChocolArm64/Translation/ILOpCodeStore.cs | 20 +- ChocolArm64/Translation/ILOpCodeStoreState.cs | 26 ++- .../{LocalAlloc.cs => RegisterUsage.cs} | 199 ++++++++++-------- ChocolArm64/Translation/TranslatedSub.cs | 55 ++++- ChocolArm64/Translation/Translator.cs | 56 +++-- ChocolArm64/Translation/TranslatorQueue.cs | 14 +- .../Translation/TranslatorQueueItem.cs | 15 +- .../Translation/{IoType.cs => VarType.cs} | 2 +- Ryujinx/Config.jsonc | 17 +- Ryujinx/Configuration.cs | 10 + Ryujinx/_schema.json | 12 ++ 28 files changed, 456 insertions(+), 280 deletions(-) create mode 100644 ChocolArm64/Translation/CallType.cs rename ChocolArm64/Translation/{LocalAlloc.cs => RegisterUsage.cs} (56%) rename ChocolArm64/Translation/{IoType.cs => VarType.cs} (85%) diff --git a/ChocolArm64/Instructions/InstEmitFlow.cs b/ChocolArm64/Instructions/InstEmitFlow.cs index a842dca9d1..5eae89cc09 100644 --- a/ChocolArm64/Instructions/InstEmitFlow.cs +++ b/ChocolArm64/Instructions/InstEmitFlow.cs @@ -39,7 +39,6 @@ namespace ChocolArm64.Instructions context.EmitLdc_I(op.Position + 4); context.EmitStint(RegisterAlias.Lr); - context.EmitStoreState(); EmitCall(context, op.Imm); } @@ -60,6 +59,8 @@ namespace ChocolArm64.Instructions { OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp; + context.HasIndirectJump = true; + context.EmitStoreState(); context.EmitLdintzr(op.Rn); diff --git a/ChocolArm64/Instructions/InstEmitFlow32.cs b/ChocolArm64/Instructions/InstEmitFlow32.cs index 61f1d34c53..dea490c775 100644 --- a/ChocolArm64/Instructions/InstEmitFlow32.cs +++ b/ChocolArm64/Instructions/InstEmitFlow32.cs @@ -65,7 +65,6 @@ namespace ChocolArm64.Instructions } context.EmitStint(GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr)); - context.EmitStoreState(); //If x is true, then this is a branch with link and exchange. //In this case we need to swap the mode between Arm <-> Thumb. diff --git a/ChocolArm64/Instructions/InstEmitFlowHelper.cs b/ChocolArm64/Instructions/InstEmitFlowHelper.cs index e93ef42679..a6091a5711 100644 --- a/ChocolArm64/Instructions/InstEmitFlowHelper.cs +++ b/ChocolArm64/Instructions/InstEmitFlowHelper.cs @@ -11,6 +11,8 @@ namespace ChocolArm64.Instructions { if (context.Tier == TranslationTier.Tier0) { + context.EmitStoreState(); + context.TranslateAhead(imm); context.EmitLdc_I8(imm); @@ -22,6 +24,10 @@ namespace ChocolArm64.Instructions if (!context.TryOptEmitSubroutineCall()) { + context.HasSlowCall = true; + + context.EmitStoreState(); + context.TranslateAhead(imm); context.EmitLdarg(TranslatedSub.StateArgIdx); @@ -32,6 +38,7 @@ namespace ChocolArm64.Instructions context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdc_I8(imm); + context.EmitLdc_I4((int)CallType.Call); context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine)); @@ -58,20 +65,6 @@ namespace ChocolArm64.Instructions { if (context.Tier == TranslationTier.Tier0) { - context.Emit(OpCodes.Dup); - - context.EmitSttmp(); - context.EmitLdarg(TranslatedSub.StateArgIdx); - - context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator), - BindingFlags.Instance | - BindingFlags.NonPublic)); - - context.EmitLdarg(TranslatedSub.StateArgIdx); - context.EmitLdtmp(); - - context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine)); - context.Emit(OpCodes.Ret); } else @@ -85,8 +78,11 @@ namespace ChocolArm64.Instructions context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdtmp(); + context.EmitLdc_I4(isJump + ? (int)CallType.VirtualJump + : (int)CallType.VirtualCall); - context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateVirtualSubroutine)); + context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine)); context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdarg(TranslatedSub.MemoryArgIdx); diff --git a/ChocolArm64/Optimizations.cs b/ChocolArm64/Optimizations.cs index 8fa6f4626c..cbb8131f5c 100644 --- a/ChocolArm64/Optimizations.cs +++ b/ChocolArm64/Optimizations.cs @@ -2,21 +2,23 @@ using System.Runtime.Intrinsics.X86; public static class Optimizations { - internal static bool FastFP = true; + public static bool AssumeStrictAbiCompliance { get; set; } - private static bool _useAllSseIfAvailable = true; + public static bool FastFP { get; set; } = true; - private static bool _useSseIfAvailable = true; - private static bool _useSse2IfAvailable = true; - private static bool _useSse3IfAvailable = true; - private static bool _useSsse3IfAvailable = true; - private static bool _useSse41IfAvailable = true; - private static bool _useSse42IfAvailable = true; + private const bool UseAllSseIfAvailable = true; - internal static bool UseSse = (_useAllSseIfAvailable && _useSseIfAvailable) && Sse.IsSupported; - internal static bool UseSse2 = (_useAllSseIfAvailable && _useSse2IfAvailable) && Sse2.IsSupported; - internal static bool UseSse3 = (_useAllSseIfAvailable && _useSse3IfAvailable) && Sse3.IsSupported; - internal static bool UseSsse3 = (_useAllSseIfAvailable && _useSsse3IfAvailable) && Ssse3.IsSupported; - internal static bool UseSse41 = (_useAllSseIfAvailable && _useSse41IfAvailable) && Sse41.IsSupported; - internal static bool UseSse42 = (_useAllSseIfAvailable && _useSse42IfAvailable) && Sse42.IsSupported; -} + public static bool UseSseIfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSse2IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSse3IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable; + + internal static bool UseSse => UseSseIfAvailable && Sse.IsSupported; + internal static bool UseSse2 => UseSse2IfAvailable && Sse2.IsSupported; + internal static bool UseSse3 => UseSse3IfAvailable && Sse3.IsSupported; + internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported; + internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported; + internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported; +} \ No newline at end of file diff --git a/ChocolArm64/Translation/CallType.cs b/ChocolArm64/Translation/CallType.cs new file mode 100644 index 0000000000..937ede768a --- /dev/null +++ b/ChocolArm64/Translation/CallType.cs @@ -0,0 +1,9 @@ +namespace ChocolArm64.Translation +{ + enum CallType + { + Call, + VirtualCall, + VirtualJump + } +} \ No newline at end of file diff --git a/ChocolArm64/Translation/ILBlock.cs b/ChocolArm64/Translation/ILBlock.cs index 136579012b..12773705a1 100644 --- a/ChocolArm64/Translation/ILBlock.cs +++ b/ChocolArm64/Translation/ILBlock.cs @@ -4,13 +4,13 @@ namespace ChocolArm64.Translation { class ILBlock : IILEmit { - public long IntInputs { get; private set; } - public long IntOutputs { get; private set; } - public long IntAwOutputs { get; private set; } + public long IntInputs { get; private set; } + public long IntOutputs { get; private set; } + private long _intAwOutputs; - public long VecInputs { get; private set; } - public long VecOutputs { get; private set; } - public long VecAwOutputs { get; private set; } + public long VecInputs { get; private set; } + public long VecOutputs { get; private set; } + private long _vecAwOutputs; public bool HasStateStore { get; private set; } @@ -34,25 +34,25 @@ namespace ChocolArm64.Translation //opcodes emitted by each ARM instruction. //We can only consider the new outputs for doing input elimination //after all the CIL opcodes used by the instruction being emitted. - IntAwOutputs = IntOutputs; - VecAwOutputs = VecOutputs; + _intAwOutputs = IntOutputs; + _vecAwOutputs = VecOutputs; } else if (emitter is ILOpCodeLoad ld && ILMethodBuilder.IsRegIndex(ld.Index)) { - switch (ld.IoType) + switch (ld.VarType) { - case IoType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~IntAwOutputs; break; - case IoType.Int: IntInputs |= (1L << ld.Index) & ~IntAwOutputs; break; - case IoType.Vector: VecInputs |= (1L << ld.Index) & ~VecAwOutputs; break; + case VarType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~_intAwOutputs; break; + case VarType.Int: IntInputs |= (1L << ld.Index) & ~_intAwOutputs; break; + case VarType.Vector: VecInputs |= (1L << ld.Index) & ~_vecAwOutputs; break; } } else if (emitter is ILOpCodeStore st && ILMethodBuilder.IsRegIndex(st.Index)) { - switch (st.IoType) + switch (st.VarType) { - case IoType.Flag: IntOutputs |= (1L << st.Index) << 32; break; - case IoType.Int: IntOutputs |= 1L << st.Index; break; - case IoType.Vector: VecOutputs |= 1L << st.Index; break; + case VarType.Flag: IntOutputs |= (1L << st.Index) << 32; break; + case VarType.Int: IntOutputs |= 1L << st.Index; break; + case VarType.Vector: VecOutputs |= 1L << st.Index; break; } } else if (emitter is ILOpCodeStoreState) diff --git a/ChocolArm64/Translation/ILEmitterCtx.cs b/ChocolArm64/Translation/ILEmitterCtx.cs index f7e61bc999..91b72b13ae 100644 --- a/ChocolArm64/Translation/ILEmitterCtx.cs +++ b/ChocolArm64/Translation/ILEmitterCtx.cs @@ -31,6 +31,10 @@ namespace ChocolArm64.Translation public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO + public bool HasIndirectJump { get; set; } + + public bool HasSlowCall { get; set; } + private Dictionary _visitedBlocks; private Queue _branchTargets; @@ -91,7 +95,12 @@ namespace ChocolArm64.Translation ResetBlockState(); - AdvanceOpCode(); + if (AdvanceOpCode()) + { + EmitSynchronization(); + + _ilBlock.Add(new ILOpCodeLoadState(_ilBlock, isSubEntry: true)); + } } public static int GetIntTempIndex() @@ -127,10 +136,18 @@ namespace ChocolArm64.Translation return; } - if (_opcIndex == 0) + int opcIndex = _opcIndex; + + if (opcIndex == 0) { MarkLabel(GetLabel(_currBlock.Position)); + } + bool isLastOp = opcIndex == CurrBlock.OpCodes.Count - 1; + + if (isLastOp && CurrBlock.Branch != null && + (ulong)CurrBlock.Branch.Position <= (ulong)CurrBlock.Position) + { EmitSynchronization(); } @@ -161,7 +178,7 @@ namespace ChocolArm64.Translation //of the next instruction to be executed (in the case that the condition //is false, and the branch was not taken, as all basic blocks should end with //some kind of branch). - if (CurrOp == CurrBlock.GetLastOp() && CurrBlock.Next == null) + if (isLastOp && CurrBlock.Next == null) { EmitStoreState(); EmitLdc_I8(CurrOp.Position + CurrOp.OpCodeSizeInBytes); @@ -285,32 +302,43 @@ namespace ChocolArm64.Translation return; } - _queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1)); + _queue.Enqueue(position, mode, TranslationTier.Tier1, isComplete: true); } public bool TryOptEmitSubroutineCall() { + //Calls should always have a next block, unless + //we're translating a single basic block. if (_currBlock.Next == null) { return false; } - if (CurrOp.Emitter != InstEmit.Bl) + if (!(CurrOp is IOpCodeBImm op)) { return false; } - if (!_cache.TryGetSubroutine(((OpCodeBImmAl64)CurrOp).Imm, out TranslatedSub subroutine)) + if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub)) { return false; } + //It's not worth to call a Tier0 method, because + //it contains slow code, rather than the entire function. + if (sub.Tier == TranslationTier.Tier0) + { + return false; + } + + EmitStoreState(sub); + for (int index = 0; index < TranslatedSub.FixedArgTypes.Length; index++) { EmitLdarg(index); } - EmitCall(subroutine.Method); + EmitCall(sub.Method); return true; } @@ -321,8 +349,8 @@ namespace ChocolArm64.Translation InstEmitAluHelper.EmitAluLoadOpers(this); - Stloc(CmpOptTmp2Index, IoType.Int); - Stloc(CmpOptTmp1Index, IoType.Int); + Stloc(CmpOptTmp2Index, VarType.Int); + Stloc(CmpOptTmp1Index, VarType.Int); } private Dictionary _branchOps = new Dictionary() @@ -346,8 +374,8 @@ namespace ChocolArm64.Translation { if (_optOpLastCompare.Emitter == InstEmit.Subs) { - Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize); - Ldloc(CmpOptTmp2Index, IoType.Int, _optOpLastCompare.RegisterSize); + Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize); + Ldloc(CmpOptTmp2Index, VarType.Int, _optOpLastCompare.RegisterSize); Emit(_branchOps[cond], target); @@ -369,7 +397,7 @@ namespace ChocolArm64.Translation //Such invalid values can't be encoded on the immediate encodings. if (_optOpLastCompare is IOpCodeAluImm64 op) { - Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize); + Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize); if (_optOpLastCompare.RegisterSize == RegisterSize.Int32) { @@ -491,14 +519,14 @@ namespace ChocolArm64.Translation { if (amount > 0) { - Stloc(RorTmpIndex, IoType.Int); - Ldloc(RorTmpIndex, IoType.Int); + Stloc(RorTmpIndex, VarType.Int); + Ldloc(RorTmpIndex, VarType.Int); EmitLdc_I4(amount); Emit(OpCodes.Shr_Un); - Ldloc(RorTmpIndex, IoType.Int); + Ldloc(RorTmpIndex, VarType.Int); EmitLdc_I4(CurrOp.GetBitsCount() - amount); @@ -546,7 +574,7 @@ namespace ChocolArm64.Translation public void EmitLdarg(int index) { - _ilBlock.Add(new ILOpCodeLoad(index, IoType.Arg)); + _ilBlock.Add(new ILOpCodeLoad(index, VarType.Arg)); } public void EmitLdintzr(int index) @@ -588,6 +616,11 @@ namespace ChocolArm64.Translation _ilBlock.Add(new ILOpCodeStoreState(_ilBlock)); } + private void EmitStoreState(TranslatedSub callSub) + { + _ilBlock.Add(new ILOpCodeStoreState(_ilBlock, callSub)); + } + public void EmitLdtmp() => EmitLdint(IntGpTmp1Index); public void EmitSttmp() => EmitStint(IntGpTmp1Index); @@ -600,13 +633,13 @@ namespace ChocolArm64.Translation public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index); public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index); - public void EmitLdint(int index) => Ldloc(index, IoType.Int); - public void EmitStint(int index) => Stloc(index, IoType.Int); + public void EmitLdint(int index) => Ldloc(index, VarType.Int); + public void EmitStint(int index) => Stloc(index, VarType.Int); - public void EmitLdvec(int index) => Ldloc(index, IoType.Vector); - public void EmitStvec(int index) => Stloc(index, IoType.Vector); + public void EmitLdvec(int index) => Ldloc(index, VarType.Vector); + public void EmitStvec(int index) => Stloc(index, VarType.Vector); - public void EmitLdflg(int index) => Ldloc(index, IoType.Flag); + public void EmitLdflg(int index) => Ldloc(index, VarType.Flag); public void EmitStflg(int index) { //Set this only if any of the NZCV flag bits were modified. @@ -619,22 +652,22 @@ namespace ChocolArm64.Translation _optOpLastFlagSet = CurrOp; } - Stloc(index, IoType.Flag); + Stloc(index, VarType.Flag); } - private void Ldloc(int index, IoType ioType) + private void Ldloc(int index, VarType varType) { - _ilBlock.Add(new ILOpCodeLoad(index, ioType, CurrOp.RegisterSize)); + _ilBlock.Add(new ILOpCodeLoad(index, varType, CurrOp.RegisterSize)); } - private void Ldloc(int index, IoType ioType, RegisterSize registerSize) + private void Ldloc(int index, VarType varType, RegisterSize registerSize) { - _ilBlock.Add(new ILOpCodeLoad(index, ioType, registerSize)); + _ilBlock.Add(new ILOpCodeLoad(index, varType, registerSize)); } - private void Stloc(int index, IoType ioType) + private void Stloc(int index, VarType varType) { - _ilBlock.Add(new ILOpCodeStore(index, ioType, CurrOp.RegisterSize)); + _ilBlock.Add(new ILOpCodeStore(index, varType, CurrOp.RegisterSize)); } public void EmitCallPropGet(Type objType, string propName) diff --git a/ChocolArm64/Translation/ILLabel.cs b/ChocolArm64/Translation/ILLabel.cs index f423a4256c..17a31783df 100644 --- a/ChocolArm64/Translation/ILLabel.cs +++ b/ChocolArm64/Translation/ILLabel.cs @@ -6,7 +6,7 @@ namespace ChocolArm64.Translation { private bool _hasLabel; - private Label _lbl; + private Label _label; public void Emit(ILMethodBuilder context) { @@ -17,12 +17,12 @@ namespace ChocolArm64.Translation { if (!_hasLabel) { - _lbl = context.Generator.DefineLabel(); + _label = context.Generator.DefineLabel(); _hasLabel = true; } - return _lbl; + return _label; } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/ILMethodBuilder.cs b/ChocolArm64/Translation/ILMethodBuilder.cs index 892f831be3..98b5052043 100644 --- a/ChocolArm64/Translation/ILMethodBuilder.cs +++ b/ChocolArm64/Translation/ILMethodBuilder.cs @@ -8,7 +8,10 @@ namespace ChocolArm64.Translation { class ILMethodBuilder { - public LocalAlloc LocalAlloc { get; private set; } + private const int RegsCount = 32; + private const int RegsMask = RegsCount - 1; + + public RegisterUsage RegUsage { get; private set; } public ILGenerator Generator { get; private set; } @@ -18,29 +21,47 @@ namespace ChocolArm64.Translation private string _subName; + public bool IsAarch64 { get; } + + public bool IsSubComplete { get; } + private int _localsCount; - public ILMethodBuilder(ILBlock[] ilBlocks, string subName) + public ILMethodBuilder( + ILBlock[] ilBlocks, + string subName, + bool isAarch64, + bool isSubComplete = false) { - _ilBlocks = ilBlocks; - _subName = subName; + _ilBlocks = ilBlocks; + _subName = subName; + IsAarch64 = isAarch64; + IsSubComplete = isSubComplete; } - public TranslatedSub GetSubroutine(TranslationTier tier) + public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing) { - LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]); + RegUsage = new RegisterUsage(); + + RegUsage.BuildUses(_ilBlocks[0]); DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes); - Generator = method.GetILGenerator(); + long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]); + long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]); - TranslatedSub subroutine = new TranslatedSub(method, tier); + TranslatedSub subroutine = new TranslatedSub( + method, + intNiRegsMask, + vecNiRegsMask, + tier, + isWorthOptimizing); _locals = new Dictionary(); _localsCount = 0; - new ILOpCodeLoadState(_ilBlocks[0]).Emit(this); + Generator = method.GetILGenerator(); foreach (ILBlock ilBlock in _ilBlocks) { @@ -80,13 +101,13 @@ namespace ChocolArm64.Translation public static Register GetRegFromBit(int bit, RegisterType baseType) { - if (bit < 32) + if (bit < RegsCount) { return new Register(bit, baseType); } else if (baseType == RegisterType.Int) { - return new Register(bit & 0x1f, RegisterType.Flag); + return new Register(bit & RegsMask, RegisterType.Flag); } else { @@ -96,7 +117,7 @@ namespace ChocolArm64.Translation public static bool IsRegIndex(int index) { - return (uint)index < 32; + return (uint)index < RegsCount; } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/ILOpCode.cs b/ChocolArm64/Translation/ILOpCode.cs index 4021603c01..486452820d 100644 --- a/ChocolArm64/Translation/ILOpCode.cs +++ b/ChocolArm64/Translation/ILOpCode.cs @@ -4,16 +4,16 @@ namespace ChocolArm64.Translation { struct ILOpCode : IILEmit { - private OpCode _ilOp; + public OpCode ILOp { get; } public ILOpCode(OpCode ilOp) { - _ilOp = ilOp; + ILOp = ilOp; } public void Emit(ILMethodBuilder context) { - context.Generator.Emit(_ilOp); + context.Generator.Emit(ILOp); } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/ILOpCodeBranch.cs b/ChocolArm64/Translation/ILOpCodeBranch.cs index 22b80b5d52..9d4e40fa9d 100644 --- a/ChocolArm64/Translation/ILOpCodeBranch.cs +++ b/ChocolArm64/Translation/ILOpCodeBranch.cs @@ -4,18 +4,18 @@ namespace ChocolArm64.Translation { struct ILOpCodeBranch : IILEmit { - private OpCode _ilOp; - private ILLabel _label; + public OpCode ILOp { get; } + public ILLabel Label { get; } public ILOpCodeBranch(OpCode ilOp, ILLabel label) { - _ilOp = ilOp; - _label = label; + ILOp = ilOp; + Label = label; } public void Emit(ILMethodBuilder context) { - context.Generator.Emit(_ilOp, _label.GetLabel(context)); + context.Generator.Emit(ILOp, Label.GetLabel(context)); } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/ILOpCodeCall.cs b/ChocolArm64/Translation/ILOpCodeCall.cs index c046aeeb75..dc20417a9a 100644 --- a/ChocolArm64/Translation/ILOpCodeCall.cs +++ b/ChocolArm64/Translation/ILOpCodeCall.cs @@ -5,9 +5,9 @@ namespace ChocolArm64.Translation { struct ILOpCodeCall : IILEmit { - public MethodInfo Info { get; private set; } + public MethodInfo Info { get; } - public bool IsVirtual { get; private set; } + public bool IsVirtual { get; } public ILOpCodeCall(MethodInfo info, bool isVirtual) { diff --git a/ChocolArm64/Translation/ILOpCodeConst.cs b/ChocolArm64/Translation/ILOpCodeConst.cs index 2aaf8676ee..cd3b58ff04 100644 --- a/ChocolArm64/Translation/ILOpCodeConst.cs +++ b/ChocolArm64/Translation/ILOpCodeConst.cs @@ -16,6 +16,8 @@ namespace ChocolArm64.Translation private ImmVal _value; + public long Value => _value.I8; + private enum ConstType { Int32, diff --git a/ChocolArm64/Translation/ILOpCodeLoad.cs b/ChocolArm64/Translation/ILOpCodeLoad.cs index c31e06bbd9..0d11eeaa4b 100644 --- a/ChocolArm64/Translation/ILOpCodeLoad.cs +++ b/ChocolArm64/Translation/ILOpCodeLoad.cs @@ -5,28 +5,28 @@ namespace ChocolArm64.Translation { struct ILOpCodeLoad : IILEmit { - public int Index { get; private set; } + public int Index { get; } - public IoType IoType { get; private set; } + public VarType VarType { get; } - public RegisterSize RegisterSize { get; private set; } + public RegisterSize RegisterSize { get; } - public ILOpCodeLoad(int index, IoType ioType, RegisterSize registerSize = 0) + public ILOpCodeLoad(int index, VarType varType, RegisterSize registerSize = 0) { Index = index; - IoType = ioType; + VarType = varType; RegisterSize = registerSize; } public void Emit(ILMethodBuilder context) { - switch (IoType) + switch (VarType) { - case IoType.Arg: context.Generator.EmitLdarg(Index); break; + case VarType.Arg: context.Generator.EmitLdarg(Index); break; - case IoType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break; - case IoType.Int: EmitLdloc(context, Index, RegisterType.Int); break; - case IoType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break; + case VarType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break; + case VarType.Int: EmitLdloc(context, Index, RegisterType.Int); break; + case VarType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break; } } diff --git a/ChocolArm64/Translation/ILOpCodeLoadField.cs b/ChocolArm64/Translation/ILOpCodeLoadField.cs index abcd37c348..f0507ac226 100644 --- a/ChocolArm64/Translation/ILOpCodeLoadField.cs +++ b/ChocolArm64/Translation/ILOpCodeLoadField.cs @@ -5,7 +5,7 @@ namespace ChocolArm64.Translation { struct ILOpCodeLoadField : IILEmit { - public FieldInfo Info { get; private set; } + public FieldInfo Info { get; } public ILOpCodeLoadField(FieldInfo info) { diff --git a/ChocolArm64/Translation/ILOpCodeLoadState.cs b/ChocolArm64/Translation/ILOpCodeLoadState.cs index ddab611019..c23dc94329 100644 --- a/ChocolArm64/Translation/ILOpCodeLoadState.cs +++ b/ChocolArm64/Translation/ILOpCodeLoadState.cs @@ -7,15 +7,24 @@ namespace ChocolArm64.Translation { private ILBlock _block; - public ILOpCodeLoadState(ILBlock block) + private bool _isSubEntry; + + public ILOpCodeLoadState(ILBlock block, bool isSubEntry = false) { - _block = block; + _block = block; + _isSubEntry = isSubEntry; } public void Emit(ILMethodBuilder context) { - long intInputs = context.LocalAlloc.GetIntInputs(_block); - long vecInputs = context.LocalAlloc.GetVecInputs(_block); + long intInputs = context.RegUsage.GetIntInputs(_block); + long vecInputs = context.RegUsage.GetVecInputs(_block); + + if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete) + { + intInputs = RegisterUsage.ClearCallerSavedIntRegs(intInputs, context.IsAarch64); + vecInputs = RegisterUsage.ClearCallerSavedVecRegs(vecInputs, context.IsAarch64); + } LoadLocals(context, intInputs, RegisterType.Int); LoadLocals(context, vecInputs, RegisterType.Vector); diff --git a/ChocolArm64/Translation/ILOpCodeLog.cs b/ChocolArm64/Translation/ILOpCodeLog.cs index ebb042b596..53846f927e 100644 --- a/ChocolArm64/Translation/ILOpCodeLog.cs +++ b/ChocolArm64/Translation/ILOpCodeLog.cs @@ -2,16 +2,16 @@ namespace ChocolArm64.Translation { struct ILOpCodeLog : IILEmit { - private string _text; + public string Text { get; } public ILOpCodeLog(string text) { - _text = text; + Text = text; } public void Emit(ILMethodBuilder context) { - context.Generator.EmitWriteLine(_text); + context.Generator.EmitWriteLine(Text); } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/ILOpCodeStore.cs b/ChocolArm64/Translation/ILOpCodeStore.cs index 17a6259c6f..7ac78e9ae4 100644 --- a/ChocolArm64/Translation/ILOpCodeStore.cs +++ b/ChocolArm64/Translation/ILOpCodeStore.cs @@ -5,28 +5,28 @@ namespace ChocolArm64.Translation { struct ILOpCodeStore : IILEmit { - public int Index { get; private set; } + public int Index { get; } - public IoType IoType { get; private set; } + public VarType VarType { get; } - public RegisterSize RegisterSize { get; private set; } + public RegisterSize RegisterSize { get; } - public ILOpCodeStore(int index, IoType ioType, RegisterSize registerSize = 0) + public ILOpCodeStore(int index, VarType varType, RegisterSize registerSize = 0) { Index = index; - IoType = ioType; + VarType = varType; RegisterSize = registerSize; } public void Emit(ILMethodBuilder context) { - switch (IoType) + switch (VarType) { - case IoType.Arg: context.Generator.EmitStarg(Index); break; + case VarType.Arg: context.Generator.EmitStarg(Index); break; - case IoType.Flag: EmitStloc(context, Index, RegisterType.Flag); break; - case IoType.Int: EmitStloc(context, Index, RegisterType.Int); break; - case IoType.Vector: EmitStloc(context, Index, RegisterType.Vector); break; + case VarType.Flag: EmitStloc(context, Index, RegisterType.Flag); break; + case VarType.Int: EmitStloc(context, Index, RegisterType.Int); break; + case VarType.Vector: EmitStloc(context, Index, RegisterType.Vector); break; } } diff --git a/ChocolArm64/Translation/ILOpCodeStoreState.cs b/ChocolArm64/Translation/ILOpCodeStoreState.cs index 458e9eda43..a587dbfe84 100644 --- a/ChocolArm64/Translation/ILOpCodeStoreState.cs +++ b/ChocolArm64/Translation/ILOpCodeStoreState.cs @@ -7,15 +7,33 @@ namespace ChocolArm64.Translation { private ILBlock _block; - public ILOpCodeStoreState(ILBlock block) + private TranslatedSub _callSub; + + public ILOpCodeStoreState(ILBlock block, TranslatedSub callSub = null) { - _block = block; + _block = block; + _callSub = callSub; } public void Emit(ILMethodBuilder context) { - long intOutputs = context.LocalAlloc.GetIntOutputs(_block); - long vecOutputs = context.LocalAlloc.GetVecOutputs(_block); + long intOutputs = context.RegUsage.GetIntOutputs(_block); + long vecOutputs = context.RegUsage.GetVecOutputs(_block); + + if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete) + { + intOutputs = RegisterUsage.ClearCallerSavedIntRegs(intOutputs, context.IsAarch64); + vecOutputs = RegisterUsage.ClearCallerSavedVecRegs(vecOutputs, context.IsAarch64); + } + + if (_callSub != null) + { + //Those register are assigned on the callee function, without + //reading it's value first. We don't need to write them because + //they are not going to be read on the callee. + intOutputs &= ~_callSub.IntNiRegsMask; + vecOutputs &= ~_callSub.VecNiRegsMask; + } StoreLocals(context, intOutputs, RegisterType.Int); StoreLocals(context, vecOutputs, RegisterType.Vector); diff --git a/ChocolArm64/Translation/LocalAlloc.cs b/ChocolArm64/Translation/RegisterUsage.cs similarity index 56% rename from ChocolArm64/Translation/LocalAlloc.cs rename to ChocolArm64/Translation/RegisterUsage.cs index 763be6190d..2e6829d512 100644 --- a/ChocolArm64/Translation/LocalAlloc.cs +++ b/ChocolArm64/Translation/RegisterUsage.cs @@ -3,8 +3,13 @@ using System.Collections.Generic; namespace ChocolArm64.Translation { - class LocalAlloc + class RegisterUsage { + public const long CallerSavedIntRegistersMask = 0x7fL << 9; + public const long PStateNzcvFlagsMask = 0xfL << 60; + + public const long CallerSavedVecRegistersMask = 0xffffL << 16; + private class PathIo { private Dictionary _allInputs; @@ -18,31 +23,30 @@ namespace ChocolArm64.Translation _cmnOutputs = new Dictionary(); } - public PathIo(ILBlock root, long inputs, long outputs) : this() + public void Set(ILBlock entry, long inputs, long outputs) { - Set(root, inputs, outputs); - } - - public void Set(ILBlock root, long inputs, long outputs) - { - if (!_allInputs.TryAdd(root, inputs)) + if (!_allInputs.TryAdd(entry, inputs)) { - _allInputs[root] |= inputs; + _allInputs[entry] |= inputs; } - if (!_cmnOutputs.TryAdd(root, outputs)) + if (!_cmnOutputs.TryAdd(entry, outputs)) { - _cmnOutputs[root] &= outputs; + _cmnOutputs[entry] &= outputs; } _allOutputs |= outputs; } - public long GetInputs(ILBlock root) + public long GetInputs(ILBlock entry) { - if (_allInputs.TryGetValue(root, out long inputs)) + if (_allInputs.TryGetValue(entry, out long inputs)) { - return inputs | (_allOutputs & ~_cmnOutputs[root]); + //We also need to read the registers that may not be written + //by all paths that can reach a exit point, to ensure that + //the local variable will not remain uninitialized depending + //on the flow path taken. + return inputs | (_allOutputs & ~_cmnOutputs[entry]); } return 0; @@ -57,15 +61,38 @@ namespace ChocolArm64.Translation private Dictionary _intPaths; private Dictionary _vecPaths; - private struct BlockIo + private struct BlockIo : IEquatable { - public ILBlock Block; - public ILBlock Entry; + public ILBlock Block { get; } + public ILBlock Entry { get; } - public long IntInputs; - public long VecInputs; - public long IntOutputs; - public long VecOutputs; + public long IntInputs { get; set; } + public long VecInputs { get; set; } + public long IntOutputs { get; set; } + public long VecOutputs { get; set; } + + public BlockIo(ILBlock block, ILBlock entry) + { + Block = block; + Entry = entry; + + IntInputs = IntOutputs = 0; + VecInputs = VecOutputs = 0; + } + + public BlockIo( + ILBlock block, + ILBlock entry, + long intInputs, + long vecInputs, + long intOutputs, + long vecOutputs) : this(block, entry) + { + IntInputs = intInputs; + VecInputs = vecInputs; + IntOutputs = intOutputs; + VecOutputs = vecOutputs; + } public override bool Equals(object obj) { @@ -74,6 +101,11 @@ namespace ChocolArm64.Translation return false; } + return Equals(other); + } + + public bool Equals(BlockIo other) + { return other.Block == Block && other.Entry == Entry && other.IntInputs == IntInputs && @@ -98,25 +130,13 @@ namespace ChocolArm64.Translation } } - private const int MaxOptGraphLength = 40; - - public LocalAlloc(ILBlock[] graph, ILBlock entry) + public RegisterUsage() { _intPaths = new Dictionary(); _vecPaths = new Dictionary(); - - if (graph.Length > 1 && - graph.Length < MaxOptGraphLength) - { - InitializeOptimal(graph, entry); - } - else - { - InitializeFast(graph); - } } - private void InitializeOptimal(ILBlock[] graph, ILBlock entry) + public void BuildUses(ILBlock entry) { //This will go through all possible paths on the graph, //and store all inputs/outputs for each block. A register @@ -124,7 +144,7 @@ namespace ChocolArm64.Translation //When a block can be reached by more than one path, then the //output from all paths needs to be set for this block, and //only outputs present in all of the parent blocks can be considered - //when doing input elimination. Each block chain have a entry, that's where + //when doing input elimination. Each block chain has a entry, that's where //the code starts executing. They are present on the subroutine start point, //and on call return points too (address written to X30 by BL). HashSet visited = new HashSet(); @@ -133,19 +153,13 @@ namespace ChocolArm64.Translation void Enqueue(BlockIo block) { - if (!visited.Contains(block)) + if (visited.Add(block)) { unvisited.Enqueue(block); - - visited.Add(block); } } - Enqueue(new BlockIo() - { - Block = entry, - Entry = entry - }); + Enqueue(new BlockIo(entry, entry)); while (unvisited.Count > 0) { @@ -177,19 +191,21 @@ namespace ChocolArm64.Translation void EnqueueFromCurrent(ILBlock block, bool retTarget) { - BlockIo blockIo = new BlockIo() { Block = block }; + BlockIo blockIo; if (retTarget) { - blockIo.Entry = block; + blockIo = new BlockIo(block, block); } else { - blockIo.Entry = current.Entry; - blockIo.IntInputs = current.IntInputs; - blockIo.VecInputs = current.VecInputs; - blockIo.IntOutputs = current.IntOutputs; - blockIo.VecOutputs = current.VecOutputs; + blockIo = new BlockIo( + block, + current.Entry, + current.IntInputs, + current.VecInputs, + current.IntOutputs, + current.VecOutputs); } Enqueue(blockIo); @@ -207,54 +223,63 @@ namespace ChocolArm64.Translation } } - private void InitializeFast(ILBlock[] graph) - { - //This is WAY faster than InitializeOptimal, but results in - //unneeded loads and stores, so the resulting code will be slower. - long intInputs = 0, intOutputs = 0; - long vecInputs = 0, vecOutputs = 0; + public long GetIntInputs(ILBlock entry) => GetInputsImpl(entry, _intPaths.Values); + public long GetVecInputs(ILBlock entry) => GetInputsImpl(entry, _vecPaths.Values); - foreach (ILBlock block in graph) - { - intInputs |= block.IntInputs; - intOutputs |= block.IntOutputs; - vecInputs |= block.VecInputs; - vecOutputs |= block.VecOutputs; - } - - //It's possible that not all code paths writes to those output registers, - //in those cases if we attempt to write an output registers that was - //not written, we will be just writing zero and messing up the old register value. - //So we just need to ensure that all outputs are loaded. - if (graph.Length > 1) - { - intInputs |= intOutputs; - vecInputs |= vecOutputs; - } - - foreach (ILBlock block in graph) - { - _intPaths.Add(block, new PathIo(block, intInputs, intOutputs)); - _vecPaths.Add(block, new PathIo(block, vecInputs, vecOutputs)); - } - } - - public long GetIntInputs(ILBlock root) => GetInputsImpl(root, _intPaths.Values); - public long GetVecInputs(ILBlock root) => GetInputsImpl(root, _vecPaths.Values); - - private long GetInputsImpl(ILBlock root, IEnumerable values) + private long GetInputsImpl(ILBlock entry, IEnumerable values) { long inputs = 0; foreach (PathIo path in values) { - inputs |= path.GetInputs(root); + inputs |= path.GetInputs(entry); } return inputs; } + public long GetIntNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _intPaths.Values); + public long GetVecNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _vecPaths.Values); + + private long GetNotInputsImpl(ILBlock entry, IEnumerable values) + { + //Returns a mask with registers that are written to + //before being read. Only those registers that are + //written in all paths, and is not read before being + //written to on those paths, should be set on the mask. + long mask = -1L; + + foreach (PathIo path in values) + { + mask &= path.GetOutputs() & ~path.GetInputs(entry); + } + + return mask; + } + public long GetIntOutputs(ILBlock block) => _intPaths[block].GetOutputs(); public long GetVecOutputs(ILBlock block) => _vecPaths[block].GetOutputs(); + + public static long ClearCallerSavedIntRegs(long mask, bool isAarch64) + { + //TODO: ARM32 support. + if (isAarch64) + { + mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask); + } + + return mask; + } + + public static long ClearCallerSavedVecRegs(long mask, bool isAarch64) + { + //TODO: ARM32 support. + if (isAarch64) + { + mask &= ~CallerSavedVecRegistersMask; + } + + return mask; + } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/TranslatedSub.cs b/ChocolArm64/Translation/TranslatedSub.cs index 65d7035107..8b599b7a93 100644 --- a/ChocolArm64/Translation/TranslatedSub.cs +++ b/ChocolArm64/Translation/TranslatedSub.cs @@ -10,21 +10,41 @@ namespace ChocolArm64.Translation class TranslatedSub { + //This is the minimum amount of calls needed for the method + //to be retranslated with higher quality code. It's only worth + //doing that for hot code. + private const int MinCallCountForOpt = 30; + public ArmSubroutine Delegate { get; private set; } - public static int StateArgIdx { get; private set; } - public static int MemoryArgIdx { get; private set; } + public static int StateArgIdx { get; } + public static int MemoryArgIdx { get; } - public static Type[] FixedArgTypes { get; private set; } + public static Type[] FixedArgTypes { get; } - public DynamicMethod Method { get; private set; } + public DynamicMethod Method { get; } - public TranslationTier Tier { get; private set; } + public TranslationTier Tier { get; } - public TranslatedSub(DynamicMethod method, TranslationTier tier) + public long IntNiRegsMask { get; } + public long VecNiRegsMask { get; } + + private bool _isWorthOptimizing; + + private int _callCount; + + public TranslatedSub( + DynamicMethod method, + long intNiRegsMask, + long vecNiRegsMask, + TranslationTier tier, + bool isWorthOptimizing) { - Method = method ?? throw new ArgumentNullException(nameof(method));; - Tier = tier; + Method = method ?? throw new ArgumentNullException(nameof(method));; + IntNiRegsMask = intNiRegsMask; + VecNiRegsMask = vecNiRegsMask; + _isWorthOptimizing = isWorthOptimizing; + Tier = tier; } static TranslatedSub() @@ -61,5 +81,24 @@ namespace ChocolArm64.Translation { return Delegate(threadState, memory); } + + public bool IsWorthOptimizing() + { + if (!_isWorthOptimizing) + { + return false; + } + + if (_callCount++ < MinCallCountForOpt) + { + return false; + } + + //Only return true once, so that it is + //added to the queue only once. + _isWorthOptimizing = false; + + return true; + } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/Translator.cs b/ChocolArm64/Translation/Translator.cs index dd1215f50c..bda0bca09f 100644 --- a/ChocolArm64/Translation/Translator.cs +++ b/ChocolArm64/Translation/Translator.cs @@ -63,48 +63,36 @@ namespace ChocolArm64.Translation CpuTrace?.Invoke(this, new CpuTraceEventArgs(position)); } - TranslatedSub subroutine = GetOrTranslateSubroutine(state, position); + if (!_cache.TryGetSubroutine(position, out TranslatedSub sub)) + { + sub = TranslateLowCq(position, state.GetExecutionMode()); + } - position = subroutine.Execute(state, _memory); + position = sub.Execute(state, _memory); } while (position != 0 && state.Running); state.CurrentTranslator = null; } - internal void TranslateVirtualSubroutine(CpuThreadState state, long position) - { - if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0) - { - _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1)); - } - } - - internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position) + internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs) { if (!_cache.TryGetSubroutine(position, out TranslatedSub sub)) { sub = TranslateLowCq(position, state.GetExecutionMode()); } - if (sub.Tier == TranslationTier.Tier0) + if (sub.IsWorthOptimizing()) { - _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1)); + bool isComplete = cs == CallType.Call || + cs == CallType.VirtualCall; + + _queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete); } return sub.Delegate; } - internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position) - { - if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine)) - { - subroutine = TranslateLowCq(position, state.GetExecutionMode()); - } - - return subroutine; - } - private void TranslateQueuedSubs() { while (_threadCount != 0) @@ -124,7 +112,7 @@ namespace ChocolArm64.Translation } else { - TranslateHighCq(item.Position, item.Mode); + TranslateHighCq(item.Position, item.Mode, item.IsComplete); } } else @@ -142,14 +130,16 @@ namespace ChocolArm64.Translation string subName = GetSubroutineName(position); - ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName); + bool isAarch64 = mode == ExecutionMode.Aarch64; - TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0); + ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64); + + TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true); return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count); } - private void TranslateHighCq(long position, ExecutionMode mode) + private TranslatedSub TranslateHighCq(long position, ExecutionMode mode, bool isComplete) { Block graph = Decoder.DecodeSubroutine(_memory, position, mode); @@ -159,9 +149,13 @@ namespace ChocolArm64.Translation string subName = GetSubroutineName(position); - ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName); + bool isAarch64 = mode == ExecutionMode.Aarch64; - TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1); + isComplete &= !context.HasIndirectJump; + + ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete); + + TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall); int ilOpCount = 0; @@ -170,9 +164,11 @@ namespace ChocolArm64.Translation ilOpCount += ilBlock.Count; } + ForceAheadOfTimeCompilation(subroutine); + _cache.AddOrUpdate(position, subroutine, ilOpCount); - ForceAheadOfTimeCompilation(subroutine); + return subroutine; } private string GetSubroutineName(long position) diff --git a/ChocolArm64/Translation/TranslatorQueue.cs b/ChocolArm64/Translation/TranslatorQueue.cs index 89d665bfbd..0f1d847470 100644 --- a/ChocolArm64/Translation/TranslatorQueue.cs +++ b/ChocolArm64/Translation/TranslatorQueue.cs @@ -1,3 +1,4 @@ +using ChocolArm64.State; using System.Collections.Concurrent; using System.Threading; @@ -5,10 +6,6 @@ namespace ChocolArm64.Translation { class TranslatorQueue { - //This is the maximum number of functions to be translated that the queue can hold. - //The value may need some tuning to find the sweet spot. - private const int MaxQueueSize = 1024; - private ConcurrentStack[] _translationQueue; private ManualResetEvent _queueDataReceivedEvent; @@ -27,14 +24,11 @@ namespace ChocolArm64.Translation _queueDataReceivedEvent = new ManualResetEvent(false); } - public void Enqueue(TranslatorQueueItem item) + public void Enqueue(long position, ExecutionMode mode, TranslationTier tier, bool isComplete) { - ConcurrentStack queue = _translationQueue[(int)item.Tier]; + TranslatorQueueItem item = new TranslatorQueueItem(position, mode, tier, isComplete); - if (queue.Count >= MaxQueueSize) - { - queue.TryPop(out _); - } + ConcurrentStack queue = _translationQueue[(int)tier]; queue.Push(item); diff --git a/ChocolArm64/Translation/TranslatorQueueItem.cs b/ChocolArm64/Translation/TranslatorQueueItem.cs index 0988414a50..dde2706d98 100644 --- a/ChocolArm64/Translation/TranslatorQueueItem.cs +++ b/ChocolArm64/Translation/TranslatorQueueItem.cs @@ -10,11 +10,18 @@ namespace ChocolArm64.Translation public TranslationTier Tier { get; } - public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier) + public bool IsComplete { get; } + + public TranslatorQueueItem( + long position, + ExecutionMode mode, + TranslationTier tier, + bool isComplete = false) { - Position = position; - Mode = mode; - Tier = tier; + Position = position; + Mode = mode; + Tier = tier; + IsComplete = isComplete; } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/IoType.cs b/ChocolArm64/Translation/VarType.cs similarity index 85% rename from ChocolArm64/Translation/IoType.cs rename to ChocolArm64/Translation/VarType.cs index c7710e0c67..d671575e98 100644 --- a/ChocolArm64/Translation/IoType.cs +++ b/ChocolArm64/Translation/VarType.cs @@ -1,6 +1,6 @@ namespace ChocolArm64.Translation { - enum IoType + enum VarType { Arg, Flag, diff --git a/Ryujinx/Config.jsonc b/Ryujinx/Config.jsonc index 8b5ebe0328..6e808b56fd 100644 --- a/Ryujinx/Config.jsonc +++ b/Ryujinx/Config.jsonc @@ -29,18 +29,21 @@ // System Language list: https://gist.github.com/HorrorTroll/b6e4a88d774c3c9b3bdf54d79a7ca43b "system_language": "AmericanEnglish", - // Enable or Disable Docked Mode + // Enable or disable Docked Mode "docked_mode": false, - - // Enable or Disable Game Vsync + + // Enable or disable Game Vsync "enable_vsync": true, - - // Enable or Disable Multi-core scheduling of threads + + // Enable or disable Multi-core scheduling of threads "enable_multicore_scheduling": true, - + // Enable integrity checks on Switch content files "enable_fs_integrity_checks": true, - + + // Enable or disable aggressive CPU optimizations + "enable_aggressive_cpu_opts": true, + // The primary controller's type // Supported Values: Handheld, ProController, NpadPair, NpadLeft, NpadRight "controller_type": "Handheld", diff --git a/Ryujinx/Configuration.cs b/Ryujinx/Configuration.cs index dbbec1cbc6..c4a1b4369f 100644 --- a/Ryujinx/Configuration.cs +++ b/Ryujinx/Configuration.cs @@ -86,6 +86,11 @@ namespace Ryujinx /// public bool EnableFsIntegrityChecks { get; private set; } + /// + /// Enable or Disable aggressive CPU optimizations + /// + public bool EnableAggressiveCpuOpts { get; private set; } + /// /// The primary controller's type /// @@ -197,6 +202,11 @@ namespace Ryujinx ? IntegrityCheckLevel.ErrorOnInvalid : IntegrityCheckLevel.None; + if (Instance.EnableAggressiveCpuOpts) + { + Optimizations.AssumeStrictAbiCompliance = true; + } + if(Instance.GamepadControls.Enabled) { if (GamePad.GetName(Instance.GamepadControls.Index) == "Unmapped Controller") diff --git a/Ryujinx/_schema.json b/Ryujinx/_schema.json index 0e586671d6..7e7e466594 100644 --- a/Ryujinx/_schema.json +++ b/Ryujinx/_schema.json @@ -17,6 +17,7 @@ "enable_vsync", "enable_multicore_scheduling", "enable_fs_integrity_checks", + "enable_aggressive_cpu_opts", "controller_type", "keyboard_controls", "gamepad_controls" @@ -399,6 +400,17 @@ false ] }, + "enable_aggressive_cpu_opts": { + "$id": "#/properties/enable_aggressive_cpu_opts", + "type": "boolean", + "title": "Enable Aggressive CPU Optimizations", + "description": "Enable or disable aggressive CPU optimizations", + "default": true, + "examples": [ + true, + false + ] + }, "controller_type": { "$id": "#/properties/controller_type", "type": "string", From dbc105eafba1db23858c015d6bd24c42c5dc255c Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Fri, 1 Mar 2019 10:12:09 +0100 Subject: [PATCH 12/12] Create CpuTestSimdImm.cs (#608) --- Ryujinx.Tests/Cpu/CpuTestSimdImm.cs | 267 ++++++++++++++++++++++++++++ 1 file changed, 267 insertions(+) create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdImm.cs diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs new file mode 100644 index 0000000000..bb6e117395 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs @@ -0,0 +1,267 @@ +#define SimdImm + +using NUnit.Framework; + +using System.Collections.Generic; +using System.Runtime.Intrinsics; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdImm")] + public sealed class CpuTestSimdImm : CpuTest + { +#if SimdImm + +#region "Helper methods" + // abcdefgh -> aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh + private static ulong ExpandImm8(byte imm8) + { + ulong imm64 = 0ul; + + for (int i = 0, j = 0; i < 8; i++, j += 8) + { + if (((imm8 >> i) & 0b1) != 0) + { + imm64 |= 0b11111111ul << j; + } + } + + return imm64; + } + + // aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh -> abcdefgh + private static byte ShrinkImm64(ulong imm64) + { + byte imm8 = 0; + + for (int i = 0, j = 0; i < 8; i++, j += 8) + { + if (((imm64 >> j) & 0b11111111ul) != 0ul) // Note: no format check. + { + imm8 |= (byte)(0b1 << i); + } + } + + return imm8; + } +#endregion + +#region "ValueSource (Types)" + private static IEnumerable _8BIT_IMM_() + { + yield return 0x00; + yield return 0x7F; + yield return 0x80; + yield return 0xFF; + + for (int cnt = 1; cnt <= RndCntImm8; cnt++) + { + byte imm8 = TestContext.CurrentContext.Random.NextByte(); + + yield return imm8; + } + } + + private static IEnumerable _64BIT_IMM_() + { + yield return ExpandImm8(0x00); + yield return ExpandImm8(0x7F); + yield return ExpandImm8(0x80); + yield return ExpandImm8(0xFF); + + for (int cnt = 1; cnt <= RndCntImm64; cnt++) + { + byte imm8 = TestContext.CurrentContext.Random.NextByte(); + + yield return ExpandImm8(imm8); + } + } +#endregion + +#region "ValueSource (Opcodes)" + private static uint[] _Movi_V_8bit_() + { + return new uint[] + { + 0x0F00E400u // MOVI V0.8B, #0 + }; + } + + private static uint[] _Movi_Mvni_V_16bit_shifted_imm_() + { + return new uint[] + { + 0x0F008400u, // MOVI V0.4H, #0 + 0x2F008400u // MVNI V0.4H, #0 + }; + } + + private static uint[] _Movi_Mvni_V_32bit_shifted_imm_() + { + return new uint[] + { + 0x0F000400u, // MOVI V0.2S, #0 + 0x2F000400u // MVNI V0.2S, #0 + }; + } + + private static uint[] _Movi_Mvni_V_32bit_shifting_ones_() + { + return new uint[] + { + 0x0F00C400u, // MOVI V0.2S, #0, MSL #8 + 0x2F00C400u // MVNI V0.2S, #0, MSL #8 + }; + } + + private static uint[] _Movi_V_64bit_scalar_() + { + return new uint[] + { + 0x2F00E400u // MOVI D0, #0 + }; + } + + private static uint[] _Movi_V_64bit_vector_() + { + return new uint[] + { + 0x6F00E400u // MOVI V0.2D, #0 + }; + } +#endregion + + private const int RndCntImm8 = 2; + private const int RndCntImm64 = 2; + + [Test, Pairwise] + public void Movi_V_8bit([ValueSource("_Movi_V_8bit_")] uint opcodes, + [Values(0u)] uint rd, + [ValueSource("_8BIT_IMM_")] byte imm8, + [Values(0b0u, 0b1u)] uint q) // <8B, 16B> + { + uint abc = (imm8 & 0xE0u) >> 5; + uint defgh = (imm8 & 0x1Fu); + + opcodes |= ((rd & 31) << 0); + opcodes |= (abc << 16) | (defgh << 5); + opcodes |= ((q & 1) << 30); + + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE1(q == 0u ? z : 0ul); + + SingleOpcode(opcodes, v0: v0); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void Movi_Mvni_V_16bit_shifted_imm([ValueSource("_Movi_Mvni_V_16bit_shifted_imm_")] uint opcodes, + [Values(0u)] uint rd, + [ValueSource("_8BIT_IMM_")] byte imm8, + [Values(0b0u, 0b1u)] uint amount, // <0, 8> + [Values(0b0u, 0b1u)] uint q) // <4H, 8H> + { + uint abc = (imm8 & 0xE0u) >> 5; + uint defgh = (imm8 & 0x1Fu); + + opcodes |= ((rd & 31) << 0); + opcodes |= (abc << 16) | (defgh << 5); + opcodes |= ((amount & 1) << 13); + opcodes |= ((q & 1) << 30); + + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE1(q == 0u ? z : 0ul); + + SingleOpcode(opcodes, v0: v0); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void Movi_Mvni_V_32bit_shifted_imm([ValueSource("_Movi_Mvni_V_32bit_shifted_imm_")] uint opcodes, + [Values(0u)] uint rd, + [ValueSource("_8BIT_IMM_")] byte imm8, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint amount, // <0, 8, 16, 24> + [Values(0b0u, 0b1u)] uint q) // <2S, 4S> + { + uint abc = (imm8 & 0xE0u) >> 5; + uint defgh = (imm8 & 0x1Fu); + + opcodes |= ((rd & 31) << 0); + opcodes |= (abc << 16) | (defgh << 5); + opcodes |= ((amount & 3) << 13); + opcodes |= ((q & 1) << 30); + + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE1(q == 0u ? z : 0ul); + + SingleOpcode(opcodes, v0: v0); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void Movi_Mvni_V_32bit_shifting_ones([ValueSource("_Movi_Mvni_V_32bit_shifting_ones_")] uint opcodes, + [Values(0u)] uint rd, + [ValueSource("_8BIT_IMM_")] byte imm8, + [Values(0b0u, 0b1u)] uint amount, // <8, 16> + [Values(0b0u, 0b1u)] uint q) // <2S, 4S> + { + uint abc = (imm8 & 0xE0u) >> 5; + uint defgh = (imm8 & 0x1Fu); + + opcodes |= ((rd & 31) << 0); + opcodes |= (abc << 16) | (defgh << 5); + opcodes |= ((amount & 1) << 12); + opcodes |= ((q & 1) << 30); + + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE1(q == 0u ? z : 0ul); + + SingleOpcode(opcodes, v0: v0); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void Movi_V_64bit_scalar([ValueSource("_Movi_V_64bit_scalar_")] uint opcodes, + [Values(0u)] uint rd, + [ValueSource("_64BIT_IMM_")] ulong imm) + { + byte imm8 = ShrinkImm64(imm); + + uint abc = (imm8 & 0xE0u) >> 5; + uint defgh = (imm8 & 0x1Fu); + + opcodes |= ((rd & 31) << 0); + opcodes |= (abc << 16) | (defgh << 5); + + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE1(z); + + SingleOpcode(opcodes, v0: v0); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void Movi_V_64bit_vector([ValueSource("_Movi_V_64bit_vector_")] uint opcodes, + [Values(0u)] uint rd, + [ValueSource("_64BIT_IMM_")] ulong imm) + { + byte imm8 = ShrinkImm64(imm); + + uint abc = (imm8 & 0xE0u) >> 5; + uint defgh = (imm8 & 0x1Fu); + + opcodes |= ((rd & 31) << 0); + opcodes |= (abc << 16) | (defgh << 5); + + SingleOpcode(opcodes); + + CompareAgainstUnicorn(); + } +#endif + } +}