diff --git a/.github/mac-bundle-qt.sh b/.github/mac-bundle-qt.sh
old mode 100644
new mode 100755
index f8083936..aed59d60
--- a/.github/mac-bundle-qt.sh
+++ b/.github/mac-bundle-qt.sh
@@ -2,26 +2,38 @@
 # For Plist buddy
 PATH="$PATH:/usr/libexec"
 
-
 # Construct the app iconset.
 mkdir alber.iconset
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png
+
+# Create a mask for rounding our icon. We don't want it to be square, as most MacOS icons are rounded
+convert -size 1024x1024 xc:none -draw "roundrectangle 0,0,1024,1024,220,220" rounded_mask.png
+convert docs/img/mac_icon.ico -alpha on -background none -resize 1024x1024 PNG32:temp.png
+# Apply the mask to our icon
+convert temp.png rounded_mask.png -compose DstIn -composite temp.png
+
+# Normal icons
+convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16    alber.iconset/icon_16x16.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32    alber.iconset/icon_32x32.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 64x64    alber.iconset/icon_64x64.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128  alber.iconset/icon_128x128.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256  alber.iconset/icon_256x256.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512  alber.iconset/icon_512x512.png
+
+# High DPI icons
+convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32    alber.iconset/icon_16x16@2x.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64    alber.iconset/icon_32x32@2x.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 128x128  alber.iconset/icon_64x64@2x.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256  alber.iconset/icon_128x128@2x.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512  alber.iconset/icon_256x256@2x.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png
+
 iconutil --convert icns alber.iconset
+rm rounded_mask.png temp.png
 
 # Set up the .app directory
 mkdir -p Alber.app/Contents/MacOS/Libraries
 mkdir Alber.app/Contents/Resources
 
-
 # Copy binary into App
 cp ./build/Alber Alber.app/Contents/MacOS/Alber
 chmod a+x Alber.app/Contents/Macos/Alber
diff --git a/.github/mac-bundle.sh b/.github/mac-bundle.sh
index 3349d6d3..274f1115 100755
--- a/.github/mac-bundle.sh
+++ b/.github/mac-bundle.sh
@@ -2,26 +2,38 @@
 # For Plist buddy
 PATH="$PATH:/usr/libexec"
 
-
 # Construct the app iconset.
 mkdir alber.iconset
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png
-convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png
+
+# Create a mask for rounding our icon. We don't want it to be square, as most MacOS icons are rounded
+convert -size 1024x1024 xc:none -draw "roundrectangle 0,0,1024,1024,220,220" rounded_mask.png
+convert docs/img/mac_icon.ico -alpha on -background none -resize 1024x1024 PNG32:temp.png
+# Apply the mask to our icon
+convert temp.png rounded_mask.png -compose DstIn -composite temp.png
+
+# Normal icons
+convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16    alber.iconset/icon_16x16.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32    alber.iconset/icon_32x32.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 64x64    alber.iconset/icon_64x64.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128  alber.iconset/icon_128x128.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256  alber.iconset/icon_256x256.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512  alber.iconset/icon_512x512.png
+
+# High DPI icons
+convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32    alber.iconset/icon_16x16@2x.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64    alber.iconset/icon_32x32@2x.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 128x128  alber.iconset/icon_64x64@2x.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256  alber.iconset/icon_128x128@2x.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512  alber.iconset/icon_256x256@2x.png
+convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png
+
 iconutil --convert icns alber.iconset
+rm rounded_mask.png temp.png
 
 # Set up the .app directory
 mkdir -p Alber.app/Contents/MacOS/Libraries
 mkdir Alber.app/Contents/Resources
 
-
 # Copy binary into App
 cp ./build/Alber Alber.app/Contents/MacOS/Alber
 chmod a+x Alber.app/Contents/Macos/Alber
diff --git a/.github/workflows/Hydra_Build.yml b/.github/workflows/Hydra_Build.yml
index dbdfbf1b..66cd2e27 100644
--- a/.github/workflows/Hydra_Build.yml
+++ b/.github/workflows/Hydra_Build.yml
@@ -213,3 +213,32 @@ jobs:
         path: |
           ${{github.workspace}}/build/panda3ds_libretro.so
           ${{github.workspace}}/docs/libretro/panda3ds_libretro.info
+  
+  ARM-Libretro-Android:
+    runs-on: ubuntu-24.04
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Fetch submodules
+      run: git submodule update --init --recursive
+
+    - name: Configure CMake
+      run: cmake -B ${{github.workspace}}/build -DBUILD_LIBRETRO_CORE=1 -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DENABLE_VULKAN=0 -DENABLE_USER_BUILD=ON -DCMAKE_CXX_FLAGS="-march=armv8-a+crypto"
+
+    - name: Build
+      run: |
+        # Apply patch for GLES compatibility
+        git apply ./.github/gles.patch
+        # Build the project with CMake
+        cmake --build ${{github.workspace}}/build --config ${{ env.BUILD_TYPE }}
+
+        # Strip the generated library
+        ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-strip --strip-unneeded ./build/panda3ds_libretro.so
+        
+    - name: Upload artifacts
+      uses: actions/upload-artifact@v4
+      with:
+        name: Android arm64 Libretro core
+        path: |
+          ${{github.workspace}}/build/panda3ds_libretro.so
+          ${{github.workspace}}/docs/libretro/panda3ds_libretro.info
diff --git a/.github/workflows/Test_Build.yml b/.github/workflows/Test_Build.yml
new file mode 100644
index 00000000..edfa9fa9
--- /dev/null
+++ b/.github/workflows/Test_Build.yml
@@ -0,0 +1,44 @@
+name: Hardware Test Build
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    container: devkitpro/devkitarm
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install and update packages
+        run: |
+          apt-get -y install python3 python3-pip python3-venv p7zip-full libarchive13
+          python3 --version
+          python3 -m venv venv
+          . ./venv/bin/activate
+          python3 -m pip install --upgrade pip setuptools
+
+      - name: Compile tests
+        run: |
+          make -C tests/AppCpuTimeLimit
+          make -C tests/DetectEmulator
+          make -C tests/HelloWorldSVC
+          make -C tests/ImmediateModeTriangles
+          make -C tests/PICA_LITP
+          make -C tests/SimplerTri
+
+      - name: Clone and compile 3ds-examples
+        run: |
+          git clone --recursive https://github.com/devkitPro/3ds-examples tests/3ds-examples
+          make -C tests/3ds-examples
+
+      - name: Upload binaries
+        uses: actions/upload-artifact@v4
+        with:
+          name: Source & Binaries
+          path: tests
+        
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 94ea8193..5fb1065c 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -102,10 +102,17 @@ libretro-build-osx-arm64:
 #    - .core-defs
 
 # Android ARMv8a
-# android-arm64-v8a:
-#   extends:
-#     - .libretro-android-cmake-arm64-v8a
-#     - .core-defs
+android-arm64-v8a:
+  extends:
+    - .libretro-android-cmake-arm64-v8a
+    - .core-defs
+  before_script:
+    - export NUMPROC=$(($(nproc)/5))
+    - export ANDROID_NDK_VERSION=26.2.11394342
+    - export NDK_ROOT=/android-sdk-linux/ndk/$ANDROID_NDK_VERSION
+    - /android-sdk-linux/cmdline-tools/latest/bin/sdkmanager "ndk;$ANDROID_NDK_VERSION"
+    - /android-sdk-linux/cmdline-tools/latest/bin/sdkmanager "cmake;3.30.3"
+    - export PATH=/android-sdk-linux/cmake/3.30.3/bin:$PATH
 
 # Android 64-bit x86
 # android-x86_64:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0e522bdb..8eded674 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,6 +64,7 @@ option(BUILD_HYDRA_CORE "Build a Hydra core" OFF)
 option(BUILD_LIBRETRO_CORE "Build a Libretro core" OFF)
 option(ENABLE_RENDERDOC_API "Build with support for Renderdoc's capture API for graphics debugging" ON)
 option(DISABLE_SSE4 "Build with SSE4 instructions disabled, may reduce performance" OFF)
+option(ENABLE_FASTMEM "Build with support for hardware fastmem" ON)
 option(USE_LIBRETRO_AUDIO "Enable to use the LR audio device with the LR core. Otherwise our own device is used" OFF)
 option(IOS_SIMULATOR_BUILD "Compiling for IOS simulator (Set to off if compiling for a real iPhone)" ON)
 
@@ -188,6 +189,7 @@ include_directories(third_party/toml11)
 include_directories(third_party/glm)
 include_directories(third_party/renderdoc)
 include_directories(third_party/duckstation)
+include_directories(third_party/host_memory/include)
 
 add_subdirectory(third_party/cmrc)
 
@@ -329,6 +331,7 @@ set(SOURCE_FILES src/emulator.cpp src/io_file.cpp src/config.cpp
                  src/http_server.cpp src/stb_image_write.c src/core/cheats.cpp src/core/action_replay.cpp
                  src/discord_rpc.cpp src/lua.cpp src/memory_mapped_file.cpp src/renderdoc.cpp
                  src/frontend_settings.cpp src/miniaudio/miniaudio.cpp src/core/screen_layout.cpp
+                 src/dynamic_library.cpp
 )
 set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp)
 set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp
@@ -337,6 +340,7 @@ set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limi
                         src/core/kernel/address_arbiter.cpp src/core/kernel/error.cpp
                         src/core/kernel/file_operations.cpp src/core/kernel/directory_operations.cpp
                         src/core/kernel/idle_thread.cpp src/core/kernel/timers.cpp
+                        src/core/kernel/fcram.cpp
 )
 set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services/apt.cpp src/core/services/hid.cpp
                          src/core/services/fs.cpp src/core/services/gsp_gpu.cpp src/core/services/gsp_lcd.cpp
@@ -416,7 +420,8 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
                  include/fs/archive_twl_sound.hpp include/fs/archive_card_spi.hpp include/services/ns.hpp include/audio/audio_device.hpp
                  include/audio/audio_device_interface.hpp include/audio/libretro_audio_device.hpp include/services/ir/ir_types.hpp
                  include/services/ir/ir_device.hpp include/services/ir/circlepad_pro.hpp include/services/service_intercept.hpp
-                 include/screen_layout.hpp include/services/service_map.hpp include/audio/dsp_binary.hpp
+                 include/screen_layout.hpp include/services/service_map.hpp include/audio/dsp_binary.hpp include/dynamic_library.hpp
+                 include/enum_flag_ops.hpp include/kernel/fcram.hpp
 )
 
 if(IOS)
@@ -443,6 +448,9 @@ set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp
 
                              third_party/cityhash/cityhash.cpp
                              third_party/xxhash/xxhash.c
+
+                             third_party/host_memory/host_memory.cpp
+                             third_party/host_memory/virtual_buffer.cpp
 )
 
 if(ENABLE_LUAJIT AND NOT ANDROID)
@@ -675,7 +683,7 @@ if(ENABLE_METAL AND APPLE)
 
     target_sources(AlberCore PRIVATE ${RENDERER_MTL_SOURCE_FILES})
     target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1")
-    target_include_directories(AlberCore PRIVATE third_party/metal-cpp)
+    target_include_directories(AlberCore PUBLIC third_party/metal-cpp)
     # TODO: check if all of them are needed
     target_link_libraries(AlberCore PUBLIC "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl)
 endif()
@@ -710,6 +718,10 @@ if(ENABLE_HTTP_SERVER)
     target_compile_definitions(AlberCore PRIVATE PANDA3DS_ENABLE_HTTP_SERVER=1)
 endif()
 
+if(ENABLE_FASTMEM)
+    target_compile_definitions(AlberCore PRIVATE PANDA3DS_HARDWARE_FASTMEM=1)
+endif()
+
 # Configure frontend
 
 if(ENABLE_QT_GUI)
@@ -730,18 +742,25 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE)
         option(GENERATE_QT_TRANSLATION "Generate Qt translation file" OFF)
         set(QT_LANGUAGES docs/translations)
 
-        set(FRONTEND_SOURCE_FILES src/panda_qt/main.cpp src/panda_qt/screen.cpp src/panda_qt/main_window.cpp src/panda_qt/about_window.cpp
+        set(FRONTEND_SOURCE_FILES src/panda_qt/main.cpp src/panda_qt/main_window.cpp src/panda_qt/about_window.cpp
             src/panda_qt/config_window.cpp src/panda_qt/zep.cpp src/panda_qt/text_editor.cpp src/panda_qt/cheats_window.cpp src/panda_qt/mappings.cpp
             src/panda_qt/patch_window.cpp src/panda_qt/elided_label.cpp src/panda_qt/shader_editor.cpp src/panda_qt/translations.cpp
             src/panda_qt/thread_debugger.cpp src/panda_qt/cpu_debugger.cpp src/panda_qt/dsp_debugger.cpp src/panda_qt/input_window.cpp
+            src/panda_qt/screen/screen.cpp src/panda_qt/screen/screen_gl.cpp src/panda_qt/screen/screen_mtl.cpp
         )
-        set(FRONTEND_HEADER_FILES include/panda_qt/screen.hpp include/panda_qt/main_window.hpp include/panda_qt/about_window.hpp
+
+        set(FRONTEND_HEADER_FILES include/panda_qt/main_window.hpp include/panda_qt/about_window.hpp
             include/panda_qt/config_window.hpp include/panda_qt/text_editor.hpp include/panda_qt/cheats_window.hpp
             include/panda_qt/patch_window.hpp include/panda_qt/elided_label.hpp include/panda_qt/shader_editor.hpp
             include/panda_qt/thread_debugger.hpp include/panda_qt/cpu_debugger.hpp include/panda_qt/dsp_debugger.hpp
-            include/panda_qt/disabled_widget_overlay.hpp include/panda_qt/input_window.hpp
+            include/panda_qt/disabled_widget_overlay.hpp include/panda_qt/input_window.hpp include/panda_qt/screen/screen.hpp
+            include/panda_qt/screen/screen_gl.hpp include/panda_qt/screen/screen_mtl.hpp
         )
 
+        if (APPLE AND ENABLE_METAL)
+            set(FRONTEND_SOURCE_FILES ${FRONTEND_SOURCE_FILES} src/panda_qt/screen/metal_context.mm)
+        endif()
+
         source_group("Source Files\\Qt" FILES ${FRONTEND_SOURCE_FILES})
         source_group("Header Files\\Qt" FILES ${FRONTEND_HEADER_FILES})
         include_directories(${Qt6Gui_PRIVATE_INCLUDE_DIRS})
diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp
index 77bfb8ed..e62f41b3 100644
--- a/include/PICA/gpu.hpp
+++ b/include/PICA/gpu.hpp
@@ -109,11 +109,7 @@ class GPU {
 	void screenshot(const std::string& name) { renderer->screenshot(name); }
 	void deinitGraphicsContext() { renderer->deinitGraphicsContext(); }
 
-#if defined(PANDA3DS_FRONTEND_SDL)
-	void initGraphicsContext(SDL_Window* window) { renderer->initGraphicsContext(window); }
-#elif defined(PANDA3DS_FRONTEND_QT)
-	void initGraphicsContext(GL::Context* context) { renderer->initGraphicsContext(context); }
-#endif
+	void initGraphicsContext(void* context) { renderer->initGraphicsContext(context); }
 
 	void fireDMA(u32 dest, u32 source, u32 size);
 	void reset();
diff --git a/include/audio/audio_interpolation.hpp b/include/audio/audio_interpolation.hpp
index 8a87cbcd..2c9f6540 100644
--- a/include/audio/audio_interpolation.hpp
+++ b/include/audio/audio_interpolation.hpp
@@ -16,9 +16,9 @@ namespace Audio::Interpolation {
 	using StereoFrame16 = Audio::DSPMixer::StereoFrame<s16>;
 
 	struct State {
-		// Two historical samples.
-		std::array<s16, 2> xn1 = {};  //< x[n-1]
-		std::array<s16, 2> xn2 = {};  //< x[n-2]
+		// Two history samples.
+		std::array<s16, 2> xn1 = {};  // x[n-1]
+		std::array<s16, 2> xn2 = {};  // x[n-2]
 		// Current fractional position.
 		u64 fposition = 0;
 	};
diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp
index d374e304..f04ed6c2 100644
--- a/include/audio/hle_core.hpp
+++ b/include/audio/hle_core.hpp
@@ -69,7 +69,9 @@ namespace Audio {
 		// In order to save up on CPU time.
 		uint enabledMixStages = 0;
 
-		u32 samplePosition;  // Sample number into the current audio buffer
+		u32 samplePosition;      // Sample number into the current audio buffer
+		u32 currentBufferPaddr;  // Physical address of current audio buffer
+
 		float rateMultiplier;
 		u16 syncCount;
 		u16 currentBufferID;
diff --git a/include/config.hpp b/include/config.hpp
index b890e251..4aa4cce1 100644
--- a/include/config.hpp
+++ b/include/config.hpp
@@ -2,10 +2,10 @@
 #include <filesystem>
 #include <string>
 
-#include "screen_layout.hpp"
 #include "audio/dsp_core.hpp"
 #include "frontend_settings.hpp"
 #include "renderer.hpp"
+#include "screen_layout.hpp"
 #include "services/region_codes.hpp"
 
 struct AudioDeviceConfig {
@@ -49,12 +49,7 @@ struct EmulatorConfig {
 	static constexpr bool ubershaderDefault = true;
 #endif
 	static constexpr bool accelerateShadersDefault = true;
-
-#if defined(__LIBRETRO__)
 	static constexpr bool audioEnabledDefault = true;
-#else
-	static constexpr bool audioEnabledDefault = false;
-#endif
 
 	// We default to OpenGL on all platforms other than iOS
 #if defined(PANDA3DS_IOS)
@@ -63,11 +58,13 @@ struct EmulatorConfig {
 	static constexpr RendererType rendererDefault = RendererType::OpenGL;
 #endif
 
+	static constexpr bool enableFastmemDefault = true;
 	static constexpr bool hashTexturesDefault = false;
 
 	bool shaderJitEnabled = shaderJitDefault;
 	bool useUbershaders = ubershaderDefault;
 	bool accelerateShaders = accelerateShadersDefault;
+	bool fastmemEnabled = enableFastmemDefault;
 	bool hashTextures = hashTexturesDefault;
 
 	ScreenLayout::Layout screenLayout = ScreenLayout::Layout::Default;
diff --git a/include/dynamic_library.hpp b/include/dynamic_library.hpp
new file mode 100644
index 00000000..95f86920
--- /dev/null
+++ b/include/dynamic_library.hpp
@@ -0,0 +1,73 @@
+// SPDX-FileCopyrightText: 2019 Dolphin Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <string>
+
+namespace Common {
+	/**
+	 * Provides a platform-independent interface for loading a dynamic library and retrieving symbols.
+	 * The interface maintains an internal reference count to allow one handle to be shared between
+	 * multiple users.
+	 */
+	class DynamicLibrary final {
+	  public:
+		/// Default constructor, does not load a library.
+		explicit DynamicLibrary();
+
+		/// Automatically loads the specified library. Call IsOpen() to check validity before use.
+		explicit DynamicLibrary(const char* filename);
+
+		/// Initializes the dynamic library with an already opened handle.
+		explicit DynamicLibrary(void* handle_);
+
+		/// Moves the library.
+		DynamicLibrary(DynamicLibrary&&) noexcept;
+		DynamicLibrary& operator=(DynamicLibrary&&) noexcept;
+
+		/// Delete copies, we can't copy a dynamic library.
+		DynamicLibrary(const DynamicLibrary&) = delete;
+		DynamicLibrary& operator=(const DynamicLibrary&) = delete;
+
+		/// Closes the library.
+		~DynamicLibrary();
+
+		/// Returns the specified library name with the platform-specific suffix added.
+		[[nodiscard]] static std::string getUnprefixedFilename(const char* filename);
+
+		/// Returns the specified library name in platform-specific format.
+		/// Major/minor versions will not be included if set to -1.
+		/// If libname already contains the "lib" prefix, it will not be added again.
+		/// Windows: LIBNAME-MAJOR-MINOR.dll
+		/// Linux: libLIBNAME.so.MAJOR.MINOR
+		/// Mac: libLIBNAME.MAJOR.MINOR.dylib
+		[[nodiscard]] static std::string getVersionedFilename(const char* libname, int major = -1, int minor = -1);
+
+		/// Returns true if a module is loaded, otherwise false.
+		[[nodiscard]] bool isOpen() const { return handle != nullptr; }
+
+		/// Loads (or replaces) the handle with the specified library file name.
+		/// Returns true if the library was loaded and can be used.
+		[[nodiscard]] bool open(const char* filename);
+
+		/// Unloads the library, any function pointers from this library are no longer valid.
+		void close();
+
+		/// Returns the address of the specified symbol (function or variable) as an untyped pointer.
+		/// If the specified symbol does not exist in this library, nullptr is returned.
+		[[nodiscard]] void* getSymbolAddress(const char* name) const;
+
+		/// Obtains the address of the specified symbol, automatically casting to the correct type.
+		/// Returns true if the symbol was found and assigned, otherwise false.
+		template <typename T>
+		[[nodiscard]] bool getSymbol(const char* name, T* ptr) const {
+			*ptr = reinterpret_cast<T>(getSymbolAddress(name));
+			return *ptr != nullptr;
+		}
+
+	  private:
+		/// Platform-dependent data type representing a dynamic library handle.
+		void* handle = nullptr;
+	};
+}  // namespace Common
\ No newline at end of file
diff --git a/include/dynarmic_cp15.hpp b/include/dynarmic_cp15.hpp
index 1345aad4..ba7fbbf1 100644
--- a/include/dynarmic_cp15.hpp
+++ b/include/dynarmic_cp15.hpp
@@ -1,71 +1,51 @@
 #pragma once
 
-#include "dynarmic/interface/A32/a32.h"
 #include "dynarmic/interface/A32/config.h"
 #include "dynarmic/interface/A32/coprocessor.h"
 #include "helpers.hpp"
-#include "memory.hpp"
 
 class CP15 final : public Dynarmic::A32::Coprocessor {
-    using Callback = Dynarmic::A32::Coprocessor::Callback;
-    using CoprocReg = Dynarmic::A32::CoprocReg;
-    using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
-    using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
+	using Callback = Dynarmic::A32::Coprocessor::Callback;
+	using CoprocReg = Dynarmic::A32::CoprocReg;
+	using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
+	using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
 
-    u32 threadStoragePointer; // Pointer to thread-local storage
-    u32 dummy; // MCR writes here for registers whose values are ignored
+	u32 threadStoragePointer;  // Pointer to thread-local storage
+	u32 dummy;                 // MCR writes here for registers whose values are ignored
 
-    std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1,
-        CoprocReg CRd, CoprocReg CRn,
-        CoprocReg CRm, unsigned opc2) override {
-        return std::nullopt;
-    }
+	std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, CoprocReg CRn, CoprocReg CRm, unsigned opc2) override {
+		return std::nullopt;
+	}
 
-    CallbackOrAccessOneWord CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
-        CoprocReg CRm, unsigned opc2) override {
-        if (!two && opc1 == 0 && CRn == CoprocReg::C7 && CRm == CoprocReg::C10 && opc2 == 4) {
-            return &dummy; // Normally inserts a "Data Synchronization Barrier"
-        }
+	CallbackOrAccessOneWord CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm, unsigned opc2) override {
+		if (!two && opc1 == 0 && CRn == CoprocReg::C7 && CRm == CoprocReg::C10 && opc2 == 4) {
+			return &dummy;  // Normally inserts a "Data Synchronization Barrier"
+		}
 
-        if (!two && opc1 == 0 && CRn == CoprocReg::C7 && CRm == CoprocReg::C10 && opc2 == 5) {
-            return &dummy; // Normally inserts a "Data Memory Barrier"
-        }
-        Helpers::panic("CP15: CompileSendOneWord\nopc1: %d CRn: %d CRm: %d opc2: %d\n", opc1, (int)CRn, (int)CRm, opc2);
-    }
+		if (!two && opc1 == 0 && CRn == CoprocReg::C7 && CRm == CoprocReg::C10 && opc2 == 5) {
+			return &dummy;  // Normally inserts a "Data Memory Barrier"
+		}
+		Helpers::panic("CP15: CompileSendOneWord\nopc1: %d CRn: %d CRm: %d opc2: %d\n", opc1, (int)CRn, (int)CRm, opc2);
+	}
 
-    CallbackOrAccessTwoWords CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) override {
-        return std::monostate{};
-    }
+	CallbackOrAccessOneWord CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm, unsigned opc2) override {
+		// Stores a pointer to thread-local storage, accessed via mrc p15, 0, rd, c13, c0, 3
+		if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 3) {
+			return &threadStoragePointer;
+		}
 
-    CallbackOrAccessOneWord CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
-        CoprocReg CRm, unsigned opc2) override {
-        // Stores a pointer to thread-local storage, accessed via mrc p15, 0, rd, c13, c0, 3
-        if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 3) {
-            return &threadStoragePointer;
-        }
+		Helpers::panic("CP15: CompileGetOneWord\nopc1: %d CRn: %d CRm: %d opc2: %d\n", opc1, (int)CRn, (int)CRm, opc2);
+	}
 
-        Helpers::panic("CP15: CompileGetOneWord\nopc1: %d CRn: %d CRm: %d opc2: %d\n", opc1, (int)CRn, (int)CRm, opc2);
-    }
+	CallbackOrAccessTwoWords CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) override { return std::monostate{}; }
+	CallbackOrAccessTwoWords CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) override { return std::monostate{}; }
 
-    CallbackOrAccessTwoWords CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) override {
-        return std::monostate{};
-    }
+	std::optional<Callback> CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, std::optional<u8> option) override { return std::nullopt; }
+	std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, std::optional<u8> option) override { return std::nullopt; }
 
-    std::optional<Callback> CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
-        std::optional<u8> option) override {
-        return std::nullopt;
-    }
+  public:
+	void setTLSBase(u32 value) { threadStoragePointer = value; }
 
-    std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
-        std::optional<u8> option) override {
-        return std::nullopt;
-    }
-
-public:
-    void setTLSBase(u32 value) {
-        threadStoragePointer = value;
-    }
-
-    // Currently does nothing but may be needed in the future
-    void reset() {}
+	// Currently does nothing but may be needed in the future
+	void reset() {}
 };
\ No newline at end of file
diff --git a/include/emulator.hpp b/include/emulator.hpp
index bed01937..b1191f6e 100644
--- a/include/emulator.hpp
+++ b/include/emulator.hpp
@@ -24,12 +24,6 @@
 #include "http_server.hpp"
 #endif
 
-#ifdef PANDA3DS_FRONTEND_QT
-#include "gl/context.h"
-#endif
-
-struct SDL_Window;
-
 enum class ROMType {
 	None,
 	ELF,
@@ -39,10 +33,13 @@ enum class ROMType {
 };
 
 class Emulator {
+	// Config should be initialized before anything else
 	EmulatorConfig config;
+
+	Memory memory;
+	// We want memory to be constructed before the rest of the emulator, so it's at the top of the struct
 	CPU cpu;
 	GPU gpu;
-	Memory memory;
 	Kernel kernel;
 	std::unique_ptr<Audio::DSPCore> dsp;
 	Scheduler scheduler;
@@ -106,12 +103,8 @@ class Emulator {
 	bool loadELF(const std::filesystem::path& path);
 	bool loadELF(std::ifstream& file);
 
-#ifdef PANDA3DS_FRONTEND_QT
-	// For passing the GL context from Qt to the renderer
-	void initGraphicsContext(GL::Context* glContext) { gpu.initGraphicsContext(nullptr); }
-#else
-	void initGraphicsContext(SDL_Window* window) { gpu.initGraphicsContext(window); }
-#endif
+	// For passing the SDL Window, GL context, etc from the frontend to the renderer
+	void initGraphicsContext(void* context) { gpu.initGraphicsContext(context); }
 
 	RomFS::DumpingResult dumpRomFS(const std::filesystem::path& path);
 	void setOutputSize(u32 width, u32 height) { gpu.setOutputSize(width, height); }
diff --git a/include/enum_flag_ops.hpp b/include/enum_flag_ops.hpp
new file mode 100644
index 00000000..e41eac28
--- /dev/null
+++ b/include/enum_flag_ops.hpp
@@ -0,0 +1,60 @@
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <type_traits>
+
+#define DECLARE_ENUM_FLAG_OPERATORS(type)                                 \
+	[[nodiscard]] constexpr type operator|(type a, type b) noexcept {     \
+		using T = std::underlying_type_t<type>;                           \
+		return static_cast<type>(static_cast<T>(a) | static_cast<T>(b));  \
+	}                                                                     \
+	[[nodiscard]] constexpr type operator&(type a, type b) noexcept {     \
+		using T = std::underlying_type_t<type>;                           \
+		return static_cast<type>(static_cast<T>(a) & static_cast<T>(b));  \
+	}                                                                     \
+	[[nodiscard]] constexpr type operator^(type a, type b) noexcept {     \
+		using T = std::underlying_type_t<type>;                           \
+		return static_cast<type>(static_cast<T>(a) ^ static_cast<T>(b));  \
+	}                                                                     \
+	[[nodiscard]] constexpr type operator<<(type a, type b) noexcept {    \
+		using T = std::underlying_type_t<type>;                           \
+		return static_cast<type>(static_cast<T>(a) << static_cast<T>(b)); \
+	}                                                                     \
+	[[nodiscard]] constexpr type operator>>(type a, type b) noexcept {    \
+		using T = std::underlying_type_t<type>;                           \
+		return static_cast<type>(static_cast<T>(a) >> static_cast<T>(b)); \
+	}                                                                     \
+	constexpr type& operator|=(type& a, type b) noexcept {                \
+		a = a | b;                                                        \
+		return a;                                                         \
+	}                                                                     \
+	constexpr type& operator&=(type& a, type b) noexcept {                \
+		a = a & b;                                                        \
+		return a;                                                         \
+	}                                                                     \
+	constexpr type& operator^=(type& a, type b) noexcept {                \
+		a = a ^ b;                                                        \
+		return a;                                                         \
+	}                                                                     \
+	constexpr type& operator<<=(type& a, type b) noexcept {               \
+		a = a << b;                                                       \
+		return a;                                                         \
+	}                                                                     \
+	constexpr type& operator>>=(type& a, type b) noexcept {               \
+		a = a >> b;                                                       \
+		return a;                                                         \
+	}                                                                     \
+	[[nodiscard]] constexpr type operator~(type key) noexcept {           \
+		using T = std::underlying_type_t<type>;                           \
+		return static_cast<type>(~static_cast<T>(key));                   \
+	}                                                                     \
+	[[nodiscard]] constexpr bool True(type key) noexcept {                \
+		using T = std::underlying_type_t<type>;                           \
+		return static_cast<T>(key) != 0;                                  \
+	}                                                                     \
+	[[nodiscard]] constexpr bool False(type key) noexcept {               \
+		using T = std::underlying_type_t<type>;                           \
+		return static_cast<T>(key) == 0;                                  \
+	}
\ No newline at end of file
diff --git a/include/helpers.hpp b/include/helpers.hpp
index a95931d4..e13aed7e 100644
--- a/include/helpers.hpp
+++ b/include/helpers.hpp
@@ -3,10 +3,8 @@
 #include <cstdarg>
 #include <cstdint>
 #include <iostream>
-#include <iterator>
-#include <string>
-#include <vector>
 #include <memory>
+#include <string>
 
 #include "termcolor.hpp"
 
@@ -37,7 +35,7 @@ namespace Helpers {
 			return {};
 		}
 		const auto buf = std::make_unique<char[]>(size);
-		std::snprintf(buf.get(), size, fmt.c_str(), args ...);
+		std::snprintf(buf.get(), size, fmt.c_str(), args...);
 		return std::string(buf.get(), buf.get() + size - 1);
 	}
 
@@ -50,7 +48,7 @@ namespace Helpers {
 
 		exit(1);
 	}
-	
+
 #ifdef PANDA3DS_LIMITED_PANICS
 	template <class... Args>
 	static void panicDev(const char* fmt, Args&&... args) {}
diff --git a/include/kernel/fcram.hpp b/include/kernel/fcram.hpp
new file mode 100644
index 00000000..e1690ad0
--- /dev/null
+++ b/include/kernel/fcram.hpp
@@ -0,0 +1,63 @@
+#pragma once
+#include <list>
+#include <memory>
+
+#include "helpers.hpp"
+
+class Memory;
+
+enum class FcramRegion {
+	App = 0x100,
+	Sys = 0x200,
+	Base = 0x300,
+};
+
+struct FcramBlock {
+	u32 paddr;
+	s32 pages;
+
+	FcramBlock(u32 paddr, s32 pages) : paddr(paddr), pages(pages) {}
+};
+
+using FcramBlockList = std::list<FcramBlock>;
+
+class KFcram {
+	struct Region {
+		struct Block {
+			s32 pages;
+			s32 pageOffset;
+			bool used;
+
+			Block(s32 pages, u32 pageOffset) : pages(pages), pageOffset(pageOffset), used(false) {}
+		};
+
+		std::list<Block> blocks;
+		u32 start;
+		s32 pages;
+		s32 freePages;
+
+	  public:
+		Region() : start(0), pages(0) {}
+		void reset(u32 start, size_t size);
+		void alloc(std::list<FcramBlock>& out, s32 pages, bool linear);
+
+		u32 getUsedCount();
+		u32 getFreeCount();
+	};
+
+	Memory& mem;
+
+	Region appRegion, sysRegion, baseRegion;
+	uint8_t* fcram;
+	std::unique_ptr<u32> refs;
+
+  public:
+	KFcram(Memory& memory);
+	void reset(size_t ramSize, size_t appSize, size_t sysSize, size_t baseSize);
+	void alloc(FcramBlockList& out, s32 pages, FcramRegion region, bool linear);
+
+	void incRef(FcramBlockList& list);
+	void decRef(FcramBlockList& list);
+
+	u32 getUsedCount(FcramRegion region);
+};
\ No newline at end of file
diff --git a/include/kernel/kernel.hpp b/include/kernel/kernel.hpp
index 3fd926e7..c6d5044e 100644
--- a/include/kernel/kernel.hpp
+++ b/include/kernel/kernel.hpp
@@ -1,12 +1,11 @@
 #pragma once
 #include <array>
-#include <cassert>
-#include <limits>
 #include <span>
 #include <string>
 #include <vector>
 
 #include "config.hpp"
+#include "fcram.hpp"
 #include "helpers.hpp"
 #include "kernel_types.hpp"
 #include "logger.hpp"
@@ -25,6 +24,10 @@ class Kernel {
 	CPU& cpu;
 	Memory& mem;
 
+  public:
+	KFcram fcramManager;
+
+  private:
 	// The handle number for the next kernel object to be created
 	u32 handleCounter;
 	// A list of our OS threads, the max number of which depends on the resource limit (hardcoded 32 per process on retail it seems).
diff --git a/include/kernel/kernel_types.hpp b/include/kernel/kernel_types.hpp
index a5b27498..7ea6d28e 100644
--- a/include/kernel/kernel_types.hpp
+++ b/include/kernel/kernel_types.hpp
@@ -1,93 +1,107 @@
 #pragma once
 #include <array>
 #include <cstring>
-#include "fs/archive_base.hpp"
+#include <vector>
+
 #include "handles.hpp"
 #include "helpers.hpp"
-#include "result/result.hpp"
 
 enum class KernelObjectType : u8 {
-    AddressArbiter, Archive, Directory, File, MemoryBlock, Process, ResourceLimit, Session, Dummy,
-    // Bundle waitable objects together in the enum to let the compiler optimize certain checks better
-    Event, Mutex, Port, Semaphore, Timer, Thread
+	AddressArbiter,
+	Archive,
+	Directory,
+	File,
+	MemoryBlock,
+	Process,
+	ResourceLimit,
+	Session,
+	Dummy,
+	// Bundle waitable objects together in the enum to let the compiler optimize certain checks better
+	Event,
+	Mutex,
+	Port,
+	Semaphore,
+	Timer,
+	Thread
 };
 
 enum class ResourceLimitCategory : int {
-    Application = 0,
-    SystemApplet = 1,
-    LibraryApplet = 2,
-    Misc = 3
+	Application = 0,
+	SystemApplet = 1,
+	LibraryApplet = 2,
+	Misc = 3,
 };
 
 // Reset types (for use with events and timers)
 enum class ResetType {
-    OneShot = 0, // When the primitive is signaled, it will wake up exactly one thread and will clear itself automatically.
-    Sticky = 1, // When the primitive is signaled, it will wake up all threads and it won't clear itself automatically.
-    Pulse = 2, // Only meaningful for timers: same as ONESHOT but it will periodically signal the timer instead of just once.
+	OneShot = 0,  // When the primitive is signaled, it will wake up exactly one thread and will clear itself automatically.
+	Sticky = 1,   // When the primitive is signaled, it will wake up all threads and it won't clear itself automatically.
+	Pulse = 2,    // Only meaningful for timers: same as ONESHOT but it will periodically signal the timer instead of just once.
 };
 
 enum class ArbitrationType {
-    Signal = 0,
-    WaitIfLess = 1,
-    DecrementAndWaitIfLess = 2,
-    WaitIfLessTimeout = 3,
-    DecrementAndWaitIfLessTimeout = 4
+	Signal = 0,
+	WaitIfLess = 1,
+	DecrementAndWaitIfLess = 2,
+	WaitIfLessTimeout = 3,
+	DecrementAndWaitIfLessTimeout = 4,
 };
 
 enum class ProcessorID : s32 {
-    AllCPUs = -1,
-    Default = -2,
-    
-    AppCore = 0,
-    Syscore = 1,
-    New3DSExtra1 = 2,
-    New3DSExtra2 = 3
+	AllCPUs = -1,
+	Default = -2,
+
+	AppCore = 0,
+	Syscore = 1,
+	New3DSExtra1 = 2,
+	New3DSExtra2 = 3
 };
 
 struct AddressArbiter {};
 
 struct ResourceLimits {
-    HorizonHandle handle;
+	HorizonHandle handle;
 
-    s32 currentCommit = 0;
+	s32 currentCommit = 0;
 };
 
 struct Process {
-    // Resource limits for this process
-    ResourceLimits limits;
-    // Process ID
-    u32 id;
+	// Resource limits for this process
+	ResourceLimits limits;
+	// Process ID
+	u32 id;
 
-    Process(u32 id) : id(id) {}
+	Process(u32 id) : id(id) {}
 };
 
 struct Event {
-    // Some events (for now, only the DSP semaphore events) need to execute a callback when signalled
-    // This enum stores what kind of callback they should execute
-    enum class CallbackType : u32 {
-        None, DSPSemaphore,
-    };
+	// Some events (for now, only the DSP semaphore events) need to execute a callback when signalled
+	// This enum stores what kind of callback they should execute
+	enum class CallbackType : u32 {
+		None,
+		DSPSemaphore,
+	};
 
-    u64 waitlist; // A bitfield where each bit symbolizes if the thread with thread with the corresponding index is waiting on the event
-    ResetType resetType = ResetType::OneShot;
-    CallbackType callback = CallbackType::None;
-    bool fired = false;
+	u64 waitlist;  // A bitfield where each bit symbolizes if the thread with thread with the corresponding index is waiting on the event
+	ResetType resetType = ResetType::OneShot;
+	CallbackType callback = CallbackType::None;
+	bool fired = false;
 
-    Event(ResetType resetType) : resetType(resetType), waitlist(0) {}
-    Event(ResetType resetType, CallbackType cb) : resetType(resetType), waitlist(0), callback(cb) {}
+	Event(ResetType resetType) : resetType(resetType), waitlist(0) {}
+	Event(ResetType resetType, CallbackType cb) : resetType(resetType), waitlist(0), callback(cb) {}
 };
 
 struct Port {
-    static constexpr u32 maxNameLen = 11;
+	static constexpr u32 maxNameLen = 11;
 
-    char name[maxNameLen + 1] = {};
-    bool isPublic = false; // Setting name=NULL creates a private port not accessible from svcConnectToPort.
+	char name[maxNameLen + 1] = {};
+	bool isPublic = false;  // Setting name=NULL creates a private port not accessible from svcConnectToPort.
 
-    Port(const char* name) {
-        // If the name is empty (ie the first char is the null terminator) then the port is private
-        isPublic = name[0] != '\0';
-        std::strncpy(this->name, name, maxNameLen);
-    }
+	Port(const char* name) {
+		// If the name is empty (ie the first char is the null terminator) then the port is private
+		isPublic = name[0] != '\0';
+		std::strncpy(this->name, name, maxNameLen);
+	}
 };
 
 struct Session {
@@ -146,92 +160,90 @@ struct Thread {
 };
 
 static const char* kernelObjectTypeToString(KernelObjectType t) {
-    switch (t) {
-        case KernelObjectType::AddressArbiter: return "address arbiter";
-        case KernelObjectType::Archive: return "archive";
-        case KernelObjectType::Directory: return "directory";
-        case KernelObjectType::Event: return "event";
-        case KernelObjectType::File: return "file";
-        case KernelObjectType::MemoryBlock: return "memory block";
-        case KernelObjectType::Port: return "port";
-        case KernelObjectType::Process: return "process";
-        case KernelObjectType::ResourceLimit: return "resource limit";
-        case KernelObjectType::Session: return "session";
-        case KernelObjectType::Mutex: return "mutex";
-        case KernelObjectType::Semaphore: return "semaphore";
-        case KernelObjectType::Thread: return "thread";
-        case KernelObjectType::Dummy: return "dummy";
-        default: return "unknown";
-    }
+	switch (t) {
+		case KernelObjectType::AddressArbiter: return "address arbiter";
+		case KernelObjectType::Archive: return "archive";
+		case KernelObjectType::Directory: return "directory";
+		case KernelObjectType::Event: return "event";
+		case KernelObjectType::File: return "file";
+		case KernelObjectType::MemoryBlock: return "memory block";
+		case KernelObjectType::Port: return "port";
+		case KernelObjectType::Process: return "process";
+		case KernelObjectType::ResourceLimit: return "resource limit";
+		case KernelObjectType::Session: return "session";
+		case KernelObjectType::Mutex: return "mutex";
+		case KernelObjectType::Semaphore: return "semaphore";
+		case KernelObjectType::Thread: return "thread";
+		case KernelObjectType::Dummy: return "dummy";
+		default: return "unknown";
+	}
 }
 
 struct Mutex {
-    using Handle = HorizonHandle;
+	using Handle = HorizonHandle;
 
-    u64 waitlist;           // Refer to the getWaitlist function below for documentation
-    Handle ownerThread = 0; // Index of the thread that holds the mutex if it's locked
-    Handle handle; // Handle of the mutex itself
-    u32 lockCount; // Number of times this mutex has been locked by its daddy. 0 = not locked
-    bool locked;
+	u64 waitlist;            // Refer to the getWaitlist function below for documentation
+	Handle ownerThread = 0;  // Index of the thread that holds the mutex if it's locked
+	Handle handle;           // Handle of the mutex itself
+	u32 lockCount;           // Number of times this mutex has been locked by its daddy. 0 = not locked
+	bool locked;
 
-    Mutex(bool lock, Handle handle) : locked(lock), waitlist(0), lockCount(lock ? 1 : 0), handle(handle) {}
+	Mutex(bool lock, Handle handle) : locked(lock), waitlist(0), lockCount(lock ? 1 : 0), handle(handle) {}
 };
 
 struct Semaphore {
-    u64 waitlist; // Refer to the getWaitlist function below for documentation
-    s32 availableCount;
-    s32 maximumCount;
+	u64 waitlist;  // Refer to the getWaitlist function below for documentation
+	s32 availableCount;
+	s32 maximumCount;
 
-    Semaphore(s32 initialCount, s32 maximumCount) : availableCount(initialCount), maximumCount(maximumCount), waitlist(0) {}
+	Semaphore(s32 initialCount, s32 maximumCount) : availableCount(initialCount), maximumCount(maximumCount), waitlist(0) {}
 };
 
 struct Timer {
 	u64 waitlist;  // Refer to the getWaitlist function below for documentation
 	ResetType resetType = ResetType::OneShot;
 
-	u64 fireTick;      // CPU tick the timer will be fired
-	u64 interval;      // Number of ns until the timer fires for the second and future times
-	bool fired;        // Has this timer been signalled?
-	bool running;      // Is this timer running or stopped?
+	u64 fireTick;  // CPU tick the timer will be fired
+	u64 interval;  // Number of ns until the timer fires for the second and future times
+	bool fired;    // Has this timer been signalled?
+	bool running;  // Is this timer running or stopped?
 
 	Timer(ResetType type) : resetType(type), fireTick(0), interval(0), waitlist(0), fired(false), running(false) {}
 };
 
 struct MemoryBlock {
-    u32 addr = 0;
-    u32 size = 0;
-    u32 myPermission = 0;
-    u32 otherPermission = 0;
-    bool mapped = false;
+	u32 addr = 0;
+	u32 size = 0;
+	u32 myPermission = 0;
+	u32 otherPermission = 0;
+	bool mapped = false;
 
-    MemoryBlock(u32 addr, u32 size, u32 myPerm, u32 otherPerm) : addr(addr), size(size), myPermission(myPerm), otherPermission(otherPerm),
-        mapped(false) {}
+	MemoryBlock(u32 addr, u32 size, u32 myPerm, u32 otherPerm)
+		: addr(addr), size(size), myPermission(myPerm), otherPermission(otherPerm), mapped(false) {}
 };
 
 // Generic kernel object class
 struct KernelObject {
 	using Handle = HorizonHandle;
 
-    Handle handle = 0; // A u32 the OS will use to identify objects
-    void* data = nullptr;
-    KernelObjectType type;
+	Handle handle = 0;  // A u32 the OS will use to identify objects
+	void* data = nullptr;
+	KernelObjectType type;
 
-    KernelObject(Handle handle, KernelObjectType type) : handle(handle), type(type) {}
+	KernelObject(Handle handle, KernelObjectType type) : handle(handle), type(type) {}
 
-    // Our destructor does not free the data in order to avoid it being freed when our std::vector is expanded
-    // Thus, the kernel needs to delete it when appropriate
-    ~KernelObject() {}
+	// Our destructor does not free the data in order to avoid it being freed when our std::vector is expanded
+	// Thus, the kernel needs to delete it when appropriate
+	~KernelObject() {}
 
-    template <typename T>
-    T* getData() {
-        return static_cast<T*>(data);
-    }
+	template <typename T>
+	T* getData() {
+		return static_cast<T*>(data);
+	}
 
-    const char* getTypeName() const {
-        return kernelObjectTypeToString(type);
-    }
+	const char* getTypeName() const { return kernelObjectTypeToString(type); }
 
-    // Retrieves a reference to the waitlist for a specified object
+	// Retrieves a reference to the waitlist for a specified object
 	// We return a reference because this function is only called in the kernel threading internals
 	// We want the kernel to be able to easily manage waitlists, by reading/parsing them or setting/clearing bits.
 	// As we mention in the definition of the "Event" struct, the format for wailists is very simple and made to be efficient.
@@ -247,8 +259,7 @@ struct KernelObject {
 			case KernelObjectType::Timer: return getData<Timer>()->waitlist;
 
 			// This should be unreachable once we fully implement sync objects
-			default: [[unlikely]]
-                Helpers::panic("Called GetWaitList on kernel object without a waitlist (Type: %s)", getTypeName());
+			default: [[unlikely]] Helpers::panic("Called GetWaitList on kernel object without a waitlist (Type: %s)", getTypeName());
 		}
 	}
-};
\ No newline at end of file
+};
diff --git a/include/memory.hpp b/include/memory.hpp
index b1dd09de..d21e4be2 100644
--- a/include/memory.hpp
+++ b/include/memory.hpp
@@ -1,8 +1,8 @@
 #pragma once
 #include <array>
-#include <bitset>
 #include <filesystem>
 #include <fstream>
+#include <list>
 #include <optional>
 #include <vector>
 
@@ -10,8 +10,11 @@
 #include "crypto/aes_engine.hpp"
 #include "handles.hpp"
 #include "helpers.hpp"
-#include "loader/ncsd.hpp"
+#include "host_memory/host_memory.h"
+#include "kernel/fcram.hpp"
 #include "loader/3dsx.hpp"
+#include "loader/ncsd.hpp"
+#include "result/result.hpp"
 #include "services/region_codes.hpp"
 
 namespace PhysicalAddrs {
@@ -38,15 +41,15 @@ namespace VirtualAddrs {
 		DefaultStackSize = 0x4000,
 
 		NormalHeapStart = 0x08000000,
-		LinearHeapStartOld = 0x14000000, // If kernel version < 0x22C
+		LinearHeapStartOld = 0x14000000,  // If kernel version < 0x22C
 		LinearHeapEndOld = 0x1C000000,
 
 		LinearHeapStartNew = 0x30000000,
 		LinearHeapEndNew = 0x40000000,
 
-		// Start of TLS for first thread. Next thread's storage will be at TLSBase + 0x1000, and so on
-		TLSBase = 0xFF400000,
-		TLSSize = 0x1000,
+		// Start of TLS for first thread. Next thread's storage will be at TLSBase + 0x200, and so on
+		TLSBase = 0x1FF82000,
+		TLSSize = 0x200,
 
 		VramStart = 0x1F000000,
 		VramSize = 0x00600000,
@@ -76,63 +79,79 @@ namespace KernelMemoryTypes {
 		PERMISSION_W = 1 << 1,
 		PERMISSION_X = 1 << 2
 	};
-	
-	// I assume this is referring to a single piece of allocated memory? If it's for pages, it makes no sense.
-	// If it's for multiple allocations, it also makes no sense
+
 	struct MemoryInfo {
-		u32 baseAddr; // Base process virtual address. Used as a paddr in lockedMemoryInfo instead
-		u32 size;      // Of what?
-		u32 perms;     // Is this referring to a single page or?
+		u32 baseAddr;
+		u32 pages;
+		u32 perms;
 		u32 state;
 
-		u32 end() { return baseAddr + size; }
-		MemoryInfo(u32 baseAddr, u32 size, u32 perms, u32 state) : baseAddr(baseAddr), size(size)
-			, perms(perms), state(state) {}
+		u32 end() { return baseAddr + (pages << 12); }
+		MemoryInfo() : baseAddr(0), pages(0), perms(0), state(0) {}
+		MemoryInfo(u32 baseAddr, u32 pages, u32 perms, u32 state) : baseAddr(baseAddr), pages(pages), perms(perms), state(state) {}
 	};
 
 	// Shared memory block for HID, GSP:GPU etc
 	struct SharedMemoryBlock {
-		u32 paddr; // Physical address of this block's memory
-		u32 size; // Size of block
-		u32 handle; // The handle of the shared memory block
-		bool mapped; // Has this block been mapped at least once?
+		u32 paddr;    // Physical address of this block's memory
+		u32 size;     // Size of block
+		u32 handle;   // The handle of the shared memory block
+		bool mapped;  // Has this block been mapped at least once?
 
 		SharedMemoryBlock(u32 paddr, u32 size, u32 handle) : paddr(paddr), size(size), handle(handle), mapped(false) {}
 	};
-}
+}  // namespace KernelMemoryTypes
 
 class Memory {
+	// Used internally by changeMemoryState
+	struct Operation {
+		KernelMemoryTypes::MemoryState newState = KernelMemoryTypes::MemoryState::Free;
+		bool r = false, w = false, x = false;
+		bool changeState = false;
+		bool changePerms = false;
+	};
 	using Handle = HorizonHandle;
 
 	u8* fcram;
 	u8* dspRam;  // Provided to us by Audio
 	u8* vram;    // Provided to the memory class by the GPU class
 
-	u64& cpuTicks; // Reference to the CPU tick counter
+	const u64* cpuTicks = nullptr;  // Pointer to the CPU tick counter, provided to us by the CPU class
 	using SharedMemoryBlock = KernelMemoryTypes::SharedMemoryBlock;
 
+	// TODO: remove this reference when Peach's excellent page table code is moved to a better home
+	KFcram& fcramManager;
+
 	// Our dynarmic core uses page tables for reads and writes with 4096 byte pages
 	std::vector<uintptr_t> readTable, writeTable;
 
+	// vaddr->paddr translation table
+	std::vector<u32> paddrTable;
+
 	// This tracks our OS' memory allocations
-	std::vector<KernelMemoryTypes::MemoryInfo> memoryInfo;
+	std::list<KernelMemoryTypes::MemoryInfo> memoryInfo;
 
 	std::array<SharedMemoryBlock, 5> sharedMemBlocks = {
-		SharedMemoryBlock(0, 0, KernelHandles::FontSharedMemHandle), // Shared memory for the system font (size is 0 because we read the size from the cmrc filesystem
-		SharedMemoryBlock(0, 0x1000, KernelHandles::GSPSharedMemHandle), // GSP shared memory
-		SharedMemoryBlock(0, 0x1000, KernelHandles::HIDSharedMemHandle),  // HID shared memory
-		SharedMemoryBlock(0, 0x3000, KernelHandles::CSNDSharedMemHandle), // CSND shared memory
+		SharedMemoryBlock(
+			0, 0, KernelHandles::FontSharedMemHandle
+		),  // Shared memory for the system font (size is 0 because we read the size from the cmrc filesystem
+		SharedMemoryBlock(0, 0x1000, KernelHandles::GSPSharedMemHandle),          // GSP shared memory
+		SharedMemoryBlock(0, 0x1000, KernelHandles::HIDSharedMemHandle),          // HID shared memory
+		SharedMemoryBlock(0, 0x3000, KernelHandles::CSNDSharedMemHandle),         // CSND shared memory
 		SharedMemoryBlock(0, 0xE7000, KernelHandles::APTCaptureSharedMemHandle),  // APT Capture Buffer memory
- 	};
+	};
 
-public:
+  public:
 	static constexpr u32 pageShift = 12;
 	static constexpr u32 pageSize = 1 << pageShift;
 	static constexpr u32 pageMask = pageSize - 1;
 	static constexpr u32 totalPageCount = 1 << (32 - pageShift);
-	
+
 	static constexpr u32 FCRAM_SIZE = u32(128_MB);
-	static constexpr u32 FCRAM_APPLICATION_SIZE = u32(80_MB);
+	static constexpr u32 FCRAM_APPLICATION_SIZE = u32(64_MB + 16_MB);
+	static constexpr u32 FCRAM_SYSTEM_SIZE = u32(44_MB - 16_MB);
+	static constexpr u32 FCRAM_BASE_SIZE = u32(20_MB);
+
 	static constexpr u32 FCRAM_PAGE_COUNT = FCRAM_SIZE / pageSize;
 	static constexpr u32 FCRAM_APPLICATION_PAGE_COUNT = FCRAM_APPLICATION_SIZE / pageSize;
 
@@ -140,18 +159,48 @@ public:
 	static constexpr u32 DSP_CODE_MEMORY_OFFSET = u32(0_KB);
 	static constexpr u32 DSP_DATA_MEMORY_OFFSET = u32(256_KB);
 
-private:
-	std::bitset<FCRAM_PAGE_COUNT> usedFCRAMPages;
-	std::optional<u32> findPaddr(u32 size);
+  private:
+	// We also use MMU-accelerated fastmem for fast memory emulation
+	// This means that we've got a 4GB memory arena which is organized the same way as the emulated 3DS' memory map
+	// And we can access this directly instead of calling the memory read/write functions, which would be slower
+	// Regions that are not mapped or can't be accelerated this way will segfault, and the caller (eg dynarmic), will
+	// handle this segfault and call the Slower memory read/write functions
+	bool useFastmem = false;
+	static constexpr size_t FASTMEM_FCRAM_OFFSET = 0;                                    // Offset of FCRAM in the fastmem arena
+	static constexpr size_t FASTMEM_DSP_RAM_OFFSET = FASTMEM_FCRAM_OFFSET + FCRAM_SIZE;  // Offset of DSP RAM
+
+	static constexpr size_t FASTMEM_BACKING_SIZE = FCRAM_SIZE + DSP_RAM_SIZE;
+	// Total size of the virtual address space we will occupy (4GB)
+	static constexpr size_t FASTMEM_VIRTUAL_SIZE = 4_GB;
+
+	Common::HostMemory* arena;
+
+	void addFastmemView(u32 guestVaddr, size_t arenaOffset, size_t size, bool w, bool x = false) {
+		if (useFastmem) {
+			Common::MemoryPermission perms = Common::MemoryPermission::Read;
+			if (w) {
+				perms |= Common::MemoryPermission::Write;
+			}
+
+			if (x) {
+				// perms |= Common::MemoryPermission::Execute;
+			}
+
+			// If anything is mapped at the place we're trying to map, unmap it. Then, create our mapping.
+			arena->Unmap(guestVaddr, size, false);
+			arena->Map(guestVaddr, arenaOffset, size, perms, false);
+		}
+	}
+
 	u64 timeSince3DSEpoch();
 
 	// https://www.3dbrew.org/wiki/Configuration_Memory#ENVINFO
 	// Report a retail unit without JTAG
 	static constexpr u32 envInfo = 1;
 
-	// Stored in Configuration Memory starting @ 0x1FF80060 
+	// Stored in Configuration Memory starting @ 0x1FF80060
 	struct FirmwareInfo {
-		u8 unk; // Usually 0 according to 3DBrew
+		u8 unk;  // Usually 0 according to 3DBrew
 		u8 revision;
 		u8 minor;
 		u8 major;
@@ -167,12 +216,15 @@ private:
 
 	static constexpr std::array<u8, 6> MACAddress = {0x40, 0xF4, 0x07, 0xFF, 0xFF, 0xEE};
 
+	void changeMemoryState(u32 vaddr, s32 pages, const Operation& op);
+	void queryPhysicalBlocks(std::list<FcramBlock>& outList, u32 vaddr, s32 pages);
+	void mapPhysicalMemory(u32 vaddr, u32 paddr, s32 pages, bool r, bool w, bool x);
+	void unmapPhysicalMemory(u32 vaddr, u32 paddr, s32 pages);
+
   public:
 	u16 kernelVersion = 0;
-	u32 usedUserMemory = u32(0_MB); // How much of the APPLICATION FCRAM range is used (allocated to the appcore)
-	u32 usedSystemMemory = u32(0_MB); // Similar for the SYSTEM range (reserved for the syscore)
 
-	Memory(u64& cpuTicks, const EmulatorConfig& config);
+	Memory(KFcram& fcramManager, const EmulatorConfig& config);
 	void reset();
 	void* getReadPointer(u32 address);
 	void* getWritePointer(u32 address);
@@ -198,22 +250,6 @@ private:
 	u32 getLinearHeapVaddr();
 	u8* getFCRAM() { return fcram; }
 
-	// Total amount of OS-only FCRAM available (Can vary depending on how much FCRAM the app requests via the cart exheader)
-	u32 totalSysFCRAM() {
-		return FCRAM_SIZE - FCRAM_APPLICATION_SIZE;
-	}
-
-	// Amount of OS-only FCRAM currently available
-	u32 remainingSysFCRAM() {
-		return totalSysFCRAM() - usedSystemMemory;
-	}
-
-	// Physical FCRAM index to the start of OS FCRAM
-	// We allocate the first part of physical FCRAM for the application, and the rest to the OS. So the index for the OS = application ram size
-	u32 sysFCRAMIndex() {
-		return FCRAM_APPLICATION_SIZE;
-	}
-
 	enum class BatteryLevel {
 		Empty = 0,
 		AlmostEmpty,
@@ -224,9 +260,9 @@ private:
 	};
 
 	u8 getBatteryState(bool adapterConnected, bool charging, BatteryLevel batteryLevel) {
-		u8 value = static_cast<u8>(batteryLevel) << 2; // Bits 2:4 are the battery level from 0 to 5
-		if (adapterConnected) value |= 1 << 0; // Bit 0 shows if the charger is connected
-		if (charging) value |= 1 << 1; // Bit 1 shows if we're charging
+		u8 value = static_cast<u8>(batteryLevel) << 2;  // Bits 2:4 are the battery level from 0 to 5
+		if (adapterConnected) value |= 1 << 0;          // Bit 0 shows if the charger is connected
+		if (charging) value |= 1 << 1;                  // Bit 1 shows if we're charging
 
 		return value;
 	}
@@ -248,27 +284,20 @@ private:
 	}
 
 	// Returns whether "addr" is aligned to a page (4096 byte) boundary
-	static constexpr bool isAligned(u32 addr) {
-		return (addr & pageMask) == 0;
-	}
+	static constexpr bool isAligned(u32 addr) { return (addr & pageMask) == 0; }
 
-	// Allocate "size" bytes of RAM starting from FCRAM index "paddr" (We pick it ourself if paddr == 0)
-	// And map them to virtual address "vaddr" (We also pick it ourself if vaddr == 0).
-	// If the "linear" flag is on, the paddr pages must be adjacent in FCRAM
-	// This function is for interacting with the *user* portion of FCRAM mainly. For OS RAM, we use other internal functions below
-	// r, w, x: Permissions for the allocated memory
-	// adjustAddrs: If it's true paddr == 0 or vaddr == 0 tell the allocator to pick its own addresses. Used for eg svc ControlMemory
-	// isMap: Shows whether this is a reserve operation, that allocates memory and maps it to the addr space, or if it's a map operation,
-	// which just maps memory from paddr to vaddr without hassle. The latter is useful for shared memory mapping, the "map" ControlMemory, op, etc
-	// Returns the vaddr the FCRAM was mapped to or nullopt if allocation failed
-	std::optional<u32> allocateMemory(u32 vaddr, u32 paddr, u32 size, bool linear, bool r = true, bool w = true, bool x = true,
-		bool adjustsAddrs = false, bool isMap = false);
-	KernelMemoryTypes::MemoryInfo queryMemory(u32 vaddr);
+	bool allocMemory(u32 vaddr, s32 pages, FcramRegion region, bool r, bool w, bool x, KernelMemoryTypes::MemoryState state);
+	bool allocMemoryLinear(u32& outVaddr, u32 inVaddr, s32 pages, FcramRegion region, bool r, bool w, bool x);
+	bool mapVirtualMemory(
+		u32 dstVaddr, u32 srcVaddr, s32 pages, bool r, bool w, bool x, KernelMemoryTypes::MemoryState oldDstState,
+		KernelMemoryTypes::MemoryState oldSrcState, KernelMemoryTypes::MemoryState newDstState, KernelMemoryTypes::MemoryState newSrcState,
+		bool unmapPages = true
+	);
+	void changePermissions(u32 vaddr, s32 pages, bool r, bool w, bool x);
+	Result::HorizonResult queryMemory(KernelMemoryTypes::MemoryInfo& out, u32 vaddr);
+	Result::HorizonResult testMemoryState(u32 vaddr, s32 pages, KernelMemoryTypes::MemoryState desiredState);
 
-	// For internal use
-	// Allocates a "size"-sized chunk of system FCRAM and returns the index of physical FCRAM used for the allocation
-	// Used for allocating things like shared memory and the like
-	u32 allocateSysMemory(u32 size);
+	void copyToVaddr(u32 dstVaddr, const u8* srcHost, s32 size);
 
 	// Map a shared memory block to virtual address vaddr with permissions "myPerms"
 	// The kernel has a second permission parameter in MapMemoryBlock but not sure what's used for
@@ -276,10 +305,6 @@ private:
 	// Returns a pointer to the FCRAM block used for the memory if allocation succeeded
 	u8* mapSharedMemory(Handle handle, u32 vaddr, u32 myPerms, u32 otherPerms);
 
-	// Mirrors the page mapping for "size" bytes starting from sourceAddress, to "size" bytes in destAddress
-	// All of the above must be page-aligned.
-	void mirrorMapping(u32 destAddress, u32 sourceAddress, u32 size);
-
 	// Backup of the game's CXI partition info, if any
 	std::optional<NCCH> loadedCXI = std::nullopt;
 	std::optional<HB3DSX> loaded3DSX = std::nullopt;
@@ -291,12 +316,15 @@ private:
 	u8* getDSPMem() { return dspRam; }
 	u8* getDSPDataMem() { return &dspRam[DSP_DATA_MEMORY_OFFSET]; }
 	u8* getDSPCodeMem() { return &dspRam[DSP_CODE_MEMORY_OFFSET]; }
-	u32 getUsedUserMem() { return usedUserMemory; }
 
 	void setVRAM(u8* pointer) { vram = pointer; }
 	void setDSPMem(u8* pointer) { dspRam = pointer; }
+	void setCPUTicks(const u64& ticks) { cpuTicks = &ticks; }
 
 	bool allocateMainThreadStack(u32 size);
 	Regions getConsoleRegion();
 	void copySharedFont(u8* ptr, u32 vaddr);
+
+	bool isFastmemEnabled() { return useFastmem; }
+	u8* getFastmemArenaBase() { return arena->VirtualBasePointer(); }
 };
diff --git a/include/panda_qt/elided_label.hpp b/include/panda_qt/elided_label.hpp
index 9d937f9b..de31a439 100644
--- a/include/panda_qt/elided_label.hpp
+++ b/include/panda_qt/elided_label.hpp
@@ -6,6 +6,7 @@
 
 class ElidedLabel : public QLabel {
 	Q_OBJECT
+
   public:
 	explicit ElidedLabel(Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr);
 	explicit ElidedLabel(QString text, Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr);
diff --git a/include/panda_qt/main_window.hpp b/include/panda_qt/main_window.hpp
index 7bdf6b96..b259a1bc 100644
--- a/include/panda_qt/main_window.hpp
+++ b/include/panda_qt/main_window.hpp
@@ -20,7 +20,7 @@
 #include "panda_qt/cpu_debugger.hpp"
 #include "panda_qt/dsp_debugger.hpp"
 #include "panda_qt/patch_window.hpp"
-#include "panda_qt/screen.hpp"
+#include "panda_qt/screen/screen.hpp"
 #include "panda_qt/shader_editor.hpp"
 #include "panda_qt/text_editor.hpp"
 #include "panda_qt/thread_debugger.hpp"
@@ -136,7 +136,7 @@ class MainWindow : public QMainWindow {
 	void loadKeybindings();
 	void saveKeybindings();
 
-	// Tracks whether we are using an OpenGL-backed renderer or a Vulkan-backed renderer
+	// Tracks what graphics API is backing our renderer
 	bool usingGL = false;
 	bool usingVk = false;
 	bool usingMtl = false;
diff --git a/include/panda_qt/screen.hpp b/include/panda_qt/screen/screen.hpp
similarity index 54%
rename from include/panda_qt/screen.hpp
rename to include/panda_qt/screen/screen.hpp
index 270bf10f..4908b096 100644
--- a/include/panda_qt/screen.hpp
+++ b/include/panda_qt/screen/screen.hpp
@@ -1,25 +1,27 @@
 #pragma once
 #include <QWidget>
 #include <functional>
-#include <memory>
 
 #include "gl/context.h"
 #include "screen_layout.hpp"
 #include "window_info.h"
 
-// OpenGL widget for drawing the 3DS screen
+// Abstract screen widget for drawing the 3DS screen. We've got a child class for each graphics API (ScreenWidgetGL, ScreenWidgetMTL, ...)
 class ScreenWidget : public QWidget {
 	Q_OBJECT
 
   public:
 	using ResizeCallback = std::function<void(u32, u32)>;
 
-	ScreenWidget(ResizeCallback resizeCallback, QWidget* parent = nullptr);
-	void resizeEvent(QResizeEvent* event) override;
-	// Called by the emulator thread for resizing the actual GL surface, since the emulator thread owns the GL context
-	void resizeSurface(u32 width, u32 height);
+	enum class API { OpenGL, Metal, Vulkan };
 
-	GL::Context* getGLContext() { return glContext.get(); }
+	ScreenWidget(API api, ResizeCallback resizeCallback, QWidget* parent = nullptr);
+	virtual ~ScreenWidget() {}
+
+	void resizeEvent(QResizeEvent* event) override;
+
+	virtual GL::Context* getGLContext() { return nullptr; }
+	virtual void* getMTKLayer() { return nullptr; }
 
 	// Dimensions of our output surface
 	u32 surfaceWidth = 0;
@@ -30,8 +32,9 @@ class ScreenWidget : public QWidget {
 	u32 previousWidth = 0;
 	u32 previousHeight = 0;
 
-	// Coordinates (x/y/width/height) for the two screens in window space, used for properly handling touchscreen regardless
-	// of layout or resizing
+	API api = API::OpenGL;
+
+	// Coordinates (x/y/width/height) for the two screens in window space, used for properly handling touchscreen
 	ScreenLayout::WindowCoordinates screenCoordinates;
 	// Screen layouts and sizes
 	ScreenLayout::Layout screenLayout = ScreenLayout::Layout::Default;
@@ -39,16 +42,23 @@ class ScreenWidget : public QWidget {
 
 	void reloadScreenLayout(ScreenLayout::Layout newLayout, float newTopScreenSize);
 
-  private:
-	std::unique_ptr<GL::Context> glContext = nullptr;
+	// Creates a screen widget depending on the graphics API we're using
+	static ScreenWidget* getWidget(API api, ResizeCallback resizeCallback, QWidget* parent = nullptr);
+
+	// Called by the emulator thread on OpenGL for resizing the actual GL surface, since the emulator thread owns the GL context
+	virtual void resizeSurface(u32 width, u32 height) {};
+
+  protected:
 	ResizeCallback resizeCallback;
 
-	bool createGLContext();
+	virtual bool createContext() = 0;
+	virtual void resizeDisplay() = 0;
+	std::optional<WindowInfo> getWindowInfo();
 
+  private:
 	qreal devicePixelRatioFromScreen() const;
 	int scaledWindowWidth() const;
 	int scaledWindowHeight() const;
-	std::optional<WindowInfo> getWindowInfo();
 
 	void reloadScreenCoordinates();
 };
diff --git a/include/panda_qt/screen/screen_gl.hpp b/include/panda_qt/screen/screen_gl.hpp
new file mode 100644
index 00000000..04a2c11b
--- /dev/null
+++ b/include/panda_qt/screen/screen_gl.hpp
@@ -0,0 +1,18 @@
+#pragma once
+#include <memory>
+
+#include "gl/context.h"
+#include "panda_qt/screen/screen.hpp"
+
+class ScreenWidgetGL : public ScreenWidget {
+	std::unique_ptr<GL::Context> glContext = nullptr;
+
+  public:
+	ScreenWidgetGL(API api, ResizeCallback resizeCallback, QWidget* parent = nullptr);
+
+	virtual GL::Context* getGLContext() override;
+	virtual bool createContext() override;
+
+	virtual void resizeDisplay() override;
+	virtual void resizeSurface(u32 width, u32 height) override;
+};
\ No newline at end of file
diff --git a/include/panda_qt/screen/screen_mtl.hpp b/include/panda_qt/screen/screen_mtl.hpp
new file mode 100644
index 00000000..cdd240e6
--- /dev/null
+++ b/include/panda_qt/screen/screen_mtl.hpp
@@ -0,0 +1,18 @@
+#pragma once
+#include "panda_qt/screen/screen.hpp"
+
+class ScreenWidgetMTL : public ScreenWidget {
+	void* mtkLayer = nullptr;
+
+	// Objective-C++ functions for handling the Metal context
+	bool createMetalContext();
+	void resizeMetalView();
+
+  public:
+	ScreenWidgetMTL(API api, ResizeCallback resizeCallback, QWidget* parent = nullptr);
+	~ScreenWidgetMTL() override;
+
+	virtual void* getMTKLayer() override;
+	virtual bool createContext() override;
+	virtual void resizeDisplay() override;
+};
\ No newline at end of file
diff --git a/include/renderer.hpp b/include/renderer.hpp
index 40f244db..27d6a437 100644
--- a/include/renderer.hpp
+++ b/include/renderer.hpp
@@ -9,10 +9,6 @@
 #include "PICA/regs.hpp"
 #include "helpers.hpp"
 
-#ifdef PANDA3DS_FRONTEND_QT
-#include "gl/context.h"
-#endif
-
 enum class RendererType : s8 {
 	// Todo: Auto = -1,
 	Null = 0,
@@ -23,7 +19,6 @@ enum class RendererType : s8 {
 };
 
 struct EmulatorConfig;
-struct SDL_Window;
 
 class GPU;
 class ShaderUnit;
@@ -69,7 +64,7 @@ class Renderer {
 
 	virtual void reset() = 0;
 	virtual void display() = 0;                                                              // Display the 3DS screen contents to the window
-	virtual void initGraphicsContext(SDL_Window* window) = 0;                                // Initialize graphics context
+	virtual void initGraphicsContext(void* context) = 0;                                     // Initialize graphics context
 	virtual void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) = 0;  // Clear a GPU buffer in VRAM
 	virtual void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) = 0;  // Perform display transfer
 	virtual void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) = 0;
@@ -91,9 +86,9 @@ class Renderer {
 	// Called to notify the core to use OpenGL ES and not desktop GL
 	virtual void setupGLES() {}
 
-	// Only relevant for Metal renderer on iOS
-	// Passes a SwiftUI MTKView's layer (CAMetalLayer) to the renderer
-	virtual void setMTKLayer(void* layer) {};
+	// Used for Metal renderer on Qt and iOS
+	// Passes an NSView's backing layer (CAMetalLayer) to the renderer
+	virtual void setMTKLayer(void* layer) { Helpers::panic("Renderer doesn't support MTK Layer"); };
 
 	// This function is called on every draw call before parsing vertex data.
 	// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
@@ -101,11 +96,6 @@ class Renderer {
 	// Returns whether this draw is eligible for using hardware-accelerated shaders or if shaders should run on the CPU
 	virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { return false; }
 
-	// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
-#ifdef PANDA3DS_FRONTEND_QT
-	virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); }
-#endif
-
 	void setFBSize(u32 width, u32 height) {
 		fbSize[0] = width;
 		fbSize[1] = height;
diff --git a/include/renderer_gl/gl_driver.hpp b/include/renderer_gl/gl_driver.hpp
index 4a0b3727..dfb78ffe 100644
--- a/include/renderer_gl/gl_driver.hpp
+++ b/include/renderer_gl/gl_driver.hpp
@@ -1,4 +1,5 @@
 #pragma once
+#include "opengl.hpp"
 
 // Information about our OpenGL/OpenGL ES driver that we should keep track of
 // Stuff like whether specific extensions are supported, and potentially things like OpenGL context information
@@ -8,6 +9,9 @@ namespace OpenGL {
 		bool supportsExtFbFetch = false;
 		bool supportsArmFbFetch = false;
 
+		// Minimum alignment for UBO offsets. Fetched by the OpenGL renderer using glGetIntegerV.
+		GLuint uboAlignment = 16;
+
 		bool supportFbFetch() const { return supportsExtFbFetch || supportsArmFbFetch; }
 	};
 }  // namespace OpenGL
\ No newline at end of file
diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp
index b105f3e9..2023966f 100644
--- a/include/renderer_gl/renderer_gl.hpp
+++ b/include/renderer_gl/renderer_gl.hpp
@@ -92,14 +92,18 @@ class RendererGL final : public Renderer {
 	// The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation
 	// We can compile this once and then link it with all other generated fragment shaders
 	OpenGL::Shader defaultShadergenVs;
-	GLuint shadergenFragmentUBO;
-	// UBO for uploading the PICA uniforms when using hw shaders
-	GLuint hwShaderUniformUBO;
 
 	using StreamBuffer = OpenGLStreamBuffer;
+
+	std::unique_ptr<StreamBuffer> shadergenFragmentUBO;
+	// UBO for uploading the PICA uniforms when using hw shaders
+	std::unique_ptr<StreamBuffer> hwShaderUniformUBO;
 	std::unique_ptr<StreamBuffer> hwVertexBuffer;
 	std::unique_ptr<StreamBuffer> hwIndexBuffer;
 
+	// Current offset for our hw shader uniform UBO
+	u32 hwShaderUniformUBOOffset = 0;
+
 	// Cache of fixed attribute values so that we don't do any duplicate updates
 	std::array<std::array<float, 4>, 16> fixedAttrValues;
 
@@ -187,7 +191,7 @@ class RendererGL final : public Renderer {
 
 	void reset() override;
 	void display() override;                                                              // Display the 3DS screen contents to the window
-	void initGraphicsContext(SDL_Window* window) override;                                // Initialize graphics context
+	void initGraphicsContext(void* context) override;                                     // Initialize graphics context
 	void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;  // Clear a GPU buffer in VRAM
 	void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;  // Perform display transfer
 	void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
@@ -207,10 +211,6 @@ class RendererGL final : public Renderer {
 	void resetStateManager() { gl.reset(); }
 	void initUbershader(OpenGL::Program& program);
 
-#ifdef PANDA3DS_FRONTEND_QT
-	virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override { initGraphicsContextInternal(); }
-#endif
-
 	// Take a screenshot of the screen and store it in a file
 	void screenshot(const std::string& name) override;
 };
\ No newline at end of file
diff --git a/include/renderer_mtl/mtl_command_encoder.hpp b/include/renderer_mtl/mtl_command_encoder.hpp
index 562e6b79..ce758e19 100644
--- a/include/renderer_mtl/mtl_command_encoder.hpp
+++ b/include/renderer_mtl/mtl_command_encoder.hpp
@@ -2,6 +2,8 @@
 
 #include <Metal/Metal.hpp>
 
+#include "helpers.hpp"
+
 namespace Metal {
 	struct RenderState {
 		MTL::RenderPipelineState* renderPipelineState = nullptr;
diff --git a/include/renderer_mtl/mtl_depth_stencil_cache.hpp b/include/renderer_mtl/mtl_depth_stencil_cache.hpp
index 8f7256a9..b902346d 100644
--- a/include/renderer_mtl/mtl_depth_stencil_cache.hpp
+++ b/include/renderer_mtl/mtl_depth_stencil_cache.hpp
@@ -2,6 +2,7 @@
 
 #include <map>
 
+#include "helpers.hpp"
 #include "pica_to_mtl.hpp"
 
 using namespace PICA;
@@ -17,7 +18,6 @@ namespace Metal {
 	class DepthStencilCache {
 	  public:
 		DepthStencilCache() = default;
-
 		~DepthStencilCache() { reset(); }
 
 		void set(MTL::Device* dev) { device = dev; }
diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp
index 7178785e..17bead9a 100644
--- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp
+++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp
@@ -2,6 +2,7 @@
 
 #include <map>
 
+#include "helpers.hpp"
 #include "objc_helper.hpp"
 #include "pica_to_mtl.hpp"
 
diff --git a/include/renderer_mtl/mtl_lut_texture.hpp b/include/renderer_mtl/mtl_lut_texture.hpp
index 531dc73c..e2f67b6b 100644
--- a/include/renderer_mtl/mtl_lut_texture.hpp
+++ b/include/renderer_mtl/mtl_lut_texture.hpp
@@ -2,19 +2,22 @@
 
 #include <Metal/Metal.hpp>
 
+#include "helpers.hpp"
+
 namespace Metal {
 
-class LutTexture {
-public:
-    LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name);
-    ~LutTexture();
-    u32 getNextIndex();
+	class LutTexture {
+	  public:
+		LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name);
+		~LutTexture();
+		u32 getNextIndex();
 
-    MTL::Texture* getTexture() { return texture; }
-    u32 getCurrentIndex() { return currentIndex; }
-private:
-    MTL::Texture* texture;
-    u32 currentIndex = 0;
-};
+		MTL::Texture* getTexture() { return texture; }
+		u32 getCurrentIndex() { return currentIndex; }
 
-} // namespace Metal
+	  private:
+		MTL::Texture* texture;
+		u32 currentIndex = 0;
+	};
+
+}  // namespace Metal
diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp
index b392389c..d50ea336 100644
--- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp
+++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp
@@ -5,7 +5,6 @@
 #include "helpers.hpp"
 #include "pica_to_mtl.hpp"
 
-
 using namespace PICA;
 
 namespace Metal {
diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp
index d4c6dc7c..94474dad 100644
--- a/include/renderer_mtl/pica_to_mtl.hpp
+++ b/include/renderer_mtl/pica_to_mtl.hpp
@@ -3,6 +3,7 @@
 #include <Metal/Metal.hpp>
 
 #include "PICA/regs.hpp"
+#include "helpers.hpp"
 // TODO: remove dependency on OpenGL
 #include "opengl.hpp"
 
@@ -14,10 +15,10 @@ namespace PICA {
 
 		bool needsSwizzle = false;
 		MTL::TextureSwizzleChannels swizzle{
-			.red = MTL::TextureSwizzleRed,
-			.green = MTL::TextureSwizzleGreen,
-			.blue = MTL::TextureSwizzleBlue,
-			.alpha = MTL::TextureSwizzleAlpha,
+			MTL::TextureSwizzleRed,
+			MTL::TextureSwizzleGreen,
+			MTL::TextureSwizzleBlue,
+			MTL::TextureSwizzleAlpha,
 		};
 	};
 
@@ -33,7 +34,7 @@ namespace PICA {
 			case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm;  // TODO: use MTL::PixelFormatBGR5A1Unorm?
 			case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm;    // TODO: use MTL::PixelFormatB5G6R5Unorm?
 #ifdef PANDA3DS_IOS
-			case ColorFmt::RGBA4: return MTL::PixelFormatRGBA8Unorm; // IOS + Metal doesn't support AGBR4 properly, at least on simulator
+			case ColorFmt::RGBA4: return MTL::PixelFormatRGBA8Unorm;  // IOS + Metal doesn't support AGBR4 properly, at least on simulator
 #else
 			case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm;
 #endif
@@ -130,8 +131,7 @@ namespace PICA {
 			case PrimType::TriangleFan:
 				Helpers::warn("Triangle fans are not supported on Metal, using triangles instead");
 				return MTL::PrimitiveTypeTriangle;
-			case PrimType::GeometryPrimitive:
-				return MTL::PrimitiveTypeTriangle;
+			case PrimType::GeometryPrimitive: return MTL::PrimitiveTypeTriangle;
 		}
 	}
 
diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp
index c9b4b9b6..29eb02d4 100644
--- a/include/renderer_mtl/renderer_mtl.hpp
+++ b/include/renderer_mtl/renderer_mtl.hpp
@@ -13,7 +13,6 @@
 #include "mtl_vertex_buffer_cache.hpp"
 #include "renderer.hpp"
 
-
 // HACK: use the OpenGL cache
 #include "../renderer_gl/surface_cache.hpp"
 
@@ -30,7 +29,7 @@ class RendererMTL final : public Renderer {
 
 	void reset() override;
 	void display() override;
-	void initGraphicsContext(SDL_Window* window) override;
+	void initGraphicsContext(void* context) override;
 	void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;
 	void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;
 	void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
@@ -38,10 +37,6 @@ class RendererMTL final : public Renderer {
 	void screenshot(const std::string& name) override;
 	void deinitGraphicsContext() override;
 
-#ifdef PANDA3DS_FRONTEND_QT
-	virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
-#endif
-
 	virtual void setMTKLayer(void* layer) override;
 
   private:
diff --git a/include/renderer_null/renderer_null.hpp b/include/renderer_null/renderer_null.hpp
index 50a724d8..28ab41af 100644
--- a/include/renderer_null/renderer_null.hpp
+++ b/include/renderer_null/renderer_null.hpp
@@ -9,7 +9,7 @@ class RendererNull final : public Renderer {
 
 	void reset() override;
 	void display() override;
-	void initGraphicsContext(SDL_Window* window) override;
+	void initGraphicsContext(void* context) override;
 	void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;
 	void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;
 	void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
@@ -20,8 +20,4 @@ class RendererNull final : public Renderer {
 	// Tell the GPU core that we'll handle vertex fetch & shader execution in the renderer in order to speed up execution.
 	// Of course, we don't do this and geometry is never actually processed, since this is the null renderer.
 	virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) override { return true; };
-
-#ifdef PANDA3DS_FRONTEND_QT
-	virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
-#endif
 };
diff --git a/include/renderer_sw/renderer_sw.hpp b/include/renderer_sw/renderer_sw.hpp
index dd12bf0a..e2dd90cb 100644
--- a/include/renderer_sw/renderer_sw.hpp
+++ b/include/renderer_sw/renderer_sw.hpp
@@ -9,15 +9,11 @@ class RendererSw final : public Renderer {
 
 	void reset() override;
 	void display() override;
-	void initGraphicsContext(SDL_Window* window) override;
+	void initGraphicsContext(void* context) override;
 	void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;
 	void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;
 	void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
 	void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override;
 	void screenshot(const std::string& name) override;
 	void deinitGraphicsContext() override;
-
-#ifdef PANDA3DS_FRONTEND_QT
-	virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
-#endif
 };
diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp
index 25cc26f8..c90b1d4d 100644
--- a/include/renderer_vk/renderer_vk.hpp
+++ b/include/renderer_vk/renderer_vk.hpp
@@ -9,6 +9,7 @@
 #include "vk_sampler_cache.hpp"
 
 class GPU;
+struct SDL_Window;
 
 class RendererVK final : public Renderer {
 	SDL_Window* targetWindow;
@@ -113,7 +114,7 @@ class RendererVK final : public Renderer {
 
 	void reset() override;
 	void display() override;
-	void initGraphicsContext(SDL_Window* window) override;
+	void initGraphicsContext(void* context) override;
 	void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;
 	void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;
 	void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
diff --git a/include/scheduler.hpp b/include/scheduler.hpp
index 9a51f893..6083aeb2 100644
--- a/include/scheduler.hpp
+++ b/include/scheduler.hpp
@@ -4,7 +4,6 @@
 #include <limits>
 
 #include "helpers.hpp"
-#include "logger.hpp"
 
 struct Scheduler {
 	enum class EventType {
diff --git a/include/services/dsp_firmware_db.hpp b/include/services/dsp_firmware_db.hpp
index bac11d73..b6b594d4 100644
--- a/include/services/dsp_firmware_db.hpp
+++ b/include/services/dsp_firmware_db.hpp
@@ -18,7 +18,7 @@ namespace DSP {
 			: hash(hash), size(size), supportsAAC(supportsAAC), notes(notes) {}
 	};
 
-	static constexpr std::array<FirmwareInfo, 9> firmwareDB = {
+	static constexpr std::array<FirmwareInfo, 10> firmwareDB = {
 		FirmwareInfo(
 			{0x47, 0xD6, 0x6C, 0xD2, 0x13, 0x1, 0xFF, 0x62, 0xAD, 0x16, 0x98, 0x2,  0x46, 0x67, 0xF3, 0x9,
 			 0xDA, 0x7,  0x20, 0x9E, 0xFB, 0xB, 0x6A, 0x81, 0x98, 0xFF, 0x9B, 0xE0, 0x51, 0x67, 0xC9, 0xA6},
@@ -72,5 +72,11 @@ namespace DSP {
 			 0x3A, 0x29, 0x1,  0x70, 0xEA, 0x3B, 0x6C, 0x14, 0x57, 0x49, 0xAD, 0x93, 0x58, 0x67, 0x2C, 0x97},
 			49716, false, "Spotted in PMD: GTI"
 		),
+
+		FirmwareInfo(
+			{0x96, 0xF3, 0x96, 0x28, 0x38, 0xEB, 0xE9, 0x2A, 0x9E, 0x99, 0xD0, 0xB0, 0x78, 0xAD, 0xE3, 0x67,
+			 0x3B, 0x9B, 0x2F, 0x24, 0x3E, 0xBE, 0xC0, 0x47, 0x4D, 0x3E, 0x49, 0xA9, 0x2B, 0x65, 0x5B, 0x85},
+			49772, false, "Spotted in Luigi's Mansion"
+		),
 	};
 }  // namespace DSP
\ No newline at end of file
diff --git a/include/services/frd.hpp b/include/services/frd.hpp
index 914d9251..a5aee738 100644
--- a/include/services/frd.hpp
+++ b/include/services/frd.hpp
@@ -40,6 +40,7 @@ class FRDService {
 	void hasLoggedIn(u32 messagePointer);
 	void isOnline(u32 messagePointer);
 	void logout(u32 messagePointer);
+	void saveLocalAccountData(u32 messagePointer);
 	void setClientSDKVersion(u32 messagePointer);
 	void setNotificationMask(u32 messagePointer);
 	void updateGameModeDescription(u32 messagePointer);
diff --git a/src/config.cpp b/src/config.cpp
index f2cddaba..4d489c95 100644
--- a/src/config.cpp
+++ b/src/config.cpp
@@ -48,6 +48,7 @@ void EmulatorConfig::load() {
 
 			printAppVersion = toml::find_or<toml::boolean>(general, "PrintAppVersion", true);
 			circlePadProEnabled = toml::find_or<toml::boolean>(general, "EnableCirclePadPro", true);
+			fastmemEnabled = toml::find_or<toml::boolean>(general, "EnableFastmem", enableFastmemDefault);
 			systemLanguage = languageCodeFromString(toml::find_or<std::string>(general, "SystemLanguage", "en"));
 		}
 	}
@@ -180,6 +181,7 @@ void EmulatorConfig::save() {
 	data["General"]["PrintAppVersion"] = printAppVersion;
 	data["General"]["SystemLanguage"] = languageCodeToString(systemLanguage);
 	data["General"]["EnableCirclePadPro"] = circlePadProEnabled;
+	data["General"]["EnableFastmem"] = fastmemEnabled;
 
 	data["Window"]["AppVersionOnWindow"] = windowSettings.showAppVersion;
 	data["Window"]["RememberWindowPosition"] = windowSettings.rememberPosition;
diff --git a/src/core/CPU/cpu_dynarmic.cpp b/src/core/CPU/cpu_dynarmic.cpp
index 85dc70d9..124647d8 100644
--- a/src/core/CPU/cpu_dynarmic.cpp
+++ b/src/core/CPU/cpu_dynarmic.cpp
@@ -6,6 +6,7 @@
 
 CPU::CPU(Memory& mem, Kernel& kernel, Emulator& emu) : mem(mem), emu(emu), scheduler(emu.getScheduler()), env(mem, kernel, emu.getScheduler()) {
 	cp15 = std::make_shared<CP15>();
+	mem.setCPUTicks(getTicksRef());
 
 	Dynarmic::A32::UserConfig config;
 	config.arch_version = Dynarmic::A32::ArchVersion::v6K;
@@ -15,6 +16,12 @@ CPU::CPU(Memory& mem, Kernel& kernel, Emulator& emu) : mem(mem), emu(emu), sched
 	config.global_monitor = &exclusiveMonitor;
 	config.processor_id = 0;
 
+	if (mem.isFastmemEnabled()) {
+		config.fastmem_pointer = u64(mem.getFastmemArenaBase());
+	} else {
+		config.fastmem_pointer = std::nullopt;
+	}
+
 	jit = std::make_unique<Dynarmic::A32::Jit>(config);
 }
 
diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp
index f5d26784..e38f4416 100644
--- a/src/core/PICA/gpu.cpp
+++ b/src/core/PICA/gpu.cpp
@@ -138,7 +138,7 @@ void GPU::drawArrays(bool indexed) {
 
 	if (config.accelerateShaders) {
 		// If we are potentially going to use hw shaders, gather necessary to do vertex fetch, index buffering, etc on the GPU
-		// This includes parsing which vertices to upload, getting pointers to the index buffer data & vertex data, and so on 
+		// This includes parsing which vertices to upload, getting pointers to the index buffer data & vertex data, and so on
 		getAcceleratedDrawInfo(accel, indexed);
 	}
 
@@ -182,6 +182,7 @@ void GPU::drawArrays() {
 
 	// We can have up to 16 attributes, each one consisting of 4 floats
 	constexpr u32 maxAttrSizeInFloats = 16 * 4;
+	setVsOutputMask(regs[PICA::InternalRegs::VertexShaderOutputMask]);
 
 	// Base address for vertex attributes
 	// The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible
diff --git a/src/core/PICA/shader_interpreter.cpp b/src/core/PICA/shader_interpreter.cpp
index a85c7464..9be382b3 100644
--- a/src/core/PICA/shader_interpreter.cpp
+++ b/src/core/PICA/shader_interpreter.cpp
@@ -124,16 +124,24 @@ u8 PICAShader::getIndexedSource(u32 source, u32 index) {
 	switch (index) {
 		// No offset applied
 		case 0: [[likely]] return u8(source);
-		// Address register
+
+		// An address register (if index == 1 or 2) or the loop counter (if index == 3) is used as the offset
+		// There's several edge cases to handle, which have been verified with our shader tests and on a real 3DS
 		case 1:
-		case 2: {
-			const s32 offset = addrRegister[index - 1];
+		case 2:
+		case 3: {
+			s32 offset = (index == 3) ? s32(loopCounter) : addrRegister[index - 1];
 			if (offset < -128 || offset > 127) [[unlikely]] {
-				return u8(source);
+				offset = 0;
 			}
-			return u8(source + offset);
+
+			// Subtract 0x20 to get the index of the float uniform. Add the offset to it, then mask the sum with 0x7F like the PICA does
+			// After that, add 0x20 again to undo the initial subtraction
+			u8 floatUniformIndex = u8(((source - 0x20) + offset) & 0x7F);
+			floatUniformIndex += 0x20;
+
+			return floatUniformIndex;
 		}
-		case 3: return u8(source + loopCounter);
 	}
 
 	Helpers::panic("Reached unreachable path in PICAShader::getIndexedSource");
diff --git a/src/core/applets/software_keyboard.cpp b/src/core/applets/software_keyboard.cpp
index fc58a3ec..379134a6 100644
--- a/src/core/applets/software_keyboard.cpp
+++ b/src/core/applets/software_keyboard.cpp
@@ -55,7 +55,7 @@ Result::HorizonResult SoftwareKeyboardApplet::start(const MemoryBlock* sharedMem
 	}
 	mem.write16(textAddress, 0);  // Write UTF-16 null terminator
 
-	// Temporarily hardcode the pressed button to be the firs tone
+	// Temporarily hardcode the pressed button to be the first one
 	switch (config.numButtonsM1) {
 		case SoftwareKeyboardButtonConfig::SingleButton: config.returnCode = SoftwareKeyboardResult::D0Click; break;
 		case SoftwareKeyboardButtonConfig::DualButton: config.returnCode = SoftwareKeyboardResult::D1Click1; break;
diff --git a/src/core/audio/aac_decoder.cpp b/src/core/audio/aac_decoder.cpp
index af88485c..58cebda6 100644
--- a/src/core/audio/aac_decoder.cpp
+++ b/src/core/audio/aac_decoder.cpp
@@ -45,8 +45,6 @@ void AAC::Decoder::decode(AAC::Message& response, const AAC::Message& request, A
 	std::array<s16, frameSize> frame;
 	std::array<std::vector<s16>, 2> audioStreams;
 
-	bool queriedStreamInfo = false;
-
 	while (bytesValid != 0) {
 		if (aacDecoder_Fill(decoderHandle, &input, &bufferSize, &bytesValid) != AAC_DEC_OK) {
 			Helpers::warn("Failed to fill AAC decoder with samples");
@@ -143,4 +141,4 @@ AAC::Decoder::~Decoder() {
 		aacDecoder_Close(decoderHandle);
 		decoderHandle = nullptr;
 	}
-}
\ No newline at end of file
+}
diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp
index 97a6211e..059318c8 100644
--- a/src/core/audio/hle_core.cpp
+++ b/src/core/audio/hle_core.cpp
@@ -396,7 +396,28 @@ namespace Audio {
 
 		if (config.partialEmbeddedBufferDirty) {
 			config.partialEmbeddedBufferDirty = 0;
-			printf("Partial embedded buffer dirty for voice %d\n", source.index);
+
+			const u8* data = getPointerPhys<u8>(source.currentBufferPaddr & ~0x3);
+
+			if (data != nullptr) {
+				switch (source.sampleFormat) {
+					case SampleFormat::PCM8: source.currentSamples = decodePCM8(data, config.length, source); break;
+					case SampleFormat::PCM16: source.currentSamples = decodePCM16(data, config.length, source); break;
+					case SampleFormat::ADPCM: source.currentSamples = decodeADPCM(data, config.length, source); break;
+
+					default:
+						Helpers::warn("Invalid DSP sample format");
+						source.currentSamples = {};
+						break;
+				}
+
+				// We're skipping the first samplePosition samples, so remove them from the buffer so as not to consume them later
+				if (source.samplePosition > 0) {
+					auto start = source.currentSamples.begin();
+					auto end = std::next(start, source.samplePosition);
+					source.currentSamples.erase(start, end);
+				}
+			}
 		}
 
 		if (config.bufferQueueDirty) {
@@ -478,6 +499,7 @@ namespace Audio {
 			return;
 		}
 
+		source.currentBufferPaddr = buffer.paddr;
 		source.currentBufferID = buffer.bufferID;
 		source.previousBufferID = 0;
 		// For looping buffers, this is only set for the first time we play it. Loops do not set the dirty bit.
@@ -766,6 +788,7 @@ namespace Audio {
 		interpolationMode = InterpolationMode::Linear;
 
 		samplePosition = 0;
+		currentBufferPaddr = 0;
 		previousBufferID = 0;
 		currentBufferID = 0;
 		syncCount = 0;
diff --git a/src/core/kernel/fcram.cpp b/src/core/kernel/fcram.cpp
new file mode 100644
index 00000000..ebb58c34
--- /dev/null
+++ b/src/core/kernel/fcram.cpp
@@ -0,0 +1,102 @@
+#include "fcram.hpp"
+
+#include "memory.hpp"
+
+void KFcram::Region::reset(u32 start, size_t size) {
+	this->start = start;
+	pages = size >> 12;
+	freePages = pages;
+
+	Block initialBlock(pages, 0);
+	blocks.clear();
+	blocks.push_back(initialBlock);
+}
+
+void KFcram::Region::alloc(std::list<FcramBlock>& out, s32 allocPages, bool linear) {
+	for (auto it = blocks.begin(); it != blocks.end(); it++) {
+		if (it->used) continue;
+
+		// On linear allocations, only a single contiguous block may be used
+		if (it->pages < allocPages && linear) continue;
+
+		// If the current block is bigger than the allocation, split it
+		if (it->pages > allocPages) {
+			Block newBlock(it->pages - allocPages, it->pageOffset + allocPages);
+			it->pages = allocPages;
+			blocks.insert(it, newBlock);
+		}
+
+		// Mark the block as allocated and add it to the output list
+		it->used = true;
+		allocPages -= it->pages;
+		freePages -= it->pages;
+
+		u32 paddr = start + (it->pageOffset << 12);
+		FcramBlock outBlock(paddr, it->pages);
+		out.push_back(outBlock);
+
+		if (allocPages < 1) {
+			return;
+		}
+	}
+
+	// Official kernel panics here
+	Helpers::panic("Failed to allocate FCRAM, not enough guest memory");
+}
+
+u32 KFcram::Region::getUsedCount() { return pages - freePages; }
+u32 KFcram::Region::getFreeCount() { return freePages; }
+
+KFcram::KFcram(Memory& mem) : mem(mem) {}
+
+void KFcram::reset(size_t ramSize, size_t appSize, size_t sysSize, size_t baseSize) {
+	fcram = mem.getFCRAM();
+	refs = std::unique_ptr<u32>(new u32[ramSize >> 12]);
+	std::memset(refs.get(), 0, (ramSize >> 12) * sizeof(u32));
+
+	appRegion.reset(0, appSize);
+	sysRegion.reset(appSize, sysSize);
+	baseRegion.reset(appSize + sysSize, baseSize);
+}
+
+void KFcram::alloc(FcramBlockList& out, s32 pages, FcramRegion region, bool linear) {
+	switch (region) {
+		case FcramRegion::App: appRegion.alloc(out, pages, linear); break;
+		case FcramRegion::Sys: sysRegion.alloc(out, pages, linear); break;
+		case FcramRegion::Base: baseRegion.alloc(out, pages, linear); break;
+		default: Helpers::panic("Invalid FCRAM region chosen for allocation!"); break;
+	}
+
+	incRef(out);
+}
+
+void KFcram::incRef(FcramBlockList& list) {
+	for (auto it = list.begin(); it != list.end(); it++) {
+		for (int i = 0; i < it->pages; i++) {
+			u32 index = (it->paddr >> 12) + i;
+			refs.get()[index]++;
+		}
+	}
+}
+
+void KFcram::decRef(FcramBlockList& list) {
+	for (auto it = list.begin(); it != list.end(); it++) {
+		for (int i = 0; i < it->pages; i++) {
+			u32 index = (it->paddr >> 12) + i;
+			refs.get()[index]--;
+
+			if (!refs.get()[index]) {
+				Helpers::panic("TODO: Freeing FCRAM");
+			}
+		}
+	}
+}
+
+u32 KFcram::getUsedCount(FcramRegion region) {
+	switch (region) {
+		case FcramRegion::App: return appRegion.getUsedCount();
+		case FcramRegion::Sys: return sysRegion.getUsedCount();
+		case FcramRegion::Base: return baseRegion.getUsedCount();
+		default: Helpers::panic("Invalid FCRAM region in getUsedCount!");
+	}
+}
\ No newline at end of file
diff --git a/src/core/kernel/idle_thread.cpp b/src/core/kernel/idle_thread.cpp
index d6f79360..cfc71d2d 100644
--- a/src/core/kernel/idle_thread.cpp
+++ b/src/core/kernel/idle_thread.cpp
@@ -17,6 +17,8 @@ idle_thread_main:
 	b idle_thread_main
 */
 
+using namespace KernelMemoryTypes;
+
 static constexpr u8 idleThreadCode[] = {
 	0x00, 0x00, 0xA0, 0xE3,  // mov r0, #0
 	0x00, 0x10, 0xA0, 0xE3,  // mov r1, #0
@@ -27,18 +29,16 @@ static constexpr u8 idleThreadCode[] = {
 // Set up an idle thread to run when no thread is able to run
 void Kernel::setupIdleThread() {
 	Thread& t = threads[idleThreadIndex];
-	constexpr u32 codeAddress = 0xBFC00000;
 
-	// Reserve some memory for the idle thread's code. We map this memory to vaddr BFC00000 which is not userland-accessible
+	// Reserve some memory for the idle thread's code. We map this memory to vaddr 3FC00000 which shouldn't be accessed by applications
 	// We only allocate 4KB (1 page) because our idle code is pretty small
-	const u32 fcramIndex = mem.allocateSysMemory(Memory::pageSize);
-	auto vaddr = mem.allocateMemory(codeAddress, fcramIndex, Memory::pageSize, true, true, false, true, false, true);
-	if (!vaddr.has_value() || vaddr.value() != codeAddress) {
+	constexpr u32 codeAddress = 0x3FC00000;
+	if (!mem.allocMemory(codeAddress, 1, FcramRegion::Base, true, true, false, MemoryState::Locked)) {
 		Helpers::panic("Failed to setup idle thread");
 	}
 
 	// Copy idle thread code to the allocated FCRAM
-	std::memcpy(&mem.getFCRAM()[fcramIndex], idleThreadCode, sizeof(idleThreadCode));
+	mem.copyToVaddr(codeAddress, idleThreadCode, sizeof(idleThreadCode));
 
 	t.entrypoint = codeAddress;
 	t.initialSP = 0;
diff --git a/src/core/kernel/kernel.cpp b/src/core/kernel/kernel.cpp
index 824017d0..7a0aacc0 100644
--- a/src/core/kernel/kernel.cpp
+++ b/src/core/kernel/kernel.cpp
@@ -7,7 +7,7 @@
 #include "kernel_types.hpp"
 
 Kernel::Kernel(CPU& cpu, Memory& mem, GPU& gpu, const EmulatorConfig& config, LuaManager& lua)
-	: cpu(cpu), regs(cpu.regs()), mem(mem), handleCounter(0), serviceManager(regs, mem, gpu, currentProcess, *this, config, lua) {
+	: cpu(cpu), regs(cpu.regs()), mem(mem), handleCounter(0), serviceManager(regs, mem, gpu, currentProcess, *this, config, lua), fcramManager(mem) {
 	objects.reserve(512);  // Make room for a few objects to avoid further memory allocs later
 	mutexHandles.reserve(8);
 	portHandles.reserve(32);
@@ -271,7 +271,7 @@ void Kernel::getProcessInfo() {
 		// According to 3DBrew: Amount of private (code, data, heap) memory used by the process + total supervisor-mode
 		// stack size + page-rounded size of the external handle table
 		case 2:
-			regs[1] = mem.getUsedUserMem();
+			regs[1] = fcramManager.getUsedCount(FcramRegion::App) * Memory::pageSize;
 			regs[2] = 0;
 			break;
 
@@ -364,7 +364,7 @@ void Kernel::getSystemInfo() {
 			switch (subtype) {
 				// Total used memory size in the APPLICATION memory region
 				case 1:
-					regs[1] = mem.getUsedUserMem();
+					regs[1] = fcramManager.getUsedCount(FcramRegion::App) * Memory::pageSize;
 					regs[2] = 0;
 					break;
 
diff --git a/src/core/kernel/memory_management.cpp b/src/core/kernel/memory_management.cpp
index 58a46c4d..9199a32c 100644
--- a/src/core/kernel/memory_management.cpp
+++ b/src/core/kernel/memory_management.cpp
@@ -30,10 +30,10 @@ namespace MemoryPermissions {
 	};
 }
 
+using namespace KernelMemoryTypes;
+
 // Returns whether "value" is aligned to a page boundary (Ie a boundary of 4096 bytes)
-static constexpr bool isAligned(u32 value) {
-	return (value & 0xFFF) == 0;
-}
+static constexpr bool isAligned(u32 value) { return (value & 0xFFF) == 0; }
 
 // Result ControlMemory(u32* outaddr, u32 addr0, u32 addr1, u32 size,
 //						MemoryOperation operation, MemoryPermission permissions)
@@ -44,6 +44,7 @@ void Kernel::controlMemory() {
 	u32 addr0 = regs[1];
 	u32 addr1 = regs[2];
 	u32 size = regs[3];
+	u32 pages = size >> 12;  // Official kernel truncates nonaligned sizes
 	u32 perms = regs[4];
 
 	if (perms == MemoryPermissions::DontCare) {
@@ -61,7 +62,7 @@ void Kernel::controlMemory() {
 		Helpers::panic("ControlMemory: attempted to allocate executable memory");
 	}
 
-	if (!isAligned(addr0) || !isAligned(addr1) || !isAligned(size)) {
+	if (!isAligned(addr0) || !isAligned(addr1)) {
 		Helpers::panic("ControlMemory: Unaligned parameters\nAddr0: %08X\nAddr1: %08X\nSize: %08X", addr0, addr1, size);
 	}
 
@@ -72,22 +73,54 @@ void Kernel::controlMemory() {
 
 	switch (operation & 0xFF) {
 		case Operation::Commit: {
-			std::optional<u32> address = mem.allocateMemory(addr0, 0, size, linear, r, w, x, true);
-			if (!address.has_value()) {
-				Helpers::panic("ControlMemory: Failed to allocate memory");
+			// TODO: base this from the exheader
+			auto region = FcramRegion::App;
+
+			u32 outAddr = 0;
+			if (linear) {
+				if (!mem.allocMemoryLinear(outAddr, addr0, pages, region, r, w, false)) {
+					Helpers::panic("ControlMemory: Failed to allocate linear memory");
+				}
+			} else {
+				if (!mem.allocMemory(addr0, pages, region, r, w, false, MemoryState::Private)) {
+					Helpers::panic("ControlMemory: Failed to allocate memory");
+				}
+
+				outAddr = addr0;
 			}
 
-			regs[1] = address.value();
+			regs[1] = outAddr;
 			break;
 		}
 
-		case Operation::Map: mem.mirrorMapping(addr0, addr1, size); break;
+		case Operation::Map:
+			// Official kernel only allows Private regions to be mapped to Free regions. An Alias or Aliased region cannot be mapped again
+			if (!mem.mapVirtualMemory(
+					addr0, addr1, pages, r, w, false, MemoryState::Free, MemoryState::Private, MemoryState::Alias, MemoryState::Aliased
+				))
+				Helpers::panic("ControlMemory: Failed to map memory");
+			break;
+
+		case Operation::Unmap:
+			// The same as a Map operation, except in reverse
+			if (!mem.mapVirtualMemory(
+					addr0, addr1, pages, false, false, false, MemoryState::Alias, MemoryState::Aliased, MemoryState::Free, MemoryState::Private
+				)) {
+				Helpers::panic("ControlMemory: Failed to unmap memory");
+			}
+			break;
 
 		case Operation::Protect:
-			Helpers::warn(
-				"Ignoring mprotect! Hope nothing goes wrong but if the game accesses invalid memory or crashes then we prolly need to implement "
-				"this\n"
-			);
+			// Official kernel has an internal state bit to indicate that the region's permissions may be changed
+			// But this should account for all cases
+			if (!mem.testMemoryState(addr0, pages, MemoryState::Private) && !mem.testMemoryState(addr0, pages, MemoryState::Alias) &&
+				!mem.testMemoryState(addr0, pages, MemoryState::Aliased) && !mem.testMemoryState(addr0, pages, MemoryState::AliasCode)) {
+				Helpers::warn("Tried to mprotect invalid region!");
+				return;
+			}
+
+			mem.changePermissions(addr0, pages, r, w, false);
+			regs[1] = addr0;
 			break;
 
 		default: Helpers::warn("ControlMemory: unknown operation %X\n", operation); break;
@@ -104,10 +137,11 @@ void Kernel::queryMemory() {
 
 	logSVC("QueryMemory(mem info pointer = %08X, page info pointer = %08X, addr = %08X)\n", memInfo, pageInfo, addr);
 
-	const auto info = mem.queryMemory(addr);
-	regs[0] = Result::Success;
+	KernelMemoryTypes::MemoryInfo info;
+	const auto result = mem.queryMemory(info, addr);
+	regs[0] = result;
 	regs[1] = info.baseAddr;
-	regs[2] = info.size;
+	regs[2] = info.pages * Memory::pageSize;
 	regs[3] = info.perms;
 	regs[4] = info.state;
 	regs[5] = 0;  // page flags
diff --git a/src/core/kernel/resource_limits.cpp b/src/core/kernel/resource_limits.cpp
index 28fbeea8..65556c01 100644
--- a/src/core/kernel/resource_limits.cpp
+++ b/src/core/kernel/resource_limits.cpp
@@ -82,7 +82,9 @@ void Kernel::getResourceLimitCurrentValues() {
 s32 Kernel::getCurrentResourceValue(const KernelObject* limit, u32 resourceName) {
 	const auto data = static_cast<ResourceLimits*>(limit->data);
 	switch (resourceName) {
-		case ResourceType::Commit: return mem.usedUserMemory;
+		// TODO: needs to use the current amount of memory allocated by the process
+		case ResourceType::Commit: return fcramManager.getUsedCount(FcramRegion::App) * Memory::pageSize;
+
 		case ResourceType::Thread: return threadIndices.size();
 		default: Helpers::panic("Attempted to get current value of unknown kernel resource: %d\n", resourceName);
 	}
diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp
index ca6bdd19..2790c8f8 100644
--- a/src/core/loader/3dsx.cpp
+++ b/src/core/loader/3dsx.cpp
@@ -6,6 +6,8 @@
 
 #include "memory.hpp"
 
+using namespace KernelMemoryTypes;
+
 namespace {
 	struct LoadInfo {
 		u32 codeSegSizeAligned;
@@ -53,12 +55,6 @@ bool Memory::map3DSX(HB3DSX& hb3dsx, const HB3DSX::Header& header) {
 	// suum of aligned values is always aligned, have an extra RW page for libctru
 	const u32 totalSize = hbInfo.codeSegSizeAligned + hbInfo.rodataSegSizeAligned + hbInfo.dataSegSizeAligned + 4_KB;
 
-	const auto opt = findPaddr(totalSize);
-	if (!opt.has_value()) {
-		Helpers::panic("Failed to find paddr to map 3DSX file's code to");
-		return false;
-	}
-
 	// Map the ROM on the kernel side
 	const u32 textOffset = 0;
 	const u32 rodataOffset = textOffset + hbInfo.codeSegSizeAligned;
@@ -213,7 +209,8 @@ bool Memory::map3DSX(HB3DSX& hb3dsx, const HB3DSX::Header& header) {
 		{
 			pst->heapSize = u32(48_MB);
 			pst->linearHeapSize = u32(64_MB);
-		} else */ {
+		} else */
+		{
 			pst.heapSize = u32(24_MB);
 			pst.linearHeapSize = u32(32_MB);
 		}
@@ -221,12 +218,17 @@ bool Memory::map3DSX(HB3DSX& hb3dsx, const HB3DSX::Header& header) {
 		std::memcpy(&code[4], &pst, sizeof(pst));
 	}
 
-	const auto paddr = opt.value();
-	std::memcpy(&fcram[paddr], &code[0], totalSize);  // Copy the 3 segments + BSS to FCRAM
+	// Text is R-X
+	allocMemory(textSegAddr, hbInfo.codeSegSizeAligned / Memory::pageSize, FcramRegion::App, true, false, true, MemoryState::Code);
+	copyToVaddr(textSegAddr, &code[textOffset], hbInfo.codeSegSizeAligned);
 
-	allocateMemory(textSegAddr, paddr + textOffset, hbInfo.codeSegSizeAligned, true, true, false, true);           // Text is R-X
-	allocateMemory(rodataSegAddr, paddr + rodataOffset, hbInfo.rodataSegSizeAligned, true, true, false, false);    // Rodata is R--
-	allocateMemory(dataSegAddr, paddr + dataOffset, hbInfo.dataSegSizeAligned + 0x1000, true, true, true, false);  // Data+BSS+Extra is RW-
+	// Rodata is R--
+	allocMemory(rodataSegAddr, hbInfo.rodataSegSizeAligned / Memory::pageSize, FcramRegion::App, true, false, false, MemoryState::Code);
+	copyToVaddr(rodataSegAddr, &code[rodataOffset], hbInfo.rodataSegSizeAligned);
+
+	// Data + BSS + Extra is RW-. We allocate 1 extra page (4KB) which is not initialized to anything.
+	allocMemory(dataSegAddr, (hbInfo.dataSegSizeAligned + 4_KB) / Memory::pageSize, FcramRegion::App, true, true, false, MemoryState::Private);
+	copyToVaddr(dataSegAddr, &code[dataOffset], hbInfo.dataSegSizeAligned);
 
 	return true;
 }
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 8b2a7807..205b16ec 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -4,6 +4,7 @@
 #include "memory.hpp"
 
 using namespace ELFIO;
+using namespace KernelMemoryTypes;
 
 std::optional<u32> Memory::loadELF(std::ifstream& file) {
 	loadedCXI = std::nullopt;  // ELF files don't have a CXI, so set this to null
@@ -24,6 +25,7 @@ std::optional<u32> Memory::loadELF(std::ifstream& file) {
 	auto segNum = reader.segments.size();
 	printf("Number of segments: %d\n", segNum);
 	printf(" #  Perms       Vaddr           File Size       Mem Size\n");
+
 	for (int i = 0; i < segNum; ++i) {
 		const auto seg = reader.segments[i];
 		const auto flags = seg->get_flags();
@@ -55,12 +57,8 @@ std::optional<u32> Memory::loadELF(std::ifstream& file) {
 			Helpers::warn("Rounding ELF segment size to %08X\n", memorySize);
 		}
 
-		// This should also assert that findPaddr doesn't fail
-		u32 fcramAddr = findPaddr(memorySize).value();
-		std::memcpy(&fcram[fcramAddr], data, fileSize);
-
-		// Allocate the segment on the OS side
-		allocateMemory(vaddr, fcramAddr, memorySize, true, r, w, x);
+		allocMemory(vaddr, memorySize / Memory::pageSize, FcramRegion::App, r, w, x, MemoryState::Code);
+		copyToVaddr(vaddr, data, fileSize);
 	}
 
 	// ELF can't specify a region, make it default to USA
diff --git a/src/core/loader/ncsd.cpp b/src/core/loader/ncsd.cpp
index 13d68892..500b70ce 100644
--- a/src/core/loader/ncsd.cpp
+++ b/src/core/loader/ncsd.cpp
@@ -3,8 +3,11 @@
 #include <cstring>
 #include <optional>
 
+#include "kernel/fcram.hpp"
 #include "memory.hpp"
 
+using namespace KernelMemoryTypes;
+
 bool Memory::mapCXI(NCSD& ncsd, NCCH& cxi) {
 	printf("Text address = %08X, size = %08X\n", cxi.text.address, cxi.text.size);
 	printf("Rodata address = %08X, size = %08X\n", cxi.rodata.address, cxi.rodata.size);
@@ -24,12 +27,6 @@ bool Memory::mapCXI(NCSD& ncsd, NCCH& cxi) {
 	// Round up the size of the CXI stack size to a page (4KB) boundary, as the OS can only allocate memory this way
 	u32 stackSize = (cxi.stackSize + pageSize - 1) & -pageSize;
 
-	if (stackSize > 512_KB) {
-		// TODO: Figure out the actual max stack size
-		Helpers::warn("CXI stack size is %08X which seems way too big. Clamping to 512KB", stackSize);
-		stackSize = 512_KB;
-	}
-
 	// Allocate stack
 	if (!allocateMainThreadStack(stackSize)) {
 		// Should be unreachable
@@ -42,40 +39,41 @@ bool Memory::mapCXI(NCSD& ncsd, NCCH& cxi) {
 	u32 bssSize = (cxi.bssSize + 0xfff) & ~0xfff;  // Round BSS size up to a page boundary
 	// Total memory to allocate for loading
 	u32 totalSize = (cxi.text.pageCount + cxi.rodata.pageCount + cxi.data.pageCount) * pageSize + bssSize;
-	code.resize(code.size() + bssSize, 0);  // Pad the .code file with zeroes for the BSS segment
 
-	if (code.size() < totalSize) {
+	if (code.size() + bssSize < totalSize) {
 		Helpers::panic("Total code size as reported by the exheader is larger than the .code file");
 		return false;
 	}
 
-	const auto opt = findPaddr(totalSize);
-	if (!opt.has_value()) {
-		Helpers::panic("Failed to find paddr to map CXI file's code to");
-		return false;
-	}
-
-	const auto paddr = opt.value();
-	std::memcpy(&fcram[paddr], &code[0], totalSize);  // Copy the 3 segments + BSS to FCRAM
-
 	// Map the ROM on the kernel side
-	u32 textOffset = 0;
 	u32 textAddr = cxi.text.address;
 	u32 textSize = cxi.text.pageCount * pageSize;
 
-	u32 rodataOffset = textOffset + textSize;
 	u32 rodataAddr = cxi.rodata.address;
 	u32 rodataSize = cxi.rodata.pageCount * pageSize;
 
-	u32 dataOffset = rodataOffset + rodataSize;
 	u32 dataAddr = cxi.data.address;
 	u32 dataSize = cxi.data.pageCount * pageSize + bssSize;  // We're merging the data and BSS segments, as BSS is just pre-initted .data
 
-	allocateMemory(textAddr, paddr + textOffset, textSize, true, true, false, true);         // Text is R-X
-	allocateMemory(rodataAddr, paddr + rodataOffset, rodataSize, true, true, false, false);  // Rodata is R--
-	allocateMemory(dataAddr, paddr + dataOffset, dataSize, true, true, true, false);         // Data+BSS is RW-
+	// TODO: base this off the exheader
+	auto region = FcramRegion::App;
+	u32 bssAddr = dataAddr + (cxi.data.pageCount << 12);
 
-	ncsd.entrypoint = textAddr;
+	allocMemory(textAddr, cxi.text.pageCount, region, true, false, true, MemoryState::Code);
+	allocMemory(rodataAddr, cxi.rodata.pageCount, region, true, false, false, MemoryState::Code);
+	allocMemory(dataAddr, cxi.data.pageCount, region, true, true, false, MemoryState::Private);
+	allocMemory(bssAddr, bssSize >> 12, region, true, true, false, MemoryState::Private);
+
+	// Copy .code file to FCRAM
+	copyToVaddr(textAddr, code.data(), textSize);
+	copyToVaddr(rodataAddr, code.data() + textSize, rodataSize);
+	copyToVaddr(dataAddr, code.data() + textSize + rodataSize, cxi.data.pageCount << 12);
+
+	// Set BSS to zeroes
+	std::vector<u8> bss(bssSize, 0);
+	copyToVaddr(bssAddr, bss.data(), bssSize);
+
+	ncsd.entrypoint = cxi.text.address;
 
 	// Back the IOFile for accessing the ROM, as well as the ROM's CXI partition, in the memory class.
 	CXIFile = ncsd.file;
@@ -85,7 +83,9 @@ bool Memory::mapCXI(NCSD& ncsd, NCCH& cxi) {
 
 std::optional<NCSD> Memory::loadNCSD(Crypto::AESEngine& aesEngine, const std::filesystem::path& path) {
 	NCSD ncsd;
-	if (!ncsd.file.open(path, "rb")) return std::nullopt;
+	if (!ncsd.file.open(path, "rb")) {
+		return std::nullopt;
+	}
 
 	u8 magic[4];  // Must be "NCSD"
 	ncsd.file.seek(0x100);
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 57eac8ca..650c8d0e 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -6,6 +6,7 @@
 #include <ctime>
 
 #include "config_mem.hpp"
+#include "kernel/fcram.hpp"
 #include "resource_limits.hpp"
 #include "services/fonts.hpp"
 #include "services/ptm.hpp"
@@ -14,38 +15,43 @@ CMRC_DECLARE(ConsoleFonts);
 
 using namespace KernelMemoryTypes;
 
-Memory::Memory(u64& cpuTicks, const EmulatorConfig& config) : cpuTicks(cpuTicks), config(config) {
-	fcram = new uint8_t[FCRAM_SIZE]();
+Memory::Memory(KFcram& fcramManager, const EmulatorConfig& config) : fcramManager(fcramManager), config(config) {
+	const bool fastmemEnabled = config.fastmemEnabled;
+	arena = new Common::HostMemory(FASTMEM_BACKING_SIZE, FASTMEM_VIRTUAL_SIZE, fastmemEnabled);
 
 	readTable.resize(totalPageCount, 0);
 	writeTable.resize(totalPageCount, 0);
-	memoryInfo.reserve(32);  // Pre-allocate some room for memory allocation info to avoid dynamic allocs
+	paddrTable.resize(totalPageCount, 0);
+
+	fcram = arena->BackingBasePointer() + FASTMEM_FCRAM_OFFSET;
+	// arenaDSPRam = arena->BackingBasePointer() + FASTMEM_DSP_RAM_OFFSET;
+	useFastmem = fastmemEnabled && arena->VirtualBasePointer() != nullptr;
 }
 
 void Memory::reset() {
-	// Unallocate all memory
+	// Mark the entire process address space as free
+	constexpr static int MAX_USER_PAGES = 0x40000000 >> 12;
 	memoryInfo.clear();
-	usedFCRAMPages.reset();
-	usedUserMemory = u32(0_MB);
-	usedSystemMemory = u32(0_MB);
+	memoryInfo.push_back(MemoryInfo(0, MAX_USER_PAGES, 0, KernelMemoryTypes::Free));
+
+	// TODO: remove this, only needed to make the subsequent allocations work for now
+	fcramManager.reset(FCRAM_SIZE, FCRAM_APPLICATION_SIZE, FCRAM_SYSTEM_SIZE, FCRAM_BASE_SIZE);
+
+	if (useFastmem) {
+		// Unmap any mappings when resetting
+		arena->Unmap(0, 4_GB, false);
+	}
 
 	for (u32 i = 0; i < totalPageCount; i++) {
 		readTable[i] = 0;
 		writeTable[i] = 0;
+		paddrTable[i] = 0;
 	}
 
-	// Map (32 * 4) KB of FCRAM before the stack for the TLS of each thread
-	std::optional<u32> tlsBaseOpt = findPaddr(32 * 4_KB);
-	if (!tlsBaseOpt.has_value()) {  // Should be unreachable but still good to have
-		Helpers::panic("Failed to allocate memory for thread-local storage");
-	}
-
-	u32 basePaddrForTLS = tlsBaseOpt.value();
-	for (u32 i = 0; i < appResourceLimits.maxThreads; i++) {
-		u32 vaddr = VirtualAddrs::TLSBase + i * VirtualAddrs::TLSSize;
-		allocateMemory(vaddr, basePaddrForTLS, VirtualAddrs::TLSSize, true);
-		basePaddrForTLS += VirtualAddrs::TLSSize;
-	}
+	// Allocate 512 bytes of TLS for each thread. Since the smallest allocatable unit is 4 KB, that means allocating one page for every 8 threads
+	// Note that TLS is always allocated in the Base region
+	s32 tlsPages = (appResourceLimits.maxThreads + 7) >> 3;
+	allocMemory(VirtualAddrs::TLSBase, tlsPages, FcramRegion::Base, true, true, false, MemoryState::Locked);
 
 	// Initialize shared memory blocks and reserve memory for them
 	for (auto& e : sharedMemBlocks) {
@@ -56,19 +62,23 @@ void Memory::reset() {
 		}
 
 		e.mapped = false;
-		e.paddr = allocateSysMemory(e.size);
+		FcramBlockList memBlock;
+		fcramManager.alloc(memBlock, e.size >> 12, FcramRegion::Sys, false);
+		e.paddr = memBlock.begin()->paddr;
 	}
 
 	// Map DSP RAM as R/W at [0x1FF00000, 0x1FF7FFFF]
-	constexpr u32 dspRamPages = DSP_RAM_SIZE / pageSize;               // Number of DSP RAM pages
-	constexpr u32 initialPage = VirtualAddrs::DSPMemStart / pageSize;  // First page of DSP RAM in the virtual address space
+	constexpr u32 dspRamPages = DSP_RAM_SIZE / pageSize;  // Number of DSP RAM pages
 
-	for (u32 i = 0; i < dspRamPages; i++) {
-		auto pointer = uintptr_t(&dspRam[i * pageSize]);
+	u32 vaddr = VirtualAddrs::DSPMemStart;
+	u32 paddr = PhysicalAddrs::DSP_RAM;
 
-		readTable[i + initialPage] = pointer;
-		writeTable[i + initialPage] = pointer;
-	}
+	Operation op{.newState = MemoryState::Static, .r = true, .w = true, .changeState = true, .changePerms = true};
+	changeMemoryState(vaddr, dspRamPages, op);
+	mapPhysicalMemory(vaddr, paddr, dspRamPages, true, true, false);
+
+	// Allocate RW mapping for DSP RAM
+	// addFastmemView(VirtualAddrs::DSPMemStart, FASTMEM_DSP_RAM_OFFSET, DSP_RAM_SIZE, true, false);
 
 	// Later adjusted based on ROM header when possible
 	region = Regions::USA;
@@ -76,14 +86,9 @@ void Memory::reset() {
 
 bool Memory::allocateMainThreadStack(u32 size) {
 	// Map stack pages as R/W
-	std::optional<u32> basePaddr = findPaddr(size);
-	if (!basePaddr.has_value()) {  // Should also be unreachable but still good to have
-		return false;
-	}
-
+	// TODO: get the region from the exheader
 	const u32 stackBottom = VirtualAddrs::StackTop - size;
-	std::optional<u32> result = allocateMemory(stackBottom, basePaddr.value(), size, true);  // Should never be nullopt
-	return result.has_value();
+	return allocMemory(stackBottom, size >> 12, FcramRegion::App, true, true, false, MemoryState::Locked);
 }
 
 u8 Memory::read8(u32 vaddr) {
@@ -120,7 +125,7 @@ u8 Memory::read8(u32 vaddr) {
 			case ConfigMem::FirmRevision: return firm.revision;
 			case ConfigMem::FirmVersionMinor: return firm.minor;
 			case ConfigMem::FirmVersionMajor: return firm.major;
-			case ConfigMem::WifiLevel: return 0; // No wifi :(
+			case ConfigMem::WifiLevel: return 0;  // No wifi :(
 
 			case ConfigMem::WifiMac:
 			case ConfigMem::WifiMac + 1:
@@ -163,8 +168,8 @@ u32 Memory::read32(u32 vaddr) {
 			case ConfigMem::Datetime0 + 4:
 				return u32(timeSince3DSEpoch() >> 32);  // top 32 bits
 			// Ticks since time was last updated. For now we return the current tick count
-			case ConfigMem::Datetime0 + 8: return u32(cpuTicks);
-			case ConfigMem::Datetime0 + 12: return u32(cpuTicks >> 32);
+			case ConfigMem::Datetime0 + 8: return u32(*cpuTicks);
+			case ConfigMem::Datetime0 + 12: return u32(*cpuTicks >> 32);
 			case ConfigMem::Datetime0 + 16: return 0xFFB0FF0;  // Unknown, set by PTM
 			case ConfigMem::Datetime0 + 20:
 			case ConfigMem::Datetime0 + 24:
@@ -172,11 +177,10 @@ u32 Memory::read32(u32 vaddr) {
 
 			case ConfigMem::AppMemAlloc: return appResourceLimits.maxCommit;
 			case ConfigMem::SyscoreVer: return 2;
-			case 0x1FF81000: return 0;                   // TODO: Figure out what this config mem address does
+			case 0x1FF81000:
+				return 0;  // TODO: Figure out what this config mem address does
 			// Wifi MAC: First 4 bytes of MAC Address
-			case ConfigMem::WifiMac:
-				return (u32(MACAddress[3]) << 24) | (u32(MACAddress[2]) << 16) | (u32(MACAddress[1]) << 8) |
-					   MACAddress[0];
+			case ConfigMem::WifiMac: return (u32(MACAddress[3]) << 24) | (u32(MACAddress[2]) << 16) | (u32(MACAddress[1]) << 8) | MACAddress[0];
 
 			// 3D slider. Float in range 0.0 = off, 1.0 = max.
 			case ConfigMem::SliderState3D: return Helpers::bit_cast<u32, float>(0.0f);
@@ -186,7 +190,7 @@ u32 Memory::read32(u32 vaddr) {
 			default:
 				if (vaddr >= VirtualAddrs::VramStart && vaddr < VirtualAddrs::VramStart + VirtualAddrs::VramSize) {
 					static int shutUpCounter = 0;
-					if (shutUpCounter < 5) { // Stop spamming about VRAM reads after the first 5
+					if (shutUpCounter < 5) {  // Stop spamming about VRAM reads after the first 5
 						shutUpCounter++;
 						Helpers::warn("VRAM read!\n");
 					}
@@ -296,149 +300,254 @@ std::string Memory::readString(u32 address, u32 maxSize) {
 // thanks to the New 3DS having more FCRAM
 u32 Memory::getLinearHeapVaddr() { return (kernelVersion < 0x22C) ? VirtualAddrs::LinearHeapStartOld : VirtualAddrs::LinearHeapStartNew; }
 
-std::optional<u32> Memory::allocateMemory(u32 vaddr, u32 paddr, u32 size, bool linear, bool r, bool w, bool x, bool adjustAddrs, bool isMap) {
-	// Kernel-allocated memory & size must always be aligned to a page boundary
-	// Additionally assert we don't OoM and that we don't try to allocate physical FCRAM past what's available to userland
-	// If we're mapping there's no fear of OoM, because we're not really allocating memory, just binding vaddrs to specific paddrs
-	assert(isAligned(vaddr) && isAligned(paddr) && isAligned(size));
-	assert(size <= FCRAM_APPLICATION_SIZE || isMap);
-	assert(usedUserMemory + size <= FCRAM_APPLICATION_SIZE || isMap);
-	assert(paddr + size <= FCRAM_APPLICATION_SIZE || isMap);
+void Memory::changeMemoryState(u32 vaddr, s32 pages, const Operation& op) {
+	assert(!(vaddr & 0xFFF));
 
-	// Amount of available user FCRAM pages and FCRAM pages to allocate respectively
-	const u32 availablePageCount = (FCRAM_APPLICATION_SIZE - usedUserMemory) / pageSize;
-	const u32 neededPageCount = size / pageSize;
+	if (!op.changePerms && !op.changeState) Helpers::panic("Invalid op passed to changeMemoryState!");
 
-	assert(availablePageCount >= neededPageCount || isMap);
+	bool blockFound = false;
 
-	// If the paddr is 0, that means we need to select our own
-	// TODO: Fix. This method always tries to allocate blocks linearly.
-	// However, if the allocation is non-linear, the panic will trigger when it shouldn't.
-	// Non-linear allocation needs special handling
-	if (paddr == 0 && adjustAddrs) {
-		std::optional<u32> newPaddr = findPaddr(size);
-		if (!newPaddr.has_value()) {
-			Helpers::panic("Failed to find paddr");
+	for (auto it = memoryInfo.begin(); it != memoryInfo.end(); it++) {
+		// Find the block that the memory region is located in
+		u32 blockStart = it->baseAddr;
+		u32 blockEnd = it->end();
+
+		u32 reqStart = vaddr;
+		u32 reqEnd = vaddr + (pages << 12);
+
+		if (!(reqStart >= blockStart && reqEnd <= blockEnd)) continue;
+
+		// Now that the block has been found, fill it with the necessary info
+		auto oldState = it->state;
+		u32 oldPerms = it->perms;
+		it->baseAddr = reqStart;
+		it->pages = pages;
+		if (op.changePerms) it->perms = (op.r ? PERMISSION_R : 0) | (op.w ? PERMISSION_W : 0) | (op.x ? PERMISSION_X : 0);
+		if (op.changeState) it->state = op.newState;
+
+		// If the requested memory region is smaller than the block found, the block must be split
+		if (blockStart < reqStart) {
+			MemoryInfo startBlock(blockStart, (reqStart - blockStart) >> 12, oldPerms, oldState);
+			memoryInfo.insert(it, startBlock);
 		}
 
-		paddr = newPaddr.value();
-		assert(paddr + size <= FCRAM_APPLICATION_SIZE || isMap);
-	}
-
-	// If the vaddr is 0 that means we need to select our own
-	// Depending on whether our mapping should be linear or not we allocate from one of the 2 typical heap spaces
-	// We don't plan on implementing freeing any time soon, so we can pick added userUserMemory to the vaddr base to
-	// Get the full vaddr.
-	// TODO: Fix this
-	if (vaddr == 0 && adjustAddrs) {
-		// Linear memory needs to be allocated in a way where you can easily get the paddr by subtracting the linear heap base
-		// In order to be able to easily send data to hardware like the GPU
-		if (linear) {
-			vaddr = getLinearHeapVaddr() + paddr;
-		} else {
-			vaddr = usedUserMemory + VirtualAddrs::NormalHeapStart;
-		}
-	}
-
-	if (!isMap) {
-		usedUserMemory += size;
-	}
-
-	// Do linear mapping
-	u32 virtualPage = vaddr >> pageShift;
-	u32 physPage = paddr >> pageShift;  // TODO: Special handle when non-linear mapping is necessary
-	for (u32 i = 0; i < neededPageCount; i++) {
-		if (r) {
-			readTable[virtualPage] = uintptr_t(&fcram[physPage * pageSize]);
-		}
-		if (w) {
-			writeTable[virtualPage] = uintptr_t(&fcram[physPage * pageSize]);
+		if (reqEnd < blockEnd) {
+			auto itAfter = std::next(it);
+			MemoryInfo endBlock(reqEnd, (blockEnd - reqEnd) >> 12, oldPerms, oldState);
+			memoryInfo.insert(itAfter, endBlock);
 		}
 
-		// Mark FCRAM page as allocated and go on
-		usedFCRAMPages[physPage] = true;
-		virtualPage++;
-		physPage++;
+		blockFound = true;
+		break;
 	}
 
-	// Back up the info for this allocation in our memoryInfo vector
-	u32 perms = (r ? PERMISSION_R : 0) | (w ? PERMISSION_W : 0) | (x ? PERMISSION_X : 0);
-	memoryInfo.push_back(std::move(MemoryInfo(vaddr, size, perms, KernelMemoryTypes::Reserved)));
+	if (!blockFound) Helpers::panic("Unable to find block in changeMemoryState!");
 
-	return vaddr;
+	// Merge all blocks with the same state and permissions
+	for (auto it = memoryInfo.begin(); it != memoryInfo.end();) {
+		auto next = std::next(it);
+		if (next == memoryInfo.end()) break;
+
+		if (it->state != next->state || it->perms != next->perms) {
+			it++;
+			continue;
+		}
+
+		next->baseAddr = it->baseAddr;
+		next->pages += it->pages;
+		it = memoryInfo.erase(it);
+	}
 }
 
-// Find a paddr which we can use for allocating "size" bytes
-std::optional<u32> Memory::findPaddr(u32 size) {
-	assert(isAligned(size));
-	const u32 neededPages = size / pageSize;
+void Memory::queryPhysicalBlocks(FcramBlockList& outList, u32 vaddr, s32 pages) {
+	s32 srcPages = pages;
+	for (auto& alloc : memoryInfo) {
+		u32 blockStart = alloc.baseAddr;
+		u32 blockEnd = alloc.end();
 
-	// The FCRAM page we're testing to see if it's appropriate to use
-	u32 candidatePage = 0;
-	// The number of linear available pages we could find starting from this candidate page.
-	// If this ends up >= than neededPages then the paddr is good (ie we can use the candidate page as a base address)
-	u32 counter = 0;
+		if (!(vaddr >= blockStart && vaddr < blockEnd)) continue;
 
-	for (u32 i = 0; i < FCRAM_APPLICATION_PAGE_COUNT; i++) {
-		if (usedFCRAMPages[i]) {  // Page is occupied already, go to new candidate
-			candidatePage = i + 1;
-			counter = 0;
-		} else {  // The paddr we're testing has 1 more free page
-			counter++;
-			// Check if there's enough free memory to use this page
-			// We use == instead of >= because some software does 0-byte allocations
-			if (counter >= neededPages) {
-				return candidatePage * pageSize;
+		s32 blockPaddr = paddrTable[vaddr >> 12];
+		s32 blockPages = alloc.pages - ((vaddr - blockStart) >> 12);
+		blockPages = std::min(srcPages, blockPages);
+		FcramBlock physicalBlock(blockPaddr, blockPages);
+		outList.push_back(physicalBlock);
+
+		vaddr += blockPages << 12;
+		srcPages -= blockPages;
+		if (srcPages == 0) break;
+	}
+
+	if (srcPages != 0) Helpers::panic("Unable to find virtual pages to map!");
+}
+
+void Memory::mapPhysicalMemory(u32 vaddr, u32 paddr, s32 pages, bool r, bool w, bool x) {
+	assert(!(vaddr & 0xFFF));
+	assert(!(paddr & 0xFFF));
+
+	// TODO: make this a separate function
+	u8* hostPtr = nullptr;
+	if (paddr < FCRAM_SIZE) {
+		hostPtr = fcram + paddr;  // FIXME: FCRAM doesn't actually start from physical address 0, but from 0x20000000
+
+		if (useFastmem) {
+			addFastmemView(vaddr, FASTMEM_FCRAM_OFFSET + paddr, usize(pages) * pageSize, w);
+		}
+	} else if (paddr >= VirtualAddrs::DSPMemStart && paddr < VirtualAddrs::DSPMemStart + DSP_RAM_SIZE) {
+		hostPtr = dspRam + (paddr - VirtualAddrs::DSPMemStart);
+	}
+
+	for (int i = 0; i < pages; i++) {
+		u32 index = (vaddr >> 12) + i;
+		paddrTable[index] = paddr + (i << 12);
+		if (r)
+			readTable[index] = (uintptr_t)(hostPtr + (i << 12));
+		else
+			readTable[index] = 0;
+
+		if (w)
+			writeTable[index] = (uintptr_t)(hostPtr + (i << 12));
+		else
+			writeTable[index] = 0;
+	}
+}
+
+void Memory::unmapPhysicalMemory(u32 vaddr, u32 paddr, s32 pages) {
+	for (int i = 0; i < pages; i++) {
+		u32 index = (vaddr >> 12) + i;
+		paddrTable[index] = 0;
+		readTable[index] = 0;
+		writeTable[index] = 0;
+	}
+
+	if (useFastmem) {
+		arena->Unmap(vaddr, pages * pageSize, false);
+	}
+}
+
+bool Memory::allocMemory(u32 vaddr, s32 pages, FcramRegion region, bool r, bool w, bool x, MemoryState state) {
+	auto res = testMemoryState(vaddr, pages, MemoryState::Free);
+	if (res.isFailure()) return false;
+
+	FcramBlockList memList;
+	fcramManager.alloc(memList, pages, region, false);
+
+	for (auto it = memList.begin(); it != memList.end(); it++) {
+		Operation op{.newState = state, .r = r, .w = w, .x = x, .changeState = true, .changePerms = true};
+		changeMemoryState(vaddr, it->pages, op);
+		mapPhysicalMemory(vaddr, it->paddr, it->pages, r, w, x);
+		vaddr += it->pages << 12;
+	}
+
+	return true;
+}
+
+bool Memory::allocMemoryLinear(u32& outVaddr, u32 inVaddr, s32 pages, FcramRegion region, bool r, bool w, bool x) {
+	if (inVaddr) Helpers::panic("inVaddr specified for linear allocation!");
+
+	FcramBlockList memList;
+	fcramManager.alloc(memList, pages, region, true);
+
+	u32 paddr = memList.begin()->paddr;
+	u32 vaddr = getLinearHeapVaddr() + paddr;
+	auto res = testMemoryState(vaddr, pages, MemoryState::Free);
+	if (res.isFailure()) Helpers::panic("Unable to map linear allocation (vaddr:%08X pages:%08X)", vaddr, pages);
+
+	Operation op{.newState = MemoryState::Continuous, .r = r, .w = w, .x = x, .changeState = true, .changePerms = true};
+	changeMemoryState(vaddr, pages, op);
+	mapPhysicalMemory(vaddr, paddr, pages, r, w, x);
+
+	outVaddr = vaddr;
+	return true;
+}
+
+bool Memory::mapVirtualMemory(
+	u32 dstVaddr, u32 srcVaddr, s32 pages, bool r, bool w, bool x, MemoryState oldDstState, MemoryState oldSrcState, MemoryState newDstState,
+	MemoryState newSrcState, bool unmapPages
+) {
+	// Check that the regions have the specified state
+	// TODO: check src perms
+	auto res = testMemoryState(srcVaddr, pages, oldSrcState);
+	if (res.isFailure()) return false;
+
+	res = testMemoryState(dstVaddr, pages, oldDstState);
+	if (res.isFailure()) return false;
+
+	// Change the virtual memory state for both regions
+	Operation srcOp{.newState = newSrcState, .changeState = true};
+	changeMemoryState(srcVaddr, pages, srcOp);
+
+	Operation dstOp{.newState = newDstState, .r = r, .w = w, .x = x, .changeState = true, .changePerms = true};
+	changeMemoryState(dstVaddr, pages, dstOp);
+
+	// Get a list of physical blocks in the source region
+	FcramBlockList physicalList;
+	queryPhysicalBlocks(physicalList, srcVaddr, pages);
+
+	// Map or unmap each physical block
+	for (auto& block : physicalList) {
+		if (newDstState == MemoryState::Free) {
+			// TODO: Games with CROs will unmap the CRO yet still continue accessing the address it was mapped to?
+			if (unmapPages) {
+				unmapPhysicalMemory(dstVaddr, block.paddr, block.pages);
 			}
+		} else {
+			mapPhysicalMemory(dstVaddr, block.paddr, block.pages, r, w, x);
 		}
+
+		dstVaddr += block.pages << 12;
 	}
 
-	// Couldn't find any page :(
-	return std::nullopt;
+	return true;
 }
 
-u32 Memory::allocateSysMemory(u32 size) {
-	// Should never be triggered, only here as a sanity check
-	if (!isAligned(size)) {
-		Helpers::panic("Memory::allocateSysMemory: Size is not page aligned (val = %08X)", size);
+void Memory::changePermissions(u32 vaddr, s32 pages, bool r, bool w, bool x) {
+	Operation op{.r = r, .w = w, .x = x, .changePerms = true};
+	changeMemoryState(vaddr, pages, op);
+
+	// Now that permissions have been changed, update the corresponding host tables
+	FcramBlockList physicalList;
+	queryPhysicalBlocks(physicalList, vaddr, pages);
+
+	for (auto& block : physicalList) {
+		mapPhysicalMemory(vaddr, block.paddr, block.pages, r, w, x);
+		vaddr += block.pages;
 	}
-
-	// We use a pretty dumb allocator for OS memory since this is not really accessible to the app and is only used internally
-	// It works by just allocating memory linearly, starting from index 0 of OS memory and going up
-	// This should also be unreachable in practice and exists as a sanity check
-	if (size > remainingSysFCRAM()) {
-		Helpers::panic("Memory::allocateSysMemory: Overflowed OS FCRAM");
-	}
-
-	const u32 pageCount = size / pageSize;                      // Number of pages that will be used up
-	const u32 startIndex = sysFCRAMIndex() + usedSystemMemory;  // Starting FCRAM index
-	const u32 startingPage = startIndex / pageSize;
-
-	for (u32 i = 0; i < pageCount; i++) {
-		if (usedFCRAMPages[startingPage + i])  // Also a theoretically unreachable panic for safety
-			Helpers::panic("Memory::reserveMemory: Trying to reserve already reserved memory");
-		usedFCRAMPages[startingPage + i] = true;
-	}
-
-	usedSystemMemory += size;
-	return startIndex;
 }
 
-// The way I understand how the kernel's QueryMemory is supposed to work is that you give it a vaddr
-// And the kernel looks up the memory allocations it's performed, finds which one it belongs in and returns its info?
-// TODO: Verify this
-MemoryInfo Memory::queryMemory(u32 vaddr) {
+Result::HorizonResult Memory::queryMemory(MemoryInfo& out, u32 vaddr) {
 	// Check each allocation
 	for (auto& alloc : memoryInfo) {
 		// Check if the memory address belongs in this allocation and return the info if so
 		if (vaddr >= alloc.baseAddr && vaddr < alloc.end()) {
-			return alloc;
+			out = alloc;
+			return Result::Success;
 		}
 	}
 
-	// Otherwise, if this vaddr was never allocated
-	// TODO: I think this is meant to return how much memory starting here is free as the size?
-	return MemoryInfo(vaddr, pageSize, 0, KernelMemoryTypes::Free);
+	// Official kernel just returns an error here
+	Helpers::warn("Failed to find block in QueryMemory!");
+	return Result::FailurePlaceholder;
+}
+
+Result::HorizonResult Memory::testMemoryState(u32 vaddr, s32 pages, MemoryState desiredState) {
+	for (auto& alloc : memoryInfo) {
+		// Don't bother checking if we're to the left of the requested region
+		if (vaddr >= alloc.end()) continue;
+		if (alloc.state != desiredState) return Result::FailurePlaceholder;  // TODO: error for state mismatch
+
+		// If the end of this block comes after the end of the requested range with no errors, it's a success
+		if (alloc.end() >= vaddr + (pages << 12)) return Result::Success;
+	}
+
+	// TODO: error for when address is outside of userland
+	return Result::FailurePlaceholder;
+}
+
+void Memory::copyToVaddr(u32 dstVaddr, const u8* srcHost, s32 size) {
+	// TODO: check for noncontiguous allocations
+	u8* dstHost = (u8*)readTable[dstVaddr >> 12] + (dstVaddr & 0xFFF);
+	memcpy(dstHost, srcHost, size);
 }
 
 u8* Memory::mapSharedMemory(Handle handle, u32 vaddr, u32 myPerms, u32 otherPerms) {
@@ -459,13 +568,11 @@ u8* Memory::mapSharedMemory(Handle handle, u32 vaddr, u32 myPerms, u32 otherPerm
 			bool w = myPerms & 0b010;
 			bool x = myPerms & 0b100;
 
-			const auto result = allocateMemory(vaddr, paddr, size, true, r, w, x, false, true);
-			e.mapped = true;
-			if (!result.has_value()) {
-				Helpers::panic("Memory::mapSharedMemory: Failed to map shared memory block");
-				return nullptr;
-			}
+			Operation op{.newState = MemoryState::Shared, .r = r, .w = x, .x = x, .changeState = true, .changePerms = true};
+			changeMemoryState(vaddr, size >> 12, op);
+			mapPhysicalMemory(vaddr, paddr, size >> 12, r, w, x);
 
+			e.mapped = true;
 			return &fcram[paddr];
 		}
 	}
@@ -475,24 +582,6 @@ u8* Memory::mapSharedMemory(Handle handle, u32 vaddr, u32 myPerms, u32 otherPerm
 	return nullptr;
 }
 
-void Memory::mirrorMapping(u32 destAddress, u32 sourceAddress, u32 size) {
-	// Should theoretically be unreachable, only here for safety purposes
-	assert(isAligned(destAddress) && isAligned(sourceAddress) && isAligned(size));
-
-	const u32 pageCount = size / pageSize;  // How many pages we need to mirror
-	for (u32 i = 0; i < pageCount; i++) {
-		// Redo the shift here to "properly" handle wrapping around the address space instead of reading OoB
-		const u32 sourcePage = sourceAddress / pageSize;
-		const u32 destPage = destAddress / pageSize;
-
-		readTable[destPage] = readTable[sourcePage];
-		writeTable[destPage] = writeTable[sourcePage];
-
-		sourceAddress += pageSize;
-		destAddress += pageSize;
-	}
-}
-
 // Get the number of ms since Jan 1 1900
 u64 Memory::timeSince3DSEpoch() {
 	using namespace std::chrono;
diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp
index 57ccca18..659a1e1d 100644
--- a/src/core/renderer_gl/renderer_gl.cpp
+++ b/src/core/renderer_gl/renderer_gl.cpp
@@ -2,6 +2,7 @@
 
 #include <stb_image_write.h>
 
+#include <algorithm>
 #include <bit>
 #include <cmrc/cmrc.hpp>
 
@@ -11,10 +12,10 @@
 #include "PICA/pica_hash.hpp"
 #include "PICA/pica_simd.hpp"
 #include "PICA/regs.hpp"
-#include "screen_layout.hpp"
 #include "PICA/shader_decompiler.hpp"
 #include "config.hpp"
 #include "math_util.hpp"
+#include "screen_layout.hpp"
 
 CMRC_DECLARE(RendererGL);
 
@@ -71,19 +72,13 @@ void RendererGL::initGraphicsContextInternal() {
 	// Create stream buffers for vertex, index and uniform buffers
 	static constexpr usize hwIndexBufferSize = 2_MB;
 	static constexpr usize hwVertexBufferSize = 16_MB;
+	static constexpr usize hwShaderUniformUBOSize = 4_MB;
+	static constexpr usize shadergenFragmentUBOSize = 4_MB;
 
 	hwIndexBuffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, hwIndexBufferSize);
 	hwVertexBuffer = StreamBuffer::Create(GL_ARRAY_BUFFER, hwVertexBufferSize);
-
-	// Allocate memory for the shadergen fragment uniform UBO
-	glGenBuffers(1, &shadergenFragmentUBO);
-	gl.bindUBO(shadergenFragmentUBO);
-	glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW);
-
-	// Allocate memory for the accelerated vertex shader uniform UBO
-	glGenBuffers(1, &hwShaderUniformUBO);
-	gl.bindUBO(hwShaderUniformUBO);
-	glBufferData(GL_UNIFORM_BUFFER, PICAShader::totalUniformSize(), nullptr, GL_DYNAMIC_DRAW);
+	hwShaderUniformUBO = StreamBuffer::Create(GL_UNIFORM_BUFFER, hwShaderUniformUBOSize);
+	shadergenFragmentUBO = StreamBuffer::Create(GL_UNIFORM_BUFFER, shadergenFragmentUBOSize);
 
 	vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize * 2, GL_STREAM_DRAW);
 	vbo.bind();
@@ -185,6 +180,10 @@ void RendererGL::initGraphicsContextInternal() {
 	driverInfo.supportsExtFbFetch = (GLAD_GL_EXT_shader_framebuffer_fetch != 0);
 	driverInfo.supportsArmFbFetch = (GLAD_GL_ARM_shader_framebuffer_fetch != 0);
 
+	// UBOs have an alignment requirement we have to respect
+	glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast<GLint*>(&driverInfo.uboAlignment));
+	driverInfo.uboAlignment = std::max<GLuint>(driverInfo.uboAlignment, 16);
+
 	// Initialize the default vertex shader used with shadergen
 	std::string defaultShadergenVSSource = fragShaderGen.getDefaultVertexShader();
 	defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex);
@@ -192,7 +191,7 @@ void RendererGL::initGraphicsContextInternal() {
 
 // The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend)
 // So we just call initGraphicsContextInternal for both
-void RendererGL::initGraphicsContext([[maybe_unused]] SDL_Window* window) { initGraphicsContextInternal(); }
+void RendererGL::initGraphicsContext([[maybe_unused]] void* context) { initGraphicsContextInternal(); }
 
 // Set up the OpenGL blending context to match the emulated PICA
 void RendererGL::setupBlending() {
@@ -936,10 +935,6 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
 			glUniformBlockBinding(program.handle(), vertexUBOIndex, vsUBOBlockBinding);
 		}
 	}
-	glBindBufferBase(GL_UNIFORM_BUFFER, fsUBOBlockBinding, shadergenFragmentUBO);
-	if (usingAcceleratedShader) {
-		glBindBufferBase(GL_UNIFORM_BUFFER, vsUBOBlockBinding, hwShaderUniformUBO);
-	}
 
 	// Upload uniform data to our shader's UBO
 	PICA::FragmentUniforms uniforms;
@@ -1023,8 +1018,22 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
 		}
 	}
 
-	gl.bindUBO(shadergenFragmentUBO);
-	glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms);
+	// Upload fragment uniforms to UBO
+	shadergenFragmentUBO->Bind();
+	auto uboRes = shadergenFragmentUBO->Map(driverInfo.uboAlignment, sizeof(PICA::FragmentUniforms));
+	std::memcpy(uboRes.pointer, &uniforms, sizeof(PICA::FragmentUniforms));
+	shadergenFragmentUBO->Unmap(sizeof(PICA::FragmentUniforms));
+
+	// Bind our UBOs
+	glBindBufferRange(
+		GL_UNIFORM_BUFFER, fsUBOBlockBinding, shadergenFragmentUBO->GetGLBufferId(), uboRes.buffer_offset, sizeof(PICA::FragmentUniforms)
+	);
+
+	if (usingAcceleratedShader) {
+		glBindBufferRange(
+			GL_UNIFORM_BUFFER, vsUBOBlockBinding, hwShaderUniformUBO->GetGLBufferId(), hwShaderUniformUBOOffset, PICAShader::totalUniformSize()
+		);
+	}
 
 	return program;
 }
@@ -1074,11 +1083,16 @@ bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration*
 			usingAcceleratedShader = false;
 		} else {
 			generatedVertexShader = &(*shader);
-			gl.bindUBO(hwShaderUniformUBO);
+			hwShaderUniformUBO->Bind();
 
+			// Upload shader uniforms to our UBO
 			if (shaderUnit.vs.uniformsDirty) {
 				shaderUnit.vs.uniformsDirty = false;
-				glBufferSubData(GL_UNIFORM_BUFFER, 0, PICAShader::totalUniformSize(), shaderUnit.vs.getUniformPointer());
+				auto uboRes = hwShaderUniformUBO->Map(driverInfo.uboAlignment, PICAShader::totalUniformSize());
+				std::memcpy(uboRes.pointer, shaderUnit.vs.getUniformPointer(), PICAShader::totalUniformSize());
+				hwShaderUniformUBO->Unmap(PICAShader::totalUniformSize());
+
+				hwShaderUniformUBOOffset = uboRes.buffer_offset;
 			}
 
 			performIndexedRender = accel->indexed;
diff --git a/src/core/renderer_mtl/pica_to_mtl.cpp b/src/core/renderer_mtl/pica_to_mtl.cpp
index 973ad1bf..21b520f2 100644
--- a/src/core/renderer_mtl/pica_to_mtl.cpp
+++ b/src/core/renderer_mtl/pica_to_mtl.cpp
@@ -16,27 +16,27 @@ namespace PICA {
 		 decodeTexelAI8ToRG8,
 		 true,
 		 {
-			 .red = MTL::TextureSwizzleRed,
-			 .green = MTL::TextureSwizzleRed,
-			 .blue = MTL::TextureSwizzleRed,
-			 .alpha = MTL::TextureSwizzleGreen,
+			 MTL::TextureSwizzleRed,
+			 MTL::TextureSwizzleRed,
+			 MTL::TextureSwizzleRed,
+			 MTL::TextureSwizzleGreen,
 		 }},                                                 // IA8
 		{MTL::PixelFormatRG8Unorm, 2, decodeTexelGR8ToRG8},  // RG8
 		{MTL::PixelFormatR8Unorm,
 		 1,
 		 decodeTexelI8ToR8,
 		 true,
-		 {.red = MTL::TextureSwizzleRed, .green = MTL::TextureSwizzleRed, .blue = MTL::TextureSwizzleRed, .alpha = MTL::TextureSwizzleOne}},  // I8
-		{MTL::PixelFormatA8Unorm, 1, decodeTexelA8ToA8},                                                                                      // A8
-		{MTL::PixelFormatABGR4Unorm, 2, decodeTexelAI4ToABGR4},                                                                               // IA4
+		 {MTL::TextureSwizzleRed, MTL::TextureSwizzleRed, MTL::TextureSwizzleRed, MTL::TextureSwizzleOne}},  // I8
+		{MTL::PixelFormatA8Unorm, 1, decodeTexelA8ToA8},                                                     // A8
+		{MTL::PixelFormatABGR4Unorm, 2, decodeTexelAI4ToABGR4},                                              // IA4
 		{MTL::PixelFormatR8Unorm,
 		 1,
 		 decodeTexelI4ToR8,
 		 true,
-		 {.red = MTL::TextureSwizzleRed, .green = MTL::TextureSwizzleRed, .blue = MTL::TextureSwizzleRed, .alpha = MTL::TextureSwizzleOne}},  // I4
-		{MTL::PixelFormatA8Unorm, 1, decodeTexelA4ToA8},                                                                                      // A4
-		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1ToRGBA8},                                                                              // ETC1
-		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1A4ToRGBA8},  // ETC1A4
+		 {MTL::TextureSwizzleRed, MTL::TextureSwizzleRed, MTL::TextureSwizzleRed, MTL::TextureSwizzleOne}},  // I4
+		{MTL::PixelFormatA8Unorm, 1, decodeTexelA4ToA8},                                                     // A4
+		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1ToRGBA8},                                             // ETC1
+		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1A4ToRGBA8},                                           // ETC1A4
 	};
 
 	void checkForMTLPixelFormatSupport(MTL::Device* device) {
@@ -57,10 +57,10 @@ namespace PICA {
 				decodeTexelAI4ToRG8,
 				true,
 				{
-					.red = MTL::TextureSwizzleRed,
-					.green = MTL::TextureSwizzleRed,
-					.blue = MTL::TextureSwizzleRed,
-					.alpha = MTL::TextureSwizzleGreen,
+					MTL::TextureSwizzleRed,
+					MTL::TextureSwizzleRed,
+					MTL::TextureSwizzleRed,
+					MTL::TextureSwizzleGreen,
 				}
 			};
 		}
diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index 71cdd616..adee8f3c 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -10,8 +10,8 @@
 
 #include "PICA/gpu.hpp"
 #include "PICA/pica_hash.hpp"
-#include "screen_layout.hpp"
 #include "SDL_metal.h"
+#include "screen_layout.hpp"
 
 using namespace PICA;
 
@@ -57,9 +57,7 @@ void RendererMTL::reset() {
 	colorRenderTargetCache.reset();
 }
 
-void RendererMTL::setMTKLayer(void* layer) {
-	metalLayer = (CA::MetalLayer*)layer;
-}
+void RendererMTL::setMTKLayer(void* layer) { metalLayer = (CA::MetalLayer*)layer; }
 
 void RendererMTL::display() {
 	CA::MetalDrawable* drawable = metalLayer->nextDrawable();
@@ -151,13 +149,13 @@ void RendererMTL::display() {
 	drawable->release();
 }
 
-void RendererMTL::initGraphicsContext(SDL_Window* window) {
-	// On iOS, the SwiftUI side handles the MetalLayer
-#ifdef PANDA3DS_IOS
+void RendererMTL::initGraphicsContext(void* window) {
+	// On Qt and iOS, the frontend handles the Metal layer
+#if defined(PANDA3DS_FRONTEND_QT) || defined(PANDA3DS_IOS)
 	device = MTL::CreateSystemDefaultDevice();
 #else
 	// TODO: what should be the type of the view?
-	void* view = SDL_Metal_CreateView(window);
+	void* view = SDL_Metal_CreateView((SDL_Window*)window);
 	metalLayer = (CA::MetalLayer*)SDL_Metal_GetLayer(view);
 	device = MTL::CreateSystemDefaultDevice();
 	metalLayer->setDevice(device);
diff --git a/src/core/renderer_null/renderer_null.cpp b/src/core/renderer_null/renderer_null.cpp
index 4be9d089..4db2696f 100644
--- a/src/core/renderer_null/renderer_null.cpp
+++ b/src/core/renderer_null/renderer_null.cpp
@@ -6,7 +6,7 @@ RendererNull::~RendererNull() {}
 
 void RendererNull::reset() {}
 void RendererNull::display() {}
-void RendererNull::initGraphicsContext(SDL_Window* window) {}
+void RendererNull::initGraphicsContext(void* context) {}
 void RendererNull::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {}
 void RendererNull::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {}
 void RendererNull::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {}
diff --git a/src/core/renderer_sw/renderer_sw.cpp b/src/core/renderer_sw/renderer_sw.cpp
index 86b6032f..a117e373 100644
--- a/src/core/renderer_sw/renderer_sw.cpp
+++ b/src/core/renderer_sw/renderer_sw.cpp
@@ -7,7 +7,7 @@ RendererSw::~RendererSw() {}
 void RendererSw::reset() { printf("RendererSW: Unimplemented reset call\n"); }
 void RendererSw::display() { printf("RendererSW: Unimplemented display call\n"); }
 
-void RendererSw::initGraphicsContext(SDL_Window* window) { printf("RendererSW: Unimplemented initGraphicsContext call\n"); }
+void RendererSw::initGraphicsContext(void* context) { printf("RendererSW: Unimplemented initGraphicsContext call\n"); }
 void RendererSw::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { printf("RendererSW: Unimplemented clearBuffer call\n"); }
 
 void RendererSw::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {
diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp
index 57533bde..0bf9fae5 100644
--- a/src/core/renderer_vk/renderer_vk.cpp
+++ b/src/core/renderer_vk/renderer_vk.cpp
@@ -173,7 +173,8 @@ std::tuple<vk::UniquePipeline, vk::UniquePipelineLayout> createGraphicsPipeline(
 	vk::PipelineDynamicStateCreateInfo dynamicState = {};
 	static vk::DynamicState dynamicStates[] = {// The viewport and scissor of the framebuffer will be dynamic at
 											   // run-time
-											   vk::DynamicState::eViewport, vk::DynamicState::eScissor};
+											   vk::DynamicState::eViewport, vk::DynamicState::eScissor
+	};
 	dynamicState.dynamicStateCount = std::size(dynamicStates);
 	dynamicState.pDynamicStates = dynamicStates;
 
@@ -469,7 +470,8 @@ vk::RenderPass RendererVK::getRenderPass(vk::Format colorFormat, std::optional<v
 		vk::SubpassDependency(
 			0, VK_SUBPASS_EXTERNAL, vk::PipelineStageFlagBits::eAllGraphics, vk::PipelineStageFlagBits::eAllGraphics,
 			vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eColorAttachmentWrite, vk::DependencyFlagBits::eByRegion
-		)};
+		)
+	};
 
 	renderPassInfo.setDependencies(subpassDependencies);
 
@@ -892,8 +894,8 @@ using VulkanDynamicLoader = vk::detail::DynamicLoader;
 using VulkanDynamicLoader = vk::DynamicLoader;
 #endif
 
-void RendererVK::initGraphicsContext(SDL_Window* window) {
-	targetWindow = window;
+void RendererVK::initGraphicsContext(void* windowPointer) {
+	targetWindow = (SDL_Window*)windowPointer;
 	// Resolve all instance function pointers
 	static VulkanDynamicLoader dl;
 	VULKAN_HPP_DEFAULT_DISPATCHER.init(dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr"));
@@ -978,8 +980,8 @@ void RendererVK::initGraphicsContext(SDL_Window* window) {
 	}
 
 	// Create surface
-	if (window) {
-		if (VkSurfaceKHR newSurface; SDL_Vulkan_CreateSurface(window, instance.get(), &newSurface)) {
+	if (targetWindow) {
+		if (VkSurfaceKHR newSurface; SDL_Vulkan_CreateSurface(targetWindow, instance.get(), &newSurface)) {
 			swapchainSurface = newSurface;
 		} else {
 			Helpers::warn("Error creating Vulkan surface");
@@ -1127,7 +1129,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) {
 		vk::Extent2D swapchainExtent;
 		{
 			int windowWidth, windowHeight;
-			SDL_Vulkan_GetDrawableSize(window, &windowWidth, &windowHeight);
+			SDL_Vulkan_GetDrawableSize(targetWindow, &windowWidth, &windowHeight);
 			swapchainExtent.width = windowWidth;
 			swapchainExtent.height = windowHeight;
 		}
@@ -1275,7 +1277,8 @@ void RendererVK::initGraphicsContext(SDL_Window* window) {
 
 	static vk::DescriptorSetLayoutBinding displayShaderLayout[] = {
 		{// Just a singular texture slot
-		 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
+		 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment
+		},
 	};
 
 	if (auto createResult = Vulkan::DescriptorUpdateBatch::create(device.get()); createResult.has_value()) {
@@ -1407,7 +1410,8 @@ void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 co
 
 		static vk::ImageSubresourceRange depthStencilRanges[2] = {
 			vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1),
-			vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1)};
+			vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1)
+		};
 
 		// Clear RenderTarget
 		getCurrentCommandBuffer().clearDepthStencilImage(
diff --git a/src/core/screen_layout.cpp b/src/core/screen_layout.cpp
index 93407986..90dc2dd3 100644
--- a/src/core/screen_layout.cpp
+++ b/src/core/screen_layout.cpp
@@ -147,4 +147,3 @@ const char* ScreenLayout::layoutToString(Layout layout) {
 		default: return "invalid";
 	}
 }
-    
\ No newline at end of file
diff --git a/src/core/services/dsp.cpp b/src/core/services/dsp.cpp
index 3f976f6a..be750dc6 100644
--- a/src/core/services/dsp.cpp
+++ b/src/core/services/dsp.cpp
@@ -4,7 +4,6 @@
 #include <fmt/format.h>
 #include <fmt/ranges.h>
 
-#include <algorithm>
 #include <cstring>
 #include <fstream>
 
diff --git a/src/core/services/frd.cpp b/src/core/services/frd.cpp
index 63e951a6..d8bcd56f 100644
--- a/src/core/services/frd.cpp
+++ b/src/core/services/frd.cpp
@@ -27,6 +27,7 @@ namespace FRDCommands {
 		GetFriendAttributeFlags = 0x00170042,
 		UpdateGameModeDescription = 0x001D0002,
 
+		SaveLocalAccountData = 0x04050000,
 		UpdateMii = 0x040C0800,
 	};
 }
@@ -61,6 +62,7 @@ void FRDService::handleSyncRequest(u32 messagePointer, FRDService::Type type) {
 			if (type == Type::A) {
 				switch (command) {
 					case FRDCommands::UpdateMii: updateMii(messagePointer); break;
+					case FRDCommands::SaveLocalAccountData: saveLocalAccountData(messagePointer); break;
 					default: Helpers::panic("FRD:A service requested. Command: %08X\n", command); break;
 				}
 			} else {
@@ -265,6 +267,13 @@ void FRDService::logout(u32 messagePointer) {
 	mem.write32(messagePointer + 4, Result::Success);
 }
 
+void FRDService::saveLocalAccountData(u32 messagePointer) {
+	log("FRD::SaveLocalAccountData (stubbed)\n");
+
+	mem.write32(messagePointer, IPC::responseHeader(0x405, 1, 0));
+	mem.write32(messagePointer + 4, Result::Success);
+}
+
 void FRDService::updateMii(u32 messagePointer) {
 	log("FRD::UpdateMii (stubbed)\n");
 
diff --git a/src/core/services/fs.cpp b/src/core/services/fs.cpp
index 18dd0b21..5d6cb44f 100644
--- a/src/core/services/fs.cpp
+++ b/src/core/services/fs.cpp
@@ -194,7 +194,11 @@ void FSService::handleSyncRequest(u32 messagePointer) {
 		case FSCommands::SetThisSaveDataSecureValue: setThisSaveDataSecureValue(messagePointer); break;
 		case FSCommands::AbnegateAccessRight: abnegateAccessRight(messagePointer); break;
 		case FSCommands::TheGameboyVCFunction: theGameboyVCFunction(messagePointer); break;
-		default: Helpers::panic("FS service requested. Command: %08X\n", command);
+
+		default:
+			Helpers::warn("Unimplemented FS service requested. Command: %08X\n", command);
+			mem.write32(messagePointer + 4, Result::Success);
+			break;
 	}
 }
 
diff --git a/src/core/services/hid.cpp b/src/core/services/hid.cpp
index 695e0329..c149a2a3 100644
--- a/src/core/services/hid.cpp
+++ b/src/core/services/hid.cpp
@@ -118,7 +118,6 @@ void HIDService::getGyroscopeCoefficient(u32 messagePointer) {
 
 // The volume here is in the range [0, 0x3F]
 // It is read directly from I2C Device 3 register 0x09
-// Since we currently do not have audio, set the volume a bit below max (0x30)
 void HIDService::getSoundVolume(u32 messagePointer) {
 	log("HID::GetSoundVolume\n");
 	constexpr u8 volume = 0x30;
@@ -237,7 +236,7 @@ void HIDService::updateInputs(u64 currentTick) {
 
 	// For some reason, the original developers decided to signal the HID events each time the OS rescanned inputs
 	// Rather than once every time the state of a key, or the accelerometer state, etc is updated
-	// This means that the OS will signal the events even if literally nothing happened
+	// This means that the OS will signal the events even if nothing happened
 	// Some games such as Majora's Mask rely on this behaviour.
 	if (eventsInitialized) {
 		for (auto& e : events) {
diff --git a/src/core/services/ldr_ro.cpp b/src/core/services/ldr_ro.cpp
index e1c5bde6..561bdf3e 100644
--- a/src/core/services/ldr_ro.cpp
+++ b/src/core/services/ldr_ro.cpp
@@ -23,7 +23,8 @@ namespace CROHeader {
 		NameOffset = 0x084,
 		NextCRO = 0x088,
 		PrevCRO = 0x08C,
-		FixedSize = 0x98,
+		FileSize = 0x090,
+		FixedSize = 0x098,
 		OnUnresolved = 0x0AC,
 		CodeOffset = 0x0B0,
 		DataOffset = 0x0B8,
@@ -146,8 +147,10 @@ static const std::string CRO_MAGIC("CRO0");
 static const std::string CRO_MAGIC_FIXED("FIXD");
 static const std::string CRR_MAGIC("CRR0");
 
+using namespace KernelMemoryTypes;
+
 class CRO {
-	Memory &mem;
+	Memory& mem;
 
 	u32 croPointer;  // Origin address of CRO in RAM
 	u32 oldDataSegmentOffset;
@@ -155,7 +158,7 @@ class CRO {
 	bool isCRO;  // False if CRS
 
   public:
-	CRO(Memory &mem, u32 croPointer, bool isCRO) : mem(mem), croPointer(croPointer), oldDataSegmentOffset(0), isCRO(isCRO) {}
+	CRO(Memory& mem, u32 croPointer, bool isCRO) : mem(mem), croPointer(croPointer), oldDataSegmentOffset(0), isCRO(isCRO) {}
 	~CRO() = default;
 
 	std::string getModuleName() {
@@ -164,25 +167,15 @@ class CRO {
 		return mem.readString(moduleName.offset, moduleName.size);
 	}
 
-	u32 getNextCRO() {
-		return mem.read32(croPointer + CROHeader::NextCRO);
-	}
-	
-	u32 getPrevCRO() {
-		return mem.read32(croPointer + CROHeader::PrevCRO);
-	}
+	u32 getNextCRO() { return mem.read32(croPointer + CROHeader::NextCRO); }
 
-	u32 getFixedSize() {
-		return mem.read32(croPointer + CROHeader::FixedSize);
-	}
+	u32 getPrevCRO() { return mem.read32(croPointer + CROHeader::PrevCRO); }
 
-	void setNextCRO(u32 nextCRO) {
-		mem.write32(croPointer + CROHeader::NextCRO, nextCRO);
-	}
+	u32 getFixedSize() { return mem.read32(croPointer + CROHeader::FixedSize); }
 
-	void setPrevCRO(u32 prevCRO) {
-		mem.write32(croPointer + CROHeader::PrevCRO, prevCRO);
-	}
+	void setNextCRO(u32 nextCRO) { mem.write32(croPointer + CROHeader::NextCRO, nextCRO); }
+	void setPrevCRO(u32 prevCRO) { mem.write32(croPointer + CROHeader::PrevCRO, prevCRO); }
+	u32 getSize() { return mem.read32(croPointer + CROHeader::FileSize); }
 
 	void write32(u32 addr, u32 value) {
 		// Note: some games export symbols to the static module, which doesn't contain any segments.
@@ -228,21 +221,17 @@ class CRO {
 		return entryOffset + offset;
 	}
 
-	u32 getOnUnresolvedAddr() {
-		return getSegmentAddr(mem.read32(croPointer + CROHeader::OnUnresolved));
-	}
+	u32 getOnUnresolvedAddr() { return getSegmentAddr(mem.read32(croPointer + CROHeader::OnUnresolved)); }
 
 	u32 getNamedExportSymbolAddr(const std::string& symbolName) {
 		// Note: The CRO contains a trie for fast symbol lookup. For simplicity,
 		// we won't use it and instead look up the symbol in the named export symbol table
 
 		const u32 exportStringSize = mem.read32(croPointer + CROHeader::ExportStringSize);
-
 		const CROHeaderEntry namedExportTable = getHeaderEntry(CROHeader::NamedExportTableOffset);
 
 		for (u32 namedExport = 0; namedExport < namedExportTable.size; namedExport++) {
 			const u32 nameOffset = mem.read32(namedExportTable.offset + 8 * namedExport + NamedExportTable::NameOffset);
-
 			const std::string exportSymbolName = mem.readString(nameOffset, exportStringSize);
 
 			if (symbolName.compare(exportSymbolName) == 0) {
@@ -437,7 +426,7 @@ class CRO {
 		return true;
 	}
 
-	bool rebaseSegmentTable(u32 dataVaddr, u32 bssVaddr, u32 *oldDataVaddr) {
+	bool rebaseSegmentTable(u32 dataVaddr, u32 bssVaddr, u32* oldDataVaddr) {
 		const CROHeaderEntry segmentTable = getHeaderEntry(CROHeader::SegmentTableOffset);
 
 		for (u32 segment = 0; segment < segmentTable.size; segment++) {
@@ -446,13 +435,16 @@ class CRO {
 			const u32 segmentID = mem.read32(segmentTable.offset + 12 * segment + SegmentTable::ID);
 			switch (segmentID) {
 				case SegmentTable::SegmentID::DATA:
-					*oldDataVaddr = segmentOffset + croPointer; oldDataSegmentOffset = segmentOffset; segmentOffset = dataVaddr; break;
+					*oldDataVaddr = segmentOffset + croPointer;
+					oldDataSegmentOffset = segmentOffset;
+					segmentOffset = dataVaddr;
+					break;
 				case SegmentTable::SegmentID::BSS: segmentOffset = bssVaddr; break;
 				case SegmentTable::SegmentID::TEXT:
 				case SegmentTable::SegmentID::RODATA:
-					if (segmentOffset != 0) segmentOffset += croPointer; break;
-				default:
-					Helpers::panic("Unknown segment ID = %u", segmentID);
+					if (segmentOffset != 0) segmentOffset += croPointer;
+					break;
+				default: Helpers::panic("Unknown segment ID = %u", segmentID);
 			}
 
 			mem.write32(segmentTable.offset + 12 * segment + SegmentTable::Offset, segmentOffset);
@@ -473,9 +465,9 @@ class CRO {
 				case SegmentTable::SegmentID::BSS: segmentOffset = 0; break;
 				case SegmentTable::SegmentID::TEXT:
 				case SegmentTable::SegmentID::RODATA:
-					if (segmentOffset != 0) segmentOffset -= croPointer; break;
-				default:
-					Helpers::panic("Unknown segment ID = %u", segmentID);
+					if (segmentOffset != 0) segmentOffset -= croPointer;
+					break;
+				default: Helpers::panic("Unknown segment ID = %u", segmentID);
 			}
 
 			mem.write32(segmentTable.offset + 12 * segment + SegmentTable::Offset, segmentOffset);
@@ -639,7 +631,9 @@ class CRO {
 			u32 relocationOffset = mem.read32(anonymousImportTable.offset + 8 * anonymousImport + AnonymousImportTable::RelocationOffset);
 
 			if (relocationOffset != 0) {
-				mem.write32(anonymousImportTable.offset + 8 * anonymousImport + AnonymousImportTable::RelocationOffset, relocationOffset + croPointer);
+				mem.write32(
+					anonymousImportTable.offset + 8 * anonymousImport + AnonymousImportTable::RelocationOffset, relocationOffset + croPointer
+				);
 			}
 		}
 
@@ -653,7 +647,9 @@ class CRO {
 			u32 relocationOffset = mem.read32(anonymousImportTable.offset + 8 * anonymousImport + AnonymousImportTable::RelocationOffset);
 
 			if (relocationOffset != 0) {
-				mem.write32(anonymousImportTable.offset + 8 * anonymousImport + AnonymousImportTable::RelocationOffset, relocationOffset - croPointer);
+				mem.write32(
+					anonymousImportTable.offset + 8 * anonymousImport + AnonymousImportTable::RelocationOffset, relocationOffset - croPointer
+				);
 			}
 		}
 
@@ -673,7 +669,6 @@ class CRO {
 			const u32 addend = mem.read32(relocationPatchTable.offset + 12 * relocationPatch + RelocationPatch::Addend);
 
 			const u32 segmentAddr = getSegmentAddr(segmentOffset);
-
 			const u32 entryID = mem.read32(segmentTable.offset + 12 * (segmentOffset & 0xF) + SegmentTable::ID);
 
 			u32 relocationTarget = segmentAddr;
@@ -1198,9 +1193,7 @@ class CRO {
 	}
 };
 
-void LDRService::reset() {
-	loadedCRS = 0;
-}
+void LDRService::reset() { loadedCRS = 0; }
 
 void LDRService::handleSyncRequest(u32 messagePointer) {
 	const u32 command = mem.read32(messagePointer);
@@ -1245,7 +1238,14 @@ void LDRService::initialize(u32 messagePointer) {
 	}
 
 	// Map CRO to output address
-	mem.mirrorMapping(mapVaddr, crsPointer, size);
+	// TODO: how to handle permissions?
+	bool succeeded = mem.mapVirtualMemory(
+		mapVaddr, crsPointer, size >> 12, true, true, true, MemoryState::Free, MemoryState::Private, MemoryState::Locked, MemoryState::AliasCode
+	);
+
+	if (!succeeded) {
+		Helpers::panic("Failed to map CRS");
+	}
 
 	CRO crs(mem, mapVaddr, false);
 
@@ -1312,7 +1312,9 @@ void LDRService::loadCRO(u32 messagePointer, bool isNew) {
 	const u32 fixLevel = mem.read32(messagePointer + 40);
 	const Handle process = mem.read32(messagePointer + 52);
 
-	log("LDR_RO::LoadCRO (isNew = %d, buffer = %08X, vaddr = %08X, size = %08X, .data vaddr = %08X, .data size = %08X, .bss vaddr = %08X, .bss size = %08X, auto link = %d, fix level = %X, process = %X)\n", isNew, croPointer, mapVaddr, size, dataVaddr, dataSize, bssVaddr, bssSize, autoLink, fixLevel, process);
+	log("LDR_RO::LoadCRO (isNew = %d, buffer = %08X, vaddr = %08X, size = %08X, .data vaddr = %08X, .data size = %08X, .bss vaddr = %08X, .bss size "
+		"= %08X, auto link = %d, fix level = %X, process = %X)\n",
+		isNew, croPointer, mapVaddr, size, dataVaddr, dataSize, bssVaddr, bssSize, autoLink, fixLevel, process);
 
 	// Sanity checks
 	if (size < CRO_HEADER_SIZE) {
@@ -1332,7 +1334,14 @@ void LDRService::loadCRO(u32 messagePointer, bool isNew) {
 	}
 
 	// Map CRO to output address
-	mem.mirrorMapping(mapVaddr, croPointer, size);
+	// TODO: how to handle permissions?
+	bool succeeded = mem.mapVirtualMemory(
+		mapVaddr, croPointer, size >> 12, true, true, true, MemoryState::Free, MemoryState::Private, MemoryState::Locked, MemoryState::AliasCode
+	);
+
+	if (!succeeded) {
+		Helpers::panic("Failed to map CRO");
+	}
 
 	CRO cro(mem, mapVaddr, true);
 
@@ -1392,7 +1401,18 @@ void LDRService::unloadCRO(u32 messagePointer) {
 		Helpers::panic("Failed to unrebase CRO");
 	}
 
+	u32 size = cro.getSize();
+	bool succeeded = mem.mapVirtualMemory(
+		mapVaddr, croPointer, size >> 12, false, false, false, MemoryState::Locked, MemoryState::AliasCode, MemoryState::Free, MemoryState::Private,
+		false
+	);
+
+	if (!succeeded) {
+		Helpers::panic("Failed to unmap CRO");
+	}
+
 	kernel.clearInstructionCacheRange(mapVaddr, cro.getFixedSize());
+
 	mem.write32(messagePointer, IPC::responseHeader(0x5, 1, 0));
 	mem.write32(messagePointer + 4, Result::Success);
 }
\ No newline at end of file
diff --git a/src/core/services/soc.cpp b/src/core/services/soc.cpp
index 4ad546b0..4e74c5c3 100644
--- a/src/core/services/soc.cpp
+++ b/src/core/services/soc.cpp
@@ -15,7 +15,11 @@ void SOCService::handleSyncRequest(u32 messagePointer) {
 	const u32 command = mem.read32(messagePointer);
 	switch (command) {
 		case SOCCommands::InitializeSockets: initializeSockets(messagePointer); break;
-		default: Helpers::panic("SOC service requested. Command: %08X\n", command);
+
+		default:
+			Helpers::warn("SOC service requested. Command: %08X\n", command);
+			mem.write32(messagePointer + 4, Result::Success);
+			break;
 	}
 }
 
@@ -30,4 +34,4 @@ void SOCService::initializeSockets(u32 messagePointer) {
 
 	mem.write32(messagePointer, IPC::responseHeader(0x01, 1, 0));
 	mem.write32(messagePointer + 4, Result::Success);
-}
\ No newline at end of file
+}
diff --git a/src/dynamic_library.cpp b/src/dynamic_library.cpp
new file mode 100644
index 00000000..833baef7
--- /dev/null
+++ b/src/dynamic_library.cpp
@@ -0,0 +1,96 @@
+// SPDX-FileCopyrightText: 2019 Dolphin Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "dynamic_library.hpp"
+
+#include <fmt/format.h>
+
+#include <string>
+#include <utility>
+
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+
+namespace Common {
+	DynamicLibrary::DynamicLibrary() = default;
+	DynamicLibrary::DynamicLibrary(const char* filename) { void(open(filename)); }
+	DynamicLibrary::DynamicLibrary(void* handle_) : handle{handle_} {}
+	DynamicLibrary::DynamicLibrary(DynamicLibrary&& rhs) noexcept : handle{std::exchange(rhs.handle, nullptr)} {}
+
+	DynamicLibrary& DynamicLibrary::operator=(DynamicLibrary&& rhs) noexcept {
+		close();
+		handle = std::exchange(rhs.handle, nullptr);
+		return *this;
+	}
+
+	DynamicLibrary::~DynamicLibrary() { close(); }
+
+	std::string DynamicLibrary::getUnprefixedFilename(const char* filename) {
+#if defined(_WIN32)
+		return std::string(filename) + ".dll";
+#elif defined(__APPLE__)
+		return std::string(filename) + ".dylib";
+#else
+		return std::string(filename) + ".so";
+#endif
+	}
+
+	std::string DynamicLibrary::getVersionedFilename(const char* libname, int major, int minor) {
+#if defined(_WIN32)
+		if (major >= 0 && minor >= 0)
+			return fmt::format("{}-{}-{}.dll", libname, major, minor);
+		else if (major >= 0)
+			return fmt::format("{}-{}.dll", libname, major);
+		else
+			return fmt::format("{}.dll", libname);
+#elif defined(__APPLE__)
+		const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : "";
+		if (major >= 0 && minor >= 0)
+			return fmt::format("{}{}.{}.{}.dylib", prefix, libname, major, minor);
+		else if (major >= 0)
+			return fmt::format("{}{}.{}.dylib", prefix, libname, major);
+		else
+			return fmt::format("{}{}.dylib", prefix, libname);
+#else
+		const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : "";
+		if (major >= 0 && minor >= 0)
+			return fmt::format("{}{}.so.{}.{}", prefix, libname, major, minor);
+		else if (major >= 0)
+			return fmt::format("{}{}.so.{}", prefix, libname, major);
+		else
+			return fmt::format("{}{}.so", prefix, libname);
+#endif
+	}
+
+	bool DynamicLibrary::open(const char* filename) {
+#ifdef _WIN32
+		handle = reinterpret_cast<void*>(LoadLibraryA(filename));
+#else
+		handle = dlopen(filename, RTLD_NOW);
+#endif
+		return handle != nullptr;
+	}
+
+	void DynamicLibrary::close() {
+		if (!isOpen()) return;
+
+#ifdef _WIN32
+		FreeLibrary(reinterpret_cast<HMODULE>(handle));
+#else
+		dlclose(handle);
+#endif
+		handle = nullptr;
+	}
+
+	void* DynamicLibrary::getSymbolAddress(const char* name) const {
+#ifdef _WIN32
+		return reinterpret_cast<void*>(GetProcAddress(reinterpret_cast<HMODULE>(handle), name));
+#else
+		return reinterpret_cast<void*>(dlsym(handle, name));
+#endif
+	}
+
+}  // namespace Common
\ No newline at end of file
diff --git a/src/emulator.cpp b/src/emulator.cpp
index 0f97208a..e5ba4e27 100644
--- a/src/emulator.cpp
+++ b/src/emulator.cpp
@@ -20,7 +20,7 @@ __declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 1;
 
 Emulator::Emulator()
 	: config(getConfigPath()), kernel(cpu, memory, gpu, config, lua), cpu(memory, kernel, *this), gpu(memory, config),
-	  memory(cpu.getTicksRef(), config), cheats(memory, kernel.getServiceManager().getHID()), audioDevice(config.audioDeviceConfig), lua(*this),
+	  memory(kernel.fcramManager, config), cheats(memory, kernel.getServiceManager().getHID()), audioDevice(config.audioDeviceConfig), lua(*this),
 	  running(false)
 #ifdef PANDA3DS_ENABLE_HTTP_SERVER
 	  ,
@@ -159,20 +159,21 @@ void Emulator::pollScheduler() {
 		scheduler.updateNextTimestamp();
 
 		switch (eventType) {
-			case Scheduler::EventType::VBlank: [[likely]] {
-				// Signal that we've reached the end of a frame
-				frameDone = true;
-				lua.signalEvent(LuaEvent::Frame);
+			case Scheduler::EventType::VBlank:
+				[[likely]] {
+					// Signal that we've reached the end of a frame
+					frameDone = true;
+					lua.signalEvent(LuaEvent::Frame);
 
-				// Send VBlank interrupts
-				ServiceManager& srv = kernel.getServiceManager();
-				srv.sendGPUInterrupt(GPUInterrupt::VBlank0);
-				srv.sendGPUInterrupt(GPUInterrupt::VBlank1);
+					// Send VBlank interrupts
+					ServiceManager& srv = kernel.getServiceManager();
+					srv.sendGPUInterrupt(GPUInterrupt::VBlank0);
+					srv.sendGPUInterrupt(GPUInterrupt::VBlank1);
 
-				// Queue next VBlank event
-				scheduler.addEvent(Scheduler::EventType::VBlank, time + CPU::ticksPerSec / 60);
-				break;
-			}
+					// Queue next VBlank event
+					scheduler.addEvent(Scheduler::EventType::VBlank, time + CPU::ticksPerSec / 60);
+					break;
+				}
 
 			case Scheduler::EventType::ThreadWakeup: kernel.pollThreadWakeups(); break;
 			case Scheduler::EventType::UpdateTimers: kernel.pollTimers(); break;
@@ -353,8 +354,7 @@ bool Emulator::loadELF(std::ifstream& file) {
 std::span<u8> Emulator::getSMDH() {
 	switch (romType) {
 		case ROMType::NCSD:
-		case ROMType::CXI:
-			return memory.getCXI()->smdh;
+		case ROMType::CXI: return memory.getCXI()->smdh;
 		default: {
 			return std::span<u8>();
 		}
@@ -386,7 +386,7 @@ static void dumpRomFSNode(const RomFS::RomFSNode& node, const char* romFSBase, c
 
 	for (auto& directory : node.directories) {
 		const auto newPath = path / directory->name;
-		
+
 		// Create the directory for the new folder
 		std::error_code ec;
 		std::filesystem::create_directories(newPath, ec);
@@ -465,7 +465,7 @@ void Emulator::reloadSettings() {
 		loadRenderdoc();
 	}
 
-    gpu.getRenderer()->setHashTextures(config.hashTextures);
+	gpu.getRenderer()->setHashTextures(config.hashTextures);
 
 #ifdef PANDA3DS_ENABLE_DISCORD_RPC
 	// Reload RPC setting if we're compiling with RPC support
diff --git a/src/ios_driver.mm b/src/ios_driver.mm
index cb98b269..87bd057a 100644
--- a/src/ios_driver.mm
+++ b/src/ios_driver.mm
@@ -15,13 +15,11 @@ extern "C" {
 #define IOS_EXPORT extern "C" __attribute__((visibility("default")))
 
 std::unique_ptr<Emulator> emulator = nullptr;
-HIDService* hidService = nullptr;
 
 IOS_EXPORT void iosCreateEmulator() {
 	printf("Creating emulator\n");
 
 	emulator = std::make_unique<Emulator>();
-	hidService = &emulator->getServiceManager().getHID();
 	emulator->initGraphicsContext(nullptr);
 }
 
diff --git a/src/jni_driver.cpp b/src/jni_driver.cpp
index 6a156360..7274c9c4 100644
--- a/src/jni_driver.cpp
+++ b/src/jni_driver.cpp
@@ -10,11 +10,11 @@
 #include "android_utils.hpp"
 #include "sdl_sensors.hpp"
 
-std::unique_ptr<Emulator> emulator = nullptr;
-HIDService* hidService = nullptr;
-RendererGL* renderer = nullptr;
-bool romLoaded = false;
-JavaVM* jvm = nullptr;
+static std::unique_ptr<Emulator> emulator = nullptr;
+static HIDService* hidService = nullptr;
+static RendererGL* renderer = nullptr;
+static bool romLoaded = false;
+static JavaVM* jvm = nullptr;
 
 jclass alberClass;
 jmethodID alberClassOpenDocument;
diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp
index 4786b317..9ad3b62a 100644
--- a/src/libretro_core.cpp
+++ b/src/libretro_core.cpp
@@ -1,12 +1,11 @@
-#include <stdexcept>
+#include <libretro.h>
+
 #include <cstdio>
 #include <regex>
 
-#include <libretro.h>
-
-#include <version.hpp>
-#include <emulator.hpp>
-#include <renderer_gl/renderer_gl.hpp>
+#include "emulator.hpp"
+#include "renderer_gl/renderer_gl.hpp"
+#include "version.hpp"
 
 static retro_environment_t envCallback;
 static retro_video_refresh_t videoCallback;
@@ -20,20 +19,14 @@ static std::filesystem::path savePath;
 static bool screenTouched = false;
 static bool usingGLES = false;
 
-std::unique_ptr<Emulator> emulator;
-RendererGL* renderer;
+static std::unique_ptr<Emulator> emulator;
+static RendererGL* renderer;
 
-std::filesystem::path Emulator::getConfigPath() {
-	return std::filesystem::path(savePath / "config.toml");
-}
+std::filesystem::path Emulator::getConfigPath() { return std::filesystem::path(savePath / "config.toml"); }
+std::filesystem::path Emulator::getAppDataRoot() { return std::filesystem::path(savePath / "Emulator Files"); }
 
-std::filesystem::path Emulator::getAppDataRoot() {
-	return std::filesystem::path(savePath / "Emulator Files");
-}
-
-static void* getGLProcAddress(const char* name) {
-	return (void*)hwRender.get_proc_address(name);
-}
+static void* getGLProcAddress(const char* name) { return (void*)hwRender.get_proc_address(name); }
+static void videoDestroyContext() { emulator->deinitGraphicsContext(); }
 
 static void videoResetContext() {
 	if (usingGLES) {
@@ -53,10 +46,6 @@ static void videoResetContext() {
 	emulator->initGraphicsContext(nullptr);
 }
 
-static void videoDestroyContext() {
-	emulator->deinitGraphicsContext();
-}
-
 static bool setHWRender(retro_hw_context_type type) {
 	hwRender.context_type = type;
 	hwRender.context_reset = videoResetContext;
@@ -159,16 +148,12 @@ static int fetchVariableInt(std::string key, int def) {
 	return 0;
 }
 
-static bool fetchVariableBool(std::string key, bool def) {
-	return fetchVariable(key, def ? "enabled" : "disabled") == "enabled";
-}
-
-static int fetchVariableRange(std::string key, int min, int max) {
-	return std::clamp(fetchVariableInt(key, min), min, max);
-}
+static bool fetchVariableBool(std::string key, bool def) { return fetchVariable(key, def ? "enabled" : "disabled") == "enabled"; }
+static int fetchVariableRange(std::string key, int min, int max) { return std::clamp(fetchVariableInt(key, min), min, max); }
 
 static void configInit() {
 	static const retro_variable values[] = {
+		{"panda3ds_use_fastmem", EmulatorConfig::enableFastmemDefault ? "Enable fastmem; enabled|disabled" : "Enable fastmem; disabled|enabled"},
 		{"panda3ds_use_shader_jit", EmulatorConfig::shaderJitDefault ? "Enable shader JIT; enabled|disabled" : "Enable shader JIT; disabled|enabled"},
 		{"panda3ds_accelerate_shaders",
 		 EmulatorConfig::accelerateShadersDefault ? "Run 3DS shaders on the GPU; enabled|disabled" : "Run 3DS shaders on the GPU; disabled|enabled"},
@@ -204,9 +189,10 @@ static void configUpdate() {
 	config.rendererType = RendererType::OpenGL;
 	config.vsyncEnabled = fetchVariableBool("panda3ds_use_vsync", true);
 	config.shaderJitEnabled = fetchVariableBool("panda3ds_use_shader_jit", EmulatorConfig::shaderJitDefault);
+	config.fastmemEnabled = fetchVariableBool("panda3ds_use_fastmem", EmulatorConfig::enableFastmemDefault);
+	config.systemLanguage = EmulatorConfig::languageCodeFromString(fetchVariable("panda3ds_system_language", "en"));
 	config.chargerPlugged = fetchVariableBool("panda3ds_use_charger", true);
 	config.batteryPercentage = fetchVariableRange("panda3ds_battery_level", 5, 100);
-	config.systemLanguage = EmulatorConfig::languageCodeFromString(fetchVariable("panda3ds_system_language", "en"));
 
 	config.dspType = Audio::DSPCore::typeFromString(fetchVariable("panda3ds_dsp_emulation", "null"));
 	config.audioEnabled = fetchVariableBool("panda3ds_use_audio", false);
@@ -259,27 +245,13 @@ void retro_get_system_av_info(retro_system_av_info* info) {
 	info->timing.sample_rate = 32768;
 }
 
-void retro_set_environment(retro_environment_t cb) {
-	envCallback = cb;
-}
-
-void retro_set_video_refresh(retro_video_refresh_t cb) {
-	videoCallback = cb;
-}
-
-void retro_set_audio_sample_batch(retro_audio_sample_batch_t cb) {
-	audioBatchCallback = cb;
-}
+void retro_set_environment(retro_environment_t cb) { envCallback = cb; }
+void retro_set_video_refresh(retro_video_refresh_t cb) { videoCallback = cb; }
+void retro_set_audio_sample_batch(retro_audio_sample_batch_t cb) { audioBatchCallback = cb; }
 
 void retro_set_audio_sample(retro_audio_sample_t cb) {}
-
-void retro_set_input_poll(retro_input_poll_t cb) {
-	inputPollCallback = cb;
-}
-
-void retro_set_input_state(retro_input_state_t cb) {
-	inputStateCallback = cb;
-}
+void retro_set_input_poll(retro_input_poll_t cb) { inputPollCallback = cb; }
+void retro_set_input_state(retro_input_state_t cb) { inputStateCallback = cb; }
 
 void retro_init() {
 	enum retro_pixel_format xrgb888 = RETRO_PIXEL_FORMAT_XRGB8888;
@@ -297,9 +269,7 @@ void retro_init() {
 	emulator = std::make_unique<Emulator>();
 }
 
-void retro_deinit() {
-	emulator = nullptr;
-}
+void retro_deinit() { emulator = nullptr; }
 
 bool retro_load_game(const retro_game_info* game) {
 	configInit();
@@ -325,9 +295,8 @@ void retro_unload_game() {
 	renderer = nullptr;
 }
 
-void retro_reset() {
-	emulator->reset(Emulator::ReloadOption::Reload);
-}
+void retro_reset() { emulator->reset(Emulator::ReloadOption::Reload); }
+void retro_cheat_reset() { emulator->getCheats().reset(); }
 
 void retro_run() {
 	configCheckVariables();
@@ -345,13 +314,16 @@ void retro_run() {
 	hid.setKey(HID::Keys::Y, getButtonState(RETRO_DEVICE_ID_JOYPAD_Y));
 	hid.setKey(HID::Keys::L, getButtonState(RETRO_DEVICE_ID_JOYPAD_L));
 	hid.setKey(HID::Keys::R, getButtonState(RETRO_DEVICE_ID_JOYPAD_R));
+	hid.setKey(HID::Keys::ZL, getButtonState(RETRO_DEVICE_ID_JOYPAD_L2));
+	hid.setKey(HID::Keys::ZR, getButtonState(RETRO_DEVICE_ID_JOYPAD_R2));
+
 	hid.setKey(HID::Keys::Start, getButtonState(RETRO_DEVICE_ID_JOYPAD_START));
 	hid.setKey(HID::Keys::Select, getButtonState(RETRO_DEVICE_ID_JOYPAD_SELECT));
 	hid.setKey(HID::Keys::Up, getButtonState(RETRO_DEVICE_ID_JOYPAD_UP));
 	hid.setKey(HID::Keys::Down, getButtonState(RETRO_DEVICE_ID_JOYPAD_DOWN));
 	hid.setKey(HID::Keys::Left, getButtonState(RETRO_DEVICE_ID_JOYPAD_LEFT));
 	hid.setKey(HID::Keys::Right, getButtonState(RETRO_DEVICE_ID_JOYPAD_RIGHT));
-	// TODO: N3DS buttons
+	// TODO: C-Stick
 
 	// Get analog values for the left analog stick (Right analog stick is N3DS-only and unimplemented)
 	float xLeft = getAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X);
@@ -443,8 +415,4 @@ void retro_cheat_set(uint index, bool enabled, const char* code) {
 	} else {
 		emulator->getCheats().disableCheat(id);
 	}
-}
-
-void retro_cheat_reset() {
-	emulator->getCheats().reset();
-}
+}
\ No newline at end of file
diff --git a/src/panda_qt/cheats_window.cpp b/src/panda_qt/cheats_window.cpp
index 2485c677..3627406a 100644
--- a/src/panda_qt/cheats_window.cpp
+++ b/src/panda_qt/cheats_window.cpp
@@ -70,8 +70,6 @@ void CheatEntryWidget::editClicked() {
 
 CheatEditDialog::CheatEditDialog(Emulator* emu, CheatEntryWidget& cheatEntry) : QDialog(), emu(emu), cheatEntry(cheatEntry) {
 	setWindowTitle(tr("Edit Cheat"));
-
-	setAttribute(Qt::WA_DeleteOnClose);
 	setModal(true);
 
 	QVBoxLayout* layout = new QVBoxLayout;
@@ -147,6 +145,9 @@ void CheatEditDialog::accepted() {
 				cheatEntry.setMetadata(metadata);
 				cheatEntry.Update();
 			}
+
+			// Delete the CheatEditDialog when the main thread is done using it
+			QObject::deleteLater();
 		});
 	});
 }
@@ -157,6 +158,9 @@ void CheatEditDialog::rejected() {
 		// Was adding a cheat but user pressed cancel
 		cheatEntry.Remove();
 	}
+
+	// We have to manually memory-manage the CheatEditDialog object since it's accessed via multiple threads
+	QObject::deleteLater();
 }
 
 CheatsWindow::CheatsWindow(Emulator* emu, const std::filesystem::path& cheatPath, QWidget* parent)
diff --git a/src/panda_qt/config_window.cpp b/src/panda_qt/config_window.cpp
index 14b33156..1d0c83d4 100644
--- a/src/panda_qt/config_window.cpp
+++ b/src/panda_qt/config_window.cpp
@@ -172,6 +172,10 @@ ConfigWindow::ConfigWindow(ConfigCallback configCallback, MainWindowCallback win
 	connectCheckbox(circlePadProEnabled, config.circlePadProEnabled);
 	genLayout->addRow(circlePadProEnabled);
 
+	QCheckBox* fastmemEnabled = new QCheckBox(tr("Enable Fastmem"));
+	connectCheckbox(fastmemEnabled, config.fastmemEnabled);
+	genLayout->addRow(fastmemEnabled);
+
 	QCheckBox* discordRpcEnabled = new QCheckBox(tr("Enable Discord RPC"));
 	connectCheckbox(discordRpcEnabled, config.discordRpcEnabled);
 	genLayout->addRow(discordRpcEnabled);
diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp
index b592226b..f74f2061 100644
--- a/src/panda_qt/main_window.cpp
+++ b/src/panda_qt/main_window.cpp
@@ -10,6 +10,7 @@
 #include "cheats.hpp"
 #include "input_mappings.hpp"
 #include "panda_qt/dsp_debugger.hpp"
+#include "panda_qt/screen/screen.hpp"
 #include "sdl_sensors.hpp"
 #include "services/dsp.hpp"
 #include "version.hpp"
@@ -25,8 +26,19 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent)
 	resize(800, 240 * 4);
 	show();
 
+	const RendererType rendererType = emu->getConfig().rendererType;
+	usingGL = (rendererType == RendererType::OpenGL || rendererType == RendererType::Software || rendererType == RendererType::Null);
+	usingVk = (rendererType == RendererType::Vulkan);
+	usingMtl = (rendererType == RendererType::Metal);
+
+	ScreenWidget::API api = ScreenWidget::API::OpenGL;
+	if (usingVk)
+		api = ScreenWidget::API::Vulkan;
+	else if (usingMtl)
+		api = ScreenWidget::API::Metal;
+
 	// We pass a callback to the screen widget that will be triggered every time we resize the screen
-	screen = new ScreenWidget([this](u32 width, u32 height) { handleScreenResize(width, height); }, this);
+	screen = ScreenWidget::getWidget(api, [this](u32 width, u32 height) { handleScreenResize(width, height); }, this);
 	setCentralWidget(screen);
 
 	appRunning = true;
@@ -149,28 +161,29 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent)
 
 	// The emulator graphics context for the thread should be initialized in the emulator thread due to how GL contexts work
 	emuThread = std::thread([this]() {
-		const RendererType rendererType = emu->getConfig().rendererType;
-		usingGL = (rendererType == RendererType::OpenGL || rendererType == RendererType::Software || rendererType == RendererType::Null);
-		usingVk = (rendererType == RendererType::Vulkan);
-		usingMtl = (rendererType == RendererType::Metal);
+		switch (screen->api) {
+			case ScreenWidget::API::OpenGL: {
+				// Make GL context current for this thread, enable VSync
+				GL::Context* glContext = screen->getGLContext();
+				glContext->MakeCurrent();
+				glContext->SetSwapInterval(emu->getConfig().vsyncEnabled ? 1 : 0);
 
-		if (usingGL) {
-			// Make GL context current for this thread, enable VSync
-			GL::Context* glContext = screen->getGLContext();
-			glContext->MakeCurrent();
-			glContext->SetSwapInterval(emu->getConfig().vsyncEnabled ? 1 : 0);
+				if (glContext->IsGLES()) {
+					emu->getRenderer()->setupGLES();
+				}
 
-			if (glContext->IsGLES()) {
-				emu->getRenderer()->setupGLES();
+				emu->initGraphicsContext(glContext);
+				break;
 			}
 
-			emu->initGraphicsContext(glContext);
-		} else if (usingVk) {
-			Helpers::panic("Vulkan on Qt is currently WIP, try the SDL frontend instead!");
-		} else if (usingMtl) {
-			Helpers::panic("Metal on Qt currently doesn't work, try the SDL frontend instead!");
-		} else {
-			Helpers::panic("Unsupported graphics backend for Qt frontend!");
+			case ScreenWidget::API::Metal: {
+				emu->initGraphicsContext(nullptr);
+				emu->getRenderer()->setMTKLayer(screen->getMTKLayer());
+				break;
+			}
+
+			case ScreenWidget::API::Vulkan: Helpers::panic("Vulkan on Qt is currently WIP, try the SDL frontend instead!"); break;
+			default: Helpers::panic("Unsupported graphics backend for Qt frontend!"); break;
 		}
 
 		// We have to initialize controllers on the same thread they'll be polled in
@@ -213,6 +226,8 @@ void MainWindow::emuThreadMainLoop() {
 void MainWindow::swapEmuBuffer() {
 	if (usingGL) {
 		screen->getGLContext()->SwapBuffers();
+	} else if (usingMtl) {
+		// The renderer itself calls presentDrawable to swap buffers on Metal
 	} else {
 		Helpers::panic("[Qt] Don't know how to swap buffers for the current rendering backend :(");
 	}
@@ -290,6 +305,7 @@ MainWindow::~MainWindow() {
 	delete aboutWindow;
 	delete configWindow;
 	delete cheatsEditor;
+	delete screen;
 	delete luaEditor;
 }
 
diff --git a/src/panda_qt/screen/metal_context.mm b/src/panda_qt/screen/metal_context.mm
new file mode 100644
index 00000000..e7a508e4
--- /dev/null
+++ b/src/panda_qt/screen/metal_context.mm
@@ -0,0 +1,71 @@
+#import <AppKit/AppKit.h>
+#import <Metal/Metal.h>
+#import <QuartzCore/CAMetalLayer.h>
+#import <Metal/Metal.hpp>
+#import <QWindow>
+#import <QuartzCore/QuartzCore.hpp>
+
+#import "panda_qt/screen/screen_mtl.hpp"
+
+id<MTLDevice> metalDevice = nil;
+
+bool ScreenWidgetMTL::createMetalContext() {
+	NSView* nativeView = (NSView*)this->winId();
+	// Retain the layer so that we can manually memory manage it.
+	CAMetalLayer* metalLayer = [[CAMetalLayer layer] retain];
+
+	if (!metalLayer) {
+		return false;
+	}
+
+	metalDevice = MTLCreateSystemDefaultDevice();
+
+	if (!metalDevice) {
+		NSLog(@"Failed to create metal device");
+		return false;
+	}
+
+	metalLayer.device = metalDevice;
+	metalLayer.framebufferOnly = NO;
+	metalLayer.pixelFormat = MTLPixelFormatBGRA8Unorm;
+
+	CGFloat scale = [nativeView window].backingScaleFactor;
+	CGSize pointSize = nativeView.bounds.size;
+
+	metalLayer.contentsScale = scale;
+	metalLayer.drawableSize = CGSizeMake(pointSize.width * scale, pointSize.height * scale);
+
+	[nativeView setLayer:metalLayer];
+	[nativeView setWantsLayer:YES];
+
+	CA::MetalLayer* cppLayer = (CA::MetalLayer*)metalLayer;
+	mtkLayer = static_cast<void*>(cppLayer);
+
+	return true;
+}
+
+void ScreenWidgetMTL::resizeMetalView() {
+	NSView* view = (NSView*)this->windowHandle()->winId();
+	CAMetalLayer* metalLayer = (CAMetalLayer*)[view layer];
+
+	if (metalLayer) {
+		metalLayer.drawableSize = CGSizeMake(surfaceWidth, surfaceHeight);
+	}
+}
+
+ScreenWidgetMTL::~ScreenWidgetMTL() {
+	if (mtkLayer) {
+		CAMetalLayer* metalLayer = (__bridge CAMetalLayer*)static_cast<CA::MetalLayer*>(mtkLayer);
+
+		NSView* view = (NSView*)this->winId();
+		[view setLayer:nil];
+		[view setWantsLayer:NO];
+
+		// Release Metal device and layer
+		metalLayer.device = nil;
+		[metalLayer release];
+		[metalDevice release];
+
+		mtkLayer = nullptr;
+	}
+}
\ No newline at end of file
diff --git a/src/panda_qt/screen.cpp b/src/panda_qt/screen/screen.cpp
similarity index 66%
rename from src/panda_qt/screen.cpp
rename to src/panda_qt/screen/screen.cpp
index 0876bb71..94835284 100644
--- a/src/panda_qt/screen.cpp
+++ b/src/panda_qt/screen/screen.cpp
@@ -1,10 +1,12 @@
+#ifdef PANDA3DS_ENABLE_OPENGL
 #include "opengl.hpp"
+#endif
 // opengl.hpp must be included at the very top. This comment exists to make clang-format not reorder it :p
+
 #include <QGuiApplication>
 #include <QScreen>
 #include <QWindow>
 #include <algorithm>
-#include <array>
 #include <cmath>
 #include <optional>
 
@@ -12,16 +14,16 @@
 #include <qpa/qplatformnativeinterface.h>
 #endif
 
-#include "panda_qt/screen.hpp"
+#include "panda_qt/screen/screen.hpp"
+#include "panda_qt/screen/screen_gl.hpp"
+#include "panda_qt/screen/screen_mtl.hpp"
 
-// OpenGL screen widget, based on https://github.com/stenzek/duckstation/blob/master/src/duckstation-qt/displaywidget.cpp
+// Screen widget, based on https://github.com/stenzek/duckstation/blob/master/src/duckstation-qt/displaywidget.cpp
 // and https://github.com/melonDS-emu/melonDS/blob/master/src/frontend/qt_sdl/main.cpp
 
 #ifdef PANDA3DS_ENABLE_OPENGL
-ScreenWidget::ScreenWidget(ResizeCallback resizeCallback, QWidget* parent) : QWidget(parent), resizeCallback(resizeCallback) {
+ScreenWidget::ScreenWidget(API api, ResizeCallback resizeCallback, QWidget* parent) : api(api), QWidget(parent), resizeCallback(resizeCallback) {
 	// Create a native window for use with our graphics API of choice
-	resize(800, 240 * 4);
-
 	setAutoFillBackground(false);
 	setAttribute(Qt::WA_NativeWindow, true);
 	setAttribute(Qt::WA_NoSystemBackground, true);
@@ -29,11 +31,8 @@ ScreenWidget::ScreenWidget(ResizeCallback resizeCallback, QWidget* parent) : QWi
 	setAttribute(Qt::WA_KeyCompression, false);
 	setFocusPolicy(Qt::StrongFocus);
 	setMouseTracking(true);
-	show();
 
-	if (!createGLContext()) {
-		Helpers::panic("Failed to create GL context for display");
-	}
+	// The graphics context, as well as resizing and showing the widget, is handled by the screen backend
 }
 
 void ScreenWidget::resizeEvent(QResizeEvent* event) {
@@ -48,18 +47,7 @@ void ScreenWidget::resizeEvent(QResizeEvent* event) {
 	}
 
 	reloadScreenCoordinates();
-
-	// This will call take care of calling resizeSurface from the emulator thread
-	resizeCallback(surfaceWidth, surfaceHeight);
-}
-
-// Note: This will run on the emulator thread, we don't want any Qt calls happening there.
-void ScreenWidget::resizeSurface(u32 width, u32 height) {
-	if (previousWidth != width || previousHeight != height) {
-		if (glContext) {
-			glContext->ResizeSurface(width, height);
-		}
-	}
+	resizeDisplay();
 }
 
 void ScreenWidget::reloadScreenCoordinates() {
@@ -73,30 +61,6 @@ void ScreenWidget::reloadScreenLayout(ScreenLayout::Layout newLayout, float newT
 	reloadScreenCoordinates();
 }
 
-bool ScreenWidget::createGLContext() {
-	// List of GL context versions we will try. Anything 4.1+ is good for desktop OpenGL, and 3.1+ for OpenGL ES
-	static constexpr std::array<GL::Context::Version, 8> versionsToTry = {
-		GL::Context::Version{GL::Context::Profile::Core, 4, 6}, GL::Context::Version{GL::Context::Profile::Core, 4, 5},
-		GL::Context::Version{GL::Context::Profile::Core, 4, 4}, GL::Context::Version{GL::Context::Profile::Core, 4, 3},
-		GL::Context::Version{GL::Context::Profile::Core, 4, 2}, GL::Context::Version{GL::Context::Profile::Core, 4, 1},
-		GL::Context::Version{GL::Context::Profile::ES, 3, 2},   GL::Context::Version{GL::Context::Profile::ES, 3, 1},
-	};
-
-	std::optional<WindowInfo> windowInfo = getWindowInfo();
-	if (windowInfo.has_value()) {
-		this->windowInfo = *windowInfo;
-
-		glContext = GL::Context::Create(*getWindowInfo(), versionsToTry);
-		if (glContext == nullptr) {
-			return false;
-		}
-
-		glContext->DoneCurrent();
-	}
-
-	return glContext != nullptr;
-}
-
 qreal ScreenWidget::devicePixelRatioFromScreen() const {
 	const QScreen* screenForRatio = windowHandle()->screen();
 	if (!screenForRatio) {
@@ -156,3 +120,15 @@ std::optional<WindowInfo> ScreenWidget::getWindowInfo() {
 	return wi;
 }
 #endif
+
+ScreenWidget* ScreenWidget::getWidget(API api, ResizeCallback resizeCallback, QWidget* parent) {
+	if (api == API::OpenGL) {
+		return new ScreenWidgetGL(api, resizeCallback, parent);
+	} else if (api == API::Metal) {
+		return new ScreenWidgetMTL(api, resizeCallback, parent);
+	} else if (api == API::Vulkan) {
+		Helpers::panic("Vulkan is not yet supported on Panda3DS-Qt. Try SDL instead");
+	} else {
+		Helpers::panic("ScreenWidget::getWidget: Unimplemented graphics API");
+	}
+}
\ No newline at end of file
diff --git a/src/panda_qt/screen/screen_gl.cpp b/src/panda_qt/screen/screen_gl.cpp
new file mode 100644
index 00000000..87cb2738
--- /dev/null
+++ b/src/panda_qt/screen/screen_gl.cpp
@@ -0,0 +1,64 @@
+#include "panda_qt/screen/screen_gl.hpp"
+
+#include <array>
+
+#ifdef PANDA3DS_ENABLE_OPENGL
+ScreenWidgetGL::ScreenWidgetGL(API api, ResizeCallback resizeCallback, QWidget* parent) : ScreenWidget(api, resizeCallback, parent) {
+	// On Wayland + OpenGL, we have to show the window before we can create a graphics context.
+	resize(800, 240 * 4);
+	show();
+
+	if (!createContext()) {
+		Helpers::panic("Failed to create GL context for display");
+	}
+}
+
+bool ScreenWidgetGL::createContext() {
+	// List of GL context versions we will try. Anything 4.1+ is good for desktop OpenGL, and 3.1+ for OpenGL ES
+	static constexpr std::array<GL::Context::Version, 8> versionsToTry = {
+		GL::Context::Version{GL::Context::Profile::Core, 4, 6}, GL::Context::Version{GL::Context::Profile::Core, 4, 5},
+		GL::Context::Version{GL::Context::Profile::Core, 4, 4}, GL::Context::Version{GL::Context::Profile::Core, 4, 3},
+		GL::Context::Version{GL::Context::Profile::Core, 4, 2}, GL::Context::Version{GL::Context::Profile::Core, 4, 1},
+		GL::Context::Version{GL::Context::Profile::ES, 3, 2},   GL::Context::Version{GL::Context::Profile::ES, 3, 1},
+	};
+
+	std::optional<WindowInfo> windowInfo = getWindowInfo();
+	if (windowInfo.has_value()) {
+		this->windowInfo = *windowInfo;
+
+		glContext = GL::Context::Create(*getWindowInfo(), versionsToTry);
+		if (glContext == nullptr) {
+			return false;
+		}
+
+		glContext->DoneCurrent();
+	}
+
+	return glContext != nullptr;
+}
+
+void ScreenWidgetGL::resizeDisplay() {
+	// This will call take care of calling resizeSurface from the emulator thread, as the GL renderer must resize from the emu thread
+	resizeCallback(surfaceWidth, surfaceHeight);
+}
+
+// Note: This will run on the emulator thread, we don't want any Qt calls happening there.
+void ScreenWidgetGL::resizeSurface(u32 width, u32 height) {
+	if (previousWidth != width || previousHeight != height) {
+		if (glContext) {
+			glContext->ResizeSurface(width, height);
+		}
+	}
+}
+
+GL::Context* ScreenWidgetGL::getGLContext() { return glContext.get(); }
+#else
+ScreenWidgetGL::ScreenWidgetGL(API api, ResizeCallback resizeCallback, QWidget* parent) : ScreenWidget(api, resizeCallback, parent) {
+	Helpers::panic("OpenGL renderer not supported. Make sure you've compiled with OpenGL support and that you're on a compatible platform");
+}
+
+GL::Context* ScreenWidgetGL::getGLContext() { return nullptr; }
+bool ScreenWidgetGL::createContext() { return false; }
+void ScreenWidgetGL::resizeDisplay() {}
+void ScreenWidgetGL::resizeSurface(u32 width, u32 height) {}
+#endif
diff --git a/src/panda_qt/screen/screen_mtl.cpp b/src/panda_qt/screen/screen_mtl.cpp
new file mode 100644
index 00000000..472b166b
--- /dev/null
+++ b/src/panda_qt/screen/screen_mtl.cpp
@@ -0,0 +1,33 @@
+#include "panda_qt/screen/screen_mtl.hpp"
+
+#ifdef PANDA3DS_ENABLE_METAL
+ScreenWidgetMTL::ScreenWidgetMTL(API api, ResizeCallback resizeCallback, QWidget* parent) : ScreenWidget(api, resizeCallback, parent) {
+	if (!createContext()) {
+		Helpers::panic("Failed to create Metal context for display");
+	}
+
+	resize(800, 240 * 4);
+	show();
+}
+
+void ScreenWidgetMTL::resizeDisplay() {
+	resizeMetalView();
+	resizeCallback(surfaceWidth, surfaceHeight);
+}
+
+bool ScreenWidgetMTL::createContext() { return createMetalContext(); }
+void* ScreenWidgetMTL::getMTKLayer() { return mtkLayer; }
+
+#else
+ScreenWidgetMTL::ScreenWidgetMTL(API api, ResizeCallback resizeCallback, QWidget* parent) : ScreenWidget(api, resizeCallback, parent) {
+	Helpers::panic("Metal renderer not supported. Make sure you've compiled with Metal support and that you're on a compatible platform");
+}
+
+ScreenWidgetMTL::~ScreenWidgetMTL() {}
+bool ScreenWidgetMTL::createContext() { return false; }
+bool ScreenWidgetMTL::createMetalContext() { return false; }
+void* ScreenWidgetMTL::getMTKLayer() { return nullptr; }
+
+void ScreenWidgetMTL::resizeDisplay() {}
+void ScreenWidgetMTL::resizeMetalView() {}
+#endif
\ No newline at end of file
diff --git a/tests/DetectEmulator/source/main.c b/tests/DetectEmulator/source/main.c
index a66edd69..6e1e644d 100644
--- a/tests/DetectEmulator/source/main.c
+++ b/tests/DetectEmulator/source/main.c
@@ -6,16 +6,16 @@
 #define CLEAR_COLOR 0x68B0D8FF
 
 #define DISPLAY_TRANSFER_FLAGS \
-	(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
-	GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
-	GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
+    (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
+    GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
+    GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
 
 typedef struct { float x, y, z; } vertex;
 
 static const vertex vertex_list[] = {
-	{ 200.0f, 200.0f, 0.5f },
-	{ 100.0f, 40.0f, 0.5f },
-	{ 300.0f, 40.0f, 0.5f },
+    { 200.0f, 200.0f, 0.5f },
+    { 100.0f, 40.0f, 0.5f },
+    { 300.0f, 40.0f, 0.5f },
 };
 
 typedef enum {
@@ -69,22 +69,22 @@ static C3D_Mtx projection;
 static void* vbo_data;
 
 static void sceneInit(void) {
-	// Load the vertex shader, create a shader program and bind it
-	vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
-	shaderProgramInit(&program);
-	shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
-	C3D_BindProgram(&program);
+    // Load the vertex shader, create a shader program and bind it
+    vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
+    shaderProgramInit(&program);
+    shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
+    C3D_BindProgram(&program);
 
-	// Get the location of the uniforms
-	uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
+    // Get the location of the uniforms
+    uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
 
-	// Configure attributes for use with the vertex shader
-	C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
-	AttrInfo_Init(attrInfo);
-	AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position
-	AttrInfo_AddFixed(attrInfo, 1); // v1=color
+    // Configure attributes for use with the vertex shader
+    C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
+    AttrInfo_Init(attrInfo);
+    AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position
+    AttrInfo_AddFixed(attrInfo, 1); // v1=color
 
-	// Set the fixed attribute (color) to a colour depending on the emulator
+    // Set the fixed attribute (color) to a colour depending on the emulator
 
     Platform platform = getPlatform();
     switch (platform) {
@@ -104,78 +104,78 @@ static void sceneInit(void) {
             break;
     }
 
-	// Compute the projection matrix
-	Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true);
+    // Compute the projection matrix
+    Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true);
 
-	// Create the VBO (vertex buffer object)
-	vbo_data = linearAlloc(sizeof(vertex_list));
-	memcpy(vbo_data, vertex_list, sizeof(vertex_list));
+    // Create the VBO (vertex buffer object)
+    vbo_data = linearAlloc(sizeof(vertex_list));
+    memcpy(vbo_data, vertex_list, sizeof(vertex_list));
 
-	// Configure buffers
-	C3D_BufInfo* bufInfo = C3D_GetBufInfo();
-	BufInfo_Init(bufInfo);
-	BufInfo_Add(bufInfo, vbo_data, sizeof(vertex), 1, 0x0);
+    // Configure buffers
+    C3D_BufInfo* bufInfo = C3D_GetBufInfo();
+    BufInfo_Init(bufInfo);
+    BufInfo_Add(bufInfo, vbo_data, sizeof(vertex), 1, 0x0);
 
-	// Configure the first fragment shading substage to just pass through the vertex color
-	// See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
-	C3D_TexEnv* env = C3D_GetTexEnv(0);
-	C3D_TexEnvInit(env);
-	C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0);
-	C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);
+    // Configure the first fragment shading substage to just pass through the vertex color
+    // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
+    C3D_TexEnv* env = C3D_GetTexEnv(0);
+    C3D_TexEnvInit(env);
+    C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0);
+    C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);
 }
 
 static void sceneRender(void) {
-	// Update the uniforms
-	C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
+    // Update the uniforms
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
 
-	// Draw the VBO
-	C3D_DrawArrays(GPU_TRIANGLES, 0, vertex_list_count);
+    // Draw the VBO
+    C3D_DrawArrays(GPU_TRIANGLES, 0, vertex_list_count);
 }
 
 static void sceneExit(void) {
-	// Free the VBO
-	linearFree(vbo_data);
+    // Free the VBO
+    linearFree(vbo_data);
 
-	// Free the shader program
-	shaderProgramFree(&program);
-	DVLB_Free(vshader_dvlb);
+    // Free the shader program
+    shaderProgramFree(&program);
+    DVLB_Free(vshader_dvlb);
 }
 
 int main() {
     emuPrint("Entering main\n");
-	// Initialize graphics
-	gfxInitDefault();
-	C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
+    // Initialize graphics
+    gfxInitDefault();
+    C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
 
-	// Initialize the render target
-	C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
-	C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
+    // Initialize the render target
+    C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+    C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
 
-	// Initialize the scene
-	sceneInit();
+    // Initialize the scene
+    sceneInit();
 
-	// Main loop
-	while (true)
-	{
-		// Render the scene
+    // Main loop
+    while (true)
+    {
+        // Render the scene
         emuPrint("Entering C3D_FrameBegin");
-		C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
+        C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
             emuPrint("Clearing render target");
-			C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+            C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
             emuPrint("Calling C3D_FrameDrawOn");
-			C3D_FrameDrawOn(target);
+            C3D_FrameDrawOn(target);
             emuPrint("Calling sceneRender");
-			sceneRender();
+            sceneRender();
             emuPrint("Entering C3D_FrameEnd");
-		C3D_FrameEnd(0);
+        C3D_FrameEnd(0);
         emuPrint("Exited C3D_FrameEnd");
-	}
+    }
 
-	// Deinitialize the scene
-	sceneExit();
+    // Deinitialize the scene
+    sceneExit();
 
-	// Deinitialize graphics
-	C3D_Fini();
-	gfxExit();
-	return 0;
+    // Deinitialize graphics
+    C3D_Fini();
+    gfxExit();
+    return 0;
 }
diff --git a/tests/HelloWorldSVC/source/main.c b/tests/HelloWorldSVC/source/main.c
index 7c2a0972..073c14a1 100644
--- a/tests/HelloWorldSVC/source/main.c
+++ b/tests/HelloWorldSVC/source/main.c
@@ -1,5 +1,4 @@
 #include <3ds.h>
-#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -16,5 +15,5 @@ int main(int argc, char** argv) {
         __asm__ volatile ("" ::: "memory");
     }
 
-	return 0;
+    return 0;
 }
diff --git a/tests/ImmediateModeTriangles/source/main.c b/tests/ImmediateModeTriangles/source/main.c
index 4eda609f..57fd15be 100644
--- a/tests/ImmediateModeTriangles/source/main.c
+++ b/tests/ImmediateModeTriangles/source/main.c
@@ -6,9 +6,9 @@
 #define CLEAR_COLOR 0x68B0D8FF
 
 #define DISPLAY_TRANSFER_FLAGS \
-	(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
-	GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
-	GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
+    (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
+    GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
+    GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
 
 static DVLB_s* vshader_dvlb;
 static shaderProgram_s program;
@@ -17,105 +17,105 @@ static C3D_Mtx projection;
 
 static void sceneInit(void)
 {
-	// Load the vertex shader, create a shader program and bind it
-	vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
-	shaderProgramInit(&program);
-	shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
-	C3D_BindProgram(&program);
+    // Load the vertex shader, create a shader program and bind it
+    vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
+    shaderProgramInit(&program);
+    shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
+    C3D_BindProgram(&program);
 
-	// Get the location of the uniforms
-	uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
+    // Get the location of the uniforms
+    uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
 
-	// Configure attributes for use with the vertex shader
-	// Attribute format and element count are ignored in immediate mode
-	C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
-	AttrInfo_Init(attrInfo);
-	AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position
-	AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color
+    // Configure attributes for use with the vertex shader
+    // Attribute format and element count are ignored in immediate mode
+    C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
+    AttrInfo_Init(attrInfo);
+    AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position
+    AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color
 
-	// Compute the projection matrix
-	Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true);
+    // Compute the projection matrix
+    Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true);
 
-	// Configure the first fragment shading substage to just pass through the vertex color
-	// See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
-	C3D_TexEnv* env = C3D_GetTexEnv(0);
-	C3D_TexEnvInit(env);
-	C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0);
-	C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);
+    // Configure the first fragment shading substage to just pass through the vertex color
+    // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
+    C3D_TexEnv* env = C3D_GetTexEnv(0);
+    C3D_TexEnvInit(env);
+    C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0);
+    C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);
 }
 
 static void sceneRender(void)
 {
-	// Update the uniforms
-	C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
+    // Update the uniforms
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
 
-	// Draw the triangle directly
-	C3D_ImmDrawBegin(GPU_TRIANGLES);
+    // Draw the triangle directly
+    C3D_ImmDrawBegin(GPU_TRIANGLES);
         // Triangle 1
-		C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position
-		C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f);     // v1=color
+        C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position
+        C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f);     // v1=color
 
-		C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f);
-		C3D_ImmSendAttrib(0.0f, 1.0f, 0.0f, 1.0f);
+        C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f);
+        C3D_ImmSendAttrib(0.0f, 1.0f, 0.0f, 1.0f);
 
-		C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f);
-		C3D_ImmSendAttrib(0.0f, 0.0f, 1.0f, 1.0f);
+        C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f);
+        C3D_ImmSendAttrib(0.0f, 0.0f, 1.0f, 1.0f);
 
         // Triangle 2
         C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f);
-		C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f);
+        C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f);
 
         C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f);
-		C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f);
+        C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f);
         
         C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f);
-		C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f);
-	C3D_ImmDrawEnd();
+        C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f);
+    C3D_ImmDrawEnd();
 }
 
 static void sceneExit(void)
 {
-	// Free the shader program
-	shaderProgramFree(&program);
-	DVLB_Free(vshader_dvlb);
+    // Free the shader program
+    shaderProgramFree(&program);
+    DVLB_Free(vshader_dvlb);
 }
 
 int main()
 {
-	// Initialize graphics
-	gfxInitDefault();
-	C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
+    // Initialize graphics
+    gfxInitDefault();
+    C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
 
-	// Initialize the render target
-	C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
-	C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
+    // Initialize the render target
+    C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+    C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
 
-	// Initialize the scene
-	sceneInit();
+    // Initialize the scene
+    sceneInit();
 
-	// Main loop
-	while (aptMainLoop())
-	{
-		hidScanInput();
+    // Main loop
+    while (aptMainLoop())
+    {
+        hidScanInput();
 
-		// Respond to user input
-		u32 kDown = hidKeysDown();
-		if (kDown & KEY_START)
-			break; // break in order to return to hbmenu
+        // Respond to user input
+        u32 kDown = hidKeysDown();
+        if (kDown & KEY_START)
+            break; // break in order to return to hbmenu
 
-		// Render the scene
-		C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
-			C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
-			C3D_FrameDrawOn(target);
-			sceneRender();
-		C3D_FrameEnd(0);
-	}
+        // Render the scene
+        C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
+            C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+            C3D_FrameDrawOn(target);
+            sceneRender();
+        C3D_FrameEnd(0);
+    }
 
-	// Deinitialize the scene
-	sceneExit();
+    // Deinitialize the scene
+    sceneExit();
 
-	// Deinitialize graphics
-	C3D_Fini();
-	gfxExit();
-	return 0;
+    // Deinitialize graphics
+    C3D_Fini();
+    gfxExit();
+    return 0;
 }
diff --git a/tests/PICA_LITP/source/main.c b/tests/PICA_LITP/source/main.c
index 9bcab5b9..ea5b112f 100644
--- a/tests/PICA_LITP/source/main.c
+++ b/tests/PICA_LITP/source/main.c
@@ -4,12 +4,11 @@
 
 #include "vshader_shbin.h"
 
-
 #define CLEAR_COLOR 0x68B0D8FF
 
 #define DISPLAY_TRANSFER_FLAGS                                                                                                      \
-	(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | \
-	 GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
+    (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | \
+     GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
 
 static DVLB_s* vshader_dvlb;
 static shaderProgram_s program;
@@ -17,107 +16,107 @@ static int uLoc_projection;
 static C3D_Mtx projection;
 
 static void sceneInit(void) {
-	// Load the vertex shader, create a shader program and bind it
-	vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
-	shaderProgramInit(&program);
-	shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
-	C3D_BindProgram(&program);
+    // Load the vertex shader, create a shader program and bind it
+    vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
+    shaderProgramInit(&program);
+    shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
+    C3D_BindProgram(&program);
 
-	// Get the location of the uniforms
-	uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
+    // Get the location of the uniforms
+    uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
 
-	// Configure attributes for use with the vertex shader
-	// Attribute format and element count are ignored in immediate mode
-	C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
-	AttrInfo_Init(attrInfo);
-	AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3);  // v0=position
-	AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3);  // v1=color
+    // Configure attributes for use with the vertex shader
+    // Attribute format and element count are ignored in immediate mode
+    C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
+    AttrInfo_Init(attrInfo);
+    AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3);  // v0=position
+    AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3);  // v1=color
 
-	// Compute the projection matrix
-	Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true);
+    // Compute the projection matrix
+    Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true);
 
-	// Configure the first fragment shading substage to just pass through the vertex color
-	// See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
-	C3D_TexEnv* env = C3D_GetTexEnv(0);
-	C3D_TexEnvInit(env);
-	C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0);
-	C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);
+    // Configure the first fragment shading substage to just pass through the vertex color
+    // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
+    C3D_TexEnv* env = C3D_GetTexEnv(0);
+    C3D_TexEnvInit(env);
+    C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0);
+    C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);
 }
 
 static void sceneRender(void) {
-	// Update the uniforms
-	C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
+    // Update the uniforms
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
 
-	// Draw the triangle directly
-	C3D_ImmDrawBegin(GPU_TRIANGLES);
-	// Triangle 1
-	// This vertex has r >= 0 and a >= 0 so the shader should output magenta (cmp.x = cmp.y = 1)
-	C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f);  // v0=position
-	C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f);      // v1=color
+    // Draw the triangle directly
+    C3D_ImmDrawBegin(GPU_TRIANGLES);
+    // Triangle 1
+    // This vertex has r >= 0 and a >= 0 so the shader should output magenta (cmp.x = cmp.y = 1)
+    C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f);  // v0=position
+    C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f);      // v1=color
 
-	// This vertex only has a >= 0, so the shader should output lime (cmp.x = 0, cmp.y = 1)
-	C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f);
-	C3D_ImmSendAttrib(-0.5f, 1.0f, 0.0f, 1.0f);
+    // This vertex only has a >= 0, so the shader should output lime (cmp.x = 0, cmp.y = 1)
+    C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f);
+    C3D_ImmSendAttrib(-0.5f, 1.0f, 0.0f, 1.0f);
 
-	// This vertex only has r >= 0, so the shader should output cyan (cmp.x = 1, cmp.y = 0)
-	C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f);
-	C3D_ImmSendAttrib(0.5f, 0.0f, 1.0f, -1.0f);
+    // This vertex only has r >= 0, so the shader should output cyan (cmp.x = 1, cmp.y = 0)
+    C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f);
+    C3D_ImmSendAttrib(0.5f, 0.0f, 1.0f, -1.0f);
 
-	// Triangle 2
-	// The next 3 vertices have r < 0, a < 0, so the output of the shader should be the output of litp  with alpha set to 1 (cmp.x = cmp.y = 0)
-	C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f);
-	// Output g component should be 64 / 128  = 0.5
-	C3D_ImmSendAttrib(-1.0f, 64.0f, 0.0f, -1.0f);
+    // Triangle 2
+    // The next 3 vertices have r < 0, a < 0, so the output of the shader should be the output of litp  with alpha set to 1 (cmp.x = cmp.y = 0)
+    C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f);
+    // Output g component should be 64 / 128  = 0.5
+    C3D_ImmSendAttrib(-1.0f, 64.0f, 0.0f, -1.0f);
 
-	C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f);
-	// Output g component should be 128 / 128 = 1.0
-	C3D_ImmSendAttrib(-1.0f, 256.0f, 1.0f, -1.0f);
+    C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f);
+    // Output g component should be 128 / 128 = 1.0
+    C3D_ImmSendAttrib(-1.0f, 256.0f, 1.0f, -1.0f);
 
-	C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f);
-	// Output g component should be 0 / 128 = 0
-	C3D_ImmSendAttrib(-1.0f, 0.0f, 0.5f, -1.0f);
-	C3D_ImmDrawEnd();
+    C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f);
+    // Output g component should be 0 / 128 = 0
+    C3D_ImmSendAttrib(-1.0f, 0.0f, 0.5f, -1.0f);
+    C3D_ImmDrawEnd();
 }
 
 static void sceneExit(void) {
-	// Free the shader program
-	shaderProgramFree(&program);
-	DVLB_Free(vshader_dvlb);
+    // Free the shader program
+    shaderProgramFree(&program);
+    DVLB_Free(vshader_dvlb);
 }
 
 int main() {
-	// Initialize graphics
-	gfxInitDefault();
-	C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
+    // Initialize graphics
+    gfxInitDefault();
+    C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
 
-	// Initialize the render target
-	C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
-	C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
+    // Initialize the render target
+    C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+    C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
 
-	// Initialize the scene
-	sceneInit();
+    // Initialize the scene
+    sceneInit();
 
-	// Main loop
-	while (aptMainLoop()) {
-		hidScanInput();
+    // Main loop
+    while (aptMainLoop()) {
+        hidScanInput();
 
-		// Respond to user input
-		u32 kDown = hidKeysDown();
-		if (kDown & KEY_START) break;  // break in order to return to hbmenu
+        // Respond to user input
+        u32 kDown = hidKeysDown();
+        if (kDown & KEY_START) break;  // break in order to return to hbmenu
 
-		// Render the scene
-		C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
-		C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
-		C3D_FrameDrawOn(target);
-		sceneRender();
-		C3D_FrameEnd(0);
-	}
+        // Render the scene
+        C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
+        C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+        C3D_FrameDrawOn(target);
+        sceneRender();
+        C3D_FrameEnd(0);
+    }
 
-	// Deinitialize the scene
-	sceneExit();
+    // Deinitialize the scene
+    sceneExit();
 
-	// Deinitialize graphics
-	C3D_Fini();
-	gfxExit();
-	return 0;
+    // Deinitialize graphics
+    C3D_Fini();
+    gfxExit();
+    return 0;
 }
\ No newline at end of file
diff --git a/tests/SimplerTri/source/main.c b/tests/SimplerTri/source/main.c
index 0ebd936d..e8fdc4fb 100644
--- a/tests/SimplerTri/source/main.c
+++ b/tests/SimplerTri/source/main.c
@@ -6,17 +6,17 @@
 #define CLEAR_COLOR 0x68B0D8FF
 
 #define DISPLAY_TRANSFER_FLAGS \
-	(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
-	GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
-	GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
+    (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
+    GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
+    GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
 
 typedef struct { float x, y, z; } vertex;
 
 static const vertex vertex_list[] =
 {
-	{ 200.0f, 200.0f, 0.5f },
-	{ 100.0f, 40.0f, 0.5f },
-	{ 300.0f, 40.0f, 0.5f },
+    { 200.0f, 200.0f, 0.5f },
+    { 100.0f, 40.0f, 0.5f },
+    { 300.0f, 40.0f, 0.5f },
 };
 
 #define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0]))
@@ -30,61 +30,61 @@ static void* vbo_data;
 
 static void sceneInit(void)
 {
-	// Load the vertex shader, create a shader program and bind it
-	vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
-	shaderProgramInit(&program);
-	shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
-	C3D_BindProgram(&program);
+    // Load the vertex shader, create a shader program and bind it
+    vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
+    shaderProgramInit(&program);
+    shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
+    C3D_BindProgram(&program);
 
-	// Get the location of the uniforms
-	uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
+    // Get the location of the uniforms
+    uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
 
-	// Configure attributes for use with the vertex shader
-	C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
-	AttrInfo_Init(attrInfo);
-	AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position
-	AttrInfo_AddFixed(attrInfo, 1); // v1=color
+    // Configure attributes for use with the vertex shader
+    C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
+    AttrInfo_Init(attrInfo);
+    AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position
+    AttrInfo_AddFixed(attrInfo, 1); // v1=color
 
-	// Set the fixed attribute (color) to orange
-	C3D_FixedAttribSet(1, 1.0, 0.5, 0.2, 1.0);
+    // Set the fixed attribute (color) to orange
+    C3D_FixedAttribSet(1, 1.0, 0.5, 0.2, 1.0);
 
-	// Compute the projection matrix
-	Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true);
+    // Compute the projection matrix
+    Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true);
 
-	// Create the VBO (vertex buffer object)
-	vbo_data = linearAlloc(sizeof(vertex_list));
-	memcpy(vbo_data, vertex_list, sizeof(vertex_list));
+    // Create the VBO (vertex buffer object)
+    vbo_data = linearAlloc(sizeof(vertex_list));
+    memcpy(vbo_data, vertex_list, sizeof(vertex_list));
 
-	// Configure buffers
-	C3D_BufInfo* bufInfo = C3D_GetBufInfo();
-	BufInfo_Init(bufInfo);
-	BufInfo_Add(bufInfo, vbo_data, sizeof(vertex), 1, 0x0);
+    // Configure buffers
+    C3D_BufInfo* bufInfo = C3D_GetBufInfo();
+    BufInfo_Init(bufInfo);
+    BufInfo_Add(bufInfo, vbo_data, sizeof(vertex), 1, 0x0);
 
-	// Configure the first fragment shading substage to just pass through the vertex color
-	// See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
-	C3D_TexEnv* env = C3D_GetTexEnv(0);
-	C3D_TexEnvInit(env);
-	C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0);
-	C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);
+    // Configure the first fragment shading substage to just pass through the vertex color
+    // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
+    C3D_TexEnv* env = C3D_GetTexEnv(0);
+    C3D_TexEnvInit(env);
+    C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0);
+    C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);
 }
 
 static void sceneRender(void)
 {
-	// Update the uniforms
-	C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
+    // Update the uniforms
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
 
-	// Draw the VBO
-	C3D_DrawArrays(GPU_TRIANGLES, 0, vertex_list_count);
+    // Draw the VBO
+    C3D_DrawArrays(GPU_TRIANGLES, 0, vertex_list_count);
 }
 
 static void sceneExit(void)
 {
-	// Free the VBO
-	linearFree(vbo_data);
+    // Free the VBO
+    linearFree(vbo_data);
 
-	// Free the shader program
-	shaderProgramFree(&program);
-	DVLB_Free(vshader_dvlb);
+    // Free the shader program
+    shaderProgramFree(&program);
+    DVLB_Free(vshader_dvlb);
 }
 
 // Print string in emulator terminal
@@ -96,39 +96,39 @@ static void emuPrint(const char* str)
 int main()
 {
     emuPrint("Entering main\n");
-	// Initialize graphics
-	gfxInitDefault();
-	C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
+    // Initialize graphics
+    gfxInitDefault();
+    C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
 
-	// Initialize the render target
-	C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
-	C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
+    // Initialize the render target
+    C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+    C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
 
-	// Initialize the scene
-	sceneInit();
+    // Initialize the scene
+    sceneInit();
 
-	// Main loop
-	while (true)
-	{
-		// Render the scene
+    // Main loop
+    while (true)
+    {
+        // Render the scene
         emuPrint("Entering C3D_FrameBegin");
-		C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
+        C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
             emuPrint("Clearing render target");
-			C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+            C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
             emuPrint("Calling C3D_FrameDrawOn");
-			C3D_FrameDrawOn(target);
+            C3D_FrameDrawOn(target);
             emuPrint("Calling sceneRender");
-			sceneRender();
+            sceneRender();
             emuPrint("Entering C3D_FrameEnd");
-		C3D_FrameEnd(0);
+        C3D_FrameEnd(0);
         emuPrint("Exited C3D_FrameEnd");
-	}
+    }
 
-	// Deinitialize the scene
-	sceneExit();
+    // Deinitialize the scene
+    sceneExit();
 
-	// Deinitialize graphics
-	C3D_Fini();
-	gfxExit();
-	return 0;
+    // Deinitialize graphics
+    C3D_Fini();
+    gfxExit();
+    return 0;
 }
diff --git a/tests/shader.cpp b/tests/shader.cpp
index f5e70d87..12929c55 100644
--- a/tests/shader.cpp
+++ b/tests/shader.cpp
@@ -1,3 +1,4 @@
+#include <fmt/format.h>
 #include <nihstro/inline_assembly.h>
 
 #include <PICA/dynapica/shader_rec.hpp>
@@ -106,7 +107,7 @@ namespace Catch {
 	template <>
 	struct StringMaker<std::array<Floats::f24, 4>> {
 		static std::string convert(std::array<Floats::f24, 4> value) {
-			return std::format("({}, {}, {}, {})", value[0].toFloat32(), value[1].toFloat32(), value[2].toFloat32(), value[3].toFloat32());
+			return fmt::format("({}, {}, {}, {})", value[0].toFloat32(), value[1].toFloat32(), value[2].toFloat32(), value[3].toFloat32());
 		}
 	};
 }  // namespace Catch
@@ -292,8 +293,8 @@ SHADER_TEST_CASE("FLR", "[shader][vertex]") {
 SHADER_TEST_CASE("Uniform Read", "[shader][vertex][uniform]") {
 	const auto constant0 = nihstro::SourceRegister::MakeFloat(0);
 	auto shader = TestType::assembleTest({
-		{nihstro::OpCode::Id::MOVA, nihstro::DestRegister{}, "x", input0, "x", nihstro::SourceRegister{}, "", nihstro::InlineAsm::RelativeAddress::A1
-		},
+		{nihstro::OpCode::Id::MOVA, nihstro::DestRegister{}, "x", input0, "x", nihstro::SourceRegister{}, "",
+		 nihstro::InlineAsm::RelativeAddress::A1},
 		{nihstro::OpCode::Id::MOV, output0, "xyzw", constant0, "xyzw", nihstro::SourceRegister{}, "", nihstro::InlineAsm::RelativeAddress::A1},
 		{nihstro::OpCode::Id::END},
 	});
@@ -322,8 +323,8 @@ SHADER_TEST_CASE("Address Register Offset", "[video_core][shader][shader_jit]")
 	const auto constant40 = nihstro::SourceRegister::MakeFloat(40);
 	auto shader = TestType::assembleTest({
 		// mova a0.x, sh_input.x
-		{nihstro::OpCode::Id::MOVA, nihstro::DestRegister{}, "x", input0, "x", nihstro::SourceRegister{}, "", nihstro::InlineAsm::RelativeAddress::A1
-		},
+		{nihstro::OpCode::Id::MOVA, nihstro::DestRegister{}, "x", input0, "x", nihstro::SourceRegister{}, "",
+		 nihstro::InlineAsm::RelativeAddress::A1},
 		// mov sh_output.xyzw, c40[a0.x].xyzw
 		{nihstro::OpCode::Id::MOV, output0, "xyzw", constant40, "xyzw", nihstro::SourceRegister{}, "", nihstro::InlineAsm::RelativeAddress::A1},
 		{nihstro::OpCode::Id::END},
diff --git a/third_party/boost b/third_party/boost
index 4532ae23..ecfc47f5 160000
--- a/third_party/boost
+++ b/third_party/boost
@@ -1 +1 @@
-Subproject commit 4532ae239c4d0b88a547d28e19348c3b05bfd4d6
+Subproject commit ecfc47f58e73fa353456068a7245dc933ebe4472
diff --git a/third_party/host_memory/LICENSE.txt b/third_party/host_memory/LICENSE.txt
new file mode 100644
index 00000000..f288702d
--- /dev/null
+++ b/third_party/host_memory/LICENSE.txt
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.
diff --git a/third_party/host_memory/host_memory.cpp b/third_party/host_memory/host_memory.cpp
new file mode 100644
index 00000000..bbd15ebe
--- /dev/null
+++ b/third_party/host_memory/host_memory.cpp
@@ -0,0 +1,753 @@
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+// Copyright 2008 Dolphin Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#if defined(_M_ARM64) || defined(__aarch64__)
+#define ARCHITECTURE_arm64
+#endif
+
+#ifdef _WIN32
+
+#include <windows.h>
+
+#include <boost/icl/separate_interval_set.hpp>
+#include <iterator>
+#include <unordered_map>
+
+#include "dynamic_library.hpp"
+
+#elif defined(__linux__) || defined(__FreeBSD__)  // ^^^ Windows ^^^ vvv Linux vvv
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <fcntl.h>
+#include <host_memory/scope_exit.h>
+#include <sys/mman.h>
+#include <sys/random.h>
+#include <unistd.h>
+
+#include <boost/icl/interval_set.hpp>
+
+#ifndef MAP_NORESERVE
+#define MAP_NORESERVE 0
+#endif
+
+// On Android, include ioctl for shared memory ioctls, dlfcn for loading libandroid and linux/ashmem for ashmem defines
+#ifdef __ANDROID__
+#include <dlfcn.h>
+#include <linux/ashmem.h>
+#include <sys/ioctl.h>
+#endif
+
+#endif  // ^^^ Linux ^^^
+
+#include <host_memory/free_region_manager.h>
+#include <host_memory/host_memory.h>
+
+#include <cstring>
+#include <memory>
+#include <mutex>
+#include <random>
+
+#include "align.hpp"
+
+#define ASSERT(...)
+#define UNIMPLEMENTED_MSG(...)
+#define ASSERT_MSG(...)
+
+namespace Common {
+	constexpr size_t PageAlignment = 0x1000;
+	constexpr size_t HugePageSize = 0x200000;
+
+#if defined(_WIN32) && defined(PANDA3DS_HARDWARE_FASTMEM)
+
+// Manually imported for MinGW compatibility
+#ifndef MEM_RESERVE_PLACEHOLDER
+#define MEM_RESERVE_PLACEHOLDER 0x00040000
+#endif
+#ifndef MEM_REPLACE_PLACEHOLDER
+#define MEM_REPLACE_PLACEHOLDER 0x00004000
+#endif
+#ifndef MEM_COALESCE_PLACEHOLDERS
+#define MEM_COALESCE_PLACEHOLDERS 0x00000001
+#endif
+#ifndef MEM_PRESERVE_PLACEHOLDER
+#define MEM_PRESERVE_PLACEHOLDER 0x00000002
+#endif
+
+	using PFN_CreateFileMapping2 = _Ret_maybenull_ HANDLE(WINAPI*)(
+		_In_ HANDLE File, _In_opt_ SECURITY_ATTRIBUTES* SecurityAttributes, _In_ ULONG DesiredAccess, _In_ ULONG PageProtection,
+		_In_ ULONG AllocationAttributes, _In_ ULONG64 MaximumSize, _In_opt_ PCWSTR Name,
+		_Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters, _In_ ULONG ParameterCount
+	);
+
+	using PFN_VirtualAlloc2 = _Ret_maybenull_ PVOID(WINAPI*)(
+		_In_opt_ HANDLE Process, _In_opt_ PVOID BaseAddress, _In_ SIZE_T Size, _In_ ULONG AllocationType, _In_ ULONG PageProtection,
+		_Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters, _In_ ULONG ParameterCount
+	);
+
+	using PFN_MapViewOfFile3 = _Ret_maybenull_ PVOID(WINAPI*)(
+		_In_ HANDLE FileMapping, _In_opt_ HANDLE Process, _In_opt_ PVOID BaseAddress, _In_ ULONG64 Offset, _In_ SIZE_T ViewSize,
+		_In_ ULONG AllocationType, _In_ ULONG PageProtection, _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters,
+		_In_ ULONG ParameterCount
+	);
+
+	using PFN_UnmapViewOfFile2 = BOOL(WINAPI*)(_In_ HANDLE Process, _In_ PVOID BaseAddress, _In_ ULONG UnmapFlags);
+
+	template <typename T>
+	static void GetFuncAddress(Common::DynamicLibrary& dll, const char* name, T& pfn) {
+		if (!dll.getSymbol(name, &pfn)) {
+			Helpers::warn("Failed to load %s", name);
+			throw std::bad_alloc{};
+		}
+	}
+
+	class HostMemory::Impl {
+	  public:
+		explicit Impl(size_t backing_size_, size_t virtual_size_)
+			: backing_size{backing_size_}, virtual_size{virtual_size_}, process{GetCurrentProcess()}, kernelbase_dll("Kernelbase") {
+			if (!kernelbase_dll.isOpen()) {
+				Helpers::warn("Failed to load Kernelbase.dll");
+				throw std::bad_alloc{};
+			}
+			GetFuncAddress(kernelbase_dll, "CreateFileMapping2", pfn_CreateFileMapping2);
+			GetFuncAddress(kernelbase_dll, "VirtualAlloc2", pfn_VirtualAlloc2);
+			GetFuncAddress(kernelbase_dll, "MapViewOfFile3", pfn_MapViewOfFile3);
+			GetFuncAddress(kernelbase_dll, "UnmapViewOfFile2", pfn_UnmapViewOfFile2);
+
+			// Allocate backing file map
+			backing_handle = pfn_CreateFileMapping2(
+				INVALID_HANDLE_VALUE, nullptr, FILE_MAP_WRITE | FILE_MAP_READ, PAGE_READWRITE, SEC_COMMIT, backing_size, nullptr, nullptr, 0
+			);
+			if (!backing_handle) {
+				Helpers::warn("Failed to allocate %X MiB of backing memory", backing_size >> 20);
+				throw std::bad_alloc{};
+			}
+			// Allocate a virtual memory for the backing file map as placeholder
+			backing_base =
+				static_cast<u8*>(pfn_VirtualAlloc2(process, nullptr, backing_size, MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS, nullptr, 0));
+			if (!backing_base) {
+				Release();
+				Helpers::warn("Failed to reserve %X MiB of virtual memory", backing_size >> 20);
+				throw std::bad_alloc{};
+			}
+			// Map backing placeholder
+			void* const ret =
+				pfn_MapViewOfFile3(backing_handle, process, backing_base, 0, backing_size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0);
+			if (ret != backing_base) {
+				Release();
+				Helpers::warn("Failed to map %X MiB of virtual memory", backing_size >> 20);
+				throw std::bad_alloc{};
+			}
+			// Allocate virtual address placeholder
+			virtual_base =
+				static_cast<u8*>(pfn_VirtualAlloc2(process, nullptr, virtual_size, MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS, nullptr, 0));
+			if (!virtual_base) {
+				Release();
+				Helpers::warn("Failed to reserve %X GiB of virtual memory", virtual_size >> 30);
+				throw std::bad_alloc{};
+			}
+		}
+
+		~Impl() { Release(); }
+
+		void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms) {
+			std::unique_lock lock{placeholder_mutex};
+			if (!IsNiechePlaceholder(virtual_offset, length)) {
+				Split(virtual_offset, length);
+			}
+			ASSERT(placeholders.find({virtual_offset, virtual_offset + length}) == placeholders.end());
+			TrackPlaceholder(virtual_offset, host_offset, length);
+
+			MapView(virtual_offset, host_offset, length);
+		}
+
+		void Unmap(size_t virtual_offset, size_t length) {
+			std::scoped_lock lock{placeholder_mutex};
+
+			// Unmap until there are no more placeholders
+			while (UnmapOnePlaceholder(virtual_offset, length)) {
+			}
+		}
+
+		void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {
+			DWORD new_flags{};
+			if (read && write) {
+				new_flags = PAGE_READWRITE;
+			} else if (read && !write) {
+				new_flags = PAGE_READONLY;
+			} else if (!read && !write) {
+				new_flags = PAGE_NOACCESS;
+			} else {
+				UNIMPLEMENTED_MSG("Protection flag combination read={} write={}", read, write);
+			}
+			const size_t virtual_end = virtual_offset + length;
+
+			std::scoped_lock lock{placeholder_mutex};
+			auto [it, end] = placeholders.equal_range({virtual_offset, virtual_end});
+			while (it != end) {
+				const size_t offset = std::max(it->lower(), virtual_offset);
+				const size_t protect_length = std::min(it->upper(), virtual_end) - offset;
+				DWORD old_flags{};
+				if (!VirtualProtect(virtual_base + offset, protect_length, new_flags, &old_flags)) {
+					Helpers::warn("Failed to change virtual memory protect rules");
+				}
+				++it;
+			}
+		}
+
+		bool ClearBackingRegion(size_t physical_offset, size_t length) {
+			// TODO: This does not seem to be possible on Windows.
+			return false;
+		}
+
+		void EnableDirectMappedAddress() {
+			// TODO
+			Helpers::panic("Unimplemented: EnableDirectMappedAddress on Windows");
+		}
+
+		const size_t backing_size;  ///< Size of the backing memory in bytes
+		const size_t virtual_size;  ///< Size of the virtual address placeholder in bytes
+
+		u8* backing_base{};
+		u8* virtual_base{};
+
+	  private:
+		/// Release all resources in the object
+		void Release() {
+			if (!placeholders.empty()) {
+				for (const auto& placeholder : placeholders) {
+					if (!pfn_UnmapViewOfFile2(process, virtual_base + placeholder.lower(), MEM_PRESERVE_PLACEHOLDER)) {
+						Helpers::warn("Failed to unmap virtual memory placeholder");
+					}
+				}
+				Coalesce(0, virtual_size);
+			}
+			if (virtual_base) {
+				if (!VirtualFree(virtual_base, 0, MEM_RELEASE)) {
+					Helpers::warn("Failed to free virtual memory");
+				}
+			}
+			if (backing_base) {
+				if (!pfn_UnmapViewOfFile2(process, backing_base, MEM_PRESERVE_PLACEHOLDER)) {
+					Helpers::warn("Failed to unmap backing memory placeholder");
+				}
+				if (!VirtualFreeEx(process, backing_base, 0, MEM_RELEASE)) {
+					Helpers::warn("Failed to free backing memory");
+				}
+			}
+			if (!CloseHandle(backing_handle)) {
+				Helpers::warn("Failed to free backing memory file handle");
+			}
+		}
+
+		/// Unmap one placeholder in the given range (partial unmaps are supported)
+		/// Return true when there are no more placeholders to unmap
+		bool UnmapOnePlaceholder(size_t virtual_offset, size_t length) {
+			const auto it = placeholders.find({virtual_offset, virtual_offset + length});
+			const auto begin = placeholders.begin();
+			const auto end = placeholders.end();
+			if (it == end) {
+				return false;
+			}
+			const size_t placeholder_begin = it->lower();
+			const size_t placeholder_end = it->upper();
+			const size_t unmap_begin = std::max(virtual_offset, placeholder_begin);
+			const size_t unmap_end = std::min(virtual_offset + length, placeholder_end);
+			ASSERT(unmap_begin >= placeholder_begin && unmap_begin < placeholder_end);
+			ASSERT(unmap_end <= placeholder_end && unmap_end > placeholder_begin);
+
+			const auto host_pointer_it = placeholder_host_pointers.find(placeholder_begin);
+			ASSERT(host_pointer_it != placeholder_host_pointers.end());
+			const size_t host_offset = host_pointer_it->second;
+
+			const bool split_left = unmap_begin > placeholder_begin;
+			const bool split_right = unmap_end < placeholder_end;
+
+			if (!pfn_UnmapViewOfFile2(process, virtual_base + placeholder_begin, MEM_PRESERVE_PLACEHOLDER)) {
+				Helpers::warn("Failed to unmap placeholder");
+			}
+			// If we have to remap memory regions due to partial unmaps, we are in a data race as
+			// Windows doesn't support remapping memory without unmapping first. Avoid adding any extra
+			// logic within the panic region described below.
+
+			// Panic region, we are in a data race right now
+			if (split_left || split_right) {
+				Split(unmap_begin, unmap_end - unmap_begin);
+			}
+			if (split_left) {
+				MapView(placeholder_begin, host_offset, unmap_begin - placeholder_begin);
+			}
+			if (split_right) {
+				MapView(unmap_end, host_offset + unmap_end - placeholder_begin, placeholder_end - unmap_end);
+			}
+			// End panic region
+
+			size_t coalesce_begin = unmap_begin;
+			if (!split_left) {
+				// Try to coalesce pages to the left
+				coalesce_begin = it == begin ? 0 : std::prev(it)->upper();
+				if (coalesce_begin != placeholder_begin) {
+					Coalesce(coalesce_begin, unmap_end - coalesce_begin);
+				}
+			}
+			if (!split_right) {
+				// Try to coalesce pages to the right
+				const auto next = std::next(it);
+				const size_t next_begin = next == end ? virtual_size : next->lower();
+				if (placeholder_end != next_begin) {
+					// We can coalesce to the right
+					Coalesce(coalesce_begin, next_begin - coalesce_begin);
+				}
+			}
+			// Remove and reinsert placeholder trackers
+			UntrackPlaceholder(it);
+			if (split_left) {
+				TrackPlaceholder(placeholder_begin, host_offset, unmap_begin - placeholder_begin);
+			}
+			if (split_right) {
+				TrackPlaceholder(unmap_end, host_offset + unmap_end - placeholder_begin, placeholder_end - unmap_end);
+			}
+			return true;
+		}
+
+		void MapView(size_t virtual_offset, size_t host_offset, size_t length) {
+			if (!pfn_MapViewOfFile3(
+					backing_handle, process, virtual_base + virtual_offset, host_offset, length, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0
+				)) {
+				Helpers::warn("Failed to map placeholder");
+			}
+		}
+
+		void Split(size_t virtual_offset, size_t length) {
+			if (!VirtualFreeEx(process, reinterpret_cast<LPVOID>(virtual_base + virtual_offset), length, MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER)) {
+				Helpers::warn("Failed to split placeholder");
+			}
+		}
+
+		void Coalesce(size_t virtual_offset, size_t length) {
+			if (!VirtualFreeEx(process, reinterpret_cast<LPVOID>(virtual_base + virtual_offset), length, MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS)) {
+				Helpers::warn("Failed to coalesce placeholders");
+			}
+		}
+
+		void TrackPlaceholder(size_t virtual_offset, size_t host_offset, size_t length) {
+			placeholders.insert({virtual_offset, virtual_offset + length});
+			placeholder_host_pointers.emplace(virtual_offset, host_offset);
+		}
+
+		void UntrackPlaceholder(boost::icl::separate_interval_set<size_t>::iterator it) {
+			placeholder_host_pointers.erase(it->lower());
+			placeholders.erase(it);
+		}
+
+		/// Return true when a given memory region is a "nieche" and the placeholders don't have to be
+		/// split.
+		bool IsNiechePlaceholder(size_t virtual_offset, size_t length) const {
+			const auto it = placeholders.upper_bound({virtual_offset, virtual_offset + length});
+			if (it != placeholders.end() && it->lower() == virtual_offset + length) {
+				return it == placeholders.begin() ? virtual_offset == 0 : std::prev(it)->upper() == virtual_offset;
+			}
+			return false;
+		}
+
+		HANDLE process{};         ///< Current process handle
+		HANDLE backing_handle{};  ///< File based backing memory
+
+		DynamicLibrary kernelbase_dll;
+		PFN_CreateFileMapping2 pfn_CreateFileMapping2{};
+		PFN_VirtualAlloc2 pfn_VirtualAlloc2{};
+		PFN_MapViewOfFile3 pfn_MapViewOfFile3{};
+		PFN_UnmapViewOfFile2 pfn_UnmapViewOfFile2{};
+
+		std::mutex placeholder_mutex;                                  ///< Mutex for placeholders
+		boost::icl::separate_interval_set<size_t> placeholders;        ///< Mapped placeholders
+		std::unordered_map<size_t, size_t> placeholder_host_pointers;  ///< Placeholder backing offset
+	};
+
+#elif (defined(__linux__) || defined(__FreeBSD__)) && defined(PANDA3DS_HARDWARE_FASTMEM)  // ^^^ Windows ^^^ vvv Linux vvv
+
+#ifdef __ANDROID__
+#define ASHMEM_DEVICE "/dev/ashmem"
+	// Android shared memory creation code from Dolphin
+	static int AshmemCreateFileMapping(const char* name, size_t size) {
+		// ASharedMemory path - works on API >= 26 and falls through on API < 26:
+
+		// We can't call ASharedMemory_create the normal way without increasing the
+		// minimum version requirement to API 26, so we use dlopen/dlsym instead
+		static void* libandroid = dlopen("libandroid.so", RTLD_LAZY | RTLD_LOCAL);
+		static auto sharedMemoryCreate = reinterpret_cast<int (*)(const char*, size_t)>(dlsym(libandroid, "ASharedMemory_create"));
+		if (sharedMemoryCreate) {
+			return sharedMemoryCreate(name, size);
+		}
+
+		// /dev/ashmem path - works on API < 29:
+
+		int fd, ret;
+		fd = open(ASHMEM_DEVICE, O_RDWR);
+		if (fd < 0) return fd;
+
+		// We don't really care if we can't set the name, it is optional
+		ioctl(fd, ASHMEM_SET_NAME, name);
+
+		ret = ioctl(fd, ASHMEM_SET_SIZE, size);
+		if (ret < 0) {
+			close(fd);
+			Helpers::warn("Ashmem allocation failed");
+			return ret;
+		}
+		return fd;
+	}
+#endif
+
+#ifdef ARCHITECTURE_arm64
+	static void* ChooseVirtualBase(size_t virtual_size) {
+		constexpr uintptr_t Map39BitSize = (1ULL << 39);
+		constexpr uintptr_t Map36BitSize = (1ULL << 36);
+
+		// This is not a cryptographic application, we just want something random.
+		std::mt19937_64 rng;
+
+		// We want to ensure we are allocating at an address aligned to the L2 block size.
+		// For Qualcomm devices, we must also allocate memory above 36 bits.
+		const size_t lower = Map36BitSize / HugePageSize;
+		const size_t upper = (Map39BitSize - virtual_size) / HugePageSize;
+		const size_t range = upper - lower;
+
+		// Try up to 64 times to allocate memory at random addresses in the range.
+		for (int i = 0; i < 64; i++) {
+			// Calculate a possible location.
+			uintptr_t hint_address = ((rng() % range) + lower) * HugePageSize;
+
+			// Try to map.
+			// Note: we may be able to take advantage of MAP_FIXED_NOREPLACE here.
+			void* map_pointer =
+				mmap(reinterpret_cast<void*>(hint_address), virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
+
+			// If we successfully mapped, we're done.
+			if (reinterpret_cast<uintptr_t>(map_pointer) == hint_address) {
+				return map_pointer;
+			}
+
+			// Unmap if necessary, and try again.
+			if (map_pointer != MAP_FAILED) {
+				munmap(map_pointer, virtual_size);
+			}
+		}
+
+		return MAP_FAILED;
+	}
+
+#else
+
+	static void* ChooseVirtualBase(size_t virtual_size) {
+#if defined(__FreeBSD__)
+		void* virtual_base =
+			mmap(nullptr, virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_ALIGNED_SUPER, -1, 0);
+
+		if (virtual_base != MAP_FAILED) {
+			return virtual_base;
+		}
+#endif
+
+		return mmap(nullptr, virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
+	}
+
+#endif
+
+	class HostMemory::Impl {
+	  public:
+		explicit Impl(size_t backing_size_, size_t virtual_size_) : backing_size{backing_size_}, virtual_size{virtual_size_} {
+			bool good = false;
+			SCOPE_EXIT {
+				if (!good) {
+					Release();
+				}
+			};
+
+			long page_size = sysconf(_SC_PAGESIZE);
+			if (page_size != 0x1000) {
+				Helpers::warn("page size {:#x} is incompatible with 4K paging", page_size);
+				throw std::bad_alloc{};
+			}
+
+			// Backing memory initialization
+#if defined(__FreeBSD__) && __FreeBSD__ < 13
+			// XXX Drop after FreeBSD 12.* reaches EOL on 2024-06-30
+			fd = shm_open(SHM_ANON, O_RDWR, 0600);
+#elif defined(__ANDROID__)
+			fd = AshmemCreateFileMapping("HostMemory", 0);
+#else
+			fd = memfd_create("HostMemory", 0);
+#endif
+
+			if (fd < 0) {
+				Helpers::warn("memfd_create failed: {}", strerror(errno));
+				throw std::bad_alloc{};
+			}
+
+			// Defined to extend the file with zeros
+			int ret = ftruncate(fd, backing_size);
+			if (ret != 0) {
+				Helpers::warn("ftruncate failed with {}, are you out-of-memory?", strerror(errno));
+				throw std::bad_alloc{};
+			}
+
+			backing_base = static_cast<u8*>(mmap(nullptr, backing_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0));
+
+			if (backing_base == MAP_FAILED) {
+				Helpers::warn("mmap failed: {}", strerror(errno));
+				throw std::bad_alloc{};
+			}
+
+			// Virtual memory initialization
+			virtual_base = virtual_map_base = static_cast<u8*>(ChooseVirtualBase(virtual_size));
+			if (virtual_base == MAP_FAILED) {
+				Helpers::warn("mmap failed: {}", strerror(errno));
+				throw std::bad_alloc{};
+			}
+#if defined(__linux__)
+			madvise(virtual_base, virtual_size, MADV_HUGEPAGE);
+#endif
+
+			free_manager.SetAddressSpace(virtual_base, virtual_size);
+			good = true;
+		}
+
+		~Impl() { Release(); }
+
+		void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms) {
+			// Intersect the range with our address space.
+			AdjustMap(&virtual_offset, &length);
+
+			// We are removing a placeholder.
+			free_manager.AllocateBlock(virtual_base + virtual_offset, length);
+
+			// Deduce mapping protection flags.
+			int flags = PROT_NONE;
+			if (True(perms & MemoryPermission::Read)) {
+				flags |= PROT_READ;
+			}
+			if (True(perms & MemoryPermission::Write)) {
+				flags |= PROT_WRITE;
+			}
+#ifdef ARCHITECTURE_arm64
+			if (True(perms & MemoryPermission::Execute)) {
+				flags |= PROT_EXEC;
+			}
+#endif
+
+			void* ret = mmap(virtual_base + virtual_offset, length, flags, MAP_SHARED | MAP_FIXED, fd, host_offset);
+			ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
+		}
+
+		void Unmap(size_t virtual_offset, size_t length) {
+			// The method name is wrong. We're still talking about the virtual range.
+			// We don't want to unmap, we want to reserve this memory.
+
+			// Intersect the range with our address space.
+			AdjustMap(&virtual_offset, &length);
+
+			// Merge with any adjacent placeholder mappings.
+			auto [merged_pointer, merged_size] = free_manager.FreeBlock(virtual_base + virtual_offset, length);
+
+			void* ret = mmap(merged_pointer, merged_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+			ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
+		}
+
+		void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {
+			// Intersect the range with our address space.
+			AdjustMap(&virtual_offset, &length);
+
+			int flags = PROT_NONE;
+			if (read) {
+				flags |= PROT_READ;
+			}
+			if (write) {
+				flags |= PROT_WRITE;
+			}
+#ifdef HAS_NCE
+			if (execute) {
+				flags |= PROT_EXEC;
+			}
+#endif
+			int ret = mprotect(virtual_base + virtual_offset, length, flags);
+			ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
+		}
+
+		bool ClearBackingRegion(size_t physical_offset, size_t length) {
+#ifdef __linux__
+			// Set MADV_REMOVE on backing map to destroy it instantly.
+			// This also deletes the area from the backing file.
+			int ret = madvise(backing_base + physical_offset, length, MADV_REMOVE);
+			ASSERT_MSG(ret == 0, "madvise failed: {}", strerror(errno));
+
+			return true;
+#else
+			return false;
+#endif
+		}
+
+		void EnableDirectMappedAddress() { virtual_base = nullptr; }
+
+		const size_t backing_size;  ///< Size of the backing memory in bytes
+		const size_t virtual_size;  ///< Size of the virtual address placeholder in bytes
+
+		u8* backing_base{reinterpret_cast<u8*>(MAP_FAILED)};
+		u8* virtual_base{reinterpret_cast<u8*>(MAP_FAILED)};
+		u8* virtual_map_base{reinterpret_cast<u8*>(MAP_FAILED)};
+
+	  private:
+		/// Release all resources in the object
+		void Release() {
+			if (virtual_map_base != MAP_FAILED) {
+				int ret = munmap(virtual_map_base, virtual_size);
+				ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno));
+			}
+
+			if (backing_base != MAP_FAILED) {
+				int ret = munmap(backing_base, backing_size);
+				ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno));
+			}
+
+			if (fd != -1) {
+				int ret = close(fd);
+				ASSERT_MSG(ret == 0, "close failed: {}", strerror(errno));
+			}
+		}
+
+		void AdjustMap(size_t* virtual_offset, size_t* length) {
+			if (virtual_base != nullptr) {
+				return;
+			}
+
+			// If we are direct mapped, we want to make sure we are operating on a region
+			// that is in range of our virtual mapping.
+			size_t intended_start = *virtual_offset;
+			size_t intended_end = intended_start + *length;
+			size_t address_space_start = reinterpret_cast<size_t>(virtual_map_base);
+			size_t address_space_end = address_space_start + virtual_size;
+
+			if (address_space_start > intended_end || intended_start > address_space_end) {
+				*virtual_offset = 0;
+				*length = 0;
+			} else {
+				*virtual_offset = std::max(intended_start, address_space_start);
+				*length = std::min(intended_end, address_space_end) - *virtual_offset;
+			}
+		}
+
+		int fd{-1};  // memfd file descriptor, -1 is the error value of memfd_create
+		FreeRegionManager free_manager{};
+	};
+
+#else  // ^^^ Linux ^^^ vvv Generic vvv
+
+	class HostMemory::Impl {
+	  public:
+		explicit Impl(size_t /*backing_size */, size_t /* virtual_size */) {
+			// This is just a place holder.
+			// Please implement fastmem in a proper way on your platform.
+			throw std::bad_alloc{};
+		}
+
+		void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm) {}
+		void Unmap(size_t virtual_offset, size_t length) {}
+		void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {}
+		bool ClearBackingRegion(size_t physical_offset, size_t length) { return false; }
+		void EnableDirectMappedAddress() {}
+
+		u8* backing_base{nullptr};
+		u8* virtual_base{nullptr};
+	};
+
+#endif  // ^^^ Generic ^^^
+
+	HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_, bool enableFastmem) : backing_size(backing_size_), virtual_size(virtual_size_) {
+		try {
+			// Fastmem is disabled, just throw bad alloc and use the VirtualBuffer fallback.
+			if (!enableFastmem) {
+				throw std::bad_alloc{};
+			}
+
+			// Try to allocate a fastmem arena.
+			// The implementation will fail with std::bad_alloc on errors.
+			impl = std::make_unique<HostMemory::Impl>(
+				Common::alignUp(backing_size, PageAlignment), Common::alignUp(virtual_size, PageAlignment) + HugePageSize
+			);
+			backing_base = impl->backing_base;
+			virtual_base = impl->virtual_base;
+
+			if (virtual_base) {
+				// Ensure the virtual base is aligned to the L2 block size.
+				virtual_base = reinterpret_cast<u8*>(Common::alignUp(reinterpret_cast<uintptr_t>(virtual_base), HugePageSize));
+				virtual_base_offset = virtual_base - impl->virtual_base;
+			}
+
+		} catch (const std::bad_alloc&) {
+			if (enableFastmem) {
+				Helpers::warn("Fastmem unavailable, falling back to VirtualBuffer for memory allocation");
+			}
+
+			fallback_buffer = std::make_unique<Common::VirtualBuffer<u8>>(backing_size);
+			backing_base = fallback_buffer->data();
+			virtual_base = nullptr;
+		}
+	}
+
+	HostMemory::~HostMemory() = default;
+	HostMemory::HostMemory(HostMemory&&) noexcept = default;
+	HostMemory& HostMemory::operator=(HostMemory&&) noexcept = default;
+
+	void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms, bool separate_heap) {
+		ASSERT(virtual_offset % PageAlignment == 0);
+		ASSERT(host_offset % PageAlignment == 0);
+		ASSERT(length % PageAlignment == 0);
+		ASSERT(virtual_offset + length <= virtual_size);
+		ASSERT(host_offset + length <= backing_size);
+		if (length == 0 || !virtual_base || !impl) {
+			return;
+		}
+		impl->Map(virtual_offset + virtual_base_offset, host_offset, length, perms);
+	}
+
+	void HostMemory::Unmap(size_t virtual_offset, size_t length, bool separate_heap) {
+		ASSERT(virtual_offset % PageAlignment == 0);
+		ASSERT(length % PageAlignment == 0);
+		ASSERT(virtual_offset + length <= virtual_size);
+		if (length == 0 || !virtual_base || !impl) {
+			return;
+		}
+		impl->Unmap(virtual_offset + virtual_base_offset, length);
+	}
+
+	void HostMemory::Protect(size_t virtual_offset, size_t length, MemoryPermission perm) {
+		ASSERT(virtual_offset % PageAlignment == 0);
+		ASSERT(length % PageAlignment == 0);
+		ASSERT(virtual_offset + length <= virtual_size);
+		if (length == 0 || !virtual_base || !impl) {
+			return;
+		}
+		const bool read = True(perm & MemoryPermission::Read);
+		const bool write = True(perm & MemoryPermission::Write);
+		const bool execute = True(perm & MemoryPermission::Execute);
+		impl->Protect(virtual_offset + virtual_base_offset, length, read, write, execute);
+	}
+
+	void HostMemory::ClearBackingRegion(size_t physical_offset, size_t length, u32 fill_value) {
+		if (!impl || fill_value != 0 || !impl->ClearBackingRegion(physical_offset, length)) {
+			std::memset(backing_base + physical_offset, fill_value, length);
+		}
+	}
+
+	void HostMemory::EnableDirectMappedAddress() {
+		if (impl) {
+			impl->EnableDirectMappedAddress();
+			virtual_size += reinterpret_cast<uintptr_t>(virtual_base);
+		}
+	}
+
+}  // namespace Common
\ No newline at end of file
diff --git a/third_party/host_memory/include/host_memory/free_region_manager.h b/third_party/host_memory/include/host_memory/free_region_manager.h
new file mode 100644
index 00000000..2e590d60
--- /dev/null
+++ b/third_party/host_memory/include/host_memory/free_region_manager.h
@@ -0,0 +1,55 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <mutex>
+#include <boost/icl/interval_set.hpp>
+
+namespace Common {
+
+class FreeRegionManager {
+public:
+    explicit FreeRegionManager() = default;
+    ~FreeRegionManager() = default;
+
+    void SetAddressSpace(void* start, size_t size) {
+        this->FreeBlock(start, size);
+    }
+
+    std::pair<void*, size_t> FreeBlock(void* block_ptr, size_t size) {
+        std::scoped_lock lk(m_mutex);
+
+        // Check to see if we are adjacent to any regions.
+        auto start_address = reinterpret_cast<uintptr_t>(block_ptr);
+        auto end_address = start_address + size;
+        auto it = m_free_regions.find({start_address - 1, end_address + 1});
+
+        // If we are, join with them, ensuring we stay in bounds.
+        if (it != m_free_regions.end()) {
+            start_address = std::min(start_address, it->lower());
+            end_address = std::max(end_address, it->upper());
+        }
+
+        // Free the relevant region.
+        m_free_regions.insert({start_address, end_address});
+
+        // Return the adjusted pointers.
+        block_ptr = reinterpret_cast<void*>(start_address);
+        size = end_address - start_address;
+        return {block_ptr, size};
+    }
+
+    void AllocateBlock(void* block_ptr, size_t size) {
+        std::scoped_lock lk(m_mutex);
+
+        auto address = reinterpret_cast<uintptr_t>(block_ptr);
+        m_free_regions.subtract({address, address + size});
+    }
+
+private:
+    std::mutex m_mutex;
+    boost::icl::interval_set<uintptr_t> m_free_regions;
+};
+
+} // namespace Common
diff --git a/third_party/host_memory/include/host_memory/host_memory.h b/third_party/host_memory/include/host_memory/host_memory.h
new file mode 100644
index 00000000..79e664fa
--- /dev/null
+++ b/third_party/host_memory/include/host_memory/host_memory.h
@@ -0,0 +1,75 @@
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <memory>
+
+#include "enum_flag_ops.hpp"
+#include "helpers.hpp"
+#include "host_memory/virtual_buffer.h"
+
+namespace Common {
+
+	enum class MemoryPermission : u32 {
+		Read = 1 << 0,
+		Write = 1 << 1,
+		ReadWrite = Read | Write,
+		Execute = 1 << 2,
+	};
+	DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission)
+
+	/**
+	 * A low level linear memory buffer, which supports multiple mappings
+	 * Its purpose is to rebuild a given sparse memory layout, including mirrors.
+	 */
+	class HostMemory {
+	  public:
+		explicit HostMemory(size_t backing_size_, size_t virtual_size_, bool useFastmem);
+		~HostMemory();
+
+		/**
+		 * Copy constructors. They shall return a copy of the buffer without the mappings.
+		 * TODO: Implement them with COW if needed.
+		 */
+		HostMemory(const HostMemory& other) = delete;
+		HostMemory& operator=(const HostMemory& other) = delete;
+
+		/**
+		 * Move constructors. They will move the buffer and the mappings to the new object.
+		 */
+		HostMemory(HostMemory&& other) noexcept;
+		HostMemory& operator=(HostMemory&& other) noexcept;
+
+		void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms, bool separate_heap);
+		void Unmap(size_t virtual_offset, size_t length, bool separate_heap);
+		void Protect(size_t virtual_offset, size_t length, MemoryPermission perms);
+
+		void EnableDirectMappedAddress();
+
+		void ClearBackingRegion(size_t physical_offset, size_t length, u32 fill_value);
+
+		[[nodiscard]] u8* BackingBasePointer() noexcept { return backing_base; }
+		[[nodiscard]] const u8* BackingBasePointer() const noexcept { return backing_base; }
+
+		[[nodiscard]] u8* VirtualBasePointer() noexcept { return virtual_base; }
+		[[nodiscard]] const u8* VirtualBasePointer() const noexcept { return virtual_base; }
+
+		bool IsInVirtualRange(void* address) const noexcept { return address >= virtual_base && address < virtual_base + virtual_size; }
+
+	  private:
+		size_t backing_size{};
+		size_t virtual_size{};
+
+		// Low level handler for the platform dependent memory routines
+		class Impl;
+		std::unique_ptr<Impl> impl;
+		u8* backing_base{};
+		u8* virtual_base{};
+		size_t virtual_base_offset{};
+
+		// Fallback if fastmem is not supported on this platform
+		std::unique_ptr<Common::VirtualBuffer<u8>> fallback_buffer;
+	};
+
+}  // namespace Common
\ No newline at end of file
diff --git a/third_party/host_memory/include/host_memory/scope_exit.h b/third_party/host_memory/include/host_memory/scope_exit.h
new file mode 100644
index 00000000..771ad2ef
--- /dev/null
+++ b/third_party/host_memory/include/host_memory/scope_exit.h
@@ -0,0 +1,78 @@
+// SPDX-FileCopyrightText: 2014 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <utility>
+
+namespace detail {
+	template <class F>
+	class ScopeGuard {
+		ScopeGuard(const ScopeGuard&) = delete;
+		ScopeGuard& operator=(const ScopeGuard&) = delete;
+
+	  private:
+		F f;
+		bool active;
+
+	  public:
+		constexpr ScopeGuard(F f_) : f(std::move(f_)), active(true) {}
+		constexpr ~ScopeGuard() {
+			if (active) {
+				f();
+			}
+		}
+
+		constexpr void Cancel() { active = false; }
+		constexpr ScopeGuard(ScopeGuard&& rhs) : f(std::move(rhs.f)), active(rhs.active) { rhs.Cancel(); }
+
+		ScopeGuard& operator=(ScopeGuard&& rhs) = delete;
+	};
+
+	template <class F>
+	constexpr ScopeGuard<F> MakeScopeGuard(F f) {
+		return ScopeGuard<F>(std::move(f));
+	}
+
+	enum class ScopeGuardOnExit {};
+
+	template <typename F>
+	constexpr ScopeGuard<F> operator+(ScopeGuardOnExit, F&& f) {
+		return ScopeGuard<F>(std::forward<F>(f));
+	}
+
+}  // namespace detail
+
+#define CONCATENATE_IMPL(s1, s2) s1##s2
+#define CONCATENATE(s1, s2) CONCATENATE_IMPL(s1, s2)
+
+#ifdef __COUNTER__
+#define ANONYMOUS_VARIABLE(pref) CONCATENATE(pref, __COUNTER__)
+#else
+#define ANONYMOUS_VARIABLE(pref) CONCATENATE(pref, __LINE__)
+#endif
+
+/**
+ * This macro is similar to SCOPE_EXIT, except the object is caller managed. This is intended to be
+ * used when the caller might want to cancel the ScopeExit.
+ */
+#define SCOPE_GUARD detail::ScopeGuardOnExit() + [&]()
+
+/**
+ * This macro allows you to conveniently specify a block of code that will run on scope exit. Handy
+ * for doing ad-hoc clean-up tasks in a function with multiple returns.
+ *
+ * Example usage:
+ * \code
+ * const int saved_val = g_foo;
+ * g_foo = 55;
+ * SCOPE_EXIT{ g_foo = saved_val; };
+ *
+ * if (Bar()) {
+ *     return 0;
+ * } else {
+ *     return 20;
+ * }
+ * \endcode
+ */
+#define SCOPE_EXIT auto ANONYMOUS_VARIABLE(SCOPE_EXIT_STATE_) = SCOPE_GUARD
diff --git a/third_party/host_memory/include/host_memory/virtual_buffer.h b/third_party/host_memory/include/host_memory/virtual_buffer.h
new file mode 100644
index 00000000..0b5ac54d
--- /dev/null
+++ b/third_party/host_memory/include/host_memory/virtual_buffer.h
@@ -0,0 +1,68 @@
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "helpers.hpp"
+#include <utility>
+
+namespace Common {
+
+	void* AllocateMemoryPages(std::size_t size) noexcept;
+	void FreeMemoryPages(void* base, std::size_t size) noexcept;
+
+	template <typename T>
+	class VirtualBuffer final {
+	  public:
+		// TODO: Uncomment this and change Common::PageTable::PageInfo to be trivially constructible
+		// using std::atomic_ref once libc++ has support for it
+		// static_assert(
+		//     std::is_trivially_constructible_v<T>,
+		//     "T must be trivially constructible, as non-trivial constructors will not be executed "
+		//     "with the current allocator");
+
+		constexpr VirtualBuffer() = default;
+		explicit VirtualBuffer(std::size_t count) : alloc_size{count * sizeof(T)} {
+			base_ptr = reinterpret_cast<T*>(AllocateMemoryPages(alloc_size));
+		}
+
+		~VirtualBuffer() noexcept { FreeMemoryPages(base_ptr, alloc_size); }
+
+		VirtualBuffer(const VirtualBuffer&) = delete;
+		VirtualBuffer& operator=(const VirtualBuffer&) = delete;
+
+		VirtualBuffer(VirtualBuffer&& other) noexcept
+			: alloc_size{std::exchange(other.alloc_size, 0)}, base_ptr{std::exchange(other.base_ptr), nullptr} {}
+
+		VirtualBuffer& operator=(VirtualBuffer&& other) noexcept {
+			alloc_size = std::exchange(other.alloc_size, 0);
+			base_ptr = std::exchange(other.base_ptr, nullptr);
+			return *this;
+		}
+
+		void resize(std::size_t count) {
+			const auto new_size = count * sizeof(T);
+			if (new_size == alloc_size) {
+				return;
+			}
+
+			FreeMemoryPages(base_ptr, alloc_size);
+
+			alloc_size = new_size;
+			base_ptr = reinterpret_cast<T*>(AllocateMemoryPages(alloc_size));
+		}
+
+		[[nodiscard]] constexpr const T& operator[](std::size_t index) const { return base_ptr[index]; }
+		[[nodiscard]] constexpr T& operator[](std::size_t index) { return base_ptr[index]; }
+
+		[[nodiscard]] constexpr T* data() { return base_ptr; }
+		[[nodiscard]] constexpr const T* data() const { return base_ptr; }
+
+		[[nodiscard]] constexpr std::size_t size() const { return alloc_size / sizeof(T); }
+
+	  private:
+		std::size_t alloc_size{};
+		T* base_ptr{};
+	};
+
+}  // namespace Common
\ No newline at end of file
diff --git a/third_party/host_memory/virtual_buffer.cpp b/third_party/host_memory/virtual_buffer.cpp
new file mode 100644
index 00000000..b6575796
--- /dev/null
+++ b/third_party/host_memory/virtual_buffer.cpp
@@ -0,0 +1,46 @@
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/mman.h>
+#endif
+
+#include "host_memory/virtual_buffer.h"
+
+namespace Common {
+	void* AllocateMemoryPages(std::size_t size) noexcept {
+#ifdef _WIN32
+		void* base{VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_READWRITE)};
+#else
+		void* base{mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0)};
+
+		if (base == MAP_FAILED) {
+			base = nullptr;
+		}
+#endif
+
+		if (!base) {
+			Helpers::panic("Failed to allocate memory pages");
+		}
+
+		return base;
+	}
+
+	void FreeMemoryPages(void* base, [[maybe_unused]] std::size_t size) noexcept {
+		if (!base) {
+			return;
+		}
+#ifdef _WIN32
+		if (!VirtualFree(base, 0, MEM_RELEASE)) {
+			Helpers::panic("Failed to free memory pages");
+		}
+#else
+		if (munmap(base, size) != 0) {
+			Helpers::panic("Failed to free memory pages");
+		}
+#endif
+	}
+
+}  // namespace Common
\ No newline at end of file
diff --git a/third_party/metal-cpp b/third_party/metal-cpp
index a63bd172..5caea74c 160000
--- a/third_party/metal-cpp
+++ b/third_party/metal-cpp
@@ -1 +1 @@
-Subproject commit a63bd172ddcba73a3d87ca32032b66ad41ddb9a6
+Subproject commit 5caea74c5f77492add32b7cad109d796e342ab49