diff --git a/.github/workflows/Linux_Build.yml b/.github/workflows/Linux_Build.yml
index 71a318a8..d58c3c94 100644
--- a/.github/workflows/Linux_Build.yml
+++ b/.github/workflows/Linux_Build.yml
@@ -26,7 +26,7 @@ jobs:
     - name: Configure CMake
       # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
       # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
+      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_USER_BUILD=ON
 
     - name: Build
       # Build your program with the given configuration
diff --git a/.github/workflows/MacOS_Build.yml b/.github/workflows/MacOS_Build.yml
index 819d4647..5e0de4bc 100644
--- a/.github/workflows/MacOS_Build.yml
+++ b/.github/workflows/MacOS_Build.yml
@@ -23,13 +23,10 @@ jobs:
     - name: Fetch submodules
       run: git submodule update --init --recursive
 
-    - name: Install LLVM # MacOS comes with "AppleClang" instead of regular Clang, and it can't build the project because no proper C++20
-      run: brew install llvm
-
     - name: Configure CMake
       # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
       # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++
+      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON
 
     - name: Build
       # Build your program with the given configuration
diff --git a/.github/workflows/Windows_Build.yml b/.github/workflows/Windows_Build.yml
index 0a4abe41..2e8a8562 100644
--- a/.github/workflows/Windows_Build.yml
+++ b/.github/workflows/Windows_Build.yml
@@ -26,7 +26,7 @@ jobs:
     - name: Configure CMake
       # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
       # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
+      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON
 
     - name: Build
       # Build your program with the given configuration
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d40c3446..f5edc420 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10)
 set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED True)
 
-if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12)
+if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fbracket-depth=4096")
 endif()
 
@@ -13,8 +13,14 @@ endif()
 project(Alber)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
 
+if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security")
+endif() 
+
+option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" OFF)
 option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF)
 option(ENABLE_LTO "Enable link-time optimization" OFF)
+option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF)
 
 include_directories(${PROJECT_SOURCE_DIR}/include/)
 include_directories(${PROJECT_SOURCE_DIR}/include/kernel)
@@ -159,7 +165,7 @@ source_group("Source Files\\Third Party" FILES ${THIRD_PARTY_SOURCE_FILES})
 add_executable(Alber ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES}
 ${PICA_SOURCE_FILES} ${RENDERER_GL_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${HEADER_FILES})
 
-if(ENABLE_LTO)
+if(ENABLE_LTO OR ENABLE_USER_BUILD)
   set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
 endif()
 
@@ -167,4 +173,12 @@ target_link_libraries(Alber PRIVATE dynarmic SDL2-static glad cryptopp)
 
 if(GPU_DEBUG_INFO)
   target_compile_definitions(Alber PRIVATE GPU_DEBUG_INFO=1)
-endif()
\ No newline at end of file
+endif()
+
+if(ENABLE_USER_BUILD)
+    target_compile_definitions(Alber PRIVATE PANDA3DS_USER_BUILD=1)
+endif()
+
+if(ENABLE_USER_BUILD OR DISABLE_PANIC_DEV)
+    target_compile_definitions(Alber PRIVATE PANDA3DS_LIMITED_PANICS=1)
+endif()
diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp
index 2b000320..ced2c557 100644
--- a/include/PICA/gpu.hpp
+++ b/include/PICA/gpu.hpp
@@ -69,7 +69,18 @@ class GPU {
 
 	Renderer renderer;
 	PicaVertex getImmediateModeVertex();
-public:
+
+  public:
+	// 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT
+	// Encoded in PICA native format
+	static constexpr size_t LightingLutSize = PICA::Lights::LUT_Count * 256;
+	std::array<uint32_t, LightingLutSize> lightingLUT;
+
+	// Used to prevent uploading the lighting_lut on every draw call
+	// Set to true when the CPU writes to the lighting_lut
+	// Set to false by the renderer when the lighting_lut is uploaded ot the GPU
+	bool lightingLUTDirty = false;
+
 	GPU(Memory& mem);
 	void initGraphicsContext() { renderer.initGraphicsContext(); }
 	void getGraphicsContext() { renderer.getGraphicsContext(); }
diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp
index 6c868484..e1c9a819 100644
--- a/include/PICA/regs.hpp
+++ b/include/PICA/regs.hpp
@@ -10,6 +10,13 @@ namespace PICA {
 			ViewportHeight = 0x43,
 			ViewportInvh = 0x44,
 
+			// Clipping plane control
+			ClipEnable = 0x47,
+			ClipData0 = 0x48,
+			ClipData1 = 0x49,
+			ClipData2 = 0x4A,
+			ClipData3 = 0x4B,
+
 			DepthScale = 0x4D,
 			DepthOffset = 0x4E,
 			ShaderOutputCount = 0x4F,
@@ -55,6 +62,17 @@ namespace PICA {
 			ColourBufferLoc = 0x11D,
 			FramebufferSize = 0x11E,
 
+			//LightingRegs
+			LightingLUTIndex =  0x01C5,
+			LightingLUTData0 =  0x01C8,
+			LightingLUTData1 =  0x01C9,
+			LightingLUTData2 =  0x01CA,
+			LightingLUTData3 =  0x01CB,
+			LightingLUTData4 =  0x01CC,
+			LightingLUTData5 =  0x01CD,
+			LightingLUTData6 =  0x01CE,
+			LightingLUTData7 =  0x01CF,
+			
 			// Geometry pipeline registers
 			VertexAttribLoc = 0x200,
 			AttribFormatLow = 0x201,
@@ -156,6 +174,34 @@ namespace PICA {
 		};
 	}
 
+	namespace Lights {
+		enum : u32 {
+			LUT_D0 = 0,
+			LUT_D1,
+			LUT_FR,
+			LUT_RB,
+			LUT_RG,
+			LUT_RR,
+			LUT_SP0 = 0x8,
+			LUT_SP1,
+			LUT_SP2,
+			LUT_SP3,
+			LUT_SP4,
+			LUT_SP5,
+			LUT_SP6,
+			LUT_SP7,
+			LUT_DA0 = 0x10,
+			LUT_DA1,
+			LUT_DA2,
+			LUT_DA3,
+			LUT_DA4,
+			LUT_DA5,
+			LUT_DA6,
+			LUT_DA7,
+			LUT_Count
+		};
+	}
+
 	enum class TextureFmt : u32 {
 		RGBA8 = 0x0,
 		RGB8 = 0x1,
diff --git a/include/helpers.hpp b/include/helpers.hpp
index 53c57c7c..9830cc88 100644
--- a/include/helpers.hpp
+++ b/include/helpers.hpp
@@ -30,24 +30,31 @@ using s32 = std::int32_t;
 using s64 = std::int64_t;
 
 namespace Helpers {
-	[[noreturn]] static void panic(const char* fmt, ...) {
-		std::va_list args;
-		va_start(args, fmt);
+	// Unconditional panic, unlike panicDev which does not panic on user builds
+	template <class... Args>
+	[[noreturn]] static void panic(const char* fmt, Args&&... args) {
 		std::cout << termcolor::on_red << "[FATAL] ";
-		std::vprintf(fmt, args);
+		std::printf(fmt, args...);
 		std::cout << termcolor::reset << "\n";
-		va_end(args);
 
 		exit(1);
 	}
+	
+#ifdef PANDA3DS_LIMITED_PANICS
+	template <class... Args>
+	static void panicDev(const char* fmt, Args&&... args) {}
+#else
+	template <class... Args>
+	[[noreturn]] static void panicDev(const char* fmt, Args&&... args) {
+		panic(fmt, args...);
+	}
+#endif
 
-	static void warn(const char* fmt, ...) {
-		std::va_list args;
-		va_start(args, fmt);
+	template <class... Args>
+	static void warn(const char* fmt, Args&&... args) {
 		std::cout << termcolor::on_red << "[Warning] ";
-		std::vprintf(fmt, args);
+		std::printf(fmt, args...);
 		std::cout << termcolor::reset << "\n";
-		va_end(args);
 	}
 
 	static constexpr bool buildingInDebugMode() {
@@ -57,6 +64,13 @@ namespace Helpers {
 		return true;
 	}
 
+	static constexpr bool isUserBuild() {
+#ifdef PANDA3DS_USER_BUILD
+		return true;
+#endif
+		return false;
+	}
+
 	static void debug_printf(const char* fmt, ...) {
 		if constexpr (buildingInDebugMode()) {
 			std::va_list args;
diff --git a/include/opengl.hpp b/include/opengl.hpp
index 9d93078b..b259381b 100644
--- a/include/opengl.hpp
+++ b/include/opengl.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
- *   Copyright (C) 2022 PCSX-Redux authors                                 *
+ *   Copyright (C) 2022 PCSX-Redux & Panda3DS authors                      *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
@@ -524,6 +524,9 @@ namespace OpenGL {
     static void enableStencil() { glEnable(GL_STENCIL_TEST); }
     static void disableStencil() { glDisable(GL_STENCIL_TEST); }
 
+    static void enableClipPlane(GLuint index) { glEnable(GL_CLIP_DISTANCE0 + index); }
+	static void disableClipPlane(GLuint index) { glDisable(GL_CLIP_DISTANCE0 + index); }
+
     static void setDepthFunc(DepthFunc func) { glDepthFunc(static_cast<GLenum>(func)); }
 
     enum Primitives {
diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp
index 636c4c65..bf85904b 100644
--- a/include/renderer_gl/renderer_gl.hpp
+++ b/include/renderer_gl/renderer_gl.hpp
@@ -32,6 +32,7 @@ class Renderer {
 	GLint textureEnvScaleLoc = -1;
 	GLint textureEnvUpdateBufferLoc = -1;
 	GLint textureEnvBufferColorLoc = -1;
+	GLint picaRegLoc = -1;
 
 	// Depth configuration uniform locations
 	GLint depthOffsetLoc = -1;
@@ -66,6 +67,7 @@ class Renderer {
 	const std::array<u32, regNum>& regs;
 
 	OpenGL::Texture screenTexture;
+	GLuint lightLUTTextureArray;
 	OpenGL::Framebuffer screenFramebuffer;
 
 	OpenGL::Framebuffer getColourFBO();
@@ -76,6 +78,7 @@ class Renderer {
 	void bindDepthBuffer();
 	void setupTextureEnvState();
 	void bindTexturesToSlots();
+	void updateLightingLUT();
 
   public:
 	Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs) : gpu(gpu), regs(internalRegs) {}
diff --git a/include/services/hid.hpp b/include/services/hid.hpp
index beadf7f1..70bae750 100644
--- a/include/services/hid.hpp
+++ b/include/services/hid.hpp
@@ -63,6 +63,8 @@ class HIDService {
 	MAKE_LOG_FUNCTION(log, hidLogger)
 
 	// Service commands
+	void disableAccelerometer(u32 messagePointer);
+	void disableGyroscopeLow(u32 messagePointer);
 	void enableAccelerometer(u32 messagePointer);
 	void enableGyroscopeLow(u32 messagePointer);
 	void getGyroscopeLowCalibrateParam(u32 messagePointer);
diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp
index 99e5221f..2efc4195 100644
--- a/src/core/PICA/gpu.cpp
+++ b/src/core/PICA/gpu.cpp
@@ -21,6 +21,7 @@ void GPU::reset() {
 	shaderUnit.reset();
 	shaderJIT.reset();
 	std::memset(vram, 0, vramSize);
+	lightingLUT.fill(0);
 
 	totalAttribCount = 0;
 	fixedAttribMask = 0;
diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp
index 610cfe16..a0eb5adc 100644
--- a/src/core/PICA/regs.cpp
+++ b/src/core/PICA/regs.cpp
@@ -24,18 +24,36 @@ void GPU::writeReg(u32 address, u32 value) {
 }
 
 u32 GPU::readInternalReg(u32 index) {
-	if (index > regNum) {
+	using namespace PICA::InternalRegs;
+
+	if (index > regNum) [[unlikely]] {
 		Helpers::panic("Tried to read invalid GPU register. Index: %X\n", index);
 		return 0;
 	}
 
+	else if (index >= LightingLUTData0 && index <= LightingLUTData7) [[unlikely]] {
+		const uint32_t index = regs[LightingLUTIndex];  // Get full LUT index register
+		const uint32_t lutID = getBits<8, 5>(index);    // Get which LUT we're actually writing to
+		uint32_t lutIndex = getBits<0, 8>(index);       // And get the index inside the LUT we're writing to
+		uint32_t value = 0xffffffff;                    // Return value
+
+		if (lutID < PICA::Lights::LUT_Count) {
+			value = lightingLUT[lutID * 256 + lutIndex];
+		}
+
+		// Increment the bottom 8 bits of the lighting LUT index register
+		lutIndex += 1;
+		regs[LightingLUTIndex] = (index & ~0xff) | (lutIndex & 0xff);
+		return value;
+	}
+
 	return regs[index];
 }
 
 void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 	using namespace PICA::InternalRegs;
 
-	if (index > regNum) {
+	if (index > regNum) [[unlikely]] {
 		Helpers::panic("Tried to write to invalid GPU register. Index: %X, value: %08X\n", index, value);
 		return;
 	}
@@ -91,6 +109,30 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 			break;
 		}
 
+		case LightingLUTData0:
+		case LightingLUTData1:
+		case LightingLUTData2:
+		case LightingLUTData3:
+		case LightingLUTData4:
+		case LightingLUTData5:
+		case LightingLUTData6:
+		case LightingLUTData7:{
+			const uint32_t index = regs[LightingLUTIndex];  // Get full LUT index register
+			const uint32_t lutID = getBits<8, 5>(index);    // Get which LUT we're actually writing to
+			uint32_t lutIndex = getBits<0, 8>(index);       // And get the index inside the LUT we're writing to
+
+			if (lutID < PICA::Lights::LUT_Count) {
+				lightingLUT[lutID * 256 + lutIndex] = newValue;
+				lightingLUTDirty = true;
+			}
+
+			// Increment the bottom 8 bits of the lighting LUT index register
+			lutIndex += 1;
+			regs[LightingLUTIndex] = (index & ~0xff) | (lutIndex & 0xff);
+
+			break;
+		}
+
 		case VertexFloatUniformIndex:
 			shaderUnit.vs.setFloatUniformIndex(value);
 			break;
diff --git a/src/core/kernel/directory_operations.cpp b/src/core/kernel/directory_operations.cpp
index 567d9cb8..fe4f58f4 100644
--- a/src/core/kernel/directory_operations.cpp
+++ b/src/core/kernel/directory_operations.cpp
@@ -33,7 +33,7 @@ void Kernel::readDirectory(u32 messagePointer, Handle directory) {
 	const u32 entryCount = mem.read32(messagePointer + 4);
 	const u32 outPointer = mem.read32(messagePointer + 12);
 	logFileIO("Directory::Read (handle = %X, entry count = %d, out pointer = %08X)\n", directory, entryCount, outPointer);
-	Helpers::panic("Unimplemented FsDir::Read");
+	Helpers::panicDev("Unimplemented FsDir::Read");
 
 	mem.write32(messagePointer + 4, Result::Success);
 	mem.write32(messagePointer + 8, 0);
diff --git a/src/core/kernel/idle_thread.cpp b/src/core/kernel/idle_thread.cpp
index 5389fecc..5abba373 100644
--- a/src/core/kernel/idle_thread.cpp
+++ b/src/core/kernel/idle_thread.cpp
@@ -59,12 +59,12 @@ void Kernel::setupIdleThread() {
 	t.fpscr = FPSCR::ThreadDefault;
 
 	// Our idle thread should have as low of a priority as possible, because, well, it's an idle thread.
-	// We handle this by giving it a priority of 0xff, which is lower than is actually allowed for user threads
-	// (High priority value = low priority)
-	t.priority = 0xff;
+	// We handle this by giving it a priority of 0x40, which is lower than is actually allowed for user threads
+	// (High priority value = low priority). This is the same priority used in the retail kernel.
+	t.priority = 0x40;
 	t.status = ThreadStatus::Ready;
 
 	// Add idle thread to the list of thread indices
 	threadIndices.push_back(idleThreadIndex);
 	sortThreads();
-}
\ No newline at end of file
+}
diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp
index 0f29ddb5..75e0196a 100644
--- a/src/core/loader/ncch.cpp
+++ b/src/core/loader/ncch.cpp
@@ -44,24 +44,21 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
 	exheaderInfo.offset = info.offset + 0x200;
 	exheaderInfo.size = exheaderSize;
 	exheaderInfo.hashRegionSize = 0;
+	exheaderInfo.encryptionInfo = std::nullopt;
 
 	exeFS.offset = info.offset + u64(*(u32*)&header[0x1A0]) * mediaUnit;
 	exeFS.size = u64(*(u32*)&header[0x1A4]) * mediaUnit;
 	exeFS.hashRegionSize = u64(*(u32*)&header[0x1A8]) * mediaUnit;
+	exeFS.encryptionInfo = std::nullopt;
 
 	romFS.offset = info.offset + u64(*(u32*)&header[0x1B0]) * mediaUnit;
 	romFS.size = u64(*(u32*)&header[0x1B4]) * mediaUnit;
 	romFS.hashRegionSize = u64(*(u32*)&header[0x1B8]) * mediaUnit;
+	romFS.encryptionInfo = std::nullopt;
 
+	// Shows whether we got the primary and secondary keys correctly
+	bool gotCryptoKeys = true;
 	if (encrypted) {
-		if (!aesEngine.haveKeys()) {
-			Helpers::panic(
-				"Loaded an encrypted ROM but AES keys don't seem to have been provided correctly! Navigate to the emulator's\n"
-				"app data folder and make sure you have a sysdata directory with a file called aes_keys.txt which contains your keys!"
-			);
-			return false;
-		}
-
 		Crypto::AESKey primaryKeyY;
 		Crypto::AESKey secondaryKeyY;
 		std::memcpy(primaryKeyY.data(), header, primaryKeyY.size());
@@ -69,44 +66,36 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
 		if (!seedCrypto) {
 			secondaryKeyY = primaryKeyY;
 		} else {
-			Helpers::panic("Seed crypto is not supported");
-			return false;
+			Helpers::warn("Seed crypto is not supported");
+			gotCryptoKeys = false;
 		}
 
 		auto primaryResult = getPrimaryKey(aesEngine, primaryKeyY);
-
-		if (!primaryResult.first) {
-			Helpers::panic("getPrimaryKey failed!");
-			return false;
-		}
-
-		Crypto::AESKey primaryKey = primaryResult.second;
-
 		auto secondaryResult = getSecondaryKey(aesEngine, secondaryKeyY);
 
-		if (!secondaryResult.first) {
-			Helpers::panic("getSecondaryKey failed!");
-			return false;
+		if (!primaryResult.first || !secondaryResult.first) {
+			gotCryptoKeys = false;
+		} else {
+			Crypto::AESKey primaryKey = primaryResult.second;
+			Crypto::AESKey secondaryKey = secondaryResult.second;
+
+			EncryptionInfo encryptionInfoTmp;
+			encryptionInfoTmp.normalKey = primaryKey;
+			encryptionInfoTmp.initialCounter.fill(0);
+
+			for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) {
+				encryptionInfoTmp.initialCounter[i] = header[0x108 + sizeof(std::uint64_t) - 1 - i];
+			}
+			encryptionInfoTmp.initialCounter[8] = 1;
+			exheaderInfo.encryptionInfo = encryptionInfoTmp;
+
+			encryptionInfoTmp.initialCounter[8] = 2;
+			exeFS.encryptionInfo = encryptionInfoTmp;
+
+			encryptionInfoTmp.normalKey = secondaryKey;
+			encryptionInfoTmp.initialCounter[8] = 3;
+			romFS.encryptionInfo = encryptionInfoTmp;
 		}
-
-		Crypto::AESKey secondaryKey = secondaryResult.second;
-
-		EncryptionInfo encryptionInfoTmp;
-		encryptionInfoTmp.normalKey = primaryKey;
-		encryptionInfoTmp.initialCounter.fill(0);
-
-		for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) {
-			encryptionInfoTmp.initialCounter[i] = header[0x108 + sizeof(std::uint64_t) - 1 - i];
-		}
-		encryptionInfoTmp.initialCounter[8] = 1;
-		exheaderInfo.encryptionInfo = encryptionInfoTmp;
-
-		encryptionInfoTmp.initialCounter[8] = 2;
-		exeFS.encryptionInfo = encryptionInfoTmp;
-
-		encryptionInfoTmp.normalKey = secondaryKey;
-		encryptionInfoTmp.initialCounter[8] = 3;
-		romFS.encryptionInfo = encryptionInfoTmp;
 	}
 
 	if (exheaderSize != 0) {
@@ -125,9 +114,28 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
 		if (u32(programID) == u32(jumpID) && encrypted) {
 			printf("NCSD is supposedly ecrypted but not actually encrypted\n");
 			encrypted = false;
+
+			// Cartridge is not actually encrypted, set all of our encryption info structures to nullopt
+			exheaderInfo.encryptionInfo = std::nullopt;
+			romFS.encryptionInfo = std::nullopt;
+			exeFS.encryptionInfo = std::nullopt;
 		}
+
 		// If it's truly encrypted, we need to read section again.
 		if (encrypted) {
+			if (!aesEngine.haveKeys()) {
+				Helpers::panic(
+					"Loaded an encrypted ROM but AES keys don't seem to have been provided correctly! Navigate to the emulator's\n"
+					"app data folder and make sure you have a sysdata directory with a file called aes_keys.txt which contains your keys!"
+				);
+				return false;
+			}
+
+			if (!gotCryptoKeys) {
+				Helpers::panic("ROM is encrypted but it seems we couldn't get either the primary or the secondary key");
+				return false;
+			}
+
 			auto [success, bytes] = readFromFile(file, exheaderInfo, &exheader[0], 0, exheaderSize);
 			if (!success || bytes != exheaderSize) {
 				printf("Failed to read Extended NCCH header\n");
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 37c13c7d..dfa155a2 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -3,6 +3,7 @@
 #include "resource_limits.hpp"
 #include <cassert>
 #include <chrono> // For time since epoch
+#include <ctime>
 
 using namespace KernelMemoryTypes;
 
@@ -424,9 +425,20 @@ void Memory::mirrorMapping(u32 destAddress, u32 sourceAddress, u32 size) {
 u64 Memory::timeSince3DSEpoch() {
 	using namespace std::chrono;
 
-	// ms since Jan 1 1970
-	milliseconds ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
-	// ms between Jan 1 1900 and Jan 1 1970 (2208988800 seconds elapsed between the two)
-	constexpr u64 offset = 2208988800ull * 1000;
-	return ms.count() + offset;
+	std::time_t rawTime = std::time(nullptr); // Get current UTC time
+	auto localTime = std::localtime(&rawTime); // Convert to local time
+
+	bool daylightSavings = localTime->tm_isdst > 0; // Get if time includes DST
+	localTime = std::gmtime(&rawTime);
+
+	// Use gmtime + mktime to calculate difference between local time and UTC
+	auto timezoneDifference = rawTime - std::mktime(localTime);
+	if (daylightSavings) {
+		timezoneDifference += 60ull * 60ull; // Add 1 hour (60 seconds * 60 minutes)
+	}
+
+	// seconds between Jan 1 1900 and Jan 1 1970
+	constexpr u64 offset = 2208988800ull;
+	milliseconds ms = duration_cast<milliseconds>(seconds(rawTime + timezoneDifference + offset));
+	return ms.count();
 }
\ No newline at end of file
diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp
index fe28c19a..589457f5 100644
--- a/src/core/renderer_gl/renderer_gl.cpp
+++ b/src/core/renderer_gl/renderer_gl.cpp
@@ -5,29 +5,42 @@
 
 using namespace Floats;
 using namespace Helpers;
-
-// This is all hacked up to display our first triangle
+using namespace PICA;
 
 const char* vertexShader = R"(
 	#version 410 core
 	
-	layout (location = 0) in vec4 a_coords;
-	layout (location = 1) in vec4 a_vertexColour;
-	layout (location = 2) in vec2 a_texcoord0;
-	layout (location = 3) in vec2 a_texcoord1;
-	layout (location = 4) in float a_texcoord0_w;
-	layout (location = 5) in vec2 a_texcoord2;
+	layout (location = 0) in vec4  a_coords;
+	layout (location = 1) in vec4  a_quaternion;
+	layout (location = 2) in vec4  a_vertexColour;
+	layout (location = 3) in vec2  a_texcoord0;
+	layout (location = 4) in vec2  a_texcoord1;
+	layout (location = 5) in float a_texcoord0_w;
+	layout (location = 6) in vec3  a_view;
+	layout (location = 7) in vec2  a_texcoord2;
 
+	out vec3 v_normal;
+	out vec3 v_tangent;
+	out vec3 v_bitangent;
 	out vec4 v_colour;
 	out vec3 v_texcoord0;
 	out vec2 v_texcoord1;
+	out vec3 v_view;
 	out vec2 v_texcoord2;
 	flat out vec4 v_textureEnvColor[6];
 	flat out vec4 v_textureEnvBufferColor;
 
+	out float gl_ClipDistance[2];
+
 	// TEV uniforms
 	uniform uint u_textureEnvColor[6];
 	uniform uint u_textureEnvBufferColor;
+	uniform uint u_picaRegs[0x200 - 0x47];
+
+	// Helper so that the implementation of u_pica_regs can be changed later
+	uint readPicaReg(uint reg_addr){
+		return u_picaRegs[reg_addr - 0x47];
+	}
 
 	vec4 abgr8888ToVec4(uint abgr) {
 		const float scale = 1.0 / 255.0;
@@ -40,6 +53,31 @@ const char* vertexShader = R"(
 		);
 	}
 
+	vec3 rotateVec3ByQuaternion(vec3 v, vec4 q){
+		vec3 u = q.xyz;
+		float s = q.w;
+		return 2.0 * dot(u, v) * u + (s * s - dot(u, u))* v  + 2.0 * s * cross(u, v);
+	}
+
+	// Convert an arbitrary-width floating point literal to an f32
+	float decodeFP(uint hex, uint E, uint M){
+		uint width = M + E + 1u;
+		uint bias = 128u - (1u << (E - 1u));
+		uint exponent = (hex >> M) & ((1u << E) - 1u);
+		uint mantissa = hex & ((1u << M) - 1u);
+		uint sign = (hex >> (E + M)) << 31u;
+
+		if ((hex & ((1u << (width - 1u)) - 1u)) != 0) {
+			if (exponent == (1u << E) - 1u) exponent = 255u;
+			else exponent += bias;
+			hex = sign | (mantissa << (23u - M)) | (exponent << 23u);
+		} else {
+			hex = sign;
+		}
+
+        return uintBitsToFloat(hex);
+	}
+
 	void main() {
 		gl_Position = a_coords;
 		v_colour = a_vertexColour;
@@ -48,21 +86,45 @@ const char* vertexShader = R"(
 		v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w);
 		v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
 		v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
+		v_view = a_view; 
+
+		v_normal    = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
+		v_tangent   = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
+		v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));
 
 		for (int i = 0; i < 6; i++) {
 			v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]);
 		}
 
 		v_textureEnvBufferColor = abgr8888ToVec4(u_textureEnvBufferColor);
+
+		// Parse clipping plane registers
+		// The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0 
+		// With n = (A, B, C) being the normal vector and D being the origin point distance
+		// Therefore, for the second clipping plane, we can just pass the dot product of the clip vector and the input coordinates to gl_ClipDistance[1]
+		vec4 clipData = vec4(
+			decodeFP(readPicaReg(0x48) & 0xffffffu, 7, 16),
+			decodeFP(readPicaReg(0x49) & 0xffffffu, 7, 16),
+			decodeFP(readPicaReg(0x4A) & 0xffffffu, 7, 16),
+			decodeFP(readPicaReg(0x4B) & 0xffffffu, 7, 16)
+		);
+
+		// There's also another, always-on clipping plane based on vertex z
+		gl_ClipDistance[0] = -a_coords.z;
+		gl_ClipDistance[1] = dot(clipData, a_coords);
 	}
 )";
 
 const char* fragmentShader = R"(
 	#version 410 core
 	
+	in vec3 v_tangent;
+	in vec3 v_normal;
+	in vec3 v_bitangent;
 	in vec4 v_colour;
 	in vec3 v_texcoord0;
 	in vec2 v_texcoord1;
+	in vec3 v_view;
 	in vec2 v_texcoord2;
 	flat in vec4 v_textureEnvColor[6];
 	flat in vec4 v_textureEnvBufferColor;
@@ -87,6 +149,14 @@ const char* fragmentShader = R"(
 	uniform sampler2D u_tex0;
 	uniform sampler2D u_tex1;
 	uniform sampler2D u_tex2;
+	uniform sampler1DArray u_tex_lighting_lut;
+
+	uniform uint u_picaRegs[0x200 - 0x47];
+
+	// Helper so that the implementation of u_pica_regs can be changed later
+	uint readPicaReg(uint reg_addr){
+		return u_picaRegs[reg_addr - 0x47];
+	}
 
 	vec4 tevSources[16];
 	vec4 tevNextPreviousBuffer;
@@ -190,9 +260,196 @@ const char* fragmentShader = R"(
 		return result;
 	}
 
+	#define D0_LUT 0u
+	#define D1_LUT 1u
+	#define SP_LUT 2u
+	#define FR_LUT 3u
+	#define RB_LUT 4u
+	#define RG_LUT 5u
+	#define RR_LUT 6u
+
+	float lutLookup(uint lut, uint light, float value){
+		if (lut >= FR_LUT && lut <= RR_LUT)
+			lut -= 1;
+		if (lut==SP_LUT)
+			lut = light + 8;
+		return texture(u_tex_lighting_lut, vec2(value, lut)).r; 
+	}
+
+	vec3 regToColor(uint reg) {
+		// Normalization scale to convert from [0...255] to [0.0...1.0]
+		const float scale = 1.0 / 255.0;
+
+		return scale * vec3(
+			float(bitfieldExtract(reg, 20, 8)),
+			float(bitfieldExtract(reg, 10, 8)),
+			float(bitfieldExtract(reg, 00, 8))
+		);
+	}
+
+	// Convert an arbitrary-width floating point literal to an f32
+	float decodeFP(uint hex, uint E, uint M){
+		uint width = M + E + 1u;
+		uint bias = 128u - (1u << (E - 1u));
+		uint exponent = (hex >> M) & ((1u << E) - 1u);
+		uint mantissa = hex & ((1u << M) - 1u);
+		uint sign = (hex >> (E + M)) << 31u;
+
+		if ((hex & ((1u << (width - 1u)) - 1u)) != 0) {
+			if (exponent == (1u << E) - 1u) exponent = 255u;
+			else exponent += bias;
+			hex = sign | (mantissa << (23u - M)) | (exponent << 23u);
+		} else {
+			hex = sign;
+		}
+
+        return uintBitsToFloat(hex);
+	}
+
+	// Implements the following algorthm: https://mathb.in/26766
 	void calcLighting(out vec4 primary_color, out vec4 secondary_color){
-		primary_color = vec4(vec3(0.5) ,1.0);
-		secondary_color = vec4(vec3(0.5) ,1.0);
+		// Quaternions describe a transformation from surface-local space to eye space.
+		// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
+		// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
+		vec3 normal    = normalize(v_normal   );
+		vec3 tangent   = normalize(v_tangent  );
+		vec3 bitangent = normalize(v_bitangent);
+		vec3 view = normalize(v_view);
+
+		uint GPUREG_LIGHTING_ENABLE  = readPicaReg(0x008F);
+		if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0){
+			primary_color = secondary_color = vec4(1.0);
+			return;
+		}
+
+		uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0);
+		uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2) & 0x7u) +1;
+		uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9);
+
+		primary_color   = vec4(vec3(0.0),1.0);
+		secondary_color = vec4(vec3(0.0),1.0);
+
+		primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT);
+
+		uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0);
+		uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1);
+		uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3);
+		uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4);
+		uint GPUREG_LIGHTING_LUTINPUT_SCALE =  readPicaReg(0x01D2);
+		float d[7];
+
+		bool error_unimpl = false;
+
+		for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++){
+			uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION,int(i*3),3);
+		
+			uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id);
+			uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141 + 0x10 * light_id);
+			uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142 + 0x10 * light_id);
+			uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143 + 0x10 * light_id);
+			uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144 + 0x10 * light_id);
+			uint GPUREG_LIGHTi_VECTOR_HIGH= readPicaReg(0x0145 + 0x10 * light_id);
+			uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149 + 0x10 * light_id);
+
+			vec3 light_vector = normalize(vec3(
+				decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5, 10),
+				decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5, 10),
+				decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5, 10)
+			));
+
+			// Positional Light
+			if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0)
+				error_unimpl = true;
+
+			vec3 half_vector = normalize(normalize(light_vector) + view);
+
+			for(int c = 0; c < 7; c++){
+				if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){
+					uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
+					float scale = float(1u << scale_id);
+					if (scale_id >= 6u)
+						scale/=256.0;
+
+					uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
+					if (input_id == 0u) d[c] = dot(normal,half_vector);
+					else if (input_id == 1u) d[c] = dot(view,half_vector);
+					else if (input_id == 2u) d[c] = dot(normal,view);
+					else if (input_id == 3u) d[c] = dot(light_vector,normal);
+					else if (input_id == 4u){
+						uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146 + 0x10 * light_id);
+						uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147 + 0x10 * light_id);
+						vec3 spot_light_vector = normalize(vec3(
+							decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1, 11),
+							decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1, 11),
+							decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1, 11)
+						));
+						d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
+					} else if (input_id == 5u) {
+						d[c] = 1.0; // TODO: cos <greek symbol> (aka CP);
+						error_unimpl = true;
+					} else {
+						d[c] = 1.0;
+					}
+
+					d[c] = lutLookup(c, light_id, d[c] * 0.5 + 0.5) * scale;
+					if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) 
+						d[c] = abs(d[c]);
+				} else {
+					d[c] = 1.0;
+				}
+			}
+			
+			uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4);
+			if (lookup_config == 0) {
+				d[D1_LUT] = 0.0;
+				d[FR_LUT] = 0.0;
+				d[RG_LUT]= d[RB_LUT] = d[RR_LUT];
+			} else if(lookup_config == 1) {
+				d[D0_LUT] = 0.0;
+				d[D1_LUT] = 0.0;
+				d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
+			} else if(lookup_config == 2) {
+				d[FR_LUT] = 0.0;
+				d[SP_LUT] = 0.0;
+				d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
+			} else if(lookup_config == 3) {
+				d[SP_LUT] = 0.0;
+				d[RG_LUT]= d[RB_LUT] = d[RR_LUT] = 1.0;
+			} else if (lookup_config == 4) {
+				d[FR_LUT] = 0.0;
+			} else if (lookup_config == 5) {
+				d[D1_LUT] = 0.0;
+			} else if (lookup_config == 6) {
+				d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
+			}
+
+			float distance_factor = 1.0; // a
+			float indirect_factor = 1.0; // fi
+			float shadow_factor = 1.0;   // o
+
+			float NdotL = dot(normal, light_vector); //Li dot N
+
+			// Two sided diffuse
+			if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0) NdotL = max(0.0, NdotL);
+			else NdotL = abs(NdotL);
+
+			float light_factor =  distance_factor*d[SP_LUT]*indirect_factor*shadow_factor;
+
+			primary_color.rgb   += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE)*NdotL);
+			secondary_color.rgb += light_factor * (
+									 regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
+									 regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])
+									);
+		}	
+		uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
+		uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
+
+		if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
+		if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
+
+		if (error_unimpl) {
+			secondary_color = primary_color = vec4(1.0,0.,1.0,1.0);
+		}
 	}
 
 	void main() {
@@ -232,6 +489,8 @@ const char* fragmentShader = R"(
 		if (tevUnimplementedSourceFlag) {
 			 // fragColour = vec4(1.0, 0.0, 1.0, 1.0);
 		}
+		// fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr;
+
 
 		// Get original depth value by converting from [near, far] = [0, 1] to [-1, 1]
 		// We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1]
@@ -371,11 +630,13 @@ void Renderer::initGraphicsContext() {
 	depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale");
 	depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset");
 	depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable");
+	picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs");
 
-	// Init sampler objects
+	// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
 	glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0);
 	glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1);
 	glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2);
+	glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3);
 
 	OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex);
 	OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment);
@@ -392,21 +653,27 @@ void Renderer::initGraphicsContext() {
 	// Position (x, y, z, w) attributes
 	vao.setAttributeFloat<float>(0, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.positions));
 	vao.enableAttribute(0);
-	// Colour attribute
-	vao.setAttributeFloat<float>(1, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.colour));
+	// Quaternion attribute
+	vao.setAttributeFloat<float>(1, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.quaternion));
 	vao.enableAttribute(1);
-	// UV 0 attribute
-	vao.setAttributeFloat<float>(2, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0));
+	// Colour attribute
+	vao.setAttributeFloat<float>(2, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.colour));
 	vao.enableAttribute(2);
-	// UV 1 attribute
-	vao.setAttributeFloat<float>(3, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord1));
+	// UV 0 attribute
+	vao.setAttributeFloat<float>(3, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0));
 	vao.enableAttribute(3);
-	// UV 0 W-component attribute
-	vao.setAttributeFloat<float>(4, 1, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0_w));
+	// UV 1 attribute
+	vao.setAttributeFloat<float>(4, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord1));
 	vao.enableAttribute(4);
-	// UV 2 attribute
-	vao.setAttributeFloat<float>(5, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord2));
+	// UV 0 W-component attribute
+	vao.setAttributeFloat<float>(5, 1, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0_w));
 	vao.enableAttribute(5);
+	// View
+	vao.setAttributeFloat<float>(6, 3, sizeof(PicaVertex), offsetof(PicaVertex, s.view));
+	vao.enableAttribute(6);
+	// UV 2 attribute
+	vao.setAttributeFloat<float>(7, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord2));
+	vao.enableAttribute(7);
 
 	dummyVBO.create();
 	dummyVAO.create();
@@ -414,6 +681,8 @@ void Renderer::initGraphicsContext() {
 	// Create texture and framebuffer for the 3DS screen
 	const u32 screenTextureWidth = 2 * 400; // Top screen is 400 pixels wide, bottom is 320
 	const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall
+	
+	glGenTextures(1,&lightLUTTextureArray);
 
 	auto prevTexture = OpenGL::getTex2D();
 	screenTexture.create(screenTextureWidth, screenTextureHeight, GL_RGBA8);
@@ -543,6 +812,8 @@ void Renderer::bindTexturesToSlots() {
 		tex.bind();
 	}
 
+	glActiveTexture(GL_TEXTURE0 + 3);
+	glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
 	glActiveTexture(GL_TEXTURE0);
 
 	// Update the texture unit configuration uniform if it changed
@@ -552,6 +823,24 @@ void Renderer::bindTexturesToSlots() {
 		glUniform1ui(texUnitConfigLoc, texUnitConfig);
 	}
 }
+void Renderer::updateLightingLUT(){
+	std::array<u16, GPU::LightingLutSize> u16_lightinglut; 
+	
+	for(int i = 0; i < gpu.lightingLUT.size(); i++){
+		uint64_t value =  gpu.lightingLUT[i] & ((1 << 12) - 1);
+		u16_lightinglut[i] = value * 65535 / 4095; 
+	} 
+
+	glActiveTexture(GL_TEXTURE0 + 3);
+	glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
+	glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
+	glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+	glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+	glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+	glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+	glActiveTexture(GL_TEXTURE0);
+	gpu.lightingLUTDirty = false;
+}
 
 void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex> vertices) {
 	// The fourth type is meant to be "Geometry primitive". TODO: Find out what that is
@@ -576,6 +865,11 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
 		glUniform1ui(alphaControlLoc, alphaControl);
 	}
 
+	OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled
+	if (regs[PICA::InternalRegs::ClipEnable] & 1) {
+		OpenGL::enableClipPlane(1);
+	}
+
 	setupBlending();
 	OpenGL::Framebuffer poop = getColourFBO();
 	poop.bind(OpenGL::DrawAndReadFramebuffer);
@@ -614,6 +908,14 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
 	setupTextureEnvState();
 	bindTexturesToSlots();
 
+	// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x47)
+	// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
+	glUniform1uiv(picaRegLoc, 0x200 - 0x47, &regs[0x47]);
+
+	if (gpu.lightingLUTDirty) {
+		updateLightingLUT();
+	}
+
 	// TODO: Actually use this
 	float viewportWidth = f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0;
 	float viewportHeight = f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0;
@@ -644,7 +946,6 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
 constexpr u32 topScreenBuffer = 0x1f000000;
 constexpr u32 bottomScreenBuffer = 0x1f05dc00;
 
-// Quick hack to display top screen for now
 void Renderer::display() {
 	OpenGL::disableScissor();
 
@@ -700,7 +1001,9 @@ void Renderer::bindDepthBuffer() {
 		tex = depthBufferCache.add(sampleBuffer).texture.m_handle;
 	}
 
-	if (PICA::DepthFmt::Depth24Stencil8 != depthBufferFormat) Helpers::panic("TODO: Should we remove stencil attachment?");
+	if (PICA::DepthFmt::Depth24Stencil8 != depthBufferFormat) {
+		Helpers::panicDev("TODO: Should we remove stencil attachment?");
+	}
 	auto attachment = depthBufferFormat == PICA::DepthFmt::Depth24Stencil8 ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
 	glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, tex, 0);
 }
@@ -738,6 +1041,8 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32
 	OpenGL::disableBlend();
 	OpenGL::disableDepth();
 	OpenGL::disableScissor();
+	OpenGL::disableClipPlane(0);
+	OpenGL::disableClipPlane(1);
 	displayProgram.use();
 
 	// Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture
diff --git a/src/core/services/apt.cpp b/src/core/services/apt.cpp
index f2c7612d..becf637f 100644
--- a/src/core/services/apt.cpp
+++ b/src/core/services/apt.cpp
@@ -81,7 +81,10 @@ void APTService::handleSyncRequest(u32 messagePointer) {
 		case APTCommands::SetApplicationCpuTimeLimit: setApplicationCpuTimeLimit(messagePointer); break;
 		case APTCommands::SetScreencapPostPermission: setScreencapPostPermission(messagePointer); break;
 		case APTCommands::TheSmashBrosFunction: theSmashBrosFunction(messagePointer); break;
-		default: Helpers::panic("APT service requested. Command: %08X\n", command);
+		default:
+			Helpers::panicDev("APT service requested. Command: %08X\n", command);
+			mem.write32(messagePointer + 4, Result::Success);
+			break;
 	}
 }
 
diff --git a/src/core/services/cecd.cpp b/src/core/services/cecd.cpp
index f641e40d..dd9ccb2f 100644
--- a/src/core/services/cecd.cpp
+++ b/src/core/services/cecd.cpp
@@ -16,7 +16,10 @@ void CECDService::handleSyncRequest(u32 messagePointer) {
 	const u32 command = mem.read32(messagePointer);
 	switch (command) {
 		case CECDCommands::GetInfoEventHandle: getInfoEventHandle(messagePointer); break;
-		default: Helpers::panic("CECD service requested. Command: %08X\n", command);
+		default:
+			Helpers::panicDev("CECD service requested. Command: %08X\n", command);
+			mem.write32(messagePointer + 4, Result::Success);
+			break;
 	}
 }
 
diff --git a/src/core/services/hid.cpp b/src/core/services/hid.cpp
index 7993dc7a..27a078f2 100644
--- a/src/core/services/hid.cpp
+++ b/src/core/services/hid.cpp
@@ -7,7 +7,9 @@ namespace HIDCommands {
 	enum : u32 {
 		GetIPCHandles = 0x000A0000,
 		EnableAccelerometer = 0x00110000,
+		DisableAccelerometer = 0x00120000,
 		EnableGyroscopeLow = 0x00130000,
+		DisableGyroscopeLow = 0x00140000,
 		GetGyroscopeLowRawToDpsCoefficient = 0x00150000,
 		GetGyroscopeLowCalibrateParam = 0x00160000
 	};
@@ -36,6 +38,8 @@ void HIDService::reset() {
 void HIDService::handleSyncRequest(u32 messagePointer) {
 	const u32 command = mem.read32(messagePointer);
 	switch (command) {
+		case HIDCommands::DisableAccelerometer: disableAccelerometer(messagePointer); break;
+		case HIDCommands::DisableGyroscopeLow: disableGyroscopeLow(messagePointer); break;
 		case HIDCommands::EnableAccelerometer: enableAccelerometer(messagePointer); break;
 		case HIDCommands::EnableGyroscopeLow: enableGyroscopeLow(messagePointer); break;
 		case HIDCommands::GetGyroscopeLowCalibrateParam: getGyroscopeLowCalibrateParam(messagePointer); break;
@@ -53,6 +57,14 @@ void HIDService::enableAccelerometer(u32 messagePointer) {
 	mem.write32(messagePointer + 4, Result::Success);
 }
 
+void HIDService::disableAccelerometer(u32 messagePointer) {
+	log("HID::DisableAccelerometer\n");
+	accelerometerEnabled = false;
+
+	mem.write32(messagePointer, IPC::responseHeader(0x12, 1, 0));
+	mem.write32(messagePointer + 4, Result::Success);
+}
+
 void HIDService::enableGyroscopeLow(u32 messagePointer) {
 	log("HID::EnableGyroscopeLow\n");
 	gyroEnabled = true;
@@ -61,6 +73,14 @@ void HIDService::enableGyroscopeLow(u32 messagePointer) {
 	mem.write32(messagePointer + 4, Result::Success);
 }
 
+void HIDService::disableGyroscopeLow(u32 messagePointer) {
+	log("HID::DisableGyroscopeLow\n");
+	gyroEnabled = false;
+
+	mem.write32(messagePointer, IPC::responseHeader(0x14, 1, 0));
+	mem.write32(messagePointer + 4, Result::Success);
+}
+
 void HIDService::getGyroscopeLowCalibrateParam(u32 messagePointer) {
 	log("HID::GetGyroscopeLowCalibrateParam\n");
 	constexpr s16 unit = 6700; // Approximately from Citra which took it from hardware