diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp
index e15aced3..4af9a3e6 100644
--- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp
+++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp
@@ -36,10 +36,10 @@ public:
         additionalAllocations.clear();
     }
 
-    BufferHandle get(const std::span<const PICA::Vertex>& vertices) {
+    BufferHandle get(const void* data, size_t size) {
         // If the vertex buffer is too large, just create a new one
-        if (ptr + vertices.size_bytes() > CACHE_BUFFER_SIZE) {
-            MTL::Buffer* newBuffer = device->newBuffer(vertices.data(), vertices.size_bytes(), MTL::ResourceStorageModeShared);
+        if (ptr + size > CACHE_BUFFER_SIZE) {
+            MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared);
             newBuffer->setLabel(toNSString("Additional vertex buffer"));
             additionalAllocations.push_back(newBuffer);
             Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer");
@@ -48,10 +48,10 @@ public:
         }
 
         // Copy the data into the buffer
-        memcpy((char*)buffer->contents() + ptr, vertices.data(), vertices.size_bytes());
+        memcpy((char*)buffer->contents() + ptr, data, size);
 
         size_t oldPtr = ptr;
-        ptr += vertices.size_bytes();
+        ptr += size;
 
         return BufferHandle{buffer, oldPtr};
     }
diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp
index 5d855e29..09442ae9 100644
--- a/include/renderer_mtl/renderer_mtl.hpp
+++ b/include/renderer_mtl/renderer_mtl.hpp
@@ -184,5 +184,6 @@ class RendererMTL final : public Renderer {
 	void setupTextureEnvState(MTL::RenderCommandEncoder* encoder);
 	void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder);
 	void updateLightingLUT(MTL::RenderCommandEncoder* encoder);
+	void updateFogLUT(MTL::RenderCommandEncoder* encoder);
 	void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect, const Math::Rect<u32>& destRect);
 };
diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index fbe96587..fdc1162f 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -131,9 +131,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
 	// Textures
 	MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
 	textureDescriptor->setTextureType(MTL::TextureType1DArray);
-	textureDescriptor->setPixelFormat(MTL::PixelFormatR16Uint);
+	textureDescriptor->setPixelFormat(MTL::PixelFormatRG32Float);
 	textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH);
-	textureDescriptor->setArrayLength(Lights::LUT_Count);
+	textureDescriptor->setArrayLength(Lights::LUT_Count + 1);
 	textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite);
 	textureDescriptor->setStorageMode(MTL::StorageModePrivate);
 
@@ -516,6 +516,9 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
 	if (gpu.lightingLUTDirty) {
 		updateLightingLUT(renderCommandEncoder);
 	}
+	if (gpu.fogLUTDirty) {
+        updateFogLUT(renderCommandEncoder);
+    }
 
 	renderCommandEncoder->setRenderPipelineState(pipeline);
 	renderCommandEncoder->setDepthStencilState(depthStencilState);
@@ -523,7 +526,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
 	if (vertices.size_bytes() < 4 * 1024) {
 		renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX);
 	} else {
-	    Metal::BufferHandle buffer = vertexBufferCache.get(vertices);
+	    Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes());
 		renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX);
 	}
 
@@ -560,6 +563,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
 	renderCommandEncoder->setFragmentBytes(&regs[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0);
 	renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2);
 	renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2);
+	renderCommandEncoder->setFragmentBytes(&depthUniforms, sizeof(depthUniforms), 3);
 
 	renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size()));
 }
@@ -696,11 +700,11 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) {
 
 void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
 	gpu.lightingLUTDirty = false;
-	std::array<u16, GPU::LightingLutSize> u16_lightinglut;
+	std::array<float, GPU::LightingLutSize * 2> lightingLut;
 
-	for (int i = 0; i < gpu.lightingLUT.size(); i++) {
-		uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
-		u16_lightinglut[i] = value * 65535 / 4095;
+	for (int i = 0; i < gpu.lightingLUT.size(); i += 2) {
+    	uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF;
+    	lightingLut[i] = (float)(value << 4) / 65535.0f;
 	}
 
 	//for (int i = 0; i < Lights::LUT_Count; i++) {
@@ -710,11 +714,39 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
 	renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
 	renderCommandEncoder->setDepthStencilState(defaultDepthStencilState);
 	renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0);
-	renderCommandEncoder->setVertexBytes(u16_lightinglut.data(), sizeof(u16_lightinglut), 0);
+	Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut));
+	renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0);
+	u32 arrayOffset = 0;
+	renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1);
 
 	renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), GPU::LightingLutSize);
 }
 
+void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) {
+	gpu.fogLUTDirty = false;
+	std::array<float, 128 * 2> fogLut;
+
+	for (int i = 0; i < fogLut.size(); i += 2) {
+		const uint32_t value = gpu.fogLUT[i >> 1];
+		int32_t diff = value & 0x1fff;
+		diff = (diff << 19) >> 19;  // Sign extend the 13-bit value to 32 bits
+		const float fogDifference = float(diff) / 2048.0f;
+		const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f;
+
+		fogLut[i] = fogValue;
+		fogLut[i + 1] = fogDifference;
+	}
+
+	renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
+	renderCommandEncoder->setDepthStencilState(defaultDepthStencilState);
+	renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0);
+	renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0);
+	u32 arrayOffset = (u32)Lights::LUT_Count;
+	renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1);
+
+	renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(128));
+}
+
 void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect, const Math::Rect<u32>& destRect) {
     nextRenderPassName = "Texture copy";
 	MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal
index ef993b64..fef4362a 100644
--- a/src/host_shaders/metal_copy_to_lut_texture.metal
+++ b/src/host_shaders/metal_copy_to_lut_texture.metal
@@ -4,6 +4,6 @@ using namespace metal;
 constant ushort lutTextureWidth [[function_constant(0)]];
 
 // The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass
-vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture1d_array<ushort, access::write> out [[texture(0)]], constant ushort* data [[buffer(0)]]) {
-    out.write(data[vid], vid % lutTextureWidth, vid / lutTextureWidth);
+vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture1d_array<float, access::write> out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) {
+    out.write(float4(data[vid], 0.0, 0.0), vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth);
 }
diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal
index 243a85b0..b776539b 100644
--- a/src/host_shaders/metal_shaders.metal
+++ b/src/host_shaders/metal_shaders.metal
@@ -104,6 +104,7 @@ struct EnvColor {
 
 struct DrawVertexOut {
 	float4 position [[position]];
+	float depth;
 	float4 quaternion;
 	float4 color;
 	float3 texCoord0;
@@ -176,6 +177,7 @@ vertex DrawVertexOutWithClip vertexDraw(DrawVertexIn in [[stage_in]], constant P
 
 	// Apply depth uniforms
 	out.position.z = transformZ(out.position.z, out.position.w, depthUniforms);
+	out.depth = out.position.z;
 
 	// Color
 	out.color = min(abs(in.color), 1.0);
@@ -406,6 +408,8 @@ uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) {
 #define RG_LUT 5u
 #define RR_LUT 6u
 
+#define FOG_INDEX 24
+
 float lutLookup(texture1d_array<float> texLightingLut, uint lut, uint index) {
 	return texLightingLut.read(index, lut).r;
 }
@@ -569,17 +573,15 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi
 			decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
 		));
 
-		float3 halfVector;
-
 		// Positional Light
 		if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
 			// error_unimpl = true;
-			halfVector = lightPosition + in.view;
+			lightVector = lightPosition + in.view;
 		}
 
 		// Directional light
 		else {
-			halfVector = lightPosition;
+			lightVector = lightPosition;
 		}
 
 		lightDistance = length(lightVector);
@@ -676,7 +678,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) {
     return as_type<float4>(performLogicOpU(logicOp, as_type<uint4>(s), as_type<uint4>(d)));
 }
 
-fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]],
+fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant DepthUniforms& depthUniforms [[buffer(3)]],
                              texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture1d_array<float> texLightingLut [[texture(3)]],
                              sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
     Globals globals;
@@ -691,8 +693,8 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
     if (lightingEnabled) {
         calcLighting(globals, in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]);
     } else {
-        globals.tevSources[1] = float4(1.0);
-        globals.tevSources[2] = float4(1.0);
+        globals.tevSources[1] = float4(0.0);
+        globals.tevSources[2] = float4(0.0);
     }
 
 	uint textureConfig = picaRegs.read(0x80u);
@@ -723,9 +725,37 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
 		}
 	}
 
-	float4 color = performLogicOp(logicOp, globals.tevSources[15], prevColor);
+	float4 color = globals.tevSources[15];
 
-	// TODO: fog
+	// Depth
+	float z_over_w = in.position.z;
+	float depth = z_over_w * depthUniforms.depthScale + depthUniforms.depthOffset;
+
+	if (!depthUniforms.depthMapEnable)  // Divide z by w if depthmap enable == 0 (ie using W-buffering)
+		depth /= in.position.w;
+
+	// Fog
+	bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u;
+
+	if (enable_fog) {
+		bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u;
+		float fog_index = flip_depth ? 1.0 - depth : depth;
+		fog_index *= 128.0;
+		float clamped_index = clamp(floor(fog_index), 0.0, 127.0);
+		float delta = fog_index - clamped_index;
+		float2 value = texLightingLut.read(uint(clamped_index), FOG_INDEX).rg;
+		float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);
+
+		uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u);
+
+		// Annoyingly color is not encoded in the same way as light color
+		float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0;
+		float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0;
+		float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0;
+		float3 fog_color = float3(r, g, b);
+
+		color.rgb = mix(fog_color, color.rgb, fog_factor);
+	}
 
 	// Perform alpha test
 	if ((alphaControl & 1u) != 0u) {  // Check if alpha test is on
@@ -757,5 +787,5 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
 		}
 	}
 
-	return color;
+	return performLogicOp(logicOp, color, prevColor);
 }