diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index e15aced3..4af9a3e6 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -36,10 +36,10 @@ public: additionalAllocations.clear(); } - BufferHandle get(const std::span& vertices) { + BufferHandle get(const void* data, size_t size) { // If the vertex buffer is too large, just create a new one - if (ptr + vertices.size_bytes() > CACHE_BUFFER_SIZE) { - MTL::Buffer* newBuffer = device->newBuffer(vertices.data(), vertices.size_bytes(), MTL::ResourceStorageModeShared); + if (ptr + size > CACHE_BUFFER_SIZE) { + MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); newBuffer->setLabel(toNSString("Additional vertex buffer")); additionalAllocations.push_back(newBuffer); Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer"); @@ -48,10 +48,10 @@ public: } // Copy the data into the buffer - memcpy((char*)buffer->contents() + ptr, vertices.data(), vertices.size_bytes()); + memcpy((char*)buffer->contents() + ptr, data, size); size_t oldPtr = ptr; - ptr += vertices.size_bytes(); + ptr += size; return BufferHandle{buffer, oldPtr}; } diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index 5d855e29..09442ae9 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -184,5 +184,6 @@ class RendererMTL final : public Renderer { void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder); void updateLightingLUT(MTL::RenderCommandEncoder* encoder); + void updateFogLUT(MTL::RenderCommandEncoder* encoder); void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect); }; diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index fbe96587..fdc1162f 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -131,9 +131,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { // Textures MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); textureDescriptor->setTextureType(MTL::TextureType1DArray); - textureDescriptor->setPixelFormat(MTL::PixelFormatR16Uint); + textureDescriptor->setPixelFormat(MTL::PixelFormatRG32Float); textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH); - textureDescriptor->setArrayLength(Lights::LUT_Count); + textureDescriptor->setArrayLength(Lights::LUT_Count + 1); textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); textureDescriptor->setStorageMode(MTL::StorageModePrivate); @@ -516,6 +516,9 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetRenderPipelineState(pipeline); renderCommandEncoder->setDepthStencilState(depthStencilState); @@ -523,7 +526,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); } else { - Metal::BufferHandle buffer = vertexBufferCache.get(vertices); + Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX); } @@ -560,6 +563,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2); + renderCommandEncoder->setFragmentBytes(&depthUniforms, sizeof(depthUniforms), 3); renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size())); } @@ -696,11 +700,11 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { gpu.lightingLUTDirty = false; - std::array u16_lightinglut; + std::array lightingLut; - for (int i = 0; i < gpu.lightingLUT.size(); i++) { - uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); - u16_lightinglut[i] = value * 65535 / 4095; + for (int i = 0; i < gpu.lightingLUT.size(); i += 2) { + uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF; + lightingLut[i] = (float)(value << 4) / 65535.0f; } //for (int i = 0; i < Lights::LUT_Count; i++) { @@ -710,11 +714,39 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0); - renderCommandEncoder->setVertexBytes(u16_lightinglut.data(), sizeof(u16_lightinglut), 0); + Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); + renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); + u32 arrayOffset = 0; + renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), GPU::LightingLutSize); } +void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { + gpu.fogLUTDirty = false; + std::array fogLut; + + for (int i = 0; i < fogLut.size(); i += 2) { + const uint32_t value = gpu.fogLUT[i >> 1]; + int32_t diff = value & 0x1fff; + diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits + const float fogDifference = float(diff) / 2048.0f; + const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f; + + fogLut[i] = fogValue; + fogLut[i + 1] = fogDifference; + } + + renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); + renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); + renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0); + renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0); + u32 arrayOffset = (u32)Lights::LUT_Count; + renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(128)); +} + void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect) { nextRenderPassName = "Texture copy"; MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal index ef993b64..fef4362a 100644 --- a/src/host_shaders/metal_copy_to_lut_texture.metal +++ b/src/host_shaders/metal_copy_to_lut_texture.metal @@ -4,6 +4,6 @@ using namespace metal; constant ushort lutTextureWidth [[function_constant(0)]]; // The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass -vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture1d_array out [[texture(0)]], constant ushort* data [[buffer(0)]]) { - out.write(data[vid], vid % lutTextureWidth, vid / lutTextureWidth); +vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture1d_array out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { + out.write(float4(data[vid], 0.0, 0.0), vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth); } diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index 243a85b0..b776539b 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -104,6 +104,7 @@ struct EnvColor { struct DrawVertexOut { float4 position [[position]]; + float depth; float4 quaternion; float4 color; float3 texCoord0; @@ -176,6 +177,7 @@ vertex DrawVertexOutWithClip vertexDraw(DrawVertexIn in [[stage_in]], constant P // Apply depth uniforms out.position.z = transformZ(out.position.z, out.position.w, depthUniforms); + out.depth = out.position.z; // Color out.color = min(abs(in.color), 1.0); @@ -406,6 +408,8 @@ uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) { #define RG_LUT 5u #define RR_LUT 6u +#define FOG_INDEX 24 + float lutLookup(texture1d_array texLightingLut, uint lut, uint index) { return texLightingLut.read(index, lut).r; } @@ -569,17 +573,15 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) )); - float3 halfVector; - // Positional Light if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { // error_unimpl = true; - halfVector = lightPosition + in.view; + lightVector = lightPosition + in.view; } // Directional light else { - halfVector = lightPosition; + lightVector = lightPosition; } lightDistance = length(lightVector); @@ -676,7 +678,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { return as_type(performLogicOpU(logicOp, as_type(s), as_type(d))); } -fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], +fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant DepthUniforms& depthUniforms [[buffer(3)]], texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture1d_array texLightingLut [[texture(3)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { Globals globals; @@ -691,8 +693,8 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c if (lightingEnabled) { calcLighting(globals, in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); } else { - globals.tevSources[1] = float4(1.0); - globals.tevSources[2] = float4(1.0); + globals.tevSources[1] = float4(0.0); + globals.tevSources[2] = float4(0.0); } uint textureConfig = picaRegs.read(0x80u); @@ -723,9 +725,37 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c } } - float4 color = performLogicOp(logicOp, globals.tevSources[15], prevColor); + float4 color = globals.tevSources[15]; - // TODO: fog + // Depth + float z_over_w = in.position.z; + float depth = z_over_w * depthUniforms.depthScale + depthUniforms.depthOffset; + + if (!depthUniforms.depthMapEnable) // Divide z by w if depthmap enable == 0 (ie using W-buffering) + depth /= in.position.w; + + // Fog + bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; + + if (enable_fog) { + bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fog_index = flip_depth ? 1.0 - depth : depth; + fog_index *= 128.0; + float clamped_index = clamp(floor(fog_index), 0.0, 127.0); + float delta = fog_index - clamped_index; + float2 value = texLightingLut.read(uint(clamped_index), FOG_INDEX).rg; + float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); + + uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u); + + // Annoyingly color is not encoded in the same way as light color + float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0; + float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0; + float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0; + float3 fog_color = float3(r, g, b); + + color.rgb = mix(fog_color, color.rgb, fog_factor); + } // Perform alpha test if ((alphaControl & 1u) != 0u) { // Check if alpha test is on @@ -757,5 +787,5 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c } } - return color; + return performLogicOp(logicOp, color, prevColor); }