From 0c19f5a3ead25e034dbf653376ca880a17696507 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 5 Jul 2024 20:29:05 +0200 Subject: [PATCH 1/4] specialize shader --- CMakeLists.txt | 3 +- .../renderer_mtl/mtl_blit_pipeline_cache.hpp | 72 +++++++++++++++++++ ..._cache.hpp => mtl_draw_pipeline_cache.hpp} | 44 +++++++++--- include/renderer_mtl/renderer_mtl.hpp | 7 +- src/core/renderer_mtl/renderer_mtl.cpp | 10 +-- src/host_shaders/metal_shaders.metal | 16 +++-- 6 files changed, 125 insertions(+), 27 deletions(-) create mode 100644 include/renderer_mtl/mtl_blit_pipeline_cache.hpp rename include/renderer_mtl/{mtl_pipeline_cache.hpp => mtl_draw_pipeline_cache.hpp} (64%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 13be2537..21274bf4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -404,7 +404,8 @@ endif() if(ENABLE_METAL AND APPLE) set(RENDERER_MTL_INCLUDE_FILES include/renderer_mtl/renderer_mtl.hpp include/renderer_mtl/mtl_depth_stencil_cache.hpp - include/renderer_mtl/mtl_pipeline_cache.hpp + include/renderer_mtl/mtl_blit_pipeline_cache.hpp + include/renderer_mtl/mtl_draw_pipeline_cache.hpp include/renderer_mtl/mtl_render_target.hpp include/renderer_mtl/mtl_texture.hpp include/renderer_mtl/mtl_vertex_buffer_cache.hpp diff --git a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp new file mode 100644 index 00000000..399d3005 --- /dev/null +++ b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp @@ -0,0 +1,72 @@ +#pragma once + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct BlitPipelineHash { + // Formats + ColorFmt colorFmt; + DepthFmt depthFmt; +}; + +// This pipeline only caches the pipeline with all of its color and depth attachment variations +class BlitPipelineCache { +public: + BlitPipelineCache() = default; + + ~BlitPipelineCache() { + clear(); + vertexFunction->release(); + fragmentFunction->release(); + } + + void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { + device = dev; + vertexFunction = vert; + fragmentFunction = frag; + } + + MTL::RenderPipelineState* get(BlitPipelineHash hash) { + u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; + auto& pipeline = pipelineCache[intHash]; + if (!pipeline) { + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + + desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + + NS::Error* error = nullptr; + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void clear() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + } + +private: + std::unordered_map pipelineCache; + + MTL::Device* device; + MTL::Function* vertexFunction; + MTL::Function* fragmentFunction; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp similarity index 64% rename from include/renderer_mtl/mtl_pipeline_cache.hpp rename to include/renderer_mtl/mtl_draw_pipeline_cache.hpp index d6a75c71..5e9e1b66 100644 --- a/include/renderer_mtl/mtl_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -6,7 +6,7 @@ using namespace PICA; namespace Metal { -struct PipelineHash { +struct DrawPipelineHash { // Formats ColorFmt colorFmt; DepthFmt depthFmt; @@ -14,34 +14,51 @@ struct PipelineHash { // Blending bool blendEnabled; u32 blendControl; + + // Specialization constants + bool lightingEnabled; }; // Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices #define VERTEX_BUFFER_BINDING_INDEX 30 // This pipeline only caches the pipeline with all of its color and depth attachment variations -class PipelineCache { +class DrawPipelineCache { public: - PipelineCache() = default; + DrawPipelineCache() = default; - ~PipelineCache() { + ~DrawPipelineCache() { clear(); vertexDescriptor->release(); vertexFunction->release(); - fragmentFunction->release(); } - void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag, MTL::VertexDescriptor* vertDesc) { + void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { device = dev; + library = lib; vertexFunction = vert; - fragmentFunction = frag; vertexDescriptor = vertDesc; } - MTL::RenderPipelineState* get(PipelineHash hash) { - u64 intHash = ((u64)hash.colorFmt << 36) | ((u64)hash.depthFmt << 33) | ((u64)hash.blendEnabled << 32) | (u64)hash.blendControl; - auto& pipeline = pipelineCache[intHash]; + MTL::RenderPipelineState* get(DrawPipelineHash hash) { + u64 pipelineHash = ((u64)hash.colorFmt << 37) | ((u64)hash.depthFmt << 34) | ((u64)hash.blendEnabled << 33) | ((u64)hash.blendControl << 1) | (u64)hash.lightingEnabled; + auto& pipeline = pipelineCache[pipelineHash]; if (!pipeline) { + u8 fragmentFunctionHash = (u8)hash.lightingEnabled; + auto& fragmentFunction = fragmentFunctionCache[fragmentFunctionHash]; + if (!fragmentFunction) { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&hash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); + + NS::Error* error = nullptr; + fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + fragmentFunctionCache[fragmentFunctionHash] = fragmentFunction; + } + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); desc->setVertexFunction(vertexFunction); desc->setFragmentFunction(fragmentFunction); @@ -87,14 +104,19 @@ public: pair.second->release(); } pipelineCache.clear(); + for (auto& pair : fragmentFunctionCache) { + pair.second->release(); + } + fragmentFunctionCache.clear(); } private: std::unordered_map pipelineCache; + std::unordered_map fragmentFunctionCache; MTL::Device* device; + MTL::Library* library; MTL::Function* vertexFunction; - MTL::Function* fragmentFunction; MTL::VertexDescriptor* vertexDescriptor; }; diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index 8f89fec1..c3c2ad51 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -4,7 +4,8 @@ #include "renderer.hpp" #include "mtl_texture.hpp" #include "mtl_render_target.hpp" -#include "mtl_pipeline_cache.hpp" +#include "mtl_blit_pipeline_cache.hpp" +#include "mtl_draw_pipeline_cache.hpp" #include "mtl_depth_stencil_cache.hpp" #include "mtl_vertex_buffer_cache.hpp" // HACK: use the OpenGL cache @@ -41,8 +42,8 @@ class RendererMTL final : public Renderer { SurfaceCache colorRenderTargetCache; SurfaceCache depthStencilRenderTargetCache; SurfaceCache textureCache; - Metal::PipelineCache blitPipelineCache; - Metal::PipelineCache drawPipelineCache; + Metal::BlitPipelineCache blitPipelineCache; + Metal::DrawPipelineCache drawPipelineCache; Metal::DepthStencilCache depthStencilCache; Metal::VertexBufferCache vertexBufferCache; diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index d3aabf11..f07c85c5 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -154,11 +154,10 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { MTL::Function* vertexBlitFunction = library->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding)); MTL::Function* fragmentBlitFunction = library->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding)); - blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction, nullptr); + blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction); // Draw MTL::Function* vertexDrawFunction = library->newFunction(NS::String::string("vertexDraw", NS::ASCIIStringEncoding)); - MTL::Function* fragmentDrawFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding)); // -------- Vertex descriptor -------- MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); @@ -216,7 +215,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { vertexBufferLayout->setStepFunction(MTL::VertexStepFunctionPerVertex); vertexBufferLayout->setStepRate(1); - drawPipelineCache.set(device, vertexDrawFunction, fragmentDrawFunction, vertexDescriptor); + drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor); // Depth stencil cache depthStencilCache.set(device); @@ -321,7 +320,7 @@ void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, colorAttachment->setStoreAction(MTL::StoreActionStore); // Pipeline - Metal::PipelineHash hash{destFramebuffer->format, DepthFmt::Unknown1}; + Metal::BlitPipelineHash hash{destFramebuffer->format, DepthFmt::Unknown1}; auto blitPipeline = blitPipelineCache.get(hash); beginRenderPassIfNeeded(renderPassDescriptor, destFramebuffer->texture); @@ -381,10 +380,11 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spanformat, DepthFmt::Unknown1}; + Metal::DrawPipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1}; if (depthStencilRenderTarget) { pipelineHash.depthFmt = depthStencilRenderTarget->format; } + pipelineHash.lightingEnabled = regs[0x008F] & 1; // Blending and logic op pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index c91ad1ea..2f92ff27 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -393,12 +393,6 @@ void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture float3 bitangent = normalize(in.bitangent); float3 view = normalize(in.view); - uint GPUREG_LIGHTING_ENABLE = picaRegs.read(0x008Fu); - if (extract_bits(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { - primaryColor = secondaryColor = float4(1.0); - return; - } - uint GPUREG_LIGHTING_AMBIENT = picaRegs.read(0x01C0u); uint GPUREG_LIGHTING_NUM_LIGHTS = (picaRegs.read(0x01C2u) & 0x7u) + 1u; uint GPUREG_LIGHTING_LIGHT_PERMUTATION = picaRegs.read(0x01D9u); @@ -541,12 +535,20 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { return as_type(performLogicOpU(logicOp, as_type(s), as_type(d))); } +constant int LIGHTING_ENABLED_INDEX = 0; +constant bool lightingEnabled [[function_constant(LIGHTING_ENABLED_INDEX)]]; + fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture1d_array texLightingLut [[texture(3)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { Globals globals; globals.tevSources[0] = in.color; - calcLighting(in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); + if (lightingEnabled) { + calcLighting(in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); + } else { + globals.tevSources[1] = float4(0.0); + globals.tevSources[2] = float4(0.0); + } uint textureConfig = picaRegs.read(0x80u); float2 texCoord2 = (textureConfig & (1u << 13)) != 0u ? in.texCoord1 : in.texCoord2; From 0752a583b580898480d96cb369879ec0527e22f4 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 5 Jul 2024 22:12:00 +0200 Subject: [PATCH 2/4] turn num lights into spec constant --- include/renderer_mtl/mtl_draw_pipeline_cache.hpp | 6 ++++-- src/core/renderer_mtl/renderer_mtl.cpp | 1 + src/host_shaders/metal_shaders.metal | 9 ++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index 5e9e1b66..a28caf1b 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -17,6 +17,7 @@ struct DrawPipelineHash { // Specialization constants bool lightingEnabled; + u8 lightingNumLights; }; // Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices @@ -41,14 +42,15 @@ public: } MTL::RenderPipelineState* get(DrawPipelineHash hash) { - u64 pipelineHash = ((u64)hash.colorFmt << 37) | ((u64)hash.depthFmt << 34) | ((u64)hash.blendEnabled << 33) | ((u64)hash.blendControl << 1) | (u64)hash.lightingEnabled; + u8 fragmentFunctionHash = ((u8)hash.lightingEnabled << 4) | hash.lightingNumLights; + u64 pipelineHash = ((u64)hash.colorFmt << 44) | ((u64)hash.depthFmt << 41) | ((u64)hash.blendEnabled << 40) | ((u64)hash.blendControl << 8) | fragmentFunctionHash; auto& pipeline = pipelineCache[pipelineHash]; if (!pipeline) { - u8 fragmentFunctionHash = (u8)hash.lightingEnabled; auto& fragmentFunction = fragmentFunctionCache[fragmentFunctionHash]; if (!fragmentFunction) { MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); constants->setConstantValue(&hash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); + constants->setConstantValue(&hash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); NS::Error* error = nullptr; fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index f07c85c5..a7fa208e 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -385,6 +385,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spanformat; } pipelineHash.lightingEnabled = regs[0x008F] & 1; + pipelineHash.lightingNumLights = (regs[0x01C2] & 0x7) + 1; // Blending and logic op pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index 2f92ff27..e88b867e 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -383,6 +383,9 @@ float3 regToColor(uint reg) { return scale * float3(float(extract_bits(reg, 20, 8)), float(extract_bits(reg, 10, 8)), float(extract_bits(reg, 00, 8))); } +constant bool lightingEnabled [[function_constant(0)]]; +constant uint8_t lightingNumLights [[function_constant(1)]]; + // Implements the following algorthm: https://mathb.in/26766 void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture1d_array texLightingLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { // Quaternions describe a transformation from surface-local space to eye space. @@ -394,7 +397,6 @@ void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture float3 view = normalize(in.view); uint GPUREG_LIGHTING_AMBIENT = picaRegs.read(0x01C0u); - uint GPUREG_LIGHTING_NUM_LIGHTS = (picaRegs.read(0x01C2u) & 0x7u) + 1u; uint GPUREG_LIGHTING_LIGHT_PERMUTATION = picaRegs.read(0x01D9u); primaryColor = float4(float3(0.0), 1.0); @@ -411,7 +413,7 @@ void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture bool errorUnimpl = false; - for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { + for (uint i = 0u; i < lightingNumLights; i++) { uint lightID = extract_bits(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); uint GPUREG_LIGHTi_SPECULAR0 = picaRegs.read(0x0140u + 0x10u * lightID); @@ -535,9 +537,6 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { return as_type(performLogicOpU(logicOp, as_type(s), as_type(d))); } -constant int LIGHTING_ENABLED_INDEX = 0; -constant bool lightingEnabled [[function_constant(LIGHTING_ENABLED_INDEX)]]; - fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture1d_array texLightingLut [[texture(3)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { From 5d5df1931f164b5be1bc4537dbd33e432993a7fd Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 6 Jul 2024 08:04:59 +0200 Subject: [PATCH 3/4] turn config1 into a spec constant --- include/renderer_mtl/mtl_draw_pipeline_cache.hpp | 8 +++++--- src/core/renderer_mtl/renderer_mtl.cpp | 1 + src/host_shaders/metal_shaders.metal | 8 ++++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index a28caf1b..c4bf6e22 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -18,6 +18,7 @@ struct DrawPipelineHash { // Specialization constants bool lightingEnabled; u8 lightingNumLights; + u8 lightingConfig1; }; // Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices @@ -42,8 +43,8 @@ public: } MTL::RenderPipelineState* get(DrawPipelineHash hash) { - u8 fragmentFunctionHash = ((u8)hash.lightingEnabled << 4) | hash.lightingNumLights; - u64 pipelineHash = ((u64)hash.colorFmt << 44) | ((u64)hash.depthFmt << 41) | ((u64)hash.blendEnabled << 40) | ((u64)hash.blendControl << 8) | fragmentFunctionHash; + u16 fragmentFunctionHash = ((u8)hash.lightingEnabled << 12) | (hash.lightingNumLights << 8) | hash.lightingConfig1; + u64 pipelineHash = ((u64)hash.colorFmt << 52) | ((u64)hash.depthFmt << 49) | ((u64)hash.blendEnabled << 48) | ((u64)hash.blendControl << 16) | fragmentFunctionHash; auto& pipeline = pipelineCache[pipelineHash]; if (!pipeline) { auto& fragmentFunction = fragmentFunctionCache[fragmentFunctionHash]; @@ -51,6 +52,7 @@ public: MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); constants->setConstantValue(&hash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); constants->setConstantValue(&hash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); + constants->setConstantValue(&hash.lightingConfig1, MTL::DataTypeUChar, NS::UInteger(2)); NS::Error* error = nullptr; fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); @@ -114,7 +116,7 @@ public: private: std::unordered_map pipelineCache; - std::unordered_map fragmentFunctionCache; + std::unordered_map fragmentFunctionCache; MTL::Device* device; MTL::Library* library; diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index a7fa208e..30a1fc72 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -386,6 +386,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span> 16; // Last 16 bits are unused, so skip them // Blending and logic op pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index e88b867e..85401a74 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -385,6 +385,7 @@ float3 regToColor(uint reg) { constant bool lightingEnabled [[function_constant(0)]]; constant uint8_t lightingNumLights [[function_constant(1)]]; +constant uint8_t lightingConfig1 [[function_constant(2)]]; // Implements the following algorthm: https://mathb.in/26766 void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture1d_array texLightingLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { @@ -407,7 +408,6 @@ void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture uint GPUREG_LIGHTING_LUTINPUT_ABS = picaRegs.read(0x01D0u); uint GPUREG_LIGHTING_LUTINPUT_SELECT = picaRegs.read(0x01D1u); uint GPUREG_LIGHTING_CONFIG0 = picaRegs.read(0x01C3u); - uint GPUREG_LIGHTING_CONFIG1 = picaRegs.read(0x01C4u); uint GPUREG_LIGHTING_LUTINPUT_SCALE = picaRegs.read(0x01D2u); float d[7]; @@ -443,7 +443,7 @@ void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture } for (int c = 0; c < 7; c++) { - if (extract_bits(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { + if (extract_bits(lightingConfig1, c, 1) == 0u) { uint scaleID = extract_bits(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); float scale = float(1u << scaleID); if (scaleID >= 6u) scale /= 256.0; @@ -545,8 +545,8 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c if (lightingEnabled) { calcLighting(in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); } else { - globals.tevSources[1] = float4(0.0); - globals.tevSources[2] = float4(0.0); + globals.tevSources[1] = float4(1.0); + globals.tevSources[2] = float4(1.0); } uint textureConfig = picaRegs.read(0x80u); From 559d194cc7090f6e7bf79e8bb6b43ab91f8c2e29 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 6 Jul 2024 09:00:08 +0200 Subject: [PATCH 4/4] tightly pack the pipeline hash --- .../renderer_mtl/mtl_draw_pipeline_cache.hpp | 27 ++++++++++--------- src/core/renderer_mtl/renderer_mtl.cpp | 3 ++- src/host_shaders/metal_shaders.metal | 4 +-- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index c4bf6e22..fec36b8f 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -6,19 +6,21 @@ using namespace PICA; namespace Metal { -struct DrawPipelineHash { +struct DrawPipelineHash { // 62 bits // Formats - ColorFmt colorFmt; - DepthFmt depthFmt; + ColorFmt colorFmt; // 3 bits + DepthFmt depthFmt; // 3 bits // Blending - bool blendEnabled; - u32 blendControl; + bool blendEnabled; // 1 bit + u32 blendControl; // 32 bits - // Specialization constants - bool lightingEnabled; - u8 lightingNumLights; - u8 lightingConfig1; + // Specialization constants (23 bits) + bool lightingEnabled; // 1 bit + u8 lightingNumLights; // 3 bits + u8 lightingConfig1; // 7 bits + // | ref | func | on | + u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) }; // Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices @@ -43,8 +45,8 @@ public: } MTL::RenderPipelineState* get(DrawPipelineHash hash) { - u16 fragmentFunctionHash = ((u8)hash.lightingEnabled << 12) | (hash.lightingNumLights << 8) | hash.lightingConfig1; - u64 pipelineHash = ((u64)hash.colorFmt << 52) | ((u64)hash.depthFmt << 49) | ((u64)hash.blendEnabled << 48) | ((u64)hash.blendControl << 16) | fragmentFunctionHash; + u32 fragmentFunctionHash = ((u32)hash.lightingEnabled << 22) | ((u32)hash.lightingNumLights << 19) | ((u32)hash.lightingConfig1 << 12) | ((((u32)hash.alphaControl & 0b1111111100000000) >> 8) << 4) | ((((u32)hash.alphaControl & 0b01110000) >> 4) << 1) | ((u32)hash.alphaControl & 0b0001); + u64 pipelineHash = ((u64)hash.colorFmt << 59) | ((u64)hash.depthFmt << 56) | ((u64)hash.blendEnabled << 55) | ((u64)hash.blendControl << 23) | fragmentFunctionHash; auto& pipeline = pipelineCache[pipelineHash]; if (!pipeline) { auto& fragmentFunction = fragmentFunctionCache[fragmentFunctionHash]; @@ -53,6 +55,7 @@ public: constants->setConstantValue(&hash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); constants->setConstantValue(&hash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); constants->setConstantValue(&hash.lightingConfig1, MTL::DataTypeUChar, NS::UInteger(2)); + constants->setConstantValue(&hash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3)); NS::Error* error = nullptr; fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); @@ -116,7 +119,7 @@ public: private: std::unordered_map pipelineCache; - std::unordered_map fragmentFunctionCache; + std::unordered_map fragmentFunctionCache; MTL::Device* device; MTL::Library* library; diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index 30a1fc72..e2384a79 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -385,8 +385,9 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spanformat; } pipelineHash.lightingEnabled = regs[0x008F] & 1; - pipelineHash.lightingNumLights = (regs[0x01C2] & 0x7) + 1; + pipelineHash.lightingNumLights = regs[0x01C2] & 0x7; pipelineHash.lightingConfig1 = regs[0x01C4u] >> 16; // Last 16 bits are unused, so skip them + pipelineHash.alphaControl = regs[0x104]; // Blending and logic op pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index 85401a74..1d4151c6 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -386,6 +386,7 @@ float3 regToColor(uint reg) { constant bool lightingEnabled [[function_constant(0)]]; constant uint8_t lightingNumLights [[function_constant(1)]]; constant uint8_t lightingConfig1 [[function_constant(2)]]; +constant uint16_t alphaControl [[function_constant(3)]]; // Implements the following algorthm: https://mathb.in/26766 void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture1d_array texLightingLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { @@ -413,7 +414,7 @@ void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture bool errorUnimpl = false; - for (uint i = 0u; i < lightingNumLights; i++) { + for (uint i = 0u; i < lightingNumLights + 1; i++) { uint lightID = extract_bits(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); uint GPUREG_LIGHTi_SPECULAR0 = picaRegs.read(0x0140u + 0x10u * lightID); @@ -580,7 +581,6 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c float4 color = performLogicOp(logicOp, globals.tevSources[15], prevColor); // Perform alpha test - uint alphaControl = picaRegs.read(0x104u); if ((alphaControl & 1u) != 0u) { // Check if alpha test is on uint func = (alphaControl >> 4u) & 7u; float reference = float((alphaControl >> 8u) & 0xffu) / 255.0;