From 4a9fb9bdc3d6bd86237c499bc42d4a63633fe391 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 24 Jul 2024 11:24:55 +0200 Subject: [PATCH] make lut texture 2D --- CMakeLists.txt | 5 +-- .../renderer_mtl/mtl_vertex_buffer_cache.hpp | 12 +++++-- include/renderer_mtl/renderer_mtl.hpp | 2 +- src/core/renderer_mtl/renderer_mtl.cpp | 27 ++++++++------- .../metal_copy_to_lut_texture.metal | 4 +-- src/host_shaders/metal_shaders.metal | 34 +++++++++---------- 6 files changed, 46 insertions(+), 38 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b46d087..31fdd9f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,7 +30,7 @@ endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size") -endif() +endif() option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON) option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF) @@ -442,9 +442,10 @@ if(ENABLE_METAL AND APPLE) set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal") set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir") set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib") + # TODO: only include sources in debug builds add_custom_command( OUTPUT ${SHADER_IR} - COMMAND xcrun -sdk macosx metal -o ${SHADER_IR} -c ${SHADER_SOURCE} + COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE} DEPENDS ${SHADER_SOURCE} VERBATIM) add_custom_command( diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index 4af9a3e6..5129a446 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -19,13 +19,13 @@ public: VertexBufferCache() = default; ~VertexBufferCache() { - reset(); + endFrame(); + buffer->release(); } void set(MTL::Device* dev) { device = dev; - buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); - buffer->setLabel(toNSString("Shared vertex buffer")); + create(); } void endFrame() { @@ -59,6 +59,7 @@ public: void reset() { endFrame(); buffer->release(); + create(); } private: @@ -67,6 +68,11 @@ private: std::vector additionalAllocations; MTL::Device* device; + + void create() { + buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); + buffer->setLabel(toNSString("Shared vertex buffer")); + } }; } // namespace Metal diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index 09442ae9..9ba0937a 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -57,7 +57,7 @@ class RendererMTL final : public Renderer { // Objects MTL::SamplerState* nearestSampler; MTL::SamplerState* linearSampler; - MTL::Texture* lightLUTTextureArray; + MTL::Texture* lutTexture; MTL::DepthStencilState* defaultDepthStencilState; // Pipelines diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index fdc1162f..e280e1af 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -130,15 +130,15 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { // Textures MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); - textureDescriptor->setTextureType(MTL::TextureType1DArray); - textureDescriptor->setPixelFormat(MTL::PixelFormatRG32Float); + textureDescriptor->setTextureType(MTL::TextureType2D); + textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA32Float); textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH); - textureDescriptor->setArrayLength(Lights::LUT_Count + 1); + textureDescriptor->setHeight(Lights::LUT_Count + 1); textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); textureDescriptor->setStorageMode(MTL::StorageModePrivate); - lightLUTTextureArray = device->newTexture(textureDescriptor); - lightLUTTextureArray->setLabel(toNSString("LUT texture")); + lutTexture = device->newTexture(textureDescriptor); + lutTexture->setLabel(toNSString("LUT texture")); textureDescriptor->release(); // Samplers @@ -580,7 +580,7 @@ void RendererMTL::deinitGraphicsContext() { copyToLutTexturePipeline->release(); displayPipeline->release(); defaultDepthStencilState->release(); - lightLUTTextureArray->release(); + lutTexture->release(); linearSampler->release(); nearestSampler->release(); library->release(); @@ -694,13 +694,13 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { } // LUT texture - encoder->setFragmentTexture(lightLUTTextureArray, 3); + encoder->setFragmentTexture(lutTexture, 3); encoder->setFragmentSamplerState(linearSampler, 3); } void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { gpu.lightingLUTDirty = false; - std::array lightingLut; + std::array lightingLut = {0.0f}; for (int i = 0; i < gpu.lightingLUT.size(); i += 2) { uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF; @@ -708,12 +708,12 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { } //for (int i = 0; i < Lights::LUT_Count; i++) { - // lightLUTTextureArray->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0); + // lutTexture->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0); //} renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); - renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0); + renderCommandEncoder->setVertexTexture(lutTexture, 0); Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); u32 arrayOffset = 0; @@ -724,7 +724,7 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { gpu.fogLUTDirty = false; - std::array fogLut; + std::array fogLut = {0.0f}; for (int i = 0; i < fogLut.size(); i += 2) { const uint32_t value = gpu.fogLUT[i >> 1]; @@ -739,8 +739,9 @@ void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); - renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0); - renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0); + renderCommandEncoder->setVertexTexture(lutTexture, 0); + Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); + renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); u32 arrayOffset = (u32)Lights::LUT_Count; renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal index fef4362a..5eb87320 100644 --- a/src/host_shaders/metal_copy_to_lut_texture.metal +++ b/src/host_shaders/metal_copy_to_lut_texture.metal @@ -4,6 +4,6 @@ using namespace metal; constant ushort lutTextureWidth [[function_constant(0)]]; // The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass -vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture1d_array out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { - out.write(float4(data[vid], 0.0, 0.0), vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth); +vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], device float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { + out.write(float4(data[vid].x, 0.0, 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth)); } diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index b776539b..f38d2958 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -410,11 +410,11 @@ uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) { #define FOG_INDEX 24 -float lutLookup(texture1d_array texLightingLut, uint lut, uint index) { - return texLightingLut.read(index, lut).r; +float lutLookup(texture2d texLut, uint lut, uint index) { + return texLut.read(uint2(index, lut)).r; } -float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture1d_array texLightingLut, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { +float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { uint lut_index; int bit_in_config1; if (lut_id == SP_LUT) { @@ -500,12 +500,12 @@ float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant delta = abs(delta); } int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); - return lutLookup(texLightingLut, lut_index, index) * scale; + return lutLookup(texLut, lut_index, index) * scale; } else { // Range is [-1, 1] so we need to map it to [0, 1] int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); if (index < 0) index += 256; - return lutLookup(texLightingLut, lut_index, index) * scale; + return lutLookup(texLut, lut_index, index) * scale; } } @@ -517,7 +517,7 @@ float3 regToColor(uint reg) { } // Implements the following algorthm: https://mathb.in/26766 -void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture1d_array texLightingLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { +void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { // Quaternions describe a transformation from surface-local space to eye space. // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). @@ -615,23 +615,23 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias; delta = clamp(delta, 0.0, 1.0); int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); - distanceAttenuation = lutLookup(texLightingLut, 16u + lightId, index); + distanceAttenuation = lutLookup(texLut, 16u + lightId, index); } - float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, SP_LUT, lightId, lightVector, halfVector); - float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, D0_LUT, lightId, lightVector, halfVector); - float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, D1_LUT, lightId, lightVector, halfVector); + float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, environmentId, SP_LUT, lightId, lightVector, halfVector); + float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D0_LUT, lightId, lightVector, halfVector); + float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D1_LUT, lightId, lightVector, halfVector); float3 reflectedColor; - reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, RR_LUT, lightId, lightVector, halfVector); + reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RR_LUT, lightId, lightVector, halfVector); if (isSamplerEnabled(environmentId, RG_LUT)) { - reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, RG_LUT, lightId, lightVector, halfVector); + reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RG_LUT, lightId, lightVector, halfVector); } else { reflectedColor.g = reflectedColor.r; } if (isSamplerEnabled(environmentId, RB_LUT)) { - reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, RB_LUT, lightId, lightVector, halfVector); + reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RB_LUT, lightId, lightVector, halfVector); } else { reflectedColor.b = reflectedColor.r; } @@ -657,7 +657,7 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi float fresnelFactor; if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) { - fresnelFactor = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, FR_LUT, lightId, lightVector, halfVector); + fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, environmentId, FR_LUT, lightId, lightVector, halfVector); } if (fresnelOutput1 == 1u) { @@ -679,7 +679,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { } fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant DepthUniforms& depthUniforms [[buffer(3)]], - texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture1d_array texLightingLut [[texture(3)]], + texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d texLut [[texture(3)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { Globals globals; @@ -691,7 +691,7 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c globals.tevSources[0] = in.color; if (lightingEnabled) { - calcLighting(globals, in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); + calcLighting(globals, in, picaRegs, texLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); } else { globals.tevSources[1] = float4(0.0); globals.tevSources[2] = float4(0.0); @@ -743,7 +743,7 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c fog_index *= 128.0; float clamped_index = clamp(floor(fog_index), 0.0, 127.0); float delta = fog_index - clamped_index; - float2 value = texLightingLut.read(uint(clamped_index), FOG_INDEX).rg; + float2 value = texLut.read(uint2(clamped_index, FOG_INDEX)).rg; float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u);