diff --git a/CMakeLists.txt b/CMakeLists.txt index 21274bf4..0d98e728 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -419,26 +419,36 @@ if(ENABLE_METAL AND APPLE) src/core/renderer_mtl/mtl_etc1.cpp src/core/renderer_mtl/objc_helper.mm src/host_shaders/metal_shaders.metal + src/host_shaders/metal_copy_to_lut_texture.metal ) set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES}) source_group("Source Files\\Core\\Metal Renderer" FILES ${RENDERER_MTL_SOURCE_FILES}) - add_custom_command( - OUTPUT ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.ir - COMMAND xcrun -sdk macosx metal -o ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.ir -c ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.metal - DEPENDS ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.metal - VERBATIM) + set(RENDERER_MTL_HOST_SHADERS_SOURCES) + function (add_metal_shader SHADER) + set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal") + set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir") + set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib") + add_custom_command( + OUTPUT ${SHADER_IR} + COMMAND xcrun -sdk macosx metal -o ${SHADER_IR} -c ${SHADER_SOURCE} + DEPENDS ${SHADER_SOURCE} + VERBATIM) + add_custom_command( + OUTPUT ${SHADER_METALLIB} + COMMAND xcrun -sdk macosx metallib -o ${SHADER_METALLIB} ${SHADER_IR} + DEPENDS ${SHADER_IR} + VERBATIM) + set(RENDERER_MTL_HOST_SHADERS_SOURCES ${RENDERER_MTL_HOST_SHADERS_SOURCES} ${SHADER_METALLIB}) + endfunction() - add_custom_command( - OUTPUT ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.metallib - COMMAND xcrun -sdk macosx metallib -o ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.metallib ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.ir - DEPENDS ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.ir - VERBATIM) + add_metal_shader(metal_shaders) + add_metal_shader(metal_copy_to_lut_texture) add_custom_target( - compile_msl_shader - DEPENDS src/host_shaders/metal_shaders.metallib + compile_msl_shaders + DEPENDS ${RENDERER_MTL_HOST_SHADERS_SOURCES} ) cmrc_add_resource_library( @@ -446,8 +456,9 @@ if(ENABLE_METAL AND APPLE) NAMESPACE RendererMTL WHENCE "src/host_shaders/" "src/host_shaders/metal_shaders.metallib" + "src/host_shaders/metal_copy_to_lut_texture.metallib" ) - add_dependencies(resources_renderer_mtl compile_msl_shader) + add_dependencies(resources_renderer_mtl compile_msl_shaders) target_sources(AlberCore PRIVATE ${RENDERER_MTL_SOURCE_FILES}) target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1") diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index c3c2ad51..3ec77ace 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -51,9 +51,11 @@ class RendererMTL final : public Renderer { MTL::SamplerState* nearestSampler; MTL::SamplerState* linearSampler; MTL::Texture* lightLUTTextureArray; + MTL::DepthStencilState* defaultDepthStencilState; // Pipelines MTL::RenderPipelineState* displayPipeline; + MTL::RenderPipelineState* copyToLutTexturePipeline; // Active state MTL::CommandBuffer* commandBuffer = nullptr; @@ -103,5 +105,5 @@ class RendererMTL final : public Renderer { Metal::Texture& getTexture(Metal::Texture& tex); void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder); - void updateLightingLUT(); + void updateLightingLUT(MTL::RenderCommandEncoder* encoder); }; diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index e2384a79..6b9e5fcb 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -11,7 +11,7 @@ using namespace PICA; CMRC_DECLARE(RendererMTL); -#define LIGHT_LUT_TEXTURE_WIDTH 256 +const u16 LIGHT_LUT_TEXTURE_WIDTH = 256; // HACK: redefinition... PICA::ColorFmt ToColorFormat(u32 format) { @@ -22,6 +22,18 @@ PICA::ColorFmt ToColorFormat(u32 format) { } } +MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { + //MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); + NS::Error* error = nullptr; + MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error); + //MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); + if (error) { + Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + return library; +} + RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) : Renderer(gpu, internalRegs, externalRegs) {} RendererMTL::~RendererMTL() {} @@ -105,8 +117,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { textureDescriptor->setPixelFormat(MTL::PixelFormatR16Uint); textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH); textureDescriptor->setArrayLength(Lights::LUT_Count); - textureDescriptor->setUsage(MTL::TextureUsageShaderRead); - textureDescriptor->setStorageMode(MTL::StorageModeShared); + textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + textureDescriptor->setStorageMode(MTL::StorageModePrivate); lightLUTTextureArray = device->newTexture(textureDescriptor); textureDescriptor->release(); @@ -125,14 +137,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { // Load shaders auto mtlResources = cmrc::RendererMTL::get_filesystem(); - auto shaderSource = mtlResources.open("metal_shaders.metallib"); - //MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); - NS::Error* error = nullptr; - MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error); - //MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); - if (error) { - Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } + MTL::Library* library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); + MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); // Display MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); @@ -144,7 +150,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { auto* displayColorAttachment = displayPipelineDescriptor->colorAttachments()->object(0); displayColorAttachment->setPixelFormat(MTL::PixelFormat::PixelFormatBGRA8Unorm); - error = nullptr; + NS::Error* error = nullptr; displayPipeline = device->newRenderPipelineState(displayPipelineDescriptor, &error); if (error) { Helpers::panic("Error creating display pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); @@ -217,11 +223,37 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor); + // Copy to LUT texture + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&LIGHT_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); + + error = nullptr; + MTL::Function* vertexCopyToLutTextureFunction = copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + + MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction); + // Disable rasterization + copyToLutTexturePipelineDescriptor->setRasterizationEnabled(false); + + error = nullptr; + copyToLutTexturePipeline = device->newRenderPipelineState(copyToLutTexturePipelineDescriptor, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + // Depth stencil cache depthStencilCache.set(device); // Vertex buffer cache vertexBufferCache.set(device); + + // -------- Depth stencil state -------- + MTL::DepthStencilDescriptor* depthStencilDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); + defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor); } void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { @@ -430,6 +462,12 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spantexture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr)); + + // Update the LUT texture if necessary + if (gpu.lightingLUTDirty) { + updateLightingLUT(renderCommandEncoder); + } + renderCommandEncoder->setRenderPipelineState(pipeline); renderCommandEncoder->setDepthStencilState(depthStencilState); // If size is < 4KB, use inline vertex data, otherwise use a buffer @@ -440,11 +478,6 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX); } - // Update the LUT texture if necessary - if (gpu.lightingLUTDirty) { - updateLightingLUT(); - } - // Bind resources setupTextureEnvState(renderCommandEncoder); bindTexturesToSlots(renderCommandEncoder); @@ -580,7 +613,7 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { encoder->setFragmentSamplerState(linearSampler, 3); } -void RendererMTL::updateLightingLUT() { +void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { gpu.lightingLUTDirty = false; std::array u16_lightinglut; @@ -589,7 +622,14 @@ void RendererMTL::updateLightingLUT() { u16_lightinglut[i] = value * 65535 / 4095; } - for (int i = 0; i < Lights::LUT_Count; i++) { - lightLUTTextureArray->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0); - } + //for (int i = 0; i < Lights::LUT_Count; i++) { + // lightLUTTextureArray->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0); + //} + + renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); + renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); + renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0); + renderCommandEncoder->setVertexBytes(u16_lightinglut.data(), sizeof(u16_lightinglut), 0); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), GPU::LightingLutSize); } diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal new file mode 100644 index 00000000..ef993b64 --- /dev/null +++ b/src/host_shaders/metal_copy_to_lut_texture.metal @@ -0,0 +1,9 @@ +#include +using namespace metal; + +constant ushort lutTextureWidth [[function_constant(0)]]; + +// The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass +vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture1d_array out [[texture(0)]], constant ushort* data [[buffer(0)]]) { + out.write(data[vid], vid % lutTextureWidth, vid / lutTextureWidth); +}