diff --git a/src/Ryujinx.Graphics.Metal/HelperShader.cs b/src/Ryujinx.Graphics.Metal/HelperShader.cs index b4ddfe02c1..37f130c785 100644 --- a/src/Ryujinx.Graphics.Metal/HelperShader.cs +++ b/src/Ryujinx.Graphics.Metal/HelperShader.cs @@ -27,6 +27,7 @@ namespace Ryujinx.Graphics.Metal private readonly IProgram _programColorClearSI; private readonly IProgram _programColorClearUI; private readonly IProgram _programDepthStencilClear; + private readonly IProgram _programStrideChange; public HelperShader(MTLDevice device, Pipeline pipeline) { @@ -40,6 +41,12 @@ namespace Ryujinx.Graphics.Metal new ShaderSource(blitSource, ShaderStage.Vertex, TargetLanguage.Msl) ], device); + var strideChangeSource = ReadMsl("ChangeBufferStride.metal"); + _programStrideChange = new Program( + [ + new ShaderSource(strideChangeSource, ShaderStage.Compute, TargetLanguage.Msl) + ], device); + // var colorClearFSource = ReadMsl("ColorClearF.metal"); // _programColorClearF = new Program( // [ diff --git a/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj b/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj index d8da128340..834068bb5f 100644 --- a/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj +++ b/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj @@ -20,6 +20,7 @@ + diff --git a/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal b/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal new file mode 100644 index 0000000000..c9c0713a6c --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal @@ -0,0 +1,52 @@ +#include + +using namespace metal; + +kernel void kernelMain(constant float4& stride_arguments [[buffer(0)]], + device uint8_t* in_data [[buffer(1)]], + device uint8_t* out_data [[buffer(2)]], + uint3 thread_position_in_grid [[thread_position_in_grid]], + uint3 threads_per_threadgroup [[threads_per_threadgroup]], + uint3 threadgroups_per_grid [[threads_per_grid]]) +{ + // Determine what slice of the stride copies this invocation will perform. + + int sourceStride = stride_arguments.x; + int targetStride = stride_arguments.y; + int bufferSize = stride_arguments.z; + int sourceOffset = stride_arguments.w; + + int strideRemainder = targetStride - sourceStride; + int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x); + + int copiesRequired = bufferSize / sourceStride; + + // Find the copies that this invocation should perform. + + // - Copies that all invocations perform. + int allInvocationCopies = copiesRequired / invocations; + + // - Extra remainder copy that this invocation performs. + int index = int(thread_position_in_grid.x); + int extra = (index < (copiesRequired % invocations)) ? 1 : 0; + + int copyCount = allInvocationCopies + extra; + + // Finally, get the starting offset. Make sure to count extra copies. + + int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index); + + int srcOffset = sourceOffset + startCopy * sourceStride; + int dstOffset = startCopy * targetStride; + + // Perform the copies for this region + for (int i=0; i