diff --git a/src/Ryujinx.Graphics.Metal/HelperShader.cs b/src/Ryujinx.Graphics.Metal/HelperShader.cs
index b4ddfe02c1..37f130c785 100644
--- a/src/Ryujinx.Graphics.Metal/HelperShader.cs
+++ b/src/Ryujinx.Graphics.Metal/HelperShader.cs
@@ -27,6 +27,7 @@ namespace Ryujinx.Graphics.Metal
private readonly IProgram _programColorClearSI;
private readonly IProgram _programColorClearUI;
private readonly IProgram _programDepthStencilClear;
+ private readonly IProgram _programStrideChange;
public HelperShader(MTLDevice device, Pipeline pipeline)
{
@@ -40,6 +41,12 @@ namespace Ryujinx.Graphics.Metal
new ShaderSource(blitSource, ShaderStage.Vertex, TargetLanguage.Msl)
], device);
+ var strideChangeSource = ReadMsl("ChangeBufferStride.metal");
+ _programStrideChange = new Program(
+ [
+ new ShaderSource(strideChangeSource, ShaderStage.Compute, TargetLanguage.Msl)
+ ], device);
+
// var colorClearFSource = ReadMsl("ColorClearF.metal");
// _programColorClearF = new Program(
// [
diff --git a/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj b/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj
index d8da128340..834068bb5f 100644
--- a/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj
+++ b/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj
@@ -20,6 +20,7 @@
+
diff --git a/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal b/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal
new file mode 100644
index 0000000000..c9c0713a6c
--- /dev/null
+++ b/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal
@@ -0,0 +1,52 @@
+#include
+
+using namespace metal;
+
+kernel void kernelMain(constant float4& stride_arguments [[buffer(0)]],
+ device uint8_t* in_data [[buffer(1)]],
+ device uint8_t* out_data [[buffer(2)]],
+ uint3 thread_position_in_grid [[thread_position_in_grid]],
+ uint3 threads_per_threadgroup [[threads_per_threadgroup]],
+ uint3 threadgroups_per_grid [[threads_per_grid]])
+{
+ // Determine what slice of the stride copies this invocation will perform.
+
+ int sourceStride = stride_arguments.x;
+ int targetStride = stride_arguments.y;
+ int bufferSize = stride_arguments.z;
+ int sourceOffset = stride_arguments.w;
+
+ int strideRemainder = targetStride - sourceStride;
+ int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x);
+
+ int copiesRequired = bufferSize / sourceStride;
+
+ // Find the copies that this invocation should perform.
+
+ // - Copies that all invocations perform.
+ int allInvocationCopies = copiesRequired / invocations;
+
+ // - Extra remainder copy that this invocation performs.
+ int index = int(thread_position_in_grid.x);
+ int extra = (index < (copiesRequired % invocations)) ? 1 : 0;
+
+ int copyCount = allInvocationCopies + extra;
+
+ // Finally, get the starting offset. Make sure to count extra copies.
+
+ int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index);
+
+ int srcOffset = sourceOffset + startCopy * sourceStride;
+ int dstOffset = startCopy * targetStride;
+
+ // Perform the copies for this region
+ for (int i=0; i