From 40e9247be260f7364c9b13af97a702171bd6aae7 Mon Sep 17 00:00:00 2001 From: jduncanator Date: Wed, 5 Sep 2018 16:41:00 +1000 Subject: [PATCH] Audio: Optimize the write callback further This dramatically reduces the audio buffer copy time. When the sample size is one of handled sample sizes the buffer copy operation is almost 10x faster than CopyBlockAligned. This works by copying full samples at a time, rather than the individual bytes that make up the sample. This allows for 2x or 4x faster copy operations depending on sample size. --- .../Renderers/SoundIo/SoundIoAudioTrack.cs | 127 ++++++++---------- 1 file changed, 57 insertions(+), 70 deletions(-) diff --git a/Ryujinx.Audio/Renderers/SoundIo/SoundIoAudioTrack.cs b/Ryujinx.Audio/Renderers/SoundIo/SoundIoAudioTrack.cs index fa74999e70..427f3dcb94 100644 --- a/Ryujinx.Audio/Renderers/SoundIo/SoundIoAudioTrack.cs +++ b/Ryujinx.Audio/Renderers/SoundIo/SoundIoAudioTrack.cs @@ -140,13 +140,13 @@ namespace Ryujinx.Audio.SoundIo { var area = areas.GetArea(0); - fixed (byte* buffPtr = &samples[0]) + fixed (byte* srcptr = samples) { if (bytesPerSample == 1) { for (var frame = 0; frame < frameCount; frame++) { - *((byte*)area.Pointer) = *(buffPtr + (frame * bytesPerFrame)); + ((byte*)area.Pointer)[0] = srcptr[frame * bytesPerFrame]; area.Pointer += area.Step; } @@ -155,8 +155,7 @@ namespace Ryujinx.Audio.SoundIo { for (var frame = 0; frame < frameCount; frame++) { - *((byte*)area.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + 0); - *((byte*)area.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + 1); + ((short*)area.Pointer)[0] = ((short*)srcptr)[frame * bytesPerFrame >> 1]; area.Pointer += area.Step; } @@ -165,10 +164,7 @@ namespace Ryujinx.Audio.SoundIo { for (var frame = 0; frame < frameCount; frame++) { - *((byte*)area.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + 0); - *((byte*)area.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + 1); - *((byte*)area.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + 2); - *((byte*)area.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + 3); + ((int*)area.Pointer)[0] = ((int*)srcptr)[frame * bytesPerFrame >> 2]; area.Pointer += area.Step; } @@ -177,7 +173,7 @@ namespace Ryujinx.Audio.SoundIo { for (var frame = 0; frame < frameCount; frame++) { - Unsafe.CopyBlockUnaligned((byte*)area.Pointer, buffPtr + (frame * bytesPerFrame), bytesPerSample); + Unsafe.CopyBlockUnaligned((byte*)area.Pointer, srcptr + (frame * bytesPerFrame), bytesPerSample); area.Pointer += area.Step; } @@ -190,53 +186,45 @@ namespace Ryujinx.Audio.SoundIo var area1 = areas.GetArea(0); var area2 = areas.GetArea(1); - fixed (byte* buffPtr = &samples[0]) + fixed (byte* srcptr = samples) { - if (bytesPerSample == 1) + if (bytesPerSample == 11) { for (var frame = 0; frame < frameCount; frame++) { // Channel 1 - *((byte*)area1.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample)); + ((byte*)area1.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 0]; // Channel 2 - *((byte*)area2.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample)); + ((byte*)area2.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 1]; area1.Pointer += area1.Step; area2.Pointer += area2.Step; } } - else if (bytesPerSample == 2) + else if (bytesPerSample == 12) { for (var frame = 0; frame < frameCount; frame++) { // Channel 1 - *((byte*)area1.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 0); - *((byte*)area1.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 1); + ((short*)area1.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 0]; // Channel 2 - *((byte*)area2.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 0); - *((byte*)area2.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 1); + ((short*)area2.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 1]; area1.Pointer += area1.Step; area2.Pointer += area2.Step; } } - else if (bytesPerSample == 4) + else if (bytesPerSample == 14) { for (var frame = 0; frame < frameCount; frame++) { // Channel 1 - *((byte*)area1.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 0); - *((byte*)area1.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 1); - *((byte*)area1.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 2); - *((byte*)area1.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 3); + ((int*)area1.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 0]; // Channel 2 - *((byte*)area2.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 0); - *((byte*)area2.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 1); - *((byte*)area2.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 2); - *((byte*)area2.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 3); + ((int*)area2.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 1]; area1.Pointer += area1.Step; area2.Pointer += area2.Step; @@ -246,8 +234,11 @@ namespace Ryujinx.Audio.SoundIo { for (var frame = 0; frame < frameCount; frame++) { - Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample), bytesPerSample); - Unsafe.CopyBlockUnaligned((byte*)area2.Pointer, buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample), bytesPerSample); + // Channel 1 + Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, srcptr + (frame * bytesPerFrame) + (0 * bytesPerSample), bytesPerSample); + + // Channel 2 + Unsafe.CopyBlockUnaligned((byte*)area2.Pointer, srcptr + (frame * bytesPerFrame) + (1 * bytesPerSample), bytesPerSample); area1.Pointer += area1.Step; area2.Pointer += area2.Step; @@ -264,26 +255,26 @@ namespace Ryujinx.Audio.SoundIo var area4 = areas.GetArea(3); var area5 = areas.GetArea(4); - fixed (byte* buffPtr = &samples[0]) + fixed (byte* srcptr = samples) { if (bytesPerSample == 1) { for (var frame = 0; frame < frameCount; frame++) { // Channel 1 - *((byte*)area1.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample)); + ((byte*)area1.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 0]; // Channel 2 - *((byte*)area2.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample)); + ((byte*)area2.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 1]; // Channel 3 - *((byte*)area3.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample)); + ((byte*)area3.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 2]; // Channel 4 - *((byte*)area4.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample)); + ((byte*)area4.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 3]; // Channel 5 - *((byte*)area5.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample)); + ((byte*)area5.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 4]; area1.Pointer += area1.Step; area2.Pointer += area2.Step; @@ -297,24 +288,19 @@ namespace Ryujinx.Audio.SoundIo for (var frame = 0; frame < frameCount; frame++) { // Channel 1 - *((byte*)area1.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 0); - *((byte*)area1.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 1); + ((short*)area1.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 0]; // Channel 2 - *((byte*)area2.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 0); - *((byte*)area2.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 1); + ((short*)area2.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 1]; // Channel 3 - *((byte*)area3.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 0); - *((byte*)area3.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 1); + ((short*)area3.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 2]; // Channel 4 - *((byte*)area4.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 0); - *((byte*)area4.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 1); + ((short*)area4.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 3]; // Channel 5 - *((byte*)area5.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 0); - *((byte*)area5.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 1); + ((short*)area5.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 4]; area1.Pointer += area1.Step; area2.Pointer += area2.Step; @@ -328,34 +314,19 @@ namespace Ryujinx.Audio.SoundIo for (var frame = 0; frame < frameCount; frame++) { // Channel 1 - *((byte*)area1.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 0); - *((byte*)area1.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 1); - *((byte*)area1.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 2); - *((byte*)area1.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 3); + ((int*)area1.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 0]; // Channel 2 - *((byte*)area2.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 0); - *((byte*)area2.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 1); - *((byte*)area2.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 2); - *((byte*)area2.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 3); + ((int*)area2.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 1]; // Channel 3 - *((byte*)area3.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 0); - *((byte*)area3.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 1); - *((byte*)area3.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 2); - *((byte*)area3.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 3); + ((int*)area3.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 2]; // Channel 4 - *((byte*)area4.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 0); - *((byte*)area4.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 1); - *((byte*)area4.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 2); - *((byte*)area4.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 3); + ((int*)area4.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 3]; // Channel 5 - *((byte*)area5.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 0); - *((byte*)area5.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 1); - *((byte*)area5.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 2); - *((byte*)area5.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 3); + ((int*)area5.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 4]; area1.Pointer += area1.Step; area2.Pointer += area2.Step; @@ -368,11 +339,26 @@ namespace Ryujinx.Audio.SoundIo { for (var frame = 0; frame < frameCount; frame++) { - Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample), bytesPerSample); - Unsafe.CopyBlockUnaligned((byte*)area2.Pointer, buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample), bytesPerSample); + // Channel 1 + Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, srcptr + (frame * bytesPerFrame) + (0 * bytesPerSample), bytesPerSample); + + // Channel 2 + Unsafe.CopyBlockUnaligned((byte*)area2.Pointer, srcptr + (frame * bytesPerFrame) + (1 * bytesPerSample), bytesPerSample); + + // Channel 3 + Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, srcptr + (frame * bytesPerFrame) + (2 * bytesPerSample), bytesPerSample); + + // Channel 4 + Unsafe.CopyBlockUnaligned((byte*)area2.Pointer, srcptr + (frame * bytesPerFrame) + (3 * bytesPerSample), bytesPerSample); + + // Channel 5 + Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, srcptr + (frame * bytesPerFrame) + (4 * bytesPerSample), bytesPerSample); area1.Pointer += area1.Step; area2.Pointer += area2.Step; + area3.Pointer += area3.Step; + area4.Pointer += area4.Step; + area5.Pointer += area5.Step; } } } @@ -386,13 +372,14 @@ namespace Ryujinx.Audio.SoundIo for (var i = 0; i < channelCount; i++) channels[i] = areas.GetArea(i); - fixed (byte* buffPtr = &samples[0]) + fixed (byte* srcptr = samples) { for (var frame = 0; frame < frameCount; frame++) for (var channel = 0; channel < areas.ChannelCount; channel++) { - // This is slow! - Unsafe.CopyBlockUnaligned((byte*)channels[channel].Pointer, buffPtr + frame * bytesPerFrame + channel * bytesPerSample, bytesPerSample); + // Copy channel by channel, frame by frame. This is slow! + Unsafe.CopyBlockUnaligned((byte*)channels[channel].Pointer, srcptr + (frame * bytesPerFrame) + (channel * bytesPerSample), bytesPerSample); + channels[channel].Pointer += channels[channel].Step; } }