Audio: Optimize the write callback further

This dramatically reduces the audio buffer copy time. When the sample size is one of handled sample sizes the buffer copy operation is almost 10x faster than CopyBlockAligned.

This works by copying full samples at a time, rather than the individual bytes that make up the sample. This allows for 2x or 4x faster copy operations depending on sample size.
This commit is contained in:
jduncanator 2018-09-05 16:41:00 +10:00
commit 40e9247be2

View file

@ -140,13 +140,13 @@ namespace Ryujinx.Audio.SoundIo
{ {
var area = areas.GetArea(0); var area = areas.GetArea(0);
fixed (byte* buffPtr = &samples[0]) fixed (byte* srcptr = samples)
{ {
if (bytesPerSample == 1) if (bytesPerSample == 1)
{ {
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
{ {
*((byte*)area.Pointer) = *(buffPtr + (frame * bytesPerFrame)); ((byte*)area.Pointer)[0] = srcptr[frame * bytesPerFrame];
area.Pointer += area.Step; area.Pointer += area.Step;
} }
@ -155,8 +155,7 @@ namespace Ryujinx.Audio.SoundIo
{ {
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
{ {
*((byte*)area.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + 0); ((short*)area.Pointer)[0] = ((short*)srcptr)[frame * bytesPerFrame >> 1];
*((byte*)area.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + 1);
area.Pointer += area.Step; area.Pointer += area.Step;
} }
@ -165,10 +164,7 @@ namespace Ryujinx.Audio.SoundIo
{ {
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
{ {
*((byte*)area.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + 0); ((int*)area.Pointer)[0] = ((int*)srcptr)[frame * bytesPerFrame >> 2];
*((byte*)area.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + 1);
*((byte*)area.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + 2);
*((byte*)area.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + 3);
area.Pointer += area.Step; area.Pointer += area.Step;
} }
@ -177,7 +173,7 @@ namespace Ryujinx.Audio.SoundIo
{ {
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
{ {
Unsafe.CopyBlockUnaligned((byte*)area.Pointer, buffPtr + (frame * bytesPerFrame), bytesPerSample); Unsafe.CopyBlockUnaligned((byte*)area.Pointer, srcptr + (frame * bytesPerFrame), bytesPerSample);
area.Pointer += area.Step; area.Pointer += area.Step;
} }
@ -190,53 +186,45 @@ namespace Ryujinx.Audio.SoundIo
var area1 = areas.GetArea(0); var area1 = areas.GetArea(0);
var area2 = areas.GetArea(1); var area2 = areas.GetArea(1);
fixed (byte* buffPtr = &samples[0]) fixed (byte* srcptr = samples)
{ {
if (bytesPerSample == 1) if (bytesPerSample == 11)
{ {
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
{ {
// Channel 1 // Channel 1
*((byte*)area1.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample)); ((byte*)area1.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 0];
// Channel 2 // Channel 2
*((byte*)area2.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample)); ((byte*)area2.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 1];
area1.Pointer += area1.Step; area1.Pointer += area1.Step;
area2.Pointer += area2.Step; area2.Pointer += area2.Step;
} }
} }
else if (bytesPerSample == 2) else if (bytesPerSample == 12)
{ {
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
{ {
// Channel 1 // Channel 1
*((byte*)area1.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 0); ((short*)area1.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 0];
*((byte*)area1.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 1);
// Channel 2 // Channel 2
*((byte*)area2.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 0); ((short*)area2.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 1];
*((byte*)area2.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 1);
area1.Pointer += area1.Step; area1.Pointer += area1.Step;
area2.Pointer += area2.Step; area2.Pointer += area2.Step;
} }
} }
else if (bytesPerSample == 4) else if (bytesPerSample == 14)
{ {
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
{ {
// Channel 1 // Channel 1
*((byte*)area1.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 0); ((int*)area1.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 0];
*((byte*)area1.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 1);
*((byte*)area1.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 2);
*((byte*)area1.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 3);
// Channel 2 // Channel 2
*((byte*)area2.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 0); ((int*)area2.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 1];
*((byte*)area2.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 1);
*((byte*)area2.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 2);
*((byte*)area2.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 3);
area1.Pointer += area1.Step; area1.Pointer += area1.Step;
area2.Pointer += area2.Step; area2.Pointer += area2.Step;
@ -246,8 +234,11 @@ namespace Ryujinx.Audio.SoundIo
{ {
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
{ {
Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample), bytesPerSample); // Channel 1
Unsafe.CopyBlockUnaligned((byte*)area2.Pointer, buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample), bytesPerSample); Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, srcptr + (frame * bytesPerFrame) + (0 * bytesPerSample), bytesPerSample);
// Channel 2
Unsafe.CopyBlockUnaligned((byte*)area2.Pointer, srcptr + (frame * bytesPerFrame) + (1 * bytesPerSample), bytesPerSample);
area1.Pointer += area1.Step; area1.Pointer += area1.Step;
area2.Pointer += area2.Step; area2.Pointer += area2.Step;
@ -264,26 +255,26 @@ namespace Ryujinx.Audio.SoundIo
var area4 = areas.GetArea(3); var area4 = areas.GetArea(3);
var area5 = areas.GetArea(4); var area5 = areas.GetArea(4);
fixed (byte* buffPtr = &samples[0]) fixed (byte* srcptr = samples)
{ {
if (bytesPerSample == 1) if (bytesPerSample == 1)
{ {
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
{ {
// Channel 1 // Channel 1
*((byte*)area1.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample)); ((byte*)area1.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 0];
// Channel 2 // Channel 2
*((byte*)area2.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample)); ((byte*)area2.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 1];
// Channel 3 // Channel 3
*((byte*)area3.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample)); ((byte*)area3.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 2];
// Channel 4 // Channel 4
*((byte*)area4.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample)); ((byte*)area4.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 3];
// Channel 5 // Channel 5
*((byte*)area5.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample)); ((byte*)area5.Pointer)[0] = srcptr[(frame * bytesPerFrame) + 4];
area1.Pointer += area1.Step; area1.Pointer += area1.Step;
area2.Pointer += area2.Step; area2.Pointer += area2.Step;
@ -297,24 +288,19 @@ namespace Ryujinx.Audio.SoundIo
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
{ {
// Channel 1 // Channel 1
*((byte*)area1.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 0); ((short*)area1.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 0];
*((byte*)area1.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 1);
// Channel 2 // Channel 2
*((byte*)area2.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 0); ((short*)area2.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 1];
*((byte*)area2.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 1);
// Channel 3 // Channel 3
*((byte*)area3.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 0); ((short*)area3.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 2];
*((byte*)area3.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 1);
// Channel 4 // Channel 4
*((byte*)area4.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 0); ((short*)area4.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 3];
*((byte*)area4.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 1);
// Channel 5 // Channel 5
*((byte*)area5.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 0); ((short*)area5.Pointer)[0] = ((short*)srcptr)[(frame * bytesPerFrame >> 1) + 4];
*((byte*)area5.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 1);
area1.Pointer += area1.Step; area1.Pointer += area1.Step;
area2.Pointer += area2.Step; area2.Pointer += area2.Step;
@ -328,34 +314,19 @@ namespace Ryujinx.Audio.SoundIo
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
{ {
// Channel 1 // Channel 1
*((byte*)area1.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 0); ((int*)area1.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 0];
*((byte*)area1.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 1);
*((byte*)area1.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 2);
*((byte*)area1.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 3);
// Channel 2 // Channel 2
*((byte*)area2.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 0); ((int*)area2.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 1];
*((byte*)area2.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 1);
*((byte*)area2.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 2);
*((byte*)area2.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 3);
// Channel 3 // Channel 3
*((byte*)area3.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 0); ((int*)area3.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 2];
*((byte*)area3.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 1);
*((byte*)area3.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 2);
*((byte*)area3.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 3);
// Channel 4 // Channel 4
*((byte*)area4.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 0); ((int*)area4.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 3];
*((byte*)area4.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 1);
*((byte*)area4.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 2);
*((byte*)area4.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 3);
// Channel 5 // Channel 5
*((byte*)area5.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 0); ((int*)area5.Pointer)[0] = ((int*)srcptr)[(frame * bytesPerFrame >> 2) + 4];
*((byte*)area5.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 1);
*((byte*)area5.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 2);
*((byte*)area5.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 3);
area1.Pointer += area1.Step; area1.Pointer += area1.Step;
area2.Pointer += area2.Step; area2.Pointer += area2.Step;
@ -368,11 +339,26 @@ namespace Ryujinx.Audio.SoundIo
{ {
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
{ {
Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample), bytesPerSample); // Channel 1
Unsafe.CopyBlockUnaligned((byte*)area2.Pointer, buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample), bytesPerSample); Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, srcptr + (frame * bytesPerFrame) + (0 * bytesPerSample), bytesPerSample);
// Channel 2
Unsafe.CopyBlockUnaligned((byte*)area2.Pointer, srcptr + (frame * bytesPerFrame) + (1 * bytesPerSample), bytesPerSample);
// Channel 3
Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, srcptr + (frame * bytesPerFrame) + (2 * bytesPerSample), bytesPerSample);
// Channel 4
Unsafe.CopyBlockUnaligned((byte*)area2.Pointer, srcptr + (frame * bytesPerFrame) + (3 * bytesPerSample), bytesPerSample);
// Channel 5
Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, srcptr + (frame * bytesPerFrame) + (4 * bytesPerSample), bytesPerSample);
area1.Pointer += area1.Step; area1.Pointer += area1.Step;
area2.Pointer += area2.Step; area2.Pointer += area2.Step;
area3.Pointer += area3.Step;
area4.Pointer += area4.Step;
area5.Pointer += area5.Step;
} }
} }
} }
@ -386,13 +372,14 @@ namespace Ryujinx.Audio.SoundIo
for (var i = 0; i < channelCount; i++) for (var i = 0; i < channelCount; i++)
channels[i] = areas.GetArea(i); channels[i] = areas.GetArea(i);
fixed (byte* buffPtr = &samples[0]) fixed (byte* srcptr = samples)
{ {
for (var frame = 0; frame < frameCount; frame++) for (var frame = 0; frame < frameCount; frame++)
for (var channel = 0; channel < areas.ChannelCount; channel++) for (var channel = 0; channel < areas.ChannelCount; channel++)
{ {
// This is slow! // Copy channel by channel, frame by frame. This is slow!
Unsafe.CopyBlockUnaligned((byte*)channels[channel].Pointer, buffPtr + frame * bytesPerFrame + channel * bytesPerSample, bytesPerSample); Unsafe.CopyBlockUnaligned((byte*)channels[channel].Pointer, srcptr + (frame * bytesPerFrame) + (channel * bytesPerSample), bytesPerSample);
channels[channel].Pointer += channels[channel].Step; channels[channel].Pointer += channels[channel].Step;
} }
} }