Audio: Unroll and optimize the audio write callback
Copying one sample at a time is slow, this unrolls the most common audio channel layouts and manually copies the bytes between source and destination. This is over 2x faster than calling CopyBlockUnaligned every sample.
This commit is contained in:
parent
9ee2afd547
commit
c2f0f174c6
1 changed files with 294 additions and 34 deletions
|
@ -114,7 +114,7 @@ namespace Ryujinx.Audio.SoundIo
|
|||
private unsafe void WriteCallback(int minFrameCount, int maxFrameCount)
|
||||
{
|
||||
var bytesPerFrame = AudioStream.BytesPerFrame;
|
||||
var bytesPerSample = AudioStream.BytesPerSample;
|
||||
var bytesPerSample = (uint)AudioStream.BytesPerSample;
|
||||
|
||||
var bufferedFrames = m_Buffer.Length / bytesPerFrame;
|
||||
var bufferedSamples = m_Buffer.Length / bytesPerSample;
|
||||
|
@ -128,12 +128,261 @@ namespace Ryujinx.Audio.SoundIo
|
|||
var channelCount = areas.ChannelCount;
|
||||
|
||||
var samples = new byte[frameCount * bytesPerFrame];
|
||||
var samplesLength = samples.Length;
|
||||
|
||||
m_Buffer.Read(samples, 0, samplesLength);
|
||||
m_Buffer.Read(samples, 0, samples.Length);
|
||||
|
||||
// This is a huge ugly block of code, but we save
|
||||
// a significant amount of time over the generic
|
||||
// loop that handles other channel counts.
|
||||
|
||||
// Mono
|
||||
if (channelCount == 1)
|
||||
{
|
||||
var area = areas.GetArea(0);
|
||||
|
||||
fixed (byte* buffPtr = &samples[0])
|
||||
{
|
||||
if (bytesPerSample == 1)
|
||||
{
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
*((byte*)area.Pointer) = *(buffPtr + (frame * bytesPerFrame));
|
||||
|
||||
area.Pointer += area.Step;
|
||||
}
|
||||
}
|
||||
else if (bytesPerSample == 2)
|
||||
{
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
*((byte*)area.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + 0);
|
||||
*((byte*)area.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + 1);
|
||||
|
||||
area.Pointer += area.Step;
|
||||
}
|
||||
}
|
||||
else if (bytesPerSample == 4)
|
||||
{
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
*((byte*)area.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + 0);
|
||||
*((byte*)area.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + 1);
|
||||
*((byte*)area.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + 2);
|
||||
*((byte*)area.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + 3);
|
||||
|
||||
area.Pointer += area.Step;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
Unsafe.CopyBlockUnaligned((byte*)area.Pointer, buffPtr + (frame * bytesPerFrame), bytesPerSample);
|
||||
|
||||
area.Pointer += area.Step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Stereo
|
||||
else if (channelCount == 2)
|
||||
{
|
||||
var area1 = areas.GetArea(0);
|
||||
var area2 = areas.GetArea(1);
|
||||
|
||||
fixed (byte* buffPtr = &samples[0])
|
||||
{
|
||||
if (bytesPerSample == 1)
|
||||
{
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
// Channel 1
|
||||
*((byte*)area1.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample));
|
||||
|
||||
// Channel 2
|
||||
*((byte*)area2.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample));
|
||||
|
||||
area1.Pointer += area1.Step;
|
||||
area2.Pointer += area2.Step;
|
||||
}
|
||||
}
|
||||
else if (bytesPerSample == 2)
|
||||
{
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
// Channel 1
|
||||
*((byte*)area1.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 0);
|
||||
*((byte*)area1.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 1);
|
||||
|
||||
// Channel 2
|
||||
*((byte*)area2.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 0);
|
||||
*((byte*)area2.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 1);
|
||||
|
||||
area1.Pointer += area1.Step;
|
||||
area2.Pointer += area2.Step;
|
||||
}
|
||||
}
|
||||
else if (bytesPerSample == 4)
|
||||
{
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
// Channel 1
|
||||
*((byte*)area1.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 0);
|
||||
*((byte*)area1.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 1);
|
||||
*((byte*)area1.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 2);
|
||||
*((byte*)area1.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 3);
|
||||
|
||||
// Channel 2
|
||||
*((byte*)area2.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 0);
|
||||
*((byte*)area2.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 1);
|
||||
*((byte*)area2.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 2);
|
||||
*((byte*)area2.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 3);
|
||||
|
||||
area1.Pointer += area1.Step;
|
||||
area2.Pointer += area2.Step;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample), bytesPerSample);
|
||||
Unsafe.CopyBlockUnaligned((byte*)area2.Pointer, buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample), bytesPerSample);
|
||||
|
||||
area1.Pointer += area1.Step;
|
||||
area2.Pointer += area2.Step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Surround
|
||||
else if (channelCount == 5)
|
||||
{
|
||||
var area1 = areas.GetArea(0);
|
||||
var area2 = areas.GetArea(1);
|
||||
var area3 = areas.GetArea(2);
|
||||
var area4 = areas.GetArea(3);
|
||||
var area5 = areas.GetArea(4);
|
||||
|
||||
fixed (byte* buffPtr = &samples[0])
|
||||
{
|
||||
if (bytesPerSample == 1)
|
||||
{
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
// Channel 1
|
||||
*((byte*)area1.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample));
|
||||
|
||||
// Channel 2
|
||||
*((byte*)area2.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample));
|
||||
|
||||
// Channel 3
|
||||
*((byte*)area3.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample));
|
||||
|
||||
// Channel 4
|
||||
*((byte*)area4.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample));
|
||||
|
||||
// Channel 5
|
||||
*((byte*)area5.Pointer) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample));
|
||||
|
||||
area1.Pointer += area1.Step;
|
||||
area2.Pointer += area2.Step;
|
||||
area3.Pointer += area3.Step;
|
||||
area4.Pointer += area4.Step;
|
||||
area5.Pointer += area5.Step;
|
||||
}
|
||||
}
|
||||
else if (bytesPerSample == 2)
|
||||
{
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
// Channel 1
|
||||
*((byte*)area1.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 0);
|
||||
*((byte*)area1.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 1);
|
||||
|
||||
// Channel 2
|
||||
*((byte*)area2.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 0);
|
||||
*((byte*)area2.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 1);
|
||||
|
||||
// Channel 3
|
||||
*((byte*)area3.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 0);
|
||||
*((byte*)area3.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 1);
|
||||
|
||||
// Channel 4
|
||||
*((byte*)area4.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 0);
|
||||
*((byte*)area4.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 1);
|
||||
|
||||
// Channel 5
|
||||
*((byte*)area5.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 0);
|
||||
*((byte*)area5.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 1);
|
||||
|
||||
area1.Pointer += area1.Step;
|
||||
area2.Pointer += area2.Step;
|
||||
area3.Pointer += area3.Step;
|
||||
area4.Pointer += area4.Step;
|
||||
area5.Pointer += area5.Step;
|
||||
}
|
||||
}
|
||||
else if (bytesPerSample == 4)
|
||||
{
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
// Channel 1
|
||||
*((byte*)area1.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 0);
|
||||
*((byte*)area1.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 1);
|
||||
*((byte*)area1.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 2);
|
||||
*((byte*)area1.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample) + 3);
|
||||
|
||||
// Channel 2
|
||||
*((byte*)area2.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 0);
|
||||
*((byte*)area2.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 1);
|
||||
*((byte*)area2.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 2);
|
||||
*((byte*)area2.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample) + 3);
|
||||
|
||||
// Channel 3
|
||||
*((byte*)area3.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 0);
|
||||
*((byte*)area3.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 1);
|
||||
*((byte*)area3.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 2);
|
||||
*((byte*)area3.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (2 * bytesPerSample) + 3);
|
||||
|
||||
// Channel 4
|
||||
*((byte*)area4.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 0);
|
||||
*((byte*)area4.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 1);
|
||||
*((byte*)area4.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 2);
|
||||
*((byte*)area4.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (3 * bytesPerSample) + 3);
|
||||
|
||||
// Channel 5
|
||||
*((byte*)area5.Pointer + 0) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 0);
|
||||
*((byte*)area5.Pointer + 1) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 1);
|
||||
*((byte*)area5.Pointer + 2) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 2);
|
||||
*((byte*)area5.Pointer + 3) = *(buffPtr + (frame * bytesPerFrame) + (4 * bytesPerSample) + 3);
|
||||
|
||||
area1.Pointer += area1.Step;
|
||||
area2.Pointer += area2.Step;
|
||||
area3.Pointer += area3.Step;
|
||||
area4.Pointer += area4.Step;
|
||||
area5.Pointer += area5.Step;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var frame = 0; frame < frameCount; frame++)
|
||||
{
|
||||
Unsafe.CopyBlockUnaligned((byte*)area1.Pointer, buffPtr + (frame * bytesPerFrame) + (0 * bytesPerSample), bytesPerSample);
|
||||
Unsafe.CopyBlockUnaligned((byte*)area2.Pointer, buffPtr + (frame * bytesPerFrame) + (1 * bytesPerSample), bytesPerSample);
|
||||
|
||||
area1.Pointer += area1.Step;
|
||||
area2.Pointer += area2.Step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Every other channel count
|
||||
else
|
||||
{
|
||||
var channels = new SoundIOChannelArea[channelCount];
|
||||
|
||||
// Obtain the channel area for each channel
|
||||
for (var i = 0; i < channelCount; i++)
|
||||
channels[i] = areas.GetArea(i);
|
||||
|
||||
|
@ -142,28 +391,41 @@ namespace Ryujinx.Audio.SoundIo
|
|||
for (var frame = 0; frame < frameCount; frame++)
|
||||
for (var channel = 0; channel < areas.ChannelCount; channel++)
|
||||
{
|
||||
Unsafe.CopyBlockUnaligned((byte*)channels[channel].Pointer, buffPtr + frame * bytesPerFrame + channel * bytesPerSample, (uint)bytesPerSample);
|
||||
// This is slow!
|
||||
Unsafe.CopyBlockUnaligned((byte*)channels[channel].Pointer, buffPtr + frame * bytesPerFrame + channel * bytesPerSample, bytesPerSample);
|
||||
channels[channel].Pointer += channels[channel].Step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AudioStream.EndWrite();
|
||||
|
||||
UpdateReleasedBuffers(samples.Length);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases any buffers that have been fully written to the output device
|
||||
/// </summary>
|
||||
/// <param name="bytesRead">The amount of bytes written in the last device write</param>
|
||||
private void UpdateReleasedBuffers(int bytesRead)
|
||||
{
|
||||
bool bufferReleased = false;
|
||||
while (samplesLength > 0)
|
||||
while (bytesRead > 0)
|
||||
{
|
||||
if (m_ReservedBuffers.TryPeek(out SoundIoBuffer buffer))
|
||||
{
|
||||
if(buffer.Length > samplesLength)
|
||||
if (buffer.Length > bytesRead)
|
||||
{
|
||||
buffer.Length -= samplesLength;
|
||||
samplesLength = 0;
|
||||
buffer.Length -= bytesRead;
|
||||
bytesRead = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
samplesLength -= buffer.Length;
|
||||
m_ReservedBuffers.TryDequeue(out buffer);
|
||||
|
||||
ReleasedBuffers.Enqueue(buffer.Tag);
|
||||
bufferReleased = true;
|
||||
bytesRead -= buffer.Length;
|
||||
|
||||
m_ReservedBuffers.TryDequeue(out buffer);
|
||||
ReleasedBuffers.Enqueue(buffer.Tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -172,8 +434,6 @@ namespace Ryujinx.Audio.SoundIo
|
|||
{
|
||||
OnBufferReleased();
|
||||
}
|
||||
|
||||
AudioStream.EndWrite();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue