diff --git a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs index fa50f2395f..50a87893b5 100644 --- a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs +++ b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs @@ -309,98 +309,31 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer { int[] MixBuffer = new int[MixBufferSamplesCount * AudioConsts.HostChannelsCount]; - fixed (int* mixptr = MixBuffer) + foreach (VoiceContext Voice in Voices) { - foreach (VoiceContext Voice in Voices) + if (!Voice.Playing) { - if (!Voice.Playing) + continue; + } + + int OutOffset = 0; + int PendingSamples = MixBufferSamplesCount; + float Volume = Voice.Volume; + + while (PendingSamples > 0) + { + int[] Samples = Voice.GetBufferData(Memory, PendingSamples, out int ReturnedSamples); + + if (ReturnedSamples == 0) { - continue; + break; } - int OutOffset = 0; - int PendingSamples = MixBufferSamplesCount; - float Volume = Voice.Volume; + PendingSamples -= ReturnedSamples; - while (PendingSamples > 0) + for (int Offset = 0; Offset < Samples.Length; Offset++) { - int[] Samples = Voice.GetBufferData(Memory, PendingSamples, out int ReturnedSamples); - - if (ReturnedSamples == 0) - { - break; - } - - PendingSamples -= ReturnedSamples; - - int Offset = 0; - - if(Avx2.IsSupported && Avx.IsSupported) - { - fixed (int* samptr = Samples) - { - // Load our scale factor as a scalar - Vector256 volume = Avx.SetAllVector256(Volume); - - for (; Offset + 32 <= Samples.Length; Offset += 32, OutOffset += 32) - { - // Convert our samples from ints to floats - Vector256 samples1 = Avx.ConvertToVector256Single(Avx.LoadVector256(samptr + Offset + 0)); - Vector256 samples2 = Avx.ConvertToVector256Single(Avx.LoadVector256(samptr + Offset + 8)); - Vector256 samples3 = Avx.ConvertToVector256Single(Avx.LoadVector256(samptr + Offset + 16)); - Vector256 samples4 = Avx.ConvertToVector256Single(Avx.LoadVector256(samptr + Offset + 24)); - - Vector256 mix1 = Avx.LoadVector256(mixptr + OutOffset + 0); - Vector256 mix2 = Avx.LoadVector256(mixptr + OutOffset + 8); - Vector256 mix3 = Avx.LoadVector256(mixptr + OutOffset + 16); - Vector256 mix4 = Avx.LoadVector256(mixptr + OutOffset + 24); - - // Scale by the volume and store back as ints - // TODO: Implement this as an FMA operation once Intrinsics - // gets support for FMA in their AVX2 implementation. - Avx.Store(mixptr + OutOffset + 0, Avx2.Add(mix1, Avx.ConvertToVector256Int32(Avx.Multiply(samples1, volume)))); - Avx.Store(mixptr + OutOffset + 8, Avx2.Add(mix2, Avx.ConvertToVector256Int32(Avx.Multiply(samples2, volume)))); - Avx.Store(mixptr + OutOffset + 16, Avx2.Add(mix3, Avx.ConvertToVector256Int32(Avx.Multiply(samples3, volume)))); - Avx.Store(mixptr + OutOffset + 24, Avx2.Add(mix4, Avx.ConvertToVector256Int32(Avx.Multiply(samples4, volume)))); - } - } - } - else if (Sse2.IsSupported && Sse.IsSupported) - { - fixed (int* samptr = Samples) - { - // Load our scale factor as a scalar - Vector128 volume = Sse.SetAllVector128(Volume); - - for (; Offset + 16 <= Samples.Length; Offset += 16, OutOffset += 16) - { - // Convert our samples from ints to floats - Vector128 samples1 = Sse2.ConvertToVector128Single(Sse2.LoadVector128(samptr + Offset + 0)); - Vector128 samples2 = Sse2.ConvertToVector128Single(Sse2.LoadVector128(samptr + Offset + 4)); - Vector128 samples3 = Sse2.ConvertToVector128Single(Sse2.LoadVector128(samptr + Offset + 8)); - Vector128 samples4 = Sse2.ConvertToVector128Single(Sse2.LoadVector128(samptr + Offset + 12)); - - Vector128 mix1 = Sse2.LoadVector128(mixptr + OutOffset + 0); - Vector128 mix2 = Sse2.LoadVector128(mixptr + OutOffset + 4); - Vector128 mix3 = Sse2.LoadVector128(mixptr + OutOffset + 8); - Vector128 mix4 = Sse2.LoadVector128(mixptr + OutOffset + 12); - - // Scale by the volume and store back as ints - Sse2.Store(mixptr + OutOffset + 0, Sse2.Add(mix1, Sse2.ConvertToVector128Int32(Sse.Multiply(samples1, volume)))); - Sse2.Store(mixptr + OutOffset + 4, Sse2.Add(mix2, Sse2.ConvertToVector128Int32(Sse.Multiply(samples2, volume)))); - Sse2.Store(mixptr + OutOffset + 8, Sse2.Add(mix3, Sse2.ConvertToVector128Int32(Sse.Multiply(samples3, volume)))); - Sse2.Store(mixptr + OutOffset + 12, Sse2.Add(mix4, Sse2.ConvertToVector128Int32(Sse.Multiply(samples4, volume)))); - } - } - } - - // Process left overs - for (; Offset < Samples.Length; Offset++) - { - int Sample = (int)(Samples[Offset] * Voice.Volume); - - MixBuffer[OutOffset++] += Sample; - } + MixBuffer[OutOffset++] += (int)(Samples[Offset] * Voice.Volume); } } } @@ -427,7 +360,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer Vector128 block1A = Sse2.LoadVector128(inptr + Offset + 0); Vector128 block1B = Sse2.LoadVector128(inptr + Offset + 4); - Vector128 block2A = Sse2.LoadVector128(inptr + Offset + 8); + Vector128 block2A = Sse2.LoadVector128(inptr + Offset + 8); Vector128 block2B = Sse2.LoadVector128(inptr + Offset + 12); Vector128 block3A = Sse2.LoadVector128(inptr + Offset + 16); @@ -441,8 +374,8 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer Vector128 output3 = Sse2.PackSignedSaturate(block3A, block3B); Vector128 output4 = Sse2.PackSignedSaturate(block4A, block4B); - Sse2.Store(outptr + Offset + 0, output1); - Sse2.Store(outptr + Offset + 8, output2); + Sse2.Store(outptr + Offset + 0, output1); + Sse2.Store(outptr + Offset + 8, output2); Sse2.Store(outptr + Offset + 16, output3); Sse2.Store(outptr + Offset + 24, output4); }