mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-08-13 11:39:09 +00:00
Merge 269675c116
into 72ef27c157
This commit is contained in:
commit
a88306ecea
6 changed files with 132 additions and 87 deletions
|
@ -478,9 +478,9 @@ void AXUCode::ProcessPBList(u32 pb_addr)
|
|||
{
|
||||
ApplyUpdatesForMs(curr_ms, pb, pb.updates.num_updates, updates);
|
||||
|
||||
ProcessVoice(static_cast<HLEAccelerator*>(m_accelerator.get()), pb, buffers, spms,
|
||||
ConvertMixerControl(pb.mixer_control),
|
||||
m_coeffs_checksum ? m_coeffs.data() : nullptr, false);
|
||||
ProcessVoice<spms>(static_cast<HLEAccelerator*>(m_accelerator.get()), pb, buffers,
|
||||
ConvertMixerControl(pb.mixer_control),
|
||||
m_coeffs_checksum ? m_coeffs.data() : nullptr, false);
|
||||
|
||||
// Forward the buffers
|
||||
for (auto& ptr : buffers.ptrs)
|
||||
|
|
|
@ -88,15 +88,15 @@ protected:
|
|||
static constexpr u32 MAIL_CMDLIST_MASK = 0xFFFF0000;
|
||||
|
||||
// 32 * 5 because 32 samples per millisecond, for max 5 milliseconds.
|
||||
int m_samples_main_left[32 * 5]{};
|
||||
int m_samples_main_right[32 * 5]{};
|
||||
int m_samples_main_surround[32 * 5]{};
|
||||
int m_samples_auxA_left[32 * 5]{};
|
||||
int m_samples_auxA_right[32 * 5]{};
|
||||
int m_samples_auxA_surround[32 * 5]{};
|
||||
int m_samples_auxB_left[32 * 5]{};
|
||||
int m_samples_auxB_right[32 * 5]{};
|
||||
int m_samples_auxB_surround[32 * 5]{};
|
||||
alignas(32) int m_samples_main_left[32 * 5]{};
|
||||
alignas(32) int m_samples_main_right[32 * 5]{};
|
||||
alignas(32) int m_samples_main_surround[32 * 5]{};
|
||||
alignas(32) int m_samples_auxA_left[32 * 5]{};
|
||||
alignas(32) int m_samples_auxA_right[32 * 5]{};
|
||||
alignas(32) int m_samples_auxA_surround[32 * 5]{};
|
||||
alignas(32) int m_samples_auxB_left[32 * 5]{};
|
||||
alignas(32) int m_samples_auxB_right[32 * 5]{};
|
||||
alignas(32) int m_samples_auxB_surround[32 * 5]{};
|
||||
|
||||
u16 m_cmdlist[512]{};
|
||||
u32 m_cmdlist_size = 0;
|
||||
|
|
|
@ -10,7 +10,7 @@ namespace DSP::HLE
|
|||
struct VolumeData
|
||||
{
|
||||
u16 volume;
|
||||
u16 volume_delta;
|
||||
s16 volume_delta;
|
||||
};
|
||||
|
||||
struct PBMixer
|
||||
|
@ -198,7 +198,7 @@ struct PBLowPassFilter
|
|||
u16 b0;
|
||||
};
|
||||
|
||||
struct AXPB
|
||||
struct alignas(32) AXPB
|
||||
{
|
||||
u16 next_pb_hi;
|
||||
u16 next_pb_lo;
|
||||
|
@ -255,7 +255,7 @@ union PBInfImpulseResponseWM
|
|||
PBBiquadFilter biquad;
|
||||
};
|
||||
|
||||
struct AXPBWii
|
||||
struct alignas(32) AXPBWii
|
||||
{
|
||||
u16 next_pb_hi;
|
||||
u16 next_pb_lo;
|
||||
|
|
|
@ -355,10 +355,11 @@ s16 ClampS16(s64 sample)
|
|||
}
|
||||
|
||||
// Add samples to an output buffer, with optional volume ramping.
|
||||
void MixAdd(int* out, const s16* input, u32 count, VolumeData* vd, s16* dpop, bool ramp)
|
||||
template <u32 count>
|
||||
static void MixAdd(int* out, const s16* input, VolumeData* vd, s16* dpop, bool ramp)
|
||||
{
|
||||
u16& volume = vd->volume;
|
||||
u16 volume_delta = vd->volume_delta;
|
||||
u16 volume = vd->volume;
|
||||
s16 volume_delta = vd->volume_delta;
|
||||
|
||||
// If volume ramping is disabled, set volume_delta to 0. That way, the
|
||||
// mixing loop can avoid testing if volume ramping is enabled at each step,
|
||||
|
@ -366,18 +367,61 @@ void MixAdd(int* out, const s16* input, u32 count, VolumeData* vd, s16* dpop, bo
|
|||
if (!ramp)
|
||||
volume_delta = 0;
|
||||
|
||||
for (u32 i = 0; i < count; ++i)
|
||||
#ifdef __AVX2__
|
||||
if constexpr ((count & 15) == 0)
|
||||
{
|
||||
s64 sample = input[i];
|
||||
sample *= volume;
|
||||
sample >>= 15;
|
||||
s16 sample16 = ClampS16((s32)sample);
|
||||
out = std::assume_aligned<32>(out);
|
||||
input = std::assume_aligned<32>(input);
|
||||
|
||||
out[i] += sample16;
|
||||
volume += volume_delta;
|
||||
auto vol = _mm256_set1_epi16(volume);
|
||||
const auto delta = _mm256_set1_epi16(volume_delta);
|
||||
|
||||
*dpop = sample16;
|
||||
// Vectorize the volume.
|
||||
const auto iota = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
vol = _mm256_add_epi16(vol, _mm256_mullo_epi16(delta, iota));
|
||||
|
||||
// Each loop iteration processes 16 samples.
|
||||
const auto delta16 = _mm256_slli_epi16(delta, 4);
|
||||
|
||||
for (u32 i = 0; i < count; i += 16)
|
||||
{
|
||||
const auto val = *(__m256i*)&input[i];
|
||||
const auto dst = (__m256i*)&out[i];
|
||||
|
||||
// mulhrs is signed * signed but we need signed * unsigned,
|
||||
// so drop the top bit and adjust the product if it was set.
|
||||
const auto mul = _mm256_mulhrs_epi16(val, _mm256_and_si256(vol, _mm256_set1_epi16(0x7FFF)));
|
||||
const auto add = _mm256_adds_epi16(mul, _mm256_and_si256(val, _mm256_srai_epi16(vol, 15)));
|
||||
|
||||
// Sign-extend to 32-bit.
|
||||
const auto lo = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(add, 0));
|
||||
const auto hi = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(add, 1));
|
||||
|
||||
// Add to the output.
|
||||
dst[0] = _mm256_add_epi32(dst[0], lo);
|
||||
dst[1] = _mm256_add_epi32(dst[1], hi);
|
||||
|
||||
// Update the volume for the next iteration.
|
||||
vol = _mm256_add_epi16(vol, delta16);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (u32 i = 0; i < count; ++i)
|
||||
{
|
||||
s32 sample = input[i];
|
||||
sample *= volume;
|
||||
sample >>= 15;
|
||||
s16 sample16 = ClampS16(sample);
|
||||
|
||||
out[i] += (s16)sample16;
|
||||
volume += volume_delta;
|
||||
}
|
||||
}
|
||||
|
||||
vd->volume += volume_delta * count;
|
||||
*dpop = out[count - 1];
|
||||
}
|
||||
|
||||
// Execute a low pass filter on the samples using one history value.
|
||||
|
@ -418,7 +462,8 @@ static void BiquadFilter(s16* samples, u32 count, PBBiquadFilter& f)
|
|||
|
||||
// Process 1ms of audio (for AX GC) or 3ms of audio (for AX Wii) from a PB and
|
||||
// mix it to the output buffers.
|
||||
void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buffers, u16 count,
|
||||
template <size_t count>
|
||||
void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buffers,
|
||||
AXMixControl mctrl, const s16* coeffs, bool new_filter)
|
||||
{
|
||||
// If the voice is not running, nothing to do.
|
||||
|
@ -426,7 +471,7 @@ void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buf
|
|||
return;
|
||||
|
||||
// Read input samples, performing sample rate conversion if needed.
|
||||
s16 samples[MAX_SAMPLES_PER_FRAME];
|
||||
alignas(32) s16 samples[MAX_SAMPLES_PER_FRAME];
|
||||
GetInputSamples(accelerator, pb, samples, count, coeffs);
|
||||
|
||||
// Apply a global volume ramp using the volume envelope parameters.
|
||||
|
@ -465,67 +510,67 @@ void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buf
|
|||
|
||||
if (MIX_ON(MAIN_L))
|
||||
{
|
||||
MixAdd(buffers.main_left, samples, count, &pb.mixer.main_left, &pb.dpop.main_left,
|
||||
RAMP_ON(MAIN_L));
|
||||
MixAdd<count>(buffers.main_left, samples, &pb.mixer.main_left, &pb.dpop.main_left,
|
||||
RAMP_ON(MAIN_L));
|
||||
}
|
||||
if (MIX_ON(MAIN_R))
|
||||
{
|
||||
MixAdd(buffers.main_right, samples, count, &pb.mixer.main_right, &pb.dpop.main_right,
|
||||
RAMP_ON(MAIN_R));
|
||||
MixAdd<count>(buffers.main_right, samples, &pb.mixer.main_right, &pb.dpop.main_right,
|
||||
RAMP_ON(MAIN_R));
|
||||
}
|
||||
if (MIX_ON(MAIN_S))
|
||||
{
|
||||
MixAdd(buffers.main_surround, samples, count, &pb.mixer.main_surround, &pb.dpop.main_surround,
|
||||
RAMP_ON(MAIN_S));
|
||||
MixAdd<count>(buffers.main_surround, samples, &pb.mixer.main_surround, &pb.dpop.main_surround,
|
||||
RAMP_ON(MAIN_S));
|
||||
}
|
||||
|
||||
if (MIX_ON(AUXA_L))
|
||||
{
|
||||
MixAdd(buffers.auxA_left, samples, count, &pb.mixer.auxA_left, &pb.dpop.auxA_left,
|
||||
RAMP_ON(AUXA_L));
|
||||
MixAdd<count>(buffers.auxA_left, samples, &pb.mixer.auxA_left, &pb.dpop.auxA_left,
|
||||
RAMP_ON(AUXA_L));
|
||||
}
|
||||
if (MIX_ON(AUXA_R))
|
||||
{
|
||||
MixAdd(buffers.auxA_right, samples, count, &pb.mixer.auxA_right, &pb.dpop.auxA_right,
|
||||
RAMP_ON(AUXA_R));
|
||||
MixAdd<count>(buffers.auxA_right, samples, &pb.mixer.auxA_right, &pb.dpop.auxA_right,
|
||||
RAMP_ON(AUXA_R));
|
||||
}
|
||||
if (MIX_ON(AUXA_S))
|
||||
{
|
||||
MixAdd(buffers.auxA_surround, samples, count, &pb.mixer.auxA_surround, &pb.dpop.auxA_surround,
|
||||
RAMP_ON(AUXA_S));
|
||||
MixAdd<count>(buffers.auxA_surround, samples, &pb.mixer.auxA_surround, &pb.dpop.auxA_surround,
|
||||
RAMP_ON(AUXA_S));
|
||||
}
|
||||
|
||||
if (MIX_ON(AUXB_L))
|
||||
{
|
||||
MixAdd(buffers.auxB_left, samples, count, &pb.mixer.auxB_left, &pb.dpop.auxB_left,
|
||||
RAMP_ON(AUXB_L));
|
||||
MixAdd<count>(buffers.auxB_left, samples, &pb.mixer.auxB_left, &pb.dpop.auxB_left,
|
||||
RAMP_ON(AUXB_L));
|
||||
}
|
||||
if (MIX_ON(AUXB_R))
|
||||
{
|
||||
MixAdd(buffers.auxB_right, samples, count, &pb.mixer.auxB_right, &pb.dpop.auxB_right,
|
||||
RAMP_ON(AUXB_R));
|
||||
MixAdd<count>(buffers.auxB_right, samples, &pb.mixer.auxB_right, &pb.dpop.auxB_right,
|
||||
RAMP_ON(AUXB_R));
|
||||
}
|
||||
if (MIX_ON(AUXB_S))
|
||||
{
|
||||
MixAdd(buffers.auxB_surround, samples, count, &pb.mixer.auxB_surround, &pb.dpop.auxB_surround,
|
||||
RAMP_ON(AUXB_S));
|
||||
MixAdd<count>(buffers.auxB_surround, samples, &pb.mixer.auxB_surround, &pb.dpop.auxB_surround,
|
||||
RAMP_ON(AUXB_S));
|
||||
}
|
||||
|
||||
#ifdef AX_WII
|
||||
if (MIX_ON(AUXC_L))
|
||||
{
|
||||
MixAdd(buffers.auxC_left, samples, count, &pb.mixer.auxC_left, &pb.dpop.auxC_left,
|
||||
RAMP_ON(AUXC_L));
|
||||
MixAdd<count>(buffers.auxC_left, samples, &pb.mixer.auxC_left, &pb.dpop.auxC_left,
|
||||
RAMP_ON(AUXC_L));
|
||||
}
|
||||
if (MIX_ON(AUXC_R))
|
||||
{
|
||||
MixAdd(buffers.auxC_right, samples, count, &pb.mixer.auxC_right, &pb.dpop.auxC_right,
|
||||
RAMP_ON(AUXC_R));
|
||||
MixAdd<count>(buffers.auxC_right, samples, &pb.mixer.auxC_right, &pb.dpop.auxC_right,
|
||||
RAMP_ON(AUXC_R));
|
||||
}
|
||||
if (MIX_ON(AUXC_S))
|
||||
{
|
||||
MixAdd(buffers.auxC_surround, samples, count, &pb.mixer.auxC_surround, &pb.dpop.auxC_surround,
|
||||
RAMP_ON(AUXC_S));
|
||||
MixAdd<count>(buffers.auxC_surround, samples, &pb.mixer.auxC_surround, &pb.dpop.auxC_surround,
|
||||
RAMP_ON(AUXC_S));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -559,10 +604,10 @@ void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buf
|
|||
}
|
||||
|
||||
// Old AXWii versions process ms per ms.
|
||||
u16 wm_count = count == 96 ? 18 : 6;
|
||||
constexpr u16 wm_count = count == 96 ? 18 : 6;
|
||||
|
||||
// Interpolate at most 18 samples from the 96 samples we read before.
|
||||
s16 wm_samples[18];
|
||||
alignas(32) s16 wm_samples[18];
|
||||
|
||||
// We use ratio 0x55555 == (5 * 65536 + 21845) / 65536 == 5.3333 which
|
||||
// is the nearest we can get to 96/18
|
||||
|
@ -576,29 +621,29 @@ void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buf
|
|||
#define WMCHAN_MIX_RAMP(n) (0 != ((pb.remote_mixer_control >> (2 * n)) & 2))
|
||||
|
||||
if (WMCHAN_MIX_ON(0))
|
||||
MixAdd(buffers.wm_main0, wm_samples, wm_count, &pb.remote_mixer.main0, &pb.remote_dpop.main0,
|
||||
WMCHAN_MIX_RAMP(0));
|
||||
MixAdd<wm_count>(buffers.wm_main0, wm_samples, &pb.remote_mixer.main0, &pb.remote_dpop.main0,
|
||||
WMCHAN_MIX_RAMP(0));
|
||||
if (WMCHAN_MIX_ON(1))
|
||||
MixAdd(buffers.wm_aux0, wm_samples, wm_count, &pb.remote_mixer.aux0, &pb.remote_dpop.aux0,
|
||||
WMCHAN_MIX_RAMP(1));
|
||||
MixAdd<wm_count>(buffers.wm_aux0, wm_samples, &pb.remote_mixer.aux0, &pb.remote_dpop.aux0,
|
||||
WMCHAN_MIX_RAMP(1));
|
||||
if (WMCHAN_MIX_ON(2))
|
||||
MixAdd(buffers.wm_main1, wm_samples, wm_count, &pb.remote_mixer.main1, &pb.remote_dpop.main1,
|
||||
WMCHAN_MIX_RAMP(2));
|
||||
MixAdd<wm_count>(buffers.wm_main1, wm_samples, &pb.remote_mixer.main1, &pb.remote_dpop.main1,
|
||||
WMCHAN_MIX_RAMP(2));
|
||||
if (WMCHAN_MIX_ON(3))
|
||||
MixAdd(buffers.wm_aux1, wm_samples, wm_count, &pb.remote_mixer.aux1, &pb.remote_dpop.aux1,
|
||||
WMCHAN_MIX_RAMP(3));
|
||||
MixAdd<wm_count>(buffers.wm_aux1, wm_samples, &pb.remote_mixer.aux1, &pb.remote_dpop.aux1,
|
||||
WMCHAN_MIX_RAMP(3));
|
||||
if (WMCHAN_MIX_ON(4))
|
||||
MixAdd(buffers.wm_main2, wm_samples, wm_count, &pb.remote_mixer.main2, &pb.remote_dpop.main2,
|
||||
WMCHAN_MIX_RAMP(4));
|
||||
MixAdd<wm_count>(buffers.wm_main2, wm_samples, &pb.remote_mixer.main2, &pb.remote_dpop.main2,
|
||||
WMCHAN_MIX_RAMP(4));
|
||||
if (WMCHAN_MIX_ON(5))
|
||||
MixAdd(buffers.wm_aux2, wm_samples, wm_count, &pb.remote_mixer.aux2, &pb.remote_dpop.aux2,
|
||||
WMCHAN_MIX_RAMP(5));
|
||||
MixAdd<wm_count>(buffers.wm_aux2, wm_samples, &pb.remote_mixer.aux2, &pb.remote_dpop.aux2,
|
||||
WMCHAN_MIX_RAMP(5));
|
||||
if (WMCHAN_MIX_ON(6))
|
||||
MixAdd(buffers.wm_main3, wm_samples, wm_count, &pb.remote_mixer.main3, &pb.remote_dpop.main3,
|
||||
WMCHAN_MIX_RAMP(6));
|
||||
MixAdd<wm_count>(buffers.wm_main3, wm_samples, &pb.remote_mixer.main3, &pb.remote_dpop.main3,
|
||||
WMCHAN_MIX_RAMP(6));
|
||||
if (WMCHAN_MIX_ON(7))
|
||||
MixAdd(buffers.wm_aux3, wm_samples, wm_count, &pb.remote_mixer.aux3, &pb.remote_dpop.aux3,
|
||||
WMCHAN_MIX_RAMP(7));
|
||||
MixAdd<wm_count>(buffers.wm_aux3, wm_samples, &pb.remote_mixer.aux3, &pb.remote_dpop.aux3,
|
||||
WMCHAN_MIX_RAMP(7));
|
||||
}
|
||||
#undef WMCHAN_MIX_RAMP
|
||||
#undef WMCHAN_MIX_ON
|
||||
|
|
|
@ -459,9 +459,9 @@ void AXWiiUCode::ProcessPBList(u32 pb_addr)
|
|||
for (int curr_ms = 0; curr_ms < 3; ++curr_ms)
|
||||
{
|
||||
ApplyUpdatesForMs(curr_ms, pb, pb.updates.num_updates, updates);
|
||||
ProcessVoice(static_cast<HLEAccelerator*>(m_accelerator.get()), pb, buffers, spms,
|
||||
ConvertMixerControl(HILO_TO_32(pb.mixer_control)),
|
||||
m_coeffs_checksum ? m_coeffs.data() : nullptr, m_new_filter);
|
||||
ProcessVoice<spms>(static_cast<HLEAccelerator*>(m_accelerator.get()), pb, buffers,
|
||||
ConvertMixerControl(HILO_TO_32(pb.mixer_control)),
|
||||
m_coeffs_checksum ? m_coeffs.data() : nullptr, m_new_filter);
|
||||
|
||||
// Forward the buffers
|
||||
for (auto& ptr : buffers.regular_ptrs)
|
||||
|
@ -472,9 +472,9 @@ void AXWiiUCode::ProcessPBList(u32 pb_addr)
|
|||
}
|
||||
else
|
||||
{
|
||||
ProcessVoice(static_cast<HLEAccelerator*>(m_accelerator.get()), pb, buffers, 96,
|
||||
ConvertMixerControl(HILO_TO_32(pb.mixer_control)),
|
||||
m_coeffs_checksum ? m_coeffs.data() : nullptr, m_new_filter);
|
||||
ProcessVoice<96>(static_cast<HLEAccelerator*>(m_accelerator.get()), pb, buffers,
|
||||
ConvertMixerControl(HILO_TO_32(pb.mixer_control)),
|
||||
m_coeffs_checksum ? m_coeffs.data() : nullptr, m_new_filter);
|
||||
}
|
||||
|
||||
WritePB(memory, pb_addr, pb);
|
||||
|
|
|
@ -21,19 +21,19 @@ public:
|
|||
|
||||
protected:
|
||||
// Additional AUX buffers
|
||||
int m_samples_auxC_left[32 * 3]{};
|
||||
int m_samples_auxC_right[32 * 3]{};
|
||||
int m_samples_auxC_surround[32 * 3]{};
|
||||
alignas(32) int m_samples_auxC_left[32 * 3]{};
|
||||
alignas(32) int m_samples_auxC_right[32 * 3]{};
|
||||
alignas(32) int m_samples_auxC_surround[32 * 3]{};
|
||||
|
||||
// Wiimote buffers
|
||||
int m_samples_wm0[6 * 3]{};
|
||||
int m_samples_aux0[6 * 3]{};
|
||||
int m_samples_wm1[6 * 3]{};
|
||||
int m_samples_aux1[6 * 3]{};
|
||||
int m_samples_wm2[6 * 3]{};
|
||||
int m_samples_aux2[6 * 3]{};
|
||||
int m_samples_wm3[6 * 3]{};
|
||||
int m_samples_aux3[6 * 3]{};
|
||||
alignas(32) int m_samples_wm0[6 * 3]{};
|
||||
alignas(32) int m_samples_aux0[6 * 3]{};
|
||||
alignas(32) int m_samples_wm1[6 * 3]{};
|
||||
alignas(32) int m_samples_aux1[6 * 3]{};
|
||||
alignas(32) int m_samples_wm2[6 * 3]{};
|
||||
alignas(32) int m_samples_aux2[6 * 3]{};
|
||||
alignas(32) int m_samples_wm3[6 * 3]{};
|
||||
alignas(32) int m_samples_aux3[6 * 3]{};
|
||||
|
||||
// Are we implementing an old version of AXWii which still has updates?
|
||||
bool m_old_axwii = false;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue