cellAdec: review fixes

This commit is contained in:
capriots 2024-12-08 16:03:28 +01:00
parent f113160688
commit e055bf2692
No known key found for this signature in database
3 changed files with 89 additions and 55 deletions

View file

@ -403,8 +403,8 @@ void LpcmDecContext::exec(ppu_thread& ppu)
const v128 f32_1 = gv_mulfs(gv_cvts32_tofs(s32_1), 1.f / static_cast<f32>(0x80000000u));
const v128 f32_2 = gv_mulfs(gv_cvts32_tofs(s32_2), 1.f / static_cast<f32>(0x80000000u));
*reinterpret_cast<v128*>(&_output[i]) = gv_to_be32(f32_1);
*reinterpret_cast<v128*>(&_output[i + 4]) = gv_to_be32(f32_2);
v128::storeu(gv_to_be32(f32_1), &_output[i]);
v128::storeu(gv_to_be32(f32_2), &_output[i + 4]);
}
for (; i < au_size_s16; i++)
@ -425,7 +425,7 @@ void LpcmDecContext::exec(ppu_thread& ppu)
// Convert to float and divide by INT32_MAX + 1
const v128 _f32 = gv_mulfs(gv_cvts32_tofs(_s32), 1.f / static_cast<f32>(0x80000000u));
*reinterpret_cast<v128*>(&_output[i]) = gv_to_be32(_f32);
v128::storeu(gv_to_be32(_f32), &_output[i]);
}
for (; i * 3 <= au_size_u8 - 3; i++)
@ -452,9 +452,9 @@ void LpcmDecContext::exec(ppu_thread& ppu)
case CELL_ADEC_CH_MONO:
for (s32 i = 0; i < sample_num / 2; i += 4)
{
const v128 tmp1 = *reinterpret_cast<v128*>(&_output[i * 2]);
const v128 tmp2 = *reinterpret_cast<v128*>(&_output[i * 2 + 4]);
*reinterpret_cast<v128*>(&_output[i]) = gv_shufflefs<0 << 0 | 2 << 2 | 0 << 4 | 2 << 6>(tmp1, tmp2); // Remove every other sample
const v128 tmp1 = v128::loadu(&_output[i * 2]);
const v128 tmp2 = v128::loadu(&_output[i * 2 + 4]);
v128::storeu(gv_shufflefs<0, 2, 0, 2>(tmp1, tmp2), &_output[i]); // Remove every other sample
}
break;
@ -466,7 +466,7 @@ void LpcmDecContext::exec(ppu_thread& ppu)
case CELL_ADEC_CH_3_0:
for (s32 i_in = 0, i_out = 0; i_in < sample_num; i_in += 4, i_out += 3)
{
const v128 tmp = gv_shuffle32<0 << 0 | 2 << 2 | 1 << 4 | 3 << 6>(*reinterpret_cast<v128*>(&_output[i_in])); // Swap Front Right and Center
const v128 tmp = gv_shuffle32<0, 2, 1, 3>(v128::loadu(&_output[i_in])); // Swap Front Right and Center
v128::storeu(tmp, &_output[i_out]);
}
break;
@ -474,7 +474,7 @@ void LpcmDecContext::exec(ppu_thread& ppu)
case CELL_ADEC_CH_2_1:
for (s32 i_in = 0, i_out = 0; i_in < sample_num; i_in += 4, i_out += 3)
{
v128::storeu(*reinterpret_cast<v128*>(&_output[i_in]), &_output[i_out]);
v128::storeu(v128::loadu(&_output[i_in]), &_output[i_out]);
}
break;
@ -490,7 +490,7 @@ void LpcmDecContext::exec(ppu_thread& ppu)
case CELL_ADEC_CH_3_2:
for (s32 i_in = 0, i_out = 0; i_in < sample_num; i_in += 6, i_out += 5)
{
const v128 tmp = gv_shuffle32<0 << 0 | 2 << 2 | 1 << 4 | 3 << 6>(v128::loadu(&_output[i_in])); // Swap Front Right and Center
const v128 tmp = gv_shuffle32<0, 2, 1, 3>(v128::loadu(&_output[i_in])); // Swap Front Right and Center
v128::storeu(tmp, &_output[i_out]);
_output[i_out + 4] = _output[i_in + 4];
}
@ -499,8 +499,8 @@ void LpcmDecContext::exec(ppu_thread& ppu)
case CELL_ADEC_CH_3_4:
for (s32 i_in = 0, i_out = 0; i_in < sample_num; i_in += 8, i_out += 7)
{
const v128 tmp1 = gv_shuffle32<0 << 0 | 2 << 2 | 1 << 4 | 3 << 6>(*reinterpret_cast<v128*>(&_output[i_in])); // Swap Front Right and Center
const v128 tmp2 = gv_shuffle32<2 << 0 | 0 << 2 | 1 << 4 | 3 << 6>(*reinterpret_cast<v128*>(&_output[i_in + 4])); // Reorder Rear Left, Rear Right, Side Right -> Side Right, Rear Left, Rear Right
const v128 tmp1 = gv_shuffle32<0, 2, 1, 3>(v128::loadu(&_output[i_in])); // Swap Front Right and Center
const v128 tmp2 = gv_shuffle32<2, 0, 1, 3>(v128::loadu(&_output[i_in + 4])); // Reorder Rear Left, Rear Right, Side Right -> Side Right, Rear Left, Rear Right
v128::storeu(tmp1, &_output[i_out]);
v128::storeu(tmp2, &_output[i_out + 4]);
}
@ -509,8 +509,8 @@ void LpcmDecContext::exec(ppu_thread& ppu)
case CELL_ADEC_CH_3_4_LFE:
for (s32 i = 0; i < sample_num; i += 8)
{
const v128 tmp1 = gv_shuffle32<3 << 0 | 2 << 2 | 0 << 4 | 1 << 6>(*reinterpret_cast<v128*>(&_output[i + 4])); // Reorder Rear Left, Rear Right, Side Right, LFE -> LFE, Side Right, Rear Left, Rear Right
*reinterpret_cast<v128*>(&_output[i + 4]) = tmp1;
const v128 tmp1 = gv_shuffle32<3, 2, 0, 1>(v128::loadu(&_output[i + 4])); // Reorder Rear Left, Rear Right, Side Right, LFE -> LFE, Side Right, Rear Left, Rear Right
v128::storeu(tmp1, &_output[i + 4]);
const u64 tmp2 = std::rotl(read_from_ptr<u64>(&_output[i + 3]), 0x20); // Swap Side Left and LFE
std::memcpy(&_output[i + 3], &tmp2, sizeof(u64));
}
@ -569,7 +569,7 @@ void LpcmDecContext::exec(ppu_thread& ppu)
// Convert to float and divide by INT32_MAX + 1
const v128 _f32 = gv_mulfs(gv_cvts32_tofs(_s32), 1.f / static_cast<f32>(0x80000000u));
*reinterpret_cast<v128*>(&_output[i_out]) = gv_to_be32(_f32);
v128::storeu(gv_to_be32(_f32), &_output[i_out]);
}
for (; i_in <= au_size_s16 - 2; i_in += channel_num, i_out += 2)
@ -602,7 +602,7 @@ void LpcmDecContext::exec(ppu_thread& ppu)
? v128::normal_array_t<s8>{ -1, 8, 1, 0, -1, 8, 3, 2, -1, 10, 5, 4, -1, 11, 7, 6 }
: v128::normal_array_t<s8>{ 0, 1, 8, -1, 2, 3, 8, -1, 4, 5, 10, -1, 6, 7, 11, -1 };
const v128 shuffle_ctrl = channel_num & 1 ? shuffle_ctrl_different_offset : shuffle_ctrl_same_offset;
const v128 shuffle_ctrl = channel_num & 1 ? v128::loadu(&shuffle_ctrl_different_offset) : v128::loadu(&shuffle_ctrl_same_offset);
alignas(alignof(v128)) static constexpr auto low_bits_mask_same_offset = std::endian::native == std::endian::little
? v128::normal_array_t<u8>{ 0x00, 0xf0, 0xff, 0xff, 0x00, 0x0f, 0xff, 0xff, 0x00, 0xf0, 0xff, 0xff, 0x00, 0x0f, 0xff, 0xff }
@ -612,7 +612,7 @@ void LpcmDecContext::exec(ppu_thread& ppu)
? v128::normal_array_t<u8>{ 0x00, 0xf0, 0xff, 0xff, 0x00, 0x0f, 0xff, 0xff, 0x00, 0x0f, 0xff, 0xff, 0x00, 0xf0, 0xff, 0xff }
: v128::normal_array_t<u8>{ 0xff, 0xff, 0xf0, 0x00, 0xff, 0xff, 0x0f, 0x00, 0xff, 0xff, 0x0f, 0x00, 0xff, 0xff, 0xf0, 0x00 };
const v128 low_bits_mask = channel_num & 1 ? low_bits_mask_different_offset : low_bits_mask_same_offset;
const v128 low_bits_mask = channel_num & 1 ? v128::loadu(&low_bits_mask_different_offset) : v128::loadu(&low_bits_mask_same_offset);
for (s64 i_in = 0, i_out = 0; i_in <= au_size_u8 - low_bits_3_4_offset - (channel_num & 1); i_in += next_samples_offset, i_out += 4)
{
@ -634,7 +634,7 @@ void LpcmDecContext::exec(ppu_thread& ppu)
// Convert to float and divide by INT32_MAX + 1
const v128 _f32 = gv_mulfs(gv_cvts32_tofs(_s32), 1.f / static_cast<f32>(0x80000000u));
*reinterpret_cast<v128*>(&_output[i_out]) = gv_to_be32(_f32);
v128::storeu(gv_to_be32(_f32), &_output[i_out]);
}
break;
}
@ -662,7 +662,7 @@ void LpcmDecContext::exec(ppu_thread& ppu)
// Convert to float and divide by INT32_MAX + 1
const v128 _f32 = gv_mulfs(gv_cvts32_tofs(_s32), 1.f / static_cast<f32>(0x80000000u));
*reinterpret_cast<v128*>(&_output[i_out]) = gv_to_be32(_f32);
v128::storeu(gv_to_be32(_f32), &_output[i_out]);
}
}
}
@ -820,16 +820,18 @@ error_code _CellAdecCoreOpOpenExt_lpcm(ppu_thread& ppu, vm::ptr<LpcmDecContext>
const vm::var<sys_mutex_attribute_t> queue_mutex_attr{{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_adem06"_u64 } }};
const vm::var<sys_cond_attribute_t> cond_attr{{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_adec03"_u64 } }};
if (error_code ret = sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::queue_size_mutex), mutex_attr); ret != CELL_OK
|| (ret = sys_cond_create(ppu, handle.ptr(&LpcmDecContext::queue_size_cond), handle->queue_size_mutex, cond_attr)) != CELL_OK
|| (ret = sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::unk_mutex), mutex_attr)) != CELL_OK
|| (ret = sys_cond_create(ppu, handle.ptr(&LpcmDecContext::unk_cond), handle->unk_mutex, cond_attr)) != CELL_OK
|| (ret = sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::output_mutex), output_mutex_attr)) != CELL_OK
|| (ret = sys_cond_create(ppu, handle.ptr(&LpcmDecContext::output_consumed), handle->output_mutex, cond_attr)) != CELL_OK
|| (ret = sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::queue_mutex), queue_mutex_attr)) != CELL_OK
|| (ret = handle->release_output(ppu)) != CELL_OK
|| (ret = handle->cmd_available.init(ppu, handle.ptr(&LpcmDecContext::cmd_available), 0)) != CELL_OK
|| (ret = handle->reserved2.init(ppu, handle.ptr(&LpcmDecContext::reserved2), 0)) != CELL_OK)
error_code ret = sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::queue_size_mutex), mutex_attr);
ret = ret ? ret : sys_cond_create(ppu, handle.ptr(&LpcmDecContext::queue_size_cond), handle->queue_size_mutex, cond_attr);
ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::unk_mutex), mutex_attr);
ret = ret ? ret : sys_cond_create(ppu, handle.ptr(&LpcmDecContext::unk_cond), handle->unk_mutex, cond_attr);
ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::output_mutex), output_mutex_attr);
ret = ret ? ret : sys_cond_create(ppu, handle.ptr(&LpcmDecContext::output_consumed), handle->output_mutex, cond_attr);
ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::queue_mutex), queue_mutex_attr);
ret = ret ? ret : handle->release_output(ppu);
ret = ret ? ret : handle->cmd_available.init(ppu, handle.ptr(&LpcmDecContext::cmd_available), 0);
ret = ret ? ret : handle->reserved2.init(ppu, handle.ptr(&LpcmDecContext::reserved2), 0);
if (ret != CELL_OK)
{
return ret;
}
@ -841,14 +843,11 @@ error_code _CellAdecCoreOpOpenExt_lpcm(ppu_thread& ppu, vm::ptr<LpcmDecContext>
const vm::var<char[]> _name = vm::make_str("HLE LPCM decoder");
const auto entry = g_fxo->get<ppu_function_manager>().func_addr(FIND_FUNC(lpcmDecEntry));
if (error_code ret = ppu_execute<&sys_ppu_thread_create>(ppu, handle.ptr(&LpcmDecContext::thread_id), entry, handle.addr(), +res->ppuThreadPriority, +res->ppuThreadStackSize, SYS_PPU_THREAD_CREATE_JOINABLE, +_name); ret != CELL_OK
|| (ret = sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::spurs_queue_pop_mutex), mutex_attr)) != CELL_OK
|| (ret = sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::spurs_queue_push_mutex), mutex_attr)) != CELL_OK)
{
return ret;
}
ret = ppu_execute<&sys_ppu_thread_create>(ppu, handle.ptr(&LpcmDecContext::thread_id), entry, handle.addr(), +res->ppuThreadPriority, +res->ppuThreadStackSize, SYS_PPU_THREAD_CREATE_JOINABLE, +_name);
ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::spurs_queue_pop_mutex), mutex_attr);
ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::spurs_queue_push_mutex), mutex_attr);
return CELL_OK;
return ret;
}
error_code _CellAdecCoreOpOpen_lpcm(ppu_thread& ppu, vm::ptr<LpcmDecContext> handle, vm::ptr<AdecNotifyAuDone> notifyAuDone, vm::ptr<void> notifyAuDoneArg, vm::ptr<AdecNotifyPcmOut> notifyPcmOut, vm::ptr<void> notifyPcmOutArg,
@ -900,27 +899,25 @@ error_code _CellAdecCoreOpClose_lpcm(ppu_thread& ppu, vm::ptr<LpcmDecContext> ha
}
}
error_code ret = sys_mutex_unlock(ppu, handle->queue_size_mutex);
ret = ret ? ret : handle->release_output(ppu);
vm::var<u64> thread_ret;
ret = ret ? ret : sys_ppu_thread_join(ppu, static_cast<u32>(handle->thread_id), +thread_ret);
if (error_code ret = sys_mutex_unlock(ppu, handle->queue_size_mutex); ret != CELL_OK
|| (ret = handle->release_output(ppu)) != CELL_OK
|| (ret = sys_ppu_thread_join(ppu, static_cast<u32>(handle->thread_id), +thread_ret)) != CELL_OK
|| (ret = sys_cond_destroy(ppu, handle->queue_size_cond)) != CELL_OK
|| (ret = sys_cond_destroy(ppu, handle->unk_cond)) != CELL_OK
|| (ret = sys_cond_destroy(ppu, handle->output_consumed)) != CELL_OK
|| (ret = sys_mutex_destroy(ppu, handle->queue_mutex)) != CELL_OK
|| (ret = sys_mutex_destroy(ppu, handle->queue_size_mutex)) != CELL_OK
|| (ret = sys_mutex_destroy(ppu, handle->unk_mutex)) != CELL_OK
|| (ret = sys_mutex_destroy(ppu, handle->output_mutex)) != CELL_OK
|| (ret = handle->cmd_available.finalize(ppu)) != CELL_OK
|| (ret = handle->reserved2.finalize(ppu)) != CELL_OK
|| (ret = sys_mutex_destroy(ppu, handle->spurs_queue_pop_mutex)) != CELL_OK
|| (ret = sys_mutex_destroy(ppu, handle->spurs_queue_push_mutex)) != CELL_OK)
{
return ret;
}
ret = ret ? ret : sys_cond_destroy(ppu, handle->queue_size_cond);
ret = ret ? ret : sys_cond_destroy(ppu, handle->unk_cond);
ret = ret ? ret : sys_cond_destroy(ppu, handle->output_consumed);
ret = ret ? ret : sys_mutex_destroy(ppu, handle->queue_mutex);
ret = ret ? ret : sys_mutex_destroy(ppu, handle->queue_size_mutex);
ret = ret ? ret : sys_mutex_destroy(ppu, handle->unk_mutex);
ret = ret ? ret : sys_mutex_destroy(ppu, handle->output_mutex);
ret = ret ? ret : handle->cmd_available.finalize(ppu);
ret = ret ? ret : handle->reserved2.finalize(ppu);
ret = ret ? ret : sys_mutex_destroy(ppu, handle->spurs_queue_pop_mutex);
ret = ret ? ret : sys_mutex_destroy(ppu, handle->spurs_queue_push_mutex);
return CELL_OK;
return ret;
}
error_code _CellAdecCoreOpStartSeq_lpcm(ppu_thread& ppu, vm::ptr<LpcmDecContext> handle, vm::ptr<CellAdecParamLpcm> lpcmParam)

View file

@ -253,7 +253,7 @@ enum CellAdecSampleRate : s32
CELL_ADEC_FS_8kHz,
};
enum CellAdecBitLength : s32
enum CellAdecBitLength : u32
{
CELL_ADEC_BIT_LENGTH_RESERVED1,
CELL_ADEC_BIT_LENGTH_16,
@ -762,7 +762,7 @@ public:
return {};
}
if (value == 0)
if (value == 0u)
{
savestate = lpcm_dec_state::waiting_for_cmd_cond_wait;
cond_wait:

View file

@ -3192,6 +3192,24 @@ inline v128 gv_shuffle32(const v128& vec)
#endif
}
// For each index, r = vec[index & 3]
template <u8 Index0, u8 Index1, u8 Index2, u8 Index3>
inline v128 gv_shuffle32(const v128& vec)
{
#if defined(ARCH_X64)
return _mm_shuffle_epi32(vec, (Index0 & 3) | (Index1 & 3) << 2 | (Index2 & 3) << 4 | (Index3 & 3) << 6);
#elif defined(ARCH_ARM64)
constexpr u8 idx0 = (Index0 & 3) * sizeof(s32);
constexpr u8 idx1 = (Index1 & 3) * sizeof(s32);
constexpr u8 idx2 = (Index2 & 3) * sizeof(s32);
constexpr u8 idx3 = (Index3 & 3) * sizeof(s32);
constexpr uint8x16_t idx_vec = { idx0, idx0 + 1, idx0 + 2, idx0 + 3, idx1, idx1 + 1, idx1 + 2, idx1 + 3, idx2, idx2 + 1, idx2 + 2, idx2 + 3, idx3, idx3 + 1, idx3 + 2, idx3 + 3 };
return vqtbl1q_s8(vec, idx_vec);
#endif
}
// For the first two 2-bit indices in Control, r = a[index],
// for the last two indices, r = b[index]
template <u8 Control>
@ -3211,6 +3229,25 @@ inline v128 gv_shufflefs(const v128& a, const v128& b)
#endif
}
// For the first two indices, r = a[index & 3],
// for the last two indices, r = b[index & 3]
template <u8 Index0, u8 Index1, u8 Index2, u8 Index3>
inline v128 gv_shufflefs(const v128& a, const v128& b)
{
#if defined(ARCH_X64)
return _mm_shuffle_ps(a, b, (Index0 & 3) | (Index1 & 3) << 2 | (Index2 & 3) << 4 | (Index3 & 3) << 6);
#elif defined(ARCH_ARM64)
constexpr u8 idx0 = (Index0 & 3) * sizeof(s32);
constexpr u8 idx1 = (Index1 & 3) * sizeof(s32);
constexpr u8 idx2 = (Index2 & 3) * sizeof(s32) + sizeof(v128);
constexpr u8 idx3 = (Index3 & 3) * sizeof(s32) + sizeof(v128);
constexpr uint8x16_t idx_vec = { idx0, idx0 + 1, idx0 + 2, idx0 + 3, idx1, idx1 + 1, idx1 + 2, idx1 + 3, idx2, idx2 + 1, idx2 + 2, idx2 + 3, idx3, idx3 + 1, idx3 + 2, idx3 + 3 };
return vqtbl2q_s8({ a, b }, idx_vec);
#endif
}
// For each 32-bit element, reverse byte order
inline v128 gv_rev32(const v128& vec)
{