diff --git a/Utilities/asm.h b/Utilities/asm.h index 6a0148681d..b3741ba52e 100644 --- a/Utilities/asm.h +++ b/Utilities/asm.h @@ -4,30 +4,6 @@ namespace utils { - inline u32 cntlz32(u32 arg, bool nonzero = false) - { -#ifdef _MSC_VER - ulong res; - return _BitScanReverse(&res, arg) || nonzero ? res ^ 31 : 32; -#elif __LZCNT__ - return _lzcnt_u32(arg); -#else - return arg || nonzero ? __builtin_clz(arg) : 32; -#endif - } - - inline u64 cntlz64(u64 arg, bool nonzero = false) - { -#ifdef _MSC_VER - ulong res; - return _BitScanReverse64(&res, arg) || nonzero ? res ^ 63 : 64; -#elif __LZCNT__ - return _lzcnt_u64(arg); -#else - return arg || nonzero ? __builtin_clzll(arg) : 64; -#endif - } - inline u8 popcnt32(u32 arg) { #ifdef _MSC_VER diff --git a/Utilities/cfmt.h b/Utilities/cfmt.h index e65fadc293..04d826b7b1 100644 --- a/Utilities/cfmt.h +++ b/Utilities/cfmt.h @@ -59,7 +59,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src) const auto write_octal = [&](u64 value, u64 min_num) { - out.resize(out.size() + std::max(min_num, 66 / 3 - (utils::cntlz64(value | 1, true) + 2) / 3), '0'); + out.resize(out.size() + std::max(min_num, 66 / 3 - (std::countl_zero(value | 1) + 2) / 3), '0'); // Write in reversed order for (auto i = out.rbegin(); value; i++, value /= 8) @@ -70,7 +70,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src) const auto write_hex = [&](u64 value, bool upper, u64 min_num) { - out.resize(out.size() + std::max(min_num, 64 / 4 - utils::cntlz64(value | 1, true) / 4), '0'); + out.resize(out.size() + std::max(min_num, 64 / 4 - std::countl_zero(value | 1) / 4), '0'); // Write in reversed order for (auto i = out.rbegin(); value; i++, value /= 16) diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp index b55324f68e..d81eebf8ad 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp @@ -2114,7 +2114,7 @@ s32 _spurs::add_workload(vm::ptr spurs, vm::ptr wid, vm::cptrflags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u; // TODO: check if can be changed spurs->wklEnabled.atomic_op([spurs, wmax, &wnum](be_t& value) { - wnum = utils::cntlz32(~value); // found empty position + wnum = std::countl_one(value); // found empty position if (wnum < wmax) { value |= (0x80000000 >> wnum); // set workload bit @@ -2237,7 +2237,7 @@ s32 _spurs::add_workload(vm::ptr spurs, vm::ptr wid, vm::cptr> current->uniqueId; - res_wkl = utils::cntlz32(~k); + res_wkl = std::countl_one(k); } } } diff --git a/rpcs3/Emu/Cell/Modules/cellSync.cpp b/rpcs3/Emu/Cell/Modules/cellSync.cpp index 087952229c..58e50c600a 100644 --- a/rpcs3/Emu/Cell/Modules/cellSync.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSync.cpp @@ -1017,7 +1017,7 @@ error_code _cellSyncLFQueueCompletePushPointer(ppu_thread& ppu, vm::ptr(~(var9_ | push3.m_h6))) - 16; // count leading zeros in u16 + s32 var9 = std::countl_zero(static_cast(~(var9_ | push3.m_h6))) - 16; // count leading zeros in u16 s32 var5 = push3.m_h6 | var9_; if (var9 & 0x30) @@ -1317,7 +1317,8 @@ error_code _cellSyncLFQueueCompletePopPointer(ppu_thread& ppu, vm::ptr(~(var9_ | pop3.m_h2))) - 16; // count leading zeros in u16 + + s32 var9 = std::countl_zero(static_cast(~(var9_ | pop3.m_h2))) - 16; // count leading zeros in u16 s32 var5 = pop3.m_h2 | var9_; if (var9 & 0x30) diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h index 006b46f50e..784580d189 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.h +++ b/rpcs3/Emu/Cell/PPUAnalyser.h @@ -1238,7 +1238,7 @@ struct ppu_acontext if (min < max) { // Inverted constant MSB mask - const u64 mix = ~0ull >> utils::cntlz64(min ^ max, true); + const u64 mix = ~0ull >> std::countl_zero(min ^ max); r.bmin |= min & ~mix; r.bmax &= max | mix; diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index ff6e69f6f4..47519bb4f2 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -3250,7 +3250,7 @@ bool ppu_interpreter::MFOCRF(ppu_thread& ppu, ppu_opcode_t op) if (op.l11) { // MFOCRF - const u32 n = utils::cntlz32(op.crm) & 7; + const u32 n = std::countl_zero(op.crm) & 7; const u32 p = n * 4; const u32 v = ppu.cr[p + 0] << 3 | ppu.cr[p + 1] << 2 | ppu.cr[p + 2] << 1 | ppu.cr[p + 3] << 0; @@ -3299,7 +3299,7 @@ bool ppu_interpreter::SLW(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::CNTLZW(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = utils::cntlz32(static_cast(ppu.gpr[op.rs])); + ppu.gpr[op.ra] = std::countl_zero(static_cast(ppu.gpr[op.rs])); if (op.rc) [[unlikely]] ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } @@ -3377,7 +3377,7 @@ bool ppu_interpreter::LWZUX(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::CNTLZD(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = utils::cntlz64(ppu.gpr[op.rs]); + ppu.gpr[op.ra] = std::countl_zero(ppu.gpr[op.rs]); if (op.rc) [[unlikely]] ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } @@ -3537,7 +3537,7 @@ bool ppu_interpreter::MTOCRF(ppu_thread& ppu, ppu_opcode_t op) { // MTOCRF - const u32 n = utils::cntlz32(op.crm) & 7; + const u32 n = std::countl_zero(op.crm) & 7; const u64 v = (s >> ((n * 4) ^ 0x1c)) & 0xf; ppu.cr.fields[n] = *reinterpret_cast(s_table + v); } diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index e859de38d8..149a37a203 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -675,7 +675,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) } // Determine which value will be duplicated at hole positions - const u32 w3 = func.data.at((j - start + ~utils::cntlz32(cmask, true) % 4 * 4) / 4); + const u32 w3 = func.data.at((j - start + ~std::countl_zero(cmask) % 4 * 4) / 4); words.push_back(cmask & 1 ? func.data[(j - start + 0) / 4] : w3); words.push_back(cmask & 2 ? func.data[(j - start + 4) / 4] : w3); words.push_back(cmask & 4 ? func.data[(j - start + 8) / 4] : w3); diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index bc9c002e11..2f26a73253 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -903,7 +903,7 @@ bool spu_interpreter::CLZ(spu_thread& spu, spu_opcode_t op) { for (u32 i = 0; i < 4; i++) { - spu.gpr[op.rt]._u32[i] = utils::cntlz32(spu.gpr[op.ra]._u32[i]); + spu.gpr[op.rt]._u32[i] = std::countl_zero(spu.gpr[op.ra]._u32[i]); } return true; } diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 271f7e5f8e..938de9405c 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -697,7 +697,7 @@ namespace vm const u32 size = ::align(orig_size, min_page_size) + (flags & 0x10 ? 0x2000 : 0); // Check alignment (it's page allocation, so passing small values there is just silly) - if (align < min_page_size || align != (0x80000000u >> utils::cntlz32(align, true))) + if (align < min_page_size || align != (0x80000000u >> std::countl_zero(align))) { fmt::throw_exception("Invalid alignment (size=0x%x, align=0x%x)" HERE, size, align); } @@ -992,7 +992,7 @@ namespace vm const u32 size = ::align(orig_size, 0x10000); // Check alignment - if (align < 0x10000 || align != (0x80000000u >> utils::cntlz32(align, true))) + if (align < 0x10000 || align != (0x80000000u >> std::countl_zero(align))) { fmt::throw_exception("Invalid alignment (size=0x%x, align=0x%x)" HERE, size, align); } diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 0382cf54ea..b75fe3798a 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -3,7 +3,6 @@ #include "../system_config.h" #include "Utilities/address_range.h" #include "Utilities/geometry.h" -#include "Utilities/asm.h" #include "gcm_enums.h" #include @@ -239,19 +238,19 @@ namespace rsx // static inline u32 floor_log2(u32 value) { - return value <= 1 ? 0 : utils::cntlz32(value, true) ^ 31; + return value <= 1 ? 0 : std::countl_zero(value) ^ 31; } static inline u32 ceil_log2(u32 value) { - return value <= 1 ? 0 : utils::cntlz32((value - 1) << 1, true) ^ 31; + return value <= 1 ? 0 : std::countl_zero((value - 1) << 1) ^ 31; } static inline u32 next_pow2(u32 x) { if (x <= 2) return x; - return static_cast((1ULL << 32) >> utils::cntlz32(x - 1, true)); + return static_cast((1ULL << 32) >> std::countl_zero(x - 1)); } static inline bool fcmp(float a, float b, float epsilon = 0.000001f) diff --git a/rpcs3/util/atomic.cpp b/rpcs3/util/atomic.cpp index 0498911811..b382d91254 100644 --- a/rpcs3/util/atomic.cpp +++ b/rpcs3/util/atomic.cpp @@ -5,7 +5,6 @@ #endif #include "Utilities/sync.h" -#include "Utilities/asm.h" #ifdef USE_POSIX #include @@ -141,7 +140,7 @@ static sync_var* slot_get(std::uintptr_t iptr, sync_var* loc, u64 lv = 0) } // Get the number of leading equal bits to determine subslot - const u64 eq_bits = utils::cntlz64((((iptr ^ value) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16, true); + const u64 eq_bits = std::countl_zero((((iptr ^ value) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16); // Proceed recursively, increment level return slot_get(iptr, s_slot_list[(value & s_slot_mask) / one_v].branch + eq_bits, eq_bits + 1); @@ -166,7 +165,7 @@ static void slot_free(std::uintptr_t iptr, sync_var* loc, u64 lv = 0) } // Get the number of leading equal bits to determine subslot - const u64 eq_bits = utils::cntlz64((((iptr ^ value) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16, true); + const u64 eq_bits = std::countl_zero((((iptr ^ value) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16); // Proceed recursively, to deallocate deepest branch first slot_free(iptr, s_slot_list[(value & s_slot_mask) / one_v].branch + eq_bits, eq_bits + 1); @@ -445,7 +444,7 @@ void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_valu } // Get the number of leading equal bits (between iptr and slot owner) - const u64 eq_bits = utils::cntlz64((((iptr ^ ok) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16, true); + const u64 eq_bits = std::countl_zero((((iptr ^ ok) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16); // Collision; need to go deeper ptr = s_slot_list[(ok & s_slot_mask) / one_v].branch + eq_bits;