diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index af4d9128d2..c67e720b6f 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -2490,7 +2490,7 @@ void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */) if (alert) { list.set<0>(_this->m_sync, 0); - list.set<1>(utils::bless>(&_this->m_taskq)[1], 0); + list.template set<1>(_this->m_taskq); } else { diff --git a/Utilities/lockless.h b/Utilities/lockless.h index 48142de96a..63c2774bc8 100644 --- a/Utilities/lockless.h +++ b/Utilities/lockless.h @@ -49,7 +49,7 @@ public: if (!next) { - // Do not allow access beyond many element more at a time + // Do not allow access beyond many element more at a time ensure(!installed && index - i < N * 2); installed = true; @@ -384,17 +384,26 @@ public: template class lf_queue final { - atomic_t m_head{0}; - - lf_queue_item* load(u64 value) const noexcept +public: + struct fat_ptr { - return reinterpret_cast*>(value >> 16); + u64 ptr{}; + u32 is_non_null{}; + u32 reserved{}; + }; + +private: + atomic_t m_head{fat_ptr{}}; + + lf_queue_item* load(fat_ptr value) const noexcept + { + return reinterpret_cast*>(value.ptr); } // Extract all elements and reverse element order (FILO to FIFO) lf_queue_item* reverse() noexcept { - if (auto* head = load(m_head) ? load(m_head.exchange(0)) : nullptr) + if (auto* head = load(m_head) ? load(m_head.exchange(fat_ptr{})) : nullptr) { if (auto* prev = head->m_link) { @@ -420,7 +429,7 @@ public: lf_queue(lf_queue&& other) noexcept { - m_head.release(other.m_head.exchange(0)); + m_head.release(other.m_head.exchange(fat_ptr{})); } lf_queue& operator=(lf_queue&& other) noexcept @@ -431,7 +440,7 @@ public: } delete load(m_head); - m_head.release(other.m_head.exchange(0)); + m_head.release(other.m_head.exchange(fat_ptr{})); return *this; } @@ -442,9 +451,9 @@ public: void wait(std::nullptr_t /*null*/ = nullptr) noexcept { - if (m_head == 0) + if (!operator bool()) { - utils::bless>(&m_head)[1].wait(0); + utils::bless>(&m_head.raw().is_non_null)->wait(0); } } @@ -455,7 +464,7 @@ public: explicit operator bool() const noexcept { - return m_head != 0; + return observe() != nullptr; } template @@ -464,25 +473,25 @@ public: auto oldv = m_head.load(); auto item = new lf_queue_item(load(oldv), std::forward(args)...); - while (!m_head.compare_exchange(oldv, reinterpret_cast(item) << 16)) + while (!m_head.compare_exchange(oldv, fat_ptr{reinterpret_cast(item), item != nullptr, 0})) { item->m_link = load(oldv); } - if (!oldv && Notify) + if (!oldv.ptr && Notify) { // Notify only if queue was empty notify(true); } - return !oldv; + return !oldv.ptr; } void notify(bool force = false) { if (force || operator bool()) { - utils::bless>(&m_head)[1].notify_one(); + utils::bless>(&m_head.raw().is_non_null)->notify_one(); } } @@ -498,7 +507,7 @@ public: lf_queue_slice pop_all_reversed() { lf_queue_slice result; - result.m_head = load(m_head.exchange(0)); + result.m_head = load(m_head.exchange(fat_ptr{})); return result; } diff --git a/rpcs3/util/atomic.cpp b/rpcs3/util/atomic.cpp index cd9a6e37a3..c8e76cbf17 100644 --- a/rpcs3/util/atomic.cpp +++ b/rpcs3/util/atomic.cpp @@ -57,8 +57,8 @@ static bool has_waitv() // Total number of entries. static constexpr usz s_hashtable_size = 1u << 17; -// Reference counter combined with shifted pointer (which is assumed to be 48 bit) -static constexpr uptr s_ref_mask = 0xffff; +// Reference counter mask +static constexpr uptr s_ref_mask = 0xffff'ffff; // Fix for silly on-first-use initializer static bool s_null_wait_cb(const void*, u64, u64){ return true; }; @@ -153,8 +153,16 @@ namespace // Essentially a fat semaphore struct alignas(64) cond_handle { - // Combined pointer (most significant 48 bits) and ref counter (16 least significant bits) - atomic_t ptr_ref; + struct fat_ptr + { + u64 ptr{}; + u32 reserved{}; + u32 ref_ctr{}; + + auto operator<=>(const fat_ptr& other) const = default; + }; + + atomic_t ptr_ref; u64 tid; u32 oldv; @@ -183,7 +191,7 @@ namespace mtx.init(mtx); #endif - ensure(!ptr_ref.exchange((iptr << 16) | 1)); + ensure(ptr_ref.exchange(fat_ptr{iptr, 0, 1}) == fat_ptr{}); } void destroy() @@ -370,7 +378,7 @@ namespace if (cond_id) { // Set fake refctr - s_cond_list[cond_id].ptr_ref.release(1); + s_cond_list[cond_id].ptr_ref.release(cond_handle::fat_ptr{0, 0, 1}); cond_free(cond_id, -1); } } @@ -390,7 +398,7 @@ static u32 cond_alloc(uptr iptr, u32 tls_slot = -1) { // Fast reinitialize const u32 id = std::exchange(*ptls, 0); - s_cond_list[id].ptr_ref.release((iptr << 16) | 1); + s_cond_list[id].ptr_ref.release(cond_handle::fat_ptr{iptr, 0, 1}); return id; } @@ -461,15 +469,15 @@ static void cond_free(u32 cond_id, u32 tls_slot = -1) const auto cond = s_cond_list + cond_id; // Dereference, destroy on last ref - const bool last = cond->ptr_ref.atomic_op([](u64& val) + const bool last = cond->ptr_ref.atomic_op([](cond_handle::fat_ptr& val) { - ensure(val & s_ref_mask); + ensure(val.ref_ctr); - val--; + val.ref_ctr--; - if ((val & s_ref_mask) == 0) + if (val.ref_ctr == 0) { - val = 0; + val = cond_handle::fat_ptr{}; return true; } @@ -525,15 +533,15 @@ static cond_handle* cond_id_lock(u32 cond_id, uptr iptr = 0) while (true) { - const auto [old, ok] = cond->ptr_ref.fetch_op([&](u64& val) + const auto [old, ok] = cond->ptr_ref.fetch_op([&](cond_handle::fat_ptr& val) { - if (!val || (val & s_ref_mask) == s_ref_mask) + if (val == cond_handle::fat_ptr{} || val.ref_ctr == s_ref_mask) { // Don't reference already deallocated semaphore return false; } - if (iptr && (val >> 16) != iptr) + if (iptr && val.ptr != iptr) { // Pointer mismatch return false; @@ -548,7 +556,7 @@ static cond_handle* cond_id_lock(u32 cond_id, uptr iptr = 0) if (!did_ref) { - val++; + val.ref_ctr++; } return true; @@ -566,7 +574,7 @@ static cond_handle* cond_id_lock(u32 cond_id, uptr iptr = 0) return cond; } - if ((old & s_ref_mask) == s_ref_mask) + if (old.ref_ctr == s_ref_mask) { fmt::throw_exception("Reference count limit (%u) reached in an atomic notifier.", s_ref_mask); } @@ -589,12 +597,14 @@ namespace u64 maxc: 5; // Collision counter u64 maxd: 11; // Distance counter u64 bits: 24; // Allocated bits - u64 prio: 24; // Reserved + u64 prio: 8; // Reserved u64 ref : 16; // Ref counter - u64 iptr: 48; // First pointer to use slot (to count used slots) + u64 iptr: 64; // First pointer to use slot (to count used slots) }; + static_assert(sizeof(slot_allocator) == 16); + // Need to spare 16 bits for ref counter static constexpr u64 max_threads = 24; @@ -935,7 +945,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa const auto stamp0 = utils::get_unique_tsc(); - const uptr iptr = reinterpret_cast(data) & (~s_ref_mask >> 16); + const uptr iptr = reinterpret_cast(data); uptr iptr_ext[atomic_wait::max_list - 1]{}; @@ -956,7 +966,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa } } - iptr_ext[ext_size] = reinterpret_cast(e->data) & (~s_ref_mask >> 16); + iptr_ext[ext_size] = reinterpret_cast(e->data); ext_size++; } } @@ -1266,7 +1276,7 @@ void atomic_wait_engine::notify_one(const void* data) return; } #endif - const uptr iptr = reinterpret_cast(data) & (~s_ref_mask >> 16); + const uptr iptr = reinterpret_cast(data); root_info::slot_search(iptr, [&](u32 cond_id) { @@ -1289,7 +1299,7 @@ atomic_wait_engine::notify_all(const void* data) return; } #endif - const uptr iptr = reinterpret_cast(data) & (~s_ref_mask >> 16); + const uptr iptr = reinterpret_cast(data); // Array count for batch notification u32 count = 0; diff --git a/rpcs3/util/atomic.hpp b/rpcs3/util/atomic.hpp index 593b7a51f1..85c8b10482 100644 --- a/rpcs3/util/atomic.hpp +++ b/rpcs3/util/atomic.hpp @@ -205,9 +205,9 @@ namespace atomic_wait constexpr void set(lf_queue& var, std::nullptr_t = nullptr) { static_assert(Index < Max); - static_assert(sizeof(var) == sizeof(uptr)); + static_assert(sizeof(var) == sizeof(uptr) * 2); - m_info[Index].data = reinterpret_cast(&var) + sizeof(u32); + m_info[Index].data = reinterpret_cast(&var) + offsetof(typename lf_queue::fat_ptr, is_non_null); m_info[Index].old = 0; } @@ -215,9 +215,9 @@ namespace atomic_wait constexpr void set(stx::atomic_ptr& var, std::nullptr_t = nullptr) { static_assert(Index < Max); - static_assert(sizeof(var) == sizeof(uptr)); + static_assert(sizeof(var) == sizeof(uptr) * 2); - m_info[Index].data = reinterpret_cast(&var) + sizeof(u32); + m_info[Index].data = reinterpret_cast(&var) + offsetof(typename stx::atomic_ptr::fat_ptr, is_non_null); m_info[Index].old = 0; } diff --git a/rpcs3/util/shared_ptr.hpp b/rpcs3/util/shared_ptr.hpp index d6d1697030..375784e7dd 100644 --- a/rpcs3/util/shared_ptr.hpp +++ b/rpcs3/util/shared_ptr.hpp @@ -19,14 +19,8 @@ namespace stx template class atomic_ptr; - // Basic assumption of userspace pointer size - constexpr uint c_ptr_size = 48; - - // Use lower 16 bits as atomic_ptr internal counter of borrowed refs (pointer itself is shifted) - constexpr uint c_ref_mask = 0xffff, c_ref_size = 16; - - // Remaining pointer bits - constexpr uptr c_ptr_mask = static_cast(-1) << c_ref_size; + // Use 16 bits as atomic_ptr internal counter of borrowed refs + constexpr uint c_ref_mask = 0xffff; struct shared_counter { @@ -582,11 +576,21 @@ namespace stx template class atomic_ptr { - mutable atomic_t m_val{0}; - - static shared_counter* d(uptr val) noexcept + public: + struct fat_ptr { - return std::launder(reinterpret_cast((val >> c_ref_size) - sizeof(shared_counter))); + uptr ptr{}; + u32 is_non_null{}; + u32 ref_ctr{}; + }; + + private: + + mutable atomic_t m_val{fat_ptr{}}; + + static shared_counter* d(fat_ptr val) noexcept + { + return std::launder(reinterpret_cast(val.ptr - sizeof(shared_counter))); } shared_counter* d() const noexcept @@ -594,14 +598,19 @@ namespace stx return d(m_val); } - static uptr to_val(const volatile std::remove_extent_t* ptr) noexcept + static fat_ptr to_val(const volatile std::remove_extent_t* ptr) noexcept { - return (reinterpret_cast(ptr) << c_ref_size); + return fat_ptr{reinterpret_cast(ptr), ptr != nullptr, 0}; } - static std::remove_extent_t* ptr_to(uptr val) noexcept + static fat_ptr to_val(uptr ptr) noexcept { - return reinterpret_cast*>(val >> c_ref_size); + return fat_ptr{ptr, ptr != 0, 0}; + } + + static std::remove_extent_t* ptr_to(fat_ptr val) noexcept + { + return reinterpret_cast*>(val.ptr); } template @@ -644,7 +653,7 @@ namespace stx atomic_ptr(const shared_ptr& r) noexcept { // Obtain a ref + as many refs as an atomic_ptr can additionally reference - if (uptr rval = to_val(r.m_ptr)) + if (fat_ptr rval = to_val(r.m_ptr); rval.ptr != 0) { m_val.raw() = rval; d(rval)->refs += c_ref_mask + 1; @@ -654,7 +663,7 @@ namespace stx template requires same_ptr_implicit_v atomic_ptr(shared_ptr&& r) noexcept { - if (uptr rval = to_val(r.m_ptr)) + if (fat_ptr rval = to_val(r.m_ptr); rval.ptr != 0) { m_val.raw() = rval; d(rval)->refs += c_ref_mask; @@ -666,7 +675,7 @@ namespace stx template requires same_ptr_implicit_v atomic_ptr(single_ptr&& r) noexcept { - if (uptr rval = to_val(r.m_ptr)) + if (fat_ptr rval = to_val(r.m_ptr); rval.ptr != 0) { m_val.raw() = rval; d(rval)->refs += c_ref_mask; @@ -677,13 +686,13 @@ namespace stx ~atomic_ptr() noexcept { - const uptr v = m_val.raw(); + const fat_ptr v = m_val.raw(); - if (v >> c_ref_size) + if (v.ptr) { const auto o = d(v); - if (!o->refs.sub_fetch(c_ref_mask + 1 - (v & c_ref_mask))) + if (!o->refs.sub_fetch(c_ref_mask + 1 - (v.ref_ctr & c_ref_mask))) { o->destroy.load()(o); } @@ -732,11 +741,11 @@ namespace stx shared_type r; // Add reference - const auto [prev, did_ref] = m_val.fetch_op([](uptr& val) + const auto [prev, did_ref] = m_val.fetch_op([](fat_ptr& val) { - if (val >> c_ref_size) + if (val.ptr) { - val++; + val.ref_ctr++; return true; } @@ -754,11 +763,11 @@ namespace stx r.d()->refs++; // Dereference if still the same pointer - const auto [_, did_deref] = m_val.fetch_op([prev = prev](uptr& val) + const auto [_, did_deref] = m_val.fetch_op([prev = prev](fat_ptr& val) { - if (val >> c_ref_size == prev >> c_ref_size) + if (val.ptr == prev.ptr) { - val--; + val.ref_ctr--; return true; } @@ -781,11 +790,11 @@ namespace stx shared_type r; // Add reference - const auto [prev, did_ref] = m_val.fetch_op([](uptr& val) + const auto [prev, did_ref] = m_val.fetch_op([](fat_ptr& val) { - if (val >> c_ref_size) + if (val.ptr) { - val++; + val.ref_ctr++; return true; } @@ -822,11 +831,11 @@ namespace stx } // Dereference if still the same pointer - const auto [_, did_deref] = m_val.fetch_op([prev = prev](uptr& val) + const auto [_, did_deref] = m_val.fetch_op([prev = prev](fat_ptr& val) { - if (val >> c_ref_size == prev >> c_ref_size) + if (val.ptr == prev.ptr) { - val--; + val.ref_ctr--; return true; } @@ -887,7 +896,7 @@ namespace stx atomic_ptr old; old.m_val.raw() = m_val.exchange(to_val(r.m_ptr)); - old.m_val.raw() += 1; + old.m_val.raw().ref_ctr += 1; r.m_ptr = std::launder(ptr_to(old.m_val)); return r; @@ -903,7 +912,7 @@ namespace stx atomic_ptr old; old.m_val.raw() = m_val.exchange(to_val(value.m_ptr)); - old.m_val.raw() += 1; + old.m_val.raw().ref_ctr += 1; value.m_ptr = std::launder(ptr_to(old.m_val)); return value; @@ -922,21 +931,21 @@ namespace stx atomic_ptr old; - const uptr _val = m_val.fetch_op([&](uptr& val) + const fat_ptr _val = m_val.fetch_op([&](fat_ptr& val) { - if (val >> c_ref_size == _old) + if (val.ptr == _old) { // Set new value - val = _new << c_ref_size; + val = to_val(_new); } - else if (val) + else if (val.ptr != 0) { // Reference previous value - val++; + val.ref_ctr++; } }); - if (_val >> c_ref_size == _old) + if (_val.ptr == _old) { // Success (exch is consumed, cmp_and_old is unchanged) if (exch.m_ptr) @@ -953,9 +962,10 @@ namespace stx old_exch.m_val.raw() = to_val(std::exchange(exch.m_ptr, nullptr)); // Set to reset old cmp_and_old value - old.m_val.raw() = to_val(cmp_and_old.m_ptr) | c_ref_mask; + old.m_val.raw() = to_val(cmp_and_old.m_ptr); + old.m_val.raw().ref_ctr |= c_ref_mask; - if (!_val) + if (!_val.ptr) { return false; } @@ -965,11 +975,11 @@ namespace stx cmp_and_old.d()->refs++; // Dereference if still the same pointer - const auto [_, did_deref] = m_val.fetch_op([_val](uptr& val) + const auto [_, did_deref] = m_val.fetch_op([_val](fat_ptr& val) { - if (val >> c_ref_size == _val >> c_ref_size) + if (val.ptr == _val.ptr) { - val--; + val.ref_ctr--; return true; } @@ -1008,12 +1018,12 @@ namespace stx atomic_ptr old; - const auto [_val, ok] = m_val.fetch_op([&](uptr& val) + const auto [_val, ok] = m_val.fetch_op([&](fat_ptr& val) { - if (val >> c_ref_size == _old) + if (val.ptr == _old) { // Set new value - val = _new << c_ref_size; + val = to_val(_new); return true; } @@ -1080,7 +1090,7 @@ namespace stx if (next.m_ptr) { // Compensation for `next` assignment - old.m_val.raw() += 1; + old.m_val.raw().ref_ctr += 1; } } @@ -1092,7 +1102,7 @@ namespace stx explicit constexpr operator bool() const noexcept { - return m_val != 0; + return observe() != nullptr; } template requires same_ptr_implicit_v @@ -1109,17 +1119,17 @@ namespace stx void wait(std::nullptr_t, atomic_wait_timeout timeout = atomic_wait_timeout::inf) { - utils::bless>(&m_val)[1].wait(0, timeout); + utils::bless>(&m_val.raw().is_non_null)->wait(0, timeout); } void notify_one() { - utils::bless>(&m_val)[1].notify_one(); + utils::bless>(&m_val.raw().is_non_null)->notify_one(); } void notify_all() { - utils::bless>(&m_val)[1].notify_all(); + utils::bless>(&m_val.raw().is_non_null)->notify_all(); } };