From 73aaff1b29bda87be63ee9f8ff730102c0e05f0a Mon Sep 17 00:00:00 2001 From: Eladash Date: Mon, 25 Jul 2022 18:57:47 +0300 Subject: [PATCH] LV2: allocation-free synchronization syscalls * Show waiters' ID in kernel explorer. * Remove deque dependency from sys_sync.h --- rpcs3/Emu/CPU/CPUThread.cpp | 18 +++ rpcs3/Emu/CPU/CPUThread.h | 1 + rpcs3/Emu/Cell/PPUThread.h | 4 + rpcs3/Emu/Cell/SPUThread.cpp | 2 +- rpcs3/Emu/Cell/SPUThread.h | 2 + rpcs3/Emu/Cell/lv2/lv2.cpp | 154 ++++++++++++++++++-------- rpcs3/Emu/Cell/lv2/sys_cond.cpp | 55 +++++---- rpcs3/Emu/Cell/lv2/sys_cond.h | 3 +- rpcs3/Emu/Cell/lv2/sys_event.cpp | 59 ++++++---- rpcs3/Emu/Cell/lv2/sys_event.h | 6 +- rpcs3/Emu/Cell/lv2/sys_event_flag.cpp | 114 +++++++++++-------- rpcs3/Emu/Cell/lv2/sys_event_flag.h | 3 +- rpcs3/Emu/Cell/lv2/sys_lwcond.cpp | 71 ++++++++---- rpcs3/Emu/Cell/lv2/sys_lwcond.h | 3 +- rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp | 15 ++- rpcs3/Emu/Cell/lv2/sys_lwmutex.h | 7 +- rpcs3/Emu/Cell/lv2/sys_mutex.cpp | 11 +- rpcs3/Emu/Cell/lv2/sys_mutex.h | 9 +- rpcs3/Emu/Cell/lv2/sys_rwlock.cpp | 66 ++++++----- rpcs3/Emu/Cell/lv2/sys_rwlock.h | 4 +- rpcs3/Emu/Cell/lv2/sys_semaphore.cpp | 15 ++- rpcs3/Emu/Cell/lv2/sys_semaphore.h | 2 +- rpcs3/Emu/Cell/lv2/sys_sync.h | 110 ++++++++++++------ rpcs3/Emu/Cell/lv2/sys_timer.cpp | 16 ++- rpcs3/Emu/Cell/lv2/sys_usbd.cpp | 21 ++-- rpcs3/rpcs3qt/kernel_explorer.cpp | 51 ++++++--- 26 files changed, 547 insertions(+), 275 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 625c6c08be..a3d69b4ac4 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -946,6 +946,24 @@ u32* cpu_thread::get_pc2() return nullptr; } +cpu_thread* cpu_thread::get_next_cpu() +{ + switch (id_type()) + { + case 1: + { + return static_cast(this)->next_cpu; + } + case 2: + { + return static_cast(this)->next_cpu; + } + default: break; + } + + return nullptr; +} + std::shared_ptr make_disasm(const cpu_thread* cpu); void cpu_thread::dump_all(std::string& ret) const diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 39cb3a3eca..cbfe0994ce 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -140,6 +140,7 @@ public: u32 get_pc() const; u32* get_pc2(); // Last PC before stepping for the debugger (may be null) + cpu_thread* get_next_cpu(); // Access next_cpu member if the is one void notify(); cpu_thread& operator=(thread_state); diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index a748c315c7..16e41c51e8 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -323,6 +323,10 @@ public: std::shared_ptr optional_savestate_state; bool interrupt_thread_executing = false; + atomic_t next_cpu{}; // LV2 sleep queues' node link + atomic_t next_ppu{}; // LV2 PPU running queue's node link + bool ack_suspend = false; + be_t* get_stack_arg(s32 i, u64 align = alignof(u64)); void exec_task(); void fast_call(u32 addr, u64 rtoc); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 0118ff85b4..1c56e4cd56 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -4832,7 +4832,7 @@ bool spu_thread::stop_and_signal(u32 code) if (queue->events.empty()) { - queue->sq.emplace_back(this); + lv2_obj::emplace(queue->sq, this); group->run_state = SPU_THREAD_GROUP_STATUS_WAITING; group->waiter_spu_index = index; diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 618fe16821..a06abed34e 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -815,6 +815,8 @@ public: const u32 option; // sys_spu_thread_initialize option const u32 lv2_id; // The actual id that is used by syscalls + atomic_t next_cpu{}; // LV2 thread queues' node link + // Thread name atomic_ptr spu_tname; diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 8e4bcc2ae2..1c0b933a5c 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -49,6 +49,7 @@ #include "sys_crypto_engine.h" #include +#include extern std::string ppu_get_syscall_name(u64 code); @@ -1187,14 +1188,18 @@ std::string ppu_get_syscall_name(u64 code) } DECLARE(lv2_obj::g_mutex); -DECLARE(lv2_obj::g_ppu); -DECLARE(lv2_obj::g_pending); -DECLARE(lv2_obj::g_waiting); -DECLARE(lv2_obj::g_to_sleep); +DECLARE(lv2_obj::g_ppu){}; +DECLARE(lv2_obj::g_pending){}; thread_local DECLARE(lv2_obj::g_to_notify){}; thread_local DECLARE(lv2_obj::g_to_awake); +// Scheduler queue for timeouts (wait until -> thread) +static std::deque> g_waiting; + +// Threads which must call lv2_obj::sleep before the scheduler starts +static std::deque g_to_sleep; + namespace cpu_counter { void remove(cpu_thread*) noexcept; @@ -1260,7 +1265,7 @@ void lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, bool notify_later) if (auto ppu = thread.try_get()) { - ppu_log.trace("sleep() - waiting (%zu)", g_pending.size()); + ppu_log.trace("sleep() - waiting (%zu)", g_pending); const auto [_, ok] = ppu->state.fetch_op([&](bs_t& val) { @@ -1280,10 +1285,11 @@ void lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, bool notify_later) } // Find and remove the thread - if (!unqueue(g_ppu, ppu)) + if (!unqueue(g_ppu, ppu, &ppu_thread::next_ppu)) { - if (unqueue(g_to_sleep, ppu)) + if (auto it = std::find(g_to_sleep.begin(), g_to_sleep.end(), ppu); it != g_to_sleep.end()) { + g_to_sleep.erase(it); ppu->start_time = start_time; on_to_sleep_update(); } @@ -1293,15 +1299,19 @@ void lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, bool notify_later) return; } - unqueue(g_pending, ppu); + if (std::exchange(ppu->ack_suspend, false)) + { + g_pending--; + } ppu->raddr = 0; // Clear reservation ppu->start_time = start_time; } else if (auto spu = thread.try_get()) { - if (unqueue(g_to_sleep, spu)) + if (auto it = std::find(g_to_sleep.begin(), g_to_sleep.end(), spu); it != g_to_sleep.end()) { + g_to_sleep.erase(it); on_to_sleep_update(); } @@ -1344,7 +1354,7 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, bool notify_later, s32 prio) default: { // Priority set - if (static_cast(cpu)->prio.exchange(prio) == prio || !unqueue(g_ppu, cpu)) + if (static_cast(cpu)->prio.exchange(prio) == prio || !unqueue(g_ppu, static_cast(cpu), &ppu_thread::next_ppu)) { return true; } @@ -1353,36 +1363,66 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, bool notify_later, s32 prio) } case yield_cmd: { + usz i = 0; + // Yield command - for (usz i = 0;; i++) + for (auto ppu_next = &g_ppu;; i++) { - if (i + 1 >= g_ppu.size()) + const auto ppu = +*ppu_next; + + if (!ppu) { return false; } - if (const auto ppu = g_ppu[i]; ppu == cpu) + if (ppu == cpu) { - usz j = i + 1; + auto ppu2_next = &ppu->next_ppu; - for (; j < g_ppu.size(); j++) + if (auto next = +*ppu2_next; !next || next->prio != ppu->prio) { - if (g_ppu[j]->prio != ppu->prio) + return false; + } + + for (;; i++) + { + const auto next = +*ppu2_next; + + if (auto next2 = +next->next_ppu; !next2 || next2->prio != ppu->prio) { break; } + + ppu2_next = &next->next_ppu; } - if (j == i + 1) + if (ppu2_next == &ppu->next_ppu) { // Empty 'same prio' threads list return false; } - // Rotate current thread to the last position of the 'same prio' threads list - std::rotate(g_ppu.begin() + i, g_ppu.begin() + i + 1, g_ppu.begin() + j); + auto ppu2 = +*ppu2_next; - if (j <= g_cfg.core.ppu_threads + 0u) + // Rotate current thread to the last position of the 'same prio' threads list + ppu_next->release(ppu2); + + // Exchange forward pointers + if (ppu->next_ppu != ppu2) + { + auto ppu2_val = +ppu2->next_ppu; + ppu2->next_ppu.release(+ppu->next_ppu); + ppu->next_ppu.release(ppu2_val); + ppu2_next->release(ppu); + } + else + { + auto ppu2_val = +ppu2->next_ppu; + ppu2->next_ppu.release(ppu); + ppu->next_ppu.release(ppu2_val); + } + + if (i <= g_cfg.core.ppu_threads + 0u) { // Threads were rotated, but no context switch was made return false; @@ -1392,7 +1432,10 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, bool notify_later, s32 prio) cpu = nullptr; // Disable current thread enqueing, also enable threads list enqueing break; } + + ppu_next = &ppu->next_ppu; } + break; } case enqueue_cmd: @@ -1403,20 +1446,25 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, bool notify_later, s32 prio) const auto emplace_thread = [](cpu_thread* const cpu) { - for (auto it = g_ppu.cbegin(), end = g_ppu.cend();; it++) + for (auto it = &g_ppu;;) { - if (it != end && *it == cpu) + const auto next = +*it; + + if (next == cpu) { - ppu_log.trace("sleep() - suspended (p=%zu)", g_pending.size()); + ppu_log.trace("sleep() - suspended (p=%zu)", g_pending); return false; } // Use priority, also preserve FIFO order - if (it == end || (*it)->prio > static_cast(cpu)->prio) + if (!next || next->prio > static_cast(cpu)->prio) { - g_ppu.insert(it, static_cast(cpu)); + it->release(static_cast(cpu)); + static_cast(cpu)->next_ppu.release(next); break; } + + it = &next->next_ppu; } // Unregister timeout if necessary @@ -1448,20 +1496,28 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, bool notify_later, s32 prio) } // Remove pending if necessary - if (!g_pending.empty() && ((cpu && cpu == get_current_cpu_thread()) || prio == yield_cmd)) + if (g_pending && ((cpu && cpu == get_current_cpu_thread()) || prio == yield_cmd)) { - unqueue(g_pending, get_current_cpu_thread()); + if (auto cur = cpu_thread::get_current()) + { + if (std::exchange(cur->ack_suspend, false)) + { + g_pending--; + } + } } - // Suspend threads if necessary - for (usz i = g_cfg.core.ppu_threads; changed_queue && i < g_ppu.size(); i++) - { - const auto target = g_ppu[i]; + auto target = +g_ppu; - if (!target->state.test_and_set(cpu_flag::suspend)) + // Suspend threads if necessary + for (usz i = 0, thread_count = g_cfg.core.ppu_threads; changed_queue && target; target = target->next_ppu, i++) + { + if (i >= thread_count && cpu_flag::suspend - target->state) { ppu_log.trace("suspend(): %s", target->id); - g_pending.emplace_back(target); + target->ack_suspend = true; + g_pending++; + ensure(!target->state.test_and_set(cpu_flag::suspend)); if (is_paused(target->state - cpu_flag::suspend)) { @@ -1476,23 +1532,23 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, bool notify_later, s32 prio) void lv2_obj::cleanup() { - g_ppu.clear(); - g_pending.clear(); - g_waiting.clear(); + g_ppu = nullptr; g_to_sleep.clear(); + g_waiting.clear(); + g_pending = 0; } void lv2_obj::schedule_all(bool notify_later) { - if (g_pending.empty() && g_to_sleep.empty()) + if (!g_pending && g_to_sleep.empty()) { usz notify_later_idx = notify_later ? 0 : std::size(g_to_notify) - 1; - // Wake up threads - for (usz i = 0, x = std::min(g_cfg.core.ppu_threads, g_ppu.size()); i < x; i++) - { - const auto target = g_ppu[i]; + auto target = +g_ppu; + // Wake up threads + for (usz x = g_cfg.core.ppu_threads; target && x; target = target->next_ppu, x--) + { if (target->state & cpu_flag::suspend) { ppu_log.trace("schedule(): %s", target->id); @@ -1557,9 +1613,19 @@ ppu_thread_status lv2_obj::ppu_state(ppu_thread* ppu, bool lock_idm, bool lock_l opt_lock[1].emplace(lv2_obj::g_mutex); } - const auto it = std::find(g_ppu.begin(), g_ppu.end(), ppu); + usz pos = umax; + usz i = 0; - if (it == g_ppu.end()) + for (auto target = +g_ppu; target; target = target->next_ppu, i++) + { + if (target == ppu) + { + pos = i; + break; + } + } + + if (pos == umax) { if (!ppu->interrupt_thread_executing) { @@ -1569,7 +1635,7 @@ ppu_thread_status lv2_obj::ppu_state(ppu_thread* ppu, bool lock_idm, bool lock_l return PPU_THREAD_STATUS_SLEEP; } - if (it - g_ppu.begin() >= g_cfg.core.ppu_threads) + if (pos >= g_cfg.core.ppu_threads + 0u) { return PPU_THREAD_STATUS_RUNNABLE; } diff --git a/rpcs3/Emu/Cell/lv2/sys_cond.cpp b/rpcs3/Emu/Cell/lv2/sys_cond.cpp index 393e11ada3..a78d150a5a 100644 --- a/rpcs3/Emu/Cell/lv2/sys_cond.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_cond.cpp @@ -112,7 +112,7 @@ error_code sys_cond_destroy(ppu_thread& ppu, u32 cond_id) { std::lock_guard lock(cond.mutex->mutex); - if (cond.waiters) + if (cond.sq) { return CELL_EBUSY; } @@ -143,7 +143,7 @@ error_code sys_cond_signal(ppu_thread& ppu, u32 cond_id) const auto cond = idm::check(cond_id, [&](lv2_cond& cond) { - if (cond.waiters) + if (cond.sq) { lv2_obj::notify_all_t notify; @@ -158,7 +158,6 @@ error_code sys_cond_signal(ppu_thread& ppu, u32 cond_id) } // TODO: Is EBUSY returned after reqeueing, on sys_cond_destroy? - cond.waiters--; if (cond.mutex->try_own(*cpu, cpu->id)) { @@ -184,13 +183,15 @@ error_code sys_cond_signal_all(ppu_thread& ppu, u32 cond_id) const auto cond = idm::check(cond_id, [&](lv2_cond& cond) { - if (cond.waiters) + if (cond.sq) { + lv2_obj::notify_all_t notify; + std::lock_guard lock(cond.mutex->mutex); - for (auto cpu : cond.sq) + for (auto cpu = +cond.sq; cpu; cpu = cpu->next_cpu) { - if (static_cast(cpu)->state & cpu_flag::again) + if (cpu->state & cpu_flag::again) { ppu.state += cpu_flag::again; return; @@ -198,9 +199,10 @@ error_code sys_cond_signal_all(ppu_thread& ppu, u32 cond_id) } cpu_thread* result = nullptr; - cond.waiters -= ::size32(cond.sq); + decltype(cond.sq) sq{+cond.sq}; + cond.sq.release(nullptr); - while (const auto cpu = cond.schedule(cond.sq, SYS_SYNC_PRIORITY)) + while (const auto cpu = cond.schedule(sq, SYS_SYNC_PRIORITY)) { if (cond.mutex->try_own(*cpu, cpu->id)) { @@ -210,7 +212,7 @@ error_code sys_cond_signal_all(ppu_thread& ppu, u32 cond_id) if (result) { - lv2_obj::awake(result); + cond.awake(result, true); } } }); @@ -236,13 +238,13 @@ error_code sys_cond_signal_to(ppu_thread& ppu, u32 cond_id, u32 thread_id) return -1; } - if (cond.waiters) + if (cond.sq) { lv2_obj::notify_all_t notify; std::lock_guard lock(cond.mutex->mutex); - for (auto cpu : cond.sq) + for (auto cpu = +cond.sq; cpu; cpu = cpu->next_cpu) { if (cpu->id == thread_id) { @@ -254,8 +256,6 @@ error_code sys_cond_signal_to(ppu_thread& ppu, u32 cond_id, u32 thread_id) ensure(cond.unqueue(cond.sq, cpu)); - cond.waiters--; - if (cond.mutex->try_own(*cpu, cpu->id)) { cond.awake(cpu, true); @@ -315,9 +315,8 @@ error_code sys_cond_wait(ppu_thread& ppu, u32 cond_id, u64 timeout) else { // Register waiter - cond.sq.emplace_back(&ppu); - cond.waiters++; - } + lv2_obj::emplace(cond.sq, &ppu); + } if (ppu.loaded_from_savestate) { @@ -361,8 +360,26 @@ error_code sys_cond_wait(ppu_thread& ppu, u32 cond_id, u64 timeout) { std::lock_guard lock(cond->mutex->mutex); - const bool cond_sleep = std::find(cond->sq.begin(), cond->sq.end(), &ppu) != cond->sq.end(); - const bool mutex_sleep = std::find(cond->mutex->sq.begin(), cond->mutex->sq.end(), &ppu) != cond->mutex->sq.end(); + bool mutex_sleep = false; + bool cond_sleep = false; + + for (auto cpu = +cond->sq; cpu; cpu = cpu->next_cpu) + { + if (cpu == &ppu) + { + cond_sleep = true; + break; + } + } + + for (auto cpu = +cond->mutex->sq; cpu; cpu = cpu->next_cpu) + { + if (cpu == &ppu) + { + mutex_sleep = true; + break; + } + } if (!cond_sleep && !mutex_sleep) { @@ -402,8 +419,6 @@ error_code sys_cond_wait(ppu_thread& ppu, u32 cond_id, u64 timeout) if (cond->unqueue(cond->sq, &ppu)) { // TODO: Is EBUSY returned after reqeueing, on sys_cond_destroy? - cond->waiters--; - ppu.gpr[3] = CELL_ETIMEDOUT; // Own or requeue diff --git a/rpcs3/Emu/Cell/lv2/sys_cond.h b/rpcs3/Emu/Cell/lv2/sys_cond.h index 91fb35ec7c..0b4f4123ec 100644 --- a/rpcs3/Emu/Cell/lv2/sys_cond.h +++ b/rpcs3/Emu/Cell/lv2/sys_cond.h @@ -27,8 +27,7 @@ struct lv2_cond final : lv2_obj const u32 mtx_id; std::shared_ptr mutex; // Associated Mutex - atomic_t waiters{0}; - std::deque sq; + atomic_t sq{}; lv2_cond(u64 key, u64 name, u32 mtx_id, std::shared_ptr mutex) : key(key) diff --git a/rpcs3/Emu/Cell/lv2/sys_event.cpp b/rpcs3/Emu/Cell/lv2/sys_event.cpp index cee800eca3..2442b06970 100644 --- a/rpcs3/Emu/Cell/lv2/sys_event.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_event.cpp @@ -120,7 +120,7 @@ CellError lv2_event_queue::send(lv2_event event, bool notify_later) return CELL_ENOTCONN; } - if (sq.empty()) + if (!pq && !sq) { if (events.size() < this->size + 0u) { @@ -135,7 +135,7 @@ CellError lv2_event_queue::send(lv2_event event, bool notify_later) if (type == SYS_PPU_QUEUE) { // Store event in registers - auto& ppu = static_cast(*schedule(sq, protocol)); + auto& ppu = static_cast(*schedule(pq, protocol)); if (ppu.state & cpu_flag::again) { @@ -241,11 +241,15 @@ error_code sys_event_queue_destroy(ppu_thread& ppu, u32 equeue_id, s32 mode) std::unique_lock qlock; + cpu_thread* head{}; + const auto queue = idm::withdraw(equeue_id, [&](lv2_event_queue& queue) -> CellError { qlock = std::unique_lock{queue.mutex}; - if (!mode && !queue.sq.empty()) + head = queue.type == SYS_PPU_QUEUE ? static_cast(+queue.pq) : +queue.sq; + + if (!mode && head) { return CELL_EBUSY; } @@ -258,13 +262,13 @@ error_code sys_event_queue_destroy(ppu_thread& ppu, u32 equeue_id, s32 mode) lv2_obj::on_id_destroy(queue, queue.key); - if (queue.sq.empty()) + if (!head) { qlock.unlock(); } else { - for (auto cpu : queue.sq) + for (auto cpu = head; cpu; cpu = cpu->get_next_cpu()) { if (cpu->state & cpu_flag::again) { @@ -296,15 +300,18 @@ error_code sys_event_queue_destroy(ppu_thread& ppu, u32 equeue_id, s32 mode) if (qlock.owns_lock()) { - std::deque sq; - - sq = std::move(queue->sq); - if (sys_event.warning) { - fmt::append(lost_data, "Forcefully awaken waiters (%u):\n", sq.size()); + u32 size = 0; - for (auto cpu : sq) + for (auto cpu = head; cpu; cpu = cpu->get_next_cpu()) + { + size++; + } + + fmt::append(lost_data, "Forcefully awaken waiters (%u):\n", size); + + for (auto cpu = head; cpu; cpu = cpu->get_next_cpu()) { lost_data += cpu->get_name(); lost_data += '\n'; @@ -313,21 +320,24 @@ error_code sys_event_queue_destroy(ppu_thread& ppu, u32 equeue_id, s32 mode) if (queue->type == SYS_PPU_QUEUE) { - for (auto cpu : sq) + for (auto cpu = +queue->pq; cpu; cpu = cpu->next_cpu) { - static_cast(*cpu).gpr[3] = CELL_ECANCELED; + cpu->gpr[3] = CELL_ECANCELED; queue->append(cpu); } + atomic_storage::release(queue->pq, nullptr); lv2_obj::awake_all(); } else { - for (auto cpu : sq) + for (auto cpu = +queue->sq; cpu; cpu = cpu->next_cpu) { - static_cast(*cpu).ch_in_mbox.set_values(1, CELL_ECANCELED); - resume_spu_thread_group_from_waiting(static_cast(*cpu)); + cpu->ch_in_mbox.set_values(1, CELL_ECANCELED); + resume_spu_thread_group_from_waiting(*cpu); } + + atomic_storage::release(queue->sq, nullptr); } qlock.unlock(); @@ -382,7 +392,7 @@ error_code sys_event_queue_tryreceive(ppu_thread& ppu, u32 equeue_id, vm::ptrsq.empty() && count < size && !queue->events.empty()) + while (count < size && !queue->events.empty()) { auto& dest = event_array[count++]; const auto event = queue->events.front(); @@ -425,8 +435,8 @@ error_code sys_event_queue_receive(ppu_thread& ppu, u32 equeue_id, vm::ptrmutex); - if (std::find(queue->sq.begin(), queue->sq.end(), &ppu) == queue->sq.end()) + for (auto cpu = +queue->pq; cpu; cpu = cpu->next_cpu) { - break; + if (cpu == &ppu) + { + ppu.state += cpu_flag::again; + return {}; + } } - ppu.state += cpu_flag::again; - return {}; + break; } for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) @@ -495,7 +508,7 @@ error_code sys_event_queue_receive(ppu_thread& ppu, u32 equeue_id, vm::ptrmutex); - if (!queue->unqueue(queue->sq, &ppu)) + if (!queue->unqueue(queue->pq, &ppu)) { break; } diff --git a/rpcs3/Emu/Cell/lv2/sys_event.h b/rpcs3/Emu/Cell/lv2/sys_event.h index f6207df4ff..a464508763 100644 --- a/rpcs3/Emu/Cell/lv2/sys_event.h +++ b/rpcs3/Emu/Cell/lv2/sys_event.h @@ -4,7 +4,10 @@ #include "Emu/Memory/vm_ptr.h" +#include + class cpu_thread; +class spu_thrread; // Event Queue Type enum : u32 @@ -89,7 +92,8 @@ struct lv2_event_queue final : public lv2_obj shared_mutex mutex; std::deque events; - std::deque sq; + atomic_t sq{}; + atomic_t pq{}; lv2_event_queue(u32 protocol, s32 type, s32 size, u64 name, u64 ipc_key) noexcept; diff --git a/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp b/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp index 078f3e5d50..f8fcb8ce9a 100644 --- a/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp @@ -89,7 +89,7 @@ error_code sys_event_flag_destroy(ppu_thread& ppu, u32 id) const auto flag = idm::withdraw(id, [&](lv2_event_flag& flag) -> CellError { - if (flag.waiters) + if (flag.sq) { return CELL_EBUSY; } @@ -164,14 +164,13 @@ error_code sys_event_flag_wait(ppu_thread& ppu, u32 id, u64 bitptn, u32 mode, vm return {}; } - if (flag.type == SYS_SYNC_WAITER_SINGLE && !flag.sq.empty()) + if (flag.type == SYS_SYNC_WAITER_SINGLE && flag.sq) { return CELL_EPERM; } - flag.waiters++; - flag.sq.emplace_back(&ppu); flag.sleep(ppu, timeout, true); + lv2_obj::emplace(flag.sq, &ppu); return CELL_EBUSY; }); @@ -204,13 +203,16 @@ error_code sys_event_flag_wait(ppu_thread& ppu, u32 id, u64 bitptn, u32 mode, vm { std::lock_guard lock(flag->mutex); - if (std::find(flag->sq.begin(), flag->sq.end(), &ppu) == flag->sq.end()) + for (auto cpu = +flag->sq; cpu; cpu = cpu->next_cpu) { - break; + if (cpu == &ppu) + { + ppu.state += cpu_flag::again; + return {}; + } } - ppu.state += cpu_flag::again; - return {}; + break; } for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) @@ -240,7 +242,6 @@ error_code sys_event_flag_wait(ppu_thread& ppu, u32 id, u64 bitptn, u32 mode, vm break; } - flag->waiters--; ppu.gpr[3] = CELL_ETIMEDOUT; ppu.gpr[6] = flag->pattern; break; @@ -317,7 +318,7 @@ error_code sys_event_flag_set(cpu_thread& cpu, u32 id, u64 bitptn) { std::lock_guard lock(flag->mutex); - for (auto ppu : flag->sq) + for (auto ppu = +flag->sq; ppu; ppu = ppu->next_cpu) { if (ppu->state & cpu_flag::again) { @@ -328,24 +329,55 @@ error_code sys_event_flag_set(cpu_thread& cpu, u32 id, u64 bitptn) } } - // Sort sleep queue in required order - if (flag->protocol != SYS_SYNC_FIFO) - { - std::stable_sort(flag->sq.begin(), flag->sq.end(), [](cpu_thread* a, cpu_thread* b) - { - return static_cast(a)->prio < static_cast(b)->prio; - }); - } - // Process all waiters in single atomic op const u32 count = flag->pattern.atomic_op([&](u64& value) { value |= bitptn; u32 count = 0; - for (auto cpu : flag->sq) + if (!flag->sq) { - auto& ppu = static_cast(*cpu); + return count; + } + + for (auto ppu = +flag->sq; ppu; ppu = ppu->next_cpu) + { + ppu->gpr[7] = 0; + } + + auto first = +flag->sq; + + auto get_next = [&]() -> ppu_thread* + { + if (flag->protocol != SYS_SYNC_PRIORITY) + { + return std::exchange(first, first ? +first->next_cpu : nullptr); + } + + s32 prio = smax; + ppu_thread* it{}; + + for (auto ppu = first; ppu; ppu = ppu->next_cpu) + { + if (!ppu->gpr[7] && ppu->prio < prio) + { + it = ppu; + prio = ppu->prio; + } + } + + if (it) + { + // Mark it so it won't reappear + it->gpr[7] = 1; + } + + return it; + }; + + while (auto it = get_next()) + { + auto& ppu = *it; const u64 pattern = ppu.gpr[4]; const u64 mode = ppu.gpr[5]; @@ -370,25 +402,22 @@ error_code sys_event_flag_set(cpu_thread& cpu, u32 id, u64 bitptn) } // Remove waiters - const auto tail = std::remove_if(flag->sq.begin(), flag->sq.end(), [&](cpu_thread* cpu) + for (auto next_cpu = &flag->sq; *next_cpu;) { - auto& ppu = static_cast(*cpu); + auto& ppu = *+*next_cpu; if (ppu.gpr[3] == CELL_OK) { - flag->waiters--; - flag->append(cpu); - return true; + next_cpu->release(+ppu.next_cpu); + ppu.next_cpu.release(nullptr); + flag->append(&ppu); + continue; } - return false; - }); + next_cpu = &ppu.next_cpu; + }; - if (tail != flag->sq.end()) - { - flag->sq.erase(tail, flag->sq.end()); - lv2_obj::awake_all(); - } + lv2_obj::awake_all(); } return CELL_OK; @@ -432,7 +461,7 @@ error_code sys_event_flag_cancel(ppu_thread& ppu, u32 id, vm::ptr num) { std::lock_guard lock(flag->mutex); - for (auto cpu : flag->sq) + for (auto cpu = +flag->sq; cpu; cpu = cpu->next_cpu) { if (cpu->state & cpu_flag::again) { @@ -444,21 +473,18 @@ error_code sys_event_flag_cancel(ppu_thread& ppu, u32 id, vm::ptr num) // Get current pattern const u64 pattern = flag->pattern; - // Set count - value = ::size32(flag->sq); - // Signal all threads to return CELL_ECANCELED (protocol does not matter) - for (auto thread : ::as_rvalue(std::move(flag->sq))) + for (auto ppu = +flag->sq; ppu; ppu = ppu->next_cpu) { - auto& ppu = static_cast(*thread); + ppu->gpr[3] = CELL_ECANCELED; + ppu->gpr[6] = pattern; - ppu.gpr[3] = CELL_ECANCELED; - ppu.gpr[6] = pattern; - - flag->waiters--; - flag->append(thread); + value++; + flag->append(ppu); } + flag->sq.release(nullptr); + if (value) { lv2_obj::awake_all(); diff --git a/rpcs3/Emu/Cell/lv2/sys_event_flag.h b/rpcs3/Emu/Cell/lv2/sys_event_flag.h index 3f761a3ddc..090d47b03c 100644 --- a/rpcs3/Emu/Cell/lv2/sys_event_flag.h +++ b/rpcs3/Emu/Cell/lv2/sys_event_flag.h @@ -41,9 +41,8 @@ struct lv2_event_flag final : lv2_obj const u64 name; shared_mutex mutex; - atomic_t waiters{0}; atomic_t pattern; - std::deque sq; + atomic_t sq{}; lv2_event_flag(u32 protocol, u64 key, s32 type, u64 name, u64 pattern) noexcept : protocol{static_cast(protocol)} diff --git a/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp b/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp index 65dd839f37..d0271a68ca 100644 --- a/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp @@ -65,7 +65,7 @@ error_code _sys_lwcond_destroy(ppu_thread& ppu, u32 lwcond_id) const auto cond = idm::withdraw(lwcond_id, [&](lv2_lwcond& cond) -> CellError { - if (cond.waiters) + if (cond.sq) { return CELL_EBUSY; } @@ -127,7 +127,7 @@ error_code _sys_lwcond_signal(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id, u6 } } - if (cond.waiters) + if (cond.sq) { lv2_obj::notify_all_t notify; @@ -153,8 +153,6 @@ error_code _sys_lwcond_signal(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id, u6 return 0; } - cond.waiters--; - if (mode == 2) { static_cast(result)->gpr[3] = CELL_EBUSY; @@ -165,11 +163,11 @@ error_code _sys_lwcond_signal(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id, u6 ensure(!mutex->signaled); std::lock_guard lock(mutex->mutex); - if (mode == 3 && !mutex->sq.empty()) [[unlikely]] + if (mode == 3 && mutex->sq) [[unlikely]] { // Respect ordering of the sleep queue - mutex->sq.emplace_back(result); - auto result2 = mutex->schedule(mutex->sq, mutex->protocol); + lv2_obj::emplace(mutex->sq, result); + result = mutex->schedule(mutex->sq, mutex->protocol); if (static_cast(result2)->state & cpu_flag::again) { @@ -241,8 +239,6 @@ error_code _sys_lwcond_signal_all(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id fmt::throw_exception("Unknown mode (%d)", mode); } - bool need_awake = false; - const auto cond = idm::check(lwcond_id, [&](lv2_lwcond& cond) -> s32 { lv2_lwmutex* mutex{}; @@ -257,7 +253,7 @@ error_code _sys_lwcond_signal_all(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id } } - if (cond.waiters) + if (cond.sq) { lv2_obj::notify_all_t notify; @@ -265,18 +261,19 @@ error_code _sys_lwcond_signal_all(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id u32 result = 0; - for (auto cpu : cond.sq) + for (auto cpu = +cond.sq; cpu; cpu = cpu->next_cpu) { - if (static_cast(cpu)->state & cpu_flag::again) + if (cpu->state & cpu_flag::again) { ppu.state += cpu_flag::again; return 0; } } - cond.waiters = 0; - - while (const auto cpu = cond.schedule(cond.sq, cond.protocol)) + decltype(cond.sq) sq{+cond.sq}; + cond.sq.release(nullptr); + + while (const auto cpu = cond.schedule(sq, cond.protocol)) { if (mode == 2) { @@ -292,13 +289,12 @@ error_code _sys_lwcond_signal_all(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id else { lv2_obj::append(cpu); - need_awake = true; } result++; } - if (need_awake) + if (result && mode == 2) { lv2_obj::awake_all(true); } @@ -358,13 +354,12 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id { // Special: loading state from the point of waiting on lwmutex sleep queue std::lock_guard lock2(mutex->mutex); - mutex->sq.emplace_back(&ppu); + lv2_obj::emplace(mutex->sq, &ppu); } else { // Add a waiter - cond.waiters++; - cond.sq.emplace_back(&ppu); + lv2_obj::emplace(cond.sq, &ppu); } if (!ppu.loaded_from_savestate) @@ -414,8 +409,26 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id reader_lock lock(cond->mutex); reader_lock lock2(mutex->mutex); - const bool cond_sleep = std::find(cond->sq.begin(), cond->sq.end(), &ppu) != cond->sq.end(); - const bool mutex_sleep = std::find(mutex->sq.begin(), mutex->sq.end(), &ppu) != mutex->sq.end(); + bool mutex_sleep = false; + bool cond_sleep = false; + + for (auto cpu = +mutex->sq; cpu; cpu = cpu->next_cpu) + { + if (cpu == &ppu) + { + mutex_sleep = true; + break; + } + } + + for (auto cpu = +mutex->sq; cpu; cpu = cpu->next_cpu) + { + if (cpu == &ppu) + { + cond_sleep = true; + break; + } + } if (!cond_sleep && !mutex_sleep) { @@ -451,14 +464,24 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id if (cond->unqueue(cond->sq, &ppu)) { - cond->waiters--; ppu.gpr[3] = CELL_ETIMEDOUT; break; } reader_lock lock2(mutex->mutex); - if (std::find(mutex->sq.cbegin(), mutex->sq.cend(), &ppu) == mutex->sq.cend()) + bool mutex_sleep = false; + + for (auto cpu = +mutex->sq; cpu; cpu = cpu->next_cpu) + { + if (cpu == &ppu) + { + mutex_sleep = true; + break; + } + } + + if (!mutex_sleep) { break; } diff --git a/rpcs3/Emu/Cell/lv2/sys_lwcond.h b/rpcs3/Emu/Cell/lv2/sys_lwcond.h index d600c15825..a52f87efaf 100644 --- a/rpcs3/Emu/Cell/lv2/sys_lwcond.h +++ b/rpcs3/Emu/Cell/lv2/sys_lwcond.h @@ -31,8 +31,7 @@ struct lv2_lwcond final : lv2_obj vm::ptr control; shared_mutex mutex; - atomic_t waiters{0}; - std::deque sq; + atomic_t sq{}; lv2_lwcond(u64 name, u32 lwid, u32 protocol, vm::ptr control) noexcept : name(std::bit_cast>(name)) diff --git a/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp b/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp index 2b3f11018a..206fbdb73b 100644 --- a/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp @@ -72,7 +72,7 @@ error_code _sys_lwmutex_destroy(ppu_thread& ppu, u32 lwmutex_id) std::lock_guard lock(mutex.mutex); - if (!mutex.sq.empty()) + if (mutex.sq) { return CELL_EBUSY; } @@ -171,8 +171,8 @@ error_code _sys_lwmutex_lock(ppu_thread& ppu, u32 lwmutex_id, u64 timeout) return true; } - mutex.add_waiter(&ppu); mutex.sleep(ppu, timeout, true); + mutex.add_waiter(&ppu); return false; }); @@ -197,13 +197,16 @@ error_code _sys_lwmutex_lock(ppu_thread& ppu, u32 lwmutex_id, u64 timeout) { std::lock_guard lock(mutex->mutex); - if (std::find(mutex->sq.begin(), mutex->sq.end(), &ppu) == mutex->sq.end()) + for (auto cpu = +mutex->sq; cpu; cpu = cpu->next_cpu) { - break; + if (cpu == &ppu) + { + ppu.state += cpu_flag::again; + return {}; + } } - ppu.state += cpu_flag::again; - return {}; + break; } for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) diff --git a/rpcs3/Emu/Cell/lv2/sys_lwmutex.h b/rpcs3/Emu/Cell/lv2/sys_lwmutex.h index 8aec9241d5..ae31fa2fb4 100644 --- a/rpcs3/Emu/Cell/lv2/sys_lwmutex.h +++ b/rpcs3/Emu/Cell/lv2/sys_lwmutex.h @@ -61,7 +61,7 @@ struct lv2_lwmutex final : lv2_obj shared_mutex mutex; atomic_t signaled{0}; - std::deque sq; + atomic_t sq{}; atomic_t lwcond_waiters{0}; lv2_lwmutex(u32 protocol, vm::ptr control, u64 name) noexcept @@ -75,7 +75,8 @@ struct lv2_lwmutex final : lv2_obj void save(utils::serial& ar); // Add a waiter - void add_waiter(cpu_thread* cpu) + template + void add_waiter(T* cpu) { const bool notify = lwcond_waiters.fetch_op([](s32& val) { @@ -91,7 +92,7 @@ struct lv2_lwmutex final : lv2_obj return true; }).second; - sq.emplace_back(cpu); + lv2_obj::emplace(sq, cpu); if (notify) { diff --git a/rpcs3/Emu/Cell/lv2/sys_mutex.cpp b/rpcs3/Emu/Cell/lv2/sys_mutex.cpp index 4f76f0f589..547d3ccbbe 100644 --- a/rpcs3/Emu/Cell/lv2/sys_mutex.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_mutex.cpp @@ -188,13 +188,16 @@ error_code sys_mutex_lock(ppu_thread& ppu, u32 mutex_id, u64 timeout) { std::lock_guard lock(mutex->mutex); - if (std::find(mutex->sq.begin(), mutex->sq.end(), &ppu) == mutex->sq.end()) + for (auto cpu = +mutex->sq; cpu; cpu = cpu->next_cpu) { - break; + if (cpu == &ppu) + { + ppu.state += cpu_flag::again; + return {}; + } } - ppu.state += cpu_flag::again; - return {}; + break; } for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) diff --git a/rpcs3/Emu/Cell/lv2/sys_mutex.h b/rpcs3/Emu/Cell/lv2/sys_mutex.h index 20909acef8..f12ea35405 100644 --- a/rpcs3/Emu/Cell/lv2/sys_mutex.h +++ b/rpcs3/Emu/Cell/lv2/sys_mutex.h @@ -35,7 +35,7 @@ struct lv2_mutex final : lv2_obj shared_mutex mutex; atomic_t owner{0}; atomic_t lock_count{0}; // Recursive Locks - std::deque sq; + atomic_t sq{}; lv2_mutex(u32 protocol, u32 recursive,u32 adaptive, u64 key, u64 name) noexcept : protocol{static_cast(protocol)} @@ -82,7 +82,8 @@ struct lv2_mutex final : lv2_obj return CELL_EBUSY; } - bool try_own(cpu_thread& cpu, u32 id) + template + bool try_own(T& cpu, u32 id) { if (owner.fetch_op([&](u32& val) { @@ -96,7 +97,7 @@ struct lv2_mutex final : lv2_obj } })) { - sq.emplace_back(&cpu); + lv2_obj::emplace(sq, &cpu); return false; } @@ -139,7 +140,7 @@ struct lv2_mutex final : lv2_obj return static_cast(cpu); } - owner = cpu->id << 1 | !sq.empty(); + owner = cpu->id << 1 | !!sq; return static_cast(cpu); } else diff --git a/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp b/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp index f09cb9be8d..0b669a49f1 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp @@ -132,8 +132,8 @@ error_code sys_rwlock_rlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout) if (_old > 0 || _old & 1) { - rwlock.rq.emplace_back(&ppu); rwlock.sleep(ppu, timeout, true); + lv2_obj::emplace(rwlock.rq, &ppu); return false; } @@ -163,12 +163,15 @@ error_code sys_rwlock_rlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout) { std::lock_guard lock(rwlock->mutex); - if (std::find(rwlock->rq.begin(), rwlock->rq.end(), &ppu) == rwlock->rq.end()) + for (auto cpu = +rwlock->rq; cpu; cpu = cpu->next_cpu) { - break; + if (cpu == &ppu) + { + ppu.state += cpu_flag::again; + return {}; + } } - ppu.state += cpu_flag::again; break; } @@ -305,7 +308,7 @@ error_code sys_rwlock_runlock(ppu_thread& ppu, u32 rw_lock_id) return {}; } - rwlock->owner = cpu->id << 1 | !rwlock->wq.empty() | !rwlock->rq.empty(); + rwlock->owner = cpu->id << 1 | !!rwlock->wq | !!rwlock->rq; rwlock->awake(cpu); } @@ -313,7 +316,7 @@ error_code sys_rwlock_runlock(ppu_thread& ppu, u32 rw_lock_id) { rwlock->owner = 0; - ensure(rwlock->rq.empty()); + ensure(!rwlock->rq); } } } @@ -361,8 +364,8 @@ error_code sys_rwlock_wlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout) if (_old != 0) { - rwlock.wq.emplace_back(&ppu); rwlock.sleep(ppu, timeout, true); + lv2_obj::emplace(rwlock.wq, &ppu); } return _old; @@ -396,12 +399,15 @@ error_code sys_rwlock_wlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout) { std::lock_guard lock(rwlock->mutex); - if (std::find(rwlock->wq.begin(), rwlock->wq.end(), &ppu) == rwlock->wq.end()) + for (auto cpu = +rwlock->wq; cpu; cpu = cpu->next_cpu) { - break; + if (cpu == &ppu) + { + ppu.state += cpu_flag::again; + return {}; + } } - ppu.state += cpu_flag::again; break; } @@ -433,23 +439,28 @@ error_code sys_rwlock_wlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout) } // If the last waiter quit the writer sleep queue, wake blocked readers - if (!rwlock->rq.empty() && rwlock->wq.empty() && rwlock->owner < 0) + if (rwlock->rq && !rwlock->wq && rwlock->owner < 0) { - rwlock->owner.atomic_op([&](s64& owner) - { - owner -= 2 * static_cast(rwlock->rq.size()); // Add readers to value - owner &= -2; // Clear wait bit - }); + s64 size = 0; // Protocol doesn't matter here since they are all enqueued anyways - for (auto cpu : ::as_rvalue(std::move(rwlock->rq))) + for (auto cpu = +rwlock->rq; cpu; cpu = cpu->next_cpu) { + size++; rwlock->append(cpu); } + rwlock->rq.release(nullptr); + + rwlock->owner.atomic_op([&](s64& owner) + { + owner -= 2 * size; // Add readers to value + owner &= -2; // Clear wait bit + }); + lv2_obj::awake_all(); } - else if (rwlock->rq.empty() && rwlock->wq.empty()) + else if (!rwlock->rq && !rwlock->wq) { rwlock->owner &= -2; } @@ -535,27 +546,32 @@ error_code sys_rwlock_wunlock(ppu_thread& ppu, u32 rw_lock_id) return {}; } - rwlock->owner = cpu->id << 1 | !rwlock->wq.empty() | !rwlock->rq.empty(); + rwlock->owner = cpu->id << 1 | !!rwlock->wq | !!rwlock->rq; rwlock->awake(cpu); } - else if (auto readers = rwlock->rq.size()) + else if (rwlock->rq) { - for (auto cpu : rwlock->rq) + for (auto cpu = +rwlock->rq; cpu; cpu = cpu->next_cpu) { - if (static_cast(cpu)->state & cpu_flag::again) + if (cpu->state & cpu_flag::again) { ppu.state += cpu_flag::again; return {}; } } - - for (auto cpu : ::as_rvalue(std::move(rwlock->rq))) + + s64 size = 0; + + // Protocol doesn't matter here since they are all enqueued anyways + for (auto cpu = +rwlock->rq; cpu; cpu = cpu->next_cpu) { + size++; rwlock->append(cpu); } - rwlock->owner.release(-2 * static_cast(readers)); + rwlock->rq.release(nullptr); + rwlock->owner.release(-2 * static_cast(size)); lv2_obj::awake_all(); } else diff --git a/rpcs3/Emu/Cell/lv2/sys_rwlock.h b/rpcs3/Emu/Cell/lv2/sys_rwlock.h index 63ef36d79a..e16f036e14 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rwlock.h +++ b/rpcs3/Emu/Cell/lv2/sys_rwlock.h @@ -29,8 +29,8 @@ struct lv2_rwlock final : lv2_obj shared_mutex mutex; atomic_t owner{0}; - std::deque rq; - std::deque wq; + atomic_t rq{}; + atomic_t wq{}; lv2_rwlock(u32 protocol, u64 key, u64 name) noexcept : protocol{static_cast(protocol)} diff --git a/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp b/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp index 29f2508bc2..7fcb67952c 100644 --- a/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp @@ -129,8 +129,8 @@ error_code sys_semaphore_wait(ppu_thread& ppu, u32 sem_id, u64 timeout) if (sema.val-- <= 0) { - sema.sq.emplace_back(&ppu); sema.sleep(ppu, timeout, true); + lv2_obj::emplace(sema.sq, &ppu); return false; } @@ -160,13 +160,16 @@ error_code sys_semaphore_wait(ppu_thread& ppu, u32 sem_id, u64 timeout) { std::lock_guard lock(sem->mutex); - if (std::find(sem->sq.begin(), sem->sq.end(), &ppu) == sem->sq.end()) + for (auto cpu = +sem->sq; cpu; cpu = cpu->next_cpu) { - break; + if (cpu == &ppu) + { + ppu.state += cpu_flag::again; + return {}; + } } - ppu.state += cpu_flag::again; - return {}; + break; } for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) @@ -280,7 +283,7 @@ error_code sys_semaphore_post(ppu_thread& ppu, u32 sem_id, s32 count) { std::lock_guard lock(sem->mutex); - for (auto cpu : sem->sq) + for (auto cpu = +sem->sq; cpu; cpu = cpu->next_cpu) { if (static_cast(cpu)->state & cpu_flag::again) { diff --git a/rpcs3/Emu/Cell/lv2/sys_semaphore.h b/rpcs3/Emu/Cell/lv2/sys_semaphore.h index 9c2b39b877..e12f20d3f3 100644 --- a/rpcs3/Emu/Cell/lv2/sys_semaphore.h +++ b/rpcs3/Emu/Cell/lv2/sys_semaphore.h @@ -30,7 +30,7 @@ struct lv2_sema final : lv2_obj shared_mutex mutex; atomic_t val; - std::deque sq; + atomic_t sq{}; lv2_sema(u32 protocol, u64 key, u64 name, s32 max, s32 value) noexcept : protocol{static_cast(protocol)} diff --git a/rpcs3/Emu/Cell/lv2/sys_sync.h b/rpcs3/Emu/Cell/lv2/sys_sync.h index fa7497a7bc..cd1bc70ffe 100644 --- a/rpcs3/Emu/Cell/lv2/sys_sync.h +++ b/rpcs3/Emu/Cell/lv2/sys_sync.h @@ -10,7 +10,6 @@ #include "Emu/IPC.h" #include "Emu/system_config.h" -#include #include // attr_protocol (waiting scheduling policy) @@ -107,60 +106,106 @@ public: return str; } - // Find and remove the object from the deque container - template - static T unqueue(std::deque& queue, E object) + // Find and remove the object from the linked list + template + static T* unqueue(atomic_t& first, T* object, atomic_t T::* mem_ptr = &T::next_cpu) { - for (auto found = queue.cbegin(), end = queue.cend(); found != end; found++) + auto it = +first; + + if (it == object) { - if (*found == object) + first.release(+it->*mem_ptr); + (it->*mem_ptr).release(nullptr); + return it; + } + + for (; it;) + { + const auto next = it->*mem_ptr + 0; + + if (next == object) { - queue.erase(found); - return static_cast(object); + (it->*mem_ptr).release(+next->*mem_ptr); + (next->*mem_ptr).release(nullptr); + return next; } + + it = next; } return {}; } template - static T* schedule(std::deque& queue, u32 protocol) + static E* schedule(atomic_t& first, u32 protocol) { - if (queue.empty()) + auto it = static_cast(first); + + if (!it) { - return nullptr; + return it; } if (protocol == SYS_SYNC_FIFO) { - const auto res = queue.front(); + if (it && cpu_flag::again - it->state) + { + first.release(+it->next_cpu); + it->next_cpu.release(nullptr); + } - if (res->state.none_of(cpu_flag::again)) - queue.pop_front(); - - return res; + return it; } - s32 prio = 3071; - auto it = queue.cbegin(); + s32 prio = it->prio; + auto found = it; + auto parent_found = &first; - for (auto found = it, end = queue.cend(); found != end; found++) + while (true) { - const s32 _prio = static_cast(*found)->prio; + auto& node = it->next_cpu; + const auto next = static_cast(node); + + if (!next) + { + break; + } + + const s32 _prio = static_cast(next)->prio; if (_prio < prio) { - it = found; + found = next; + parent_found = &node; prio = _prio; } + + it = next; } - const auto res = *it; + if (cpu_flag::again - found->state) + { + parent_found->release(+found->next_cpu); + found->next_cpu.release(nullptr); + } - if (res->state.none_of(cpu_flag::again)) - queue.erase(it); + return found; + } - return res; + template + static auto emplace(atomic_t& first, T object) + { + auto it = &first; + + while (auto ptr = static_cast(+*it)) + { + it = &ptr->next_cpu; + } + + it->release(object); + + // Return parent + return it; } private: @@ -443,7 +488,10 @@ public: return; } - cpu->state.notify_one(cpu_flag::suspend + cpu_flag::signal); + if (cpu->state & cpu_flag::signal) + { + cpu->state.notify_one(cpu_flag::suspend + cpu_flag::signal); + } } } @@ -463,16 +511,10 @@ private: static thread_local std::vector g_to_awake; // Scheduler queue for active PPU threads - static std::deque g_ppu; + static atomic_t g_ppu; // Waiting for the response from - static std::deque g_pending; - - // Scheduler queue for timeouts (wait until -> thread) - static std::deque> g_waiting; - - // Threads which must call lv2_obj::sleep before the scheduler starts - static std::deque g_to_sleep; + static u32 g_pending; // Pending list of threads to notify static thread_local std::add_pointer_t g_to_notify[4]; diff --git a/rpcs3/Emu/Cell/lv2/sys_timer.cpp b/rpcs3/Emu/Cell/lv2/sys_timer.cpp index 648d9d0597..99aafe9c19 100644 --- a/rpcs3/Emu/Cell/lv2/sys_timer.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_timer.cpp @@ -11,6 +11,7 @@ #include "sys_process.h" #include +#include LOG_CHANNEL(sys_timer); @@ -152,8 +153,12 @@ error_code sys_timer_create(ppu_thread& ppu, vm::ptr timer_id) auto& thread = g_fxo->get>(); { std::lock_guard lock(thread.mutex); - lv2_obj::unqueue(thread.timers, ptr); - thread.timers.emplace_back(std::move(ptr)); + + // Theoretically could have been destroyed by sys_timer_destroy by now + if (auto it = std::find(thread.timers.begin(), thread.timers.end(), ptr); it == thread.timers.end()) + { + thread.timers.emplace_back(std::move(ptr)); + } } *timer_id = idm::last_id(); @@ -192,7 +197,12 @@ error_code sys_timer_destroy(ppu_thread& ppu, u32 timer_id) auto& thread = g_fxo->get>(); std::lock_guard lock(thread.mutex); - lv2_obj::unqueue(thread.timers, std::move(timer.ptr)); + + if (auto it = std::find(thread.timers.begin(), thread.timers.end(), timer.ptr); it != thread.timers.end()) + { + thread.timers.erase(it); + } + return CELL_OK; } diff --git a/rpcs3/Emu/Cell/lv2/sys_usbd.cpp b/rpcs3/Emu/Cell/lv2/sys_usbd.cpp index 5ea62ccb8e..c2eddd79c9 100644 --- a/rpcs3/Emu/Cell/lv2/sys_usbd.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_usbd.cpp @@ -109,7 +109,7 @@ public: // sys_usbd_receive_event PPU Threads shared_mutex mutex_sq; - std::deque sq; + atomic_t sq{}; static constexpr auto thread_name = "Usb Manager Thread"sv; @@ -642,7 +642,7 @@ error_code sys_usbd_finalize(ppu_thread& ppu, u32 handle) usbh.is_init = false; // Forcefully awake all waiters - for (auto& cpu : ::as_rvalue(std::move(usbh.sq))) + for (auto cpu = +usbh.sq; cpu; cpu = cpu->next_cpu) { // Special ternimation signal value cpu->gpr[4] = 4; @@ -651,6 +651,8 @@ error_code sys_usbd_finalize(ppu_thread& ppu, u32 handle) lv2_obj::awake(cpu); } + usbh.sq.release(nullptr); + // TODO return CELL_OK; } @@ -857,7 +859,7 @@ error_code sys_usbd_receive_event(ppu_thread& ppu, u32 handle, vm::ptr arg1 } lv2_obj::sleep(ppu); - usbh.sq.emplace_back(&ppu); + lv2_obj::emplace(usbh.sq, &ppu); } while (auto state = +ppu.state) @@ -872,14 +874,17 @@ error_code sys_usbd_receive_event(ppu_thread& ppu, u32 handle, vm::ptr arg1 { std::lock_guard lock(usbh.mutex); - if (std::find(usbh.sq.begin(), usbh.sq.end(), &ppu) == usbh.sq.end()) + for (auto cpu = +usbh.sq; cpu; cpu = cpu->next_cpu) { - break; + if (cpu == &ppu) + { + ppu.state += cpu_flag::again; + sys_usbd.trace("sys_usbd_receive_event: aborting"); + return {}; + } } - ppu.state += cpu_flag::again; - sys_usbd.trace("sys_usbd_receive_event: aborting"); - return {}; + break; } thread_ctrl::wait_on(ppu.state, state); diff --git a/rpcs3/rpcs3qt/kernel_explorer.cpp b/rpcs3/rpcs3qt/kernel_explorer.cpp index b8b93d8b15..72d2623e7c 100644 --- a/rpcs3/rpcs3qt/kernel_explorer.cpp +++ b/rpcs3/rpcs3qt/kernel_explorer.cpp @@ -337,6 +337,14 @@ void kernel_explorer::update() return; } + auto show_waiters = [&](QTreeWidgetItem* tree, cpu_thread* cpu) + { + for (; cpu; cpu = cpu->get_next_cpu()) + { + add_leaf(tree, qstr(fmt::format("Waiter: ID: 0x%x", cpu->id_type() == 2 ? static_cast(cpu)->lv2_id : cpu->id))); + } + }; + switch (id >> 24) { case SYS_MEM_OBJECT: @@ -356,22 +364,33 @@ void kernel_explorer::update() case SYS_MUTEX_OBJECT: { auto& mutex = static_cast(obj); - add_leaf(node, qstr(fmt::format(u8"Mutex 0x%08x: “%s”, %s,%s Owner: %#x, Locks: %u, Key: %#llx, Conds: %u, Wq: %zu", id, lv2_obj::name64(mutex.name), mutex.protocol, - mutex.recursive == SYS_SYNC_RECURSIVE ? " Recursive," : "", mutex.owner >> 1, +mutex.lock_count, mutex.key, mutex.cond_count, mutex.sq.size()))); + show_waiters(add_solid_node(node, qstr(fmt::format(u8"Mutex 0x%08x: “%s”, %s,%s Owner: %#x, Locks: %u, Key: %#llx, Conds: %u", id, lv2_obj::name64(mutex.name), mutex.protocol, + mutex.recursive == SYS_SYNC_RECURSIVE ? " Recursive," : "", mutex.owner >> 1, +mutex.lock_count, mutex.key, mutex.cond_count))), mutex.sq); break; } case SYS_COND_OBJECT: { auto& cond = static_cast(obj); - add_leaf(node, qstr(fmt::format(u8"Cond 0x%08x: “%s”, %s, Mutex: 0x%08x, Key: %#llx, Wq: %u", id, lv2_obj::name64(cond.name), cond.mutex->protocol, cond.mtx_id, cond.key, +cond.waiters))); + show_waiters(add_solid_node(node, qstr(fmt::format(u8"Cond 0x%08x: “%s”, %s, Mutex: 0x%08x, Key: %#llx", id, lv2_obj::name64(cond.name), cond.mutex->protocol, cond.mtx_id, cond.key))), cond.sq); break; } case SYS_RWLOCK_OBJECT: { auto& rw = static_cast(obj); const s64 val = rw.owner; - add_leaf(node, qstr(fmt::format(u8"RW Lock 0x%08x: “%s”, %s, Owner: %#x(%d), Key: %#llx, Rq: %zu, Wq: %zu", id, lv2_obj::name64(rw.name), rw.protocol, - std::max(0, val >> 1), -std::min(0, val >> 1), rw.key, rw.rq.size(), rw.wq.size()))); + auto tree = add_solid_node(node, qstr(fmt::format(u8"RW Lock 0x%08x: “%s”, %s, Owner: %#x(%d), Key: %#llx", id, lv2_obj::name64(rw.name), rw.protocol, + std::max(0, val >> 1), -std::min(0, val >> 1), rw.key))); + + if (auto rq = +rw.rq) + { + show_waiters(add_solid_node(tree, "Reader Waiters"), rq); + } + + if (auto wq = +rw.wq) + { + show_waiters(add_solid_node(tree, "Writer Waiters"), wq); + } + break; } case SYS_INTR_TAG_OBJECT: @@ -397,8 +416,8 @@ void kernel_explorer::update() case SYS_EVENT_QUEUE_OBJECT: { auto& eq = static_cast(obj); - add_leaf(node, qstr(fmt::format(u8"Event Queue 0x%08x: “%s”, %s, %s, Key: %#llx, Events: %zu/%d, Wq: %zu", id, lv2_obj::name64(eq.name), eq.protocol, - eq.type == SYS_SPU_QUEUE ? "SPU" : "PPU", eq.key, eq.events.size(), eq.size, eq.sq.size()))); + show_waiters(add_solid_node(node, qstr(fmt::format(u8"Event Queue 0x%08x: “%s”, %s, %s, Key: %#llx, Events: %zu/%d", id, lv2_obj::name64(eq.name), eq.protocol, + eq.type == SYS_SPU_QUEUE ? "SPU" : "PPU", eq.key, eq.events.size(), eq.size))), eq.type == SYS_SPU_QUEUE ? static_cast(+eq.sq) : +eq.pq); break; } case SYS_EVENT_PORT_OBJECT: @@ -494,12 +513,12 @@ void kernel_explorer::update() } else { - add_leaf(node, qstr(fmt::format(u8"LWMutex 0x%08x: “%s”, %s, Signal: %#x, Wq: %zu (unmapped/invalid control data at *0x%x)", id, lv2_obj::name64(lwm.name), lwm.protocol, +lwm.signaled, lwm.sq.size(), lwm.control))); + show_waiters(add_solid_node(node, qstr(fmt::format(u8"LWMutex 0x%08x: “%s”, %s, Signal: %#x (unmapped/invalid control data at *0x%x)", id, lv2_obj::name64(lwm.name), lwm.protocol, +lwm.signaled, lwm.control))), lwm.sq); break; } - add_leaf(node, qstr(fmt::format(u8"LWMutex 0x%08x: “%s”, %s,%s Owner: %s, Locks: %u, Signal: %#x, Control: *0x%x, Wq: %zu", id, lv2_obj::name64(lwm.name), lwm.protocol, - (lwm_data.attribute & SYS_SYNC_RECURSIVE) ? " Recursive," : "", owner_str, lwm_data.recursive_count, +lwm.signaled, lwm.control, lwm.sq.size()))); + show_waiters(add_solid_node(node, qstr(fmt::format(u8"LWMutex 0x%08x: “%s”, %s,%s Owner: %s, Locks: %u, Signal: %#x, Control: *0x%x", id, lv2_obj::name64(lwm.name), lwm.protocol, + (lwm_data.attribute & SYS_SYNC_RECURSIVE) ? " Recursive," : "", owner_str, lwm_data.recursive_count, +lwm.signaled, lwm.control))), lwm.sq); break; } case SYS_TIMER_OBJECT: @@ -517,21 +536,21 @@ void kernel_explorer::update() { auto& sema = static_cast(obj); const auto val = +sema.val; - add_leaf(node, qstr(fmt::format(u8"Sema 0x%08x: “%s”, %s, Count: %d/%d, Key: %#llx, Wq: %zu", id, lv2_obj::name64(sema.name), sema.protocol, - std::max(val, 0), sema.max, sema.key, -std::min(val, 0)))); + show_waiters(add_solid_node(node, qstr(fmt::format(u8"Sema 0x%08x: “%s”, %s, Count: %d/%d, Key: %#llx", id, lv2_obj::name64(sema.name), sema.protocol, + std::max(val, 0), sema.max, sema.key, -std::min(val, 0)))), sema.sq); break; } case SYS_LWCOND_OBJECT: { auto& lwc = static_cast(obj); - add_leaf(node, qstr(fmt::format(u8"LWCond 0x%08x: “%s”, %s, OG LWMutex: 0x%08x, Control: *0x%x, Wq: %zu", id, lv2_obj::name64(lwc.name), lwc.protocol, lwc.lwid, lwc.control, +lwc.waiters))); + show_waiters(add_solid_node(node, qstr(fmt::format(u8"LWCond 0x%08x: “%s”, %s, OG LWMutex: 0x%08x, Control: *0x%x", id, lv2_obj::name64(lwc.name), lwc.protocol, lwc.lwid, lwc.control))), lwc.sq); break; } case SYS_EVENT_FLAG_OBJECT: { auto& ef = static_cast(obj); - add_leaf(node, qstr(fmt::format(u8"Event Flag 0x%08x: “%s”, %s, Type: 0x%x, Key: %#llx, Pattern: 0x%llx, Wq: %zu", id, lv2_obj::name64(ef.name), ef.protocol, - ef.type, ef.key, ef.pattern.load(), +ef.waiters))); + show_waiters(add_solid_node(node, qstr(fmt::format(u8"Event Flag 0x%08x: “%s”, %s, Type: 0x%x, Key: %#llx, Pattern: 0x%llx", id, lv2_obj::name64(ef.name), ef.protocol, + ef.type, ef.key, ef.pattern.load()))), ef.sq); break; } case SYS_RSXAUDIO_OBJECT: @@ -543,7 +562,7 @@ void kernel_explorer::update() break; } - QTreeWidgetItem* rao_obj = add_leaf(node, qstr(fmt::format(u8"RSXAudio 0x%08x: Shmem: 0x%08x", id, u32{rao.shmem}))); + QTreeWidgetItem* rao_obj = add_solid_node(node, qstr(fmt::format(u8"RSXAudio 0x%08x: Shmem: 0x%08x", id, u32{rao.shmem}))); for (u64 q_idx = 0; q_idx < rao.event_queue.size(); q_idx++) { if (const auto eq = rao.event_queue[q_idx].lock())