From 64d6b88abd56530403b56c9757f01ac3230c8daa Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Mon, 30 Dec 2024 09:11:20 +0200 Subject: [PATCH] LV2: Introduce Dynamic Timer signals --- rpcs3/Emu/Cell/lv2/lv2.cpp | 137 ++++++++++++++++++++++++++---- rpcs3/Emu/Cell/lv2/sys_sync.h | 4 +- rpcs3/Emu/RSX/RSXThread.cpp | 2 + rpcs3/Emu/system_config.h | 6 +- rpcs3/Emu/system_config_types.cpp | 1 + rpcs3/Emu/system_config_types.h | 1 + rpcs3/rpcs3qt/emu_settings.cpp | 1 + rpcs3/rpcs3qt/emu_settings_type.h | 2 +- 8 files changed, 131 insertions(+), 23 deletions(-) diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 6349bd60c6..c6d69d8827 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -1307,6 +1307,8 @@ static std::deque g_to_sleep; static atomic_t g_scheduler_ready = false; static atomic_t s_yield_frequency = 0; static atomic_t s_max_allowed_yield_tsc = 0; +static atomic_t s_lv2_timers_sum_of_ten_delay_in_us = 5000; +static atomic_t s_lv2_timers_min_timer_in_us = 0; static u64 s_last_yield_tsc = 0; atomic_t g_lv2_preempts_taken = 0; @@ -1432,7 +1434,7 @@ bool lv2_obj::awake(cpu_thread* thread, s32 prio) if (!g_postpone_notify_barrier) { - notify_all(); + notify_all(thread); } return result; @@ -1573,6 +1575,11 @@ bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time) { if (it == end || it->first > wait_until) { + if (it == g_waiting.cbegin()) + { + s_lv2_timers_min_timer_in_us.release(wait_until); + } + g_waiting.emplace(it, wait_until, &thread); break; } @@ -1835,6 +1842,8 @@ void lv2_obj::cleanup() g_waiting.clear(); g_pending = 0; s_yield_frequency = 0; + s_lv2_timers_sum_of_ten_delay_in_us = 5000; + s_lv2_timers_min_timer_in_us = 0; } void lv2_obj::schedule_all(u64 current_time) @@ -1876,7 +1885,7 @@ void lv2_obj::schedule_all(u64 current_time) } // Check registered timeouts - while (!g_waiting.empty()) + while (!g_waiting.empty() && it != std::end(g_to_notify)) { const auto pair = &g_waiting.front(); @@ -1896,15 +1905,7 @@ void lv2_obj::schedule_all(u64 current_time) ensure(!target->state.test_and_set(cpu_flag::notify)); // Otherwise notify it to wake itself - if (it == std::end(g_to_notify)) - { - // Out of notification slots, notify locally (resizable container is not worth it) - target->state.notify_one(); - } - else - { - *it++ = &target->state; - } + *it++ = &target->state; } } else @@ -2171,7 +2172,35 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep #endif // TODO: Tune for other non windows operating sytems - if (g_cfg.core.sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_usleep : sleep_timers_accuracy_level::_all_timers)) + const sleep_timers_accuracy_level accuracy_type = g_cfg.core.sleep_timers_accuracy; + const u64 avg_delay = get_avg_timer_reponse_delay(); + + static atomic_t g_success = 0; + static atomic_t g_fails = 0; + + if (accuracy_type == sleep_timers_accuracy_level::_dynamic && avg_delay < 30 && avg_delay < (remaining + 15) / 2) + { + wait_for(remaining); + + if (remaining < host_min_quantum) + { + g_success += remaining; + //g_success++; + } + + passed = get_system_time() - start_time; + continue; + } + else + { + if (remaining < host_min_quantum) + { + g_fails += remaining; + //g_fails++; + } + } + + if (accuracy_type < (is_usleep ? sleep_timers_accuracy_level::_dynamic : sleep_timers_accuracy_level::_all_timers)) { wait_for(remaining); } @@ -2222,7 +2251,7 @@ void lv2_obj::prepare_for_sleep(cpu_thread& cpu) cpu_counter::remove(&cpu); } -void lv2_obj::notify_all() noexcept +void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept { for (auto cpu : g_to_notify) { @@ -2258,13 +2287,11 @@ void lv2_obj::notify_all() noexcept return; } - if (cpu->get_class() != thread_class::spu && cpu->state.none_of(cpu_flag::suspend)) + if (cpu->get_class() == thread_class::ppu && cpu->state.none_of(cpu_flag::suspend + cpu_flag::signal)) { return; } - std::optional lock; - constexpr usz total_waiters = std::size(spu_thread::g_spu_waiters_by_value); u32 notifies[total_waiters]{}; @@ -2346,4 +2373,82 @@ void lv2_obj::notify_all() noexcept vm::reservation_notifier_notify(addr); } } + + if (woke_thread == cpu) + { + return; + } + + const u64 min_timer = s_lv2_timers_min_timer_in_us; + const u64 current_time = get_guest_system_time(); + + if (current_time < min_timer) + { + return; + } + + atomic_bs_t* notifies_cpus[16]; + usz count_notifies_cpus = 0; + + std::unique_lock lock(g_mutex, std::try_to_lock); + + if (!lock) + { + // Not only is that this method is an opportunistic optimization + // But if it's already locked than it is likely that soon another thread would do this check instead + return; + } + + // Do it BEFORE clearing the queue in order to measure the delay properly even if the sleeping thread notified itself + // This 'redundancy' is what allows proper measurements + if (u64 min_time2 = s_lv2_timers_min_timer_in_us; current_time >= min_time2) + { + const u64 sum = s_lv2_timers_sum_of_ten_delay_in_us.observe(); + s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2) / 10); + } + + // Check registered timeouts + while (!g_waiting.empty() && count_notifies_cpus < std::size(notifies_cpus)) + { + const auto pair = &g_waiting.front(); + + if (pair->first <= current_time) + { + const auto target = pair->second; + g_waiting.pop_front(); + + if (target != cpu) + { + // Change cpu_thread::state for the lightweight notification to work + ensure(!target->state.test_and_set(cpu_flag::notify)); + notifies_cpus[count_notifies_cpus++] = &target->state; + } + } + else + { + // The list is sorted so assume no more timeouts + break; + } + } + + if (g_waiting.empty()) + { + s_lv2_timers_min_timer_in_us.release(u64{umax}); + } + else + { + s_lv2_timers_min_timer_in_us.release(g_waiting.front().first); + } + + lock.unlock(); + + for (usz i = count_notifies_cpus - 1; i != umax; i--) + { + atomic_wait_engine::notify_one(notifies_cpus[i]); + } +} + +u64 lv2_obj::get_avg_timer_reponse_delay() +{ + return s_lv2_timers_sum_of_ten_delay_in_us / 10; } diff --git a/rpcs3/Emu/Cell/lv2/sys_sync.h b/rpcs3/Emu/Cell/lv2/sys_sync.h index bd6004dfaa..244b029004 100644 --- a/rpcs3/Emu/Cell/lv2/sys_sync.h +++ b/rpcs3/Emu/Cell/lv2/sys_sync.h @@ -454,11 +454,13 @@ public: static bool wait_timeout(u64 usec, ppu_thread* cpu = {}, bool scale = true, bool is_usleep = false); - static void notify_all() noexcept; + static void notify_all(cpu_thread* woke_thread = nullptr) noexcept; // Can be called before the actual sleep call in order to move it out of mutex scope static void prepare_for_sleep(cpu_thread& cpu); + static u64 get_avg_timer_reponse_delay(); + struct notify_all_t { notify_all_t() noexcept diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 29c2d8e865..1942f8cefb 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -870,11 +870,13 @@ namespace rsx { // Wait 16ms during emulation pause. This reduces cpu load while still giving us the chance to render overlays. do_local_task(rsx::FIFO::state::paused); + lv2_obj::notify_all(); thread_ctrl::wait_on(state, old, 16000); } else { on_semaphore_acquire_wait(); + lv2_obj::notify_all(); std::this_thread::yield(); } } diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index 3cb3e39851..c207df4233 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -91,11 +91,7 @@ struct cfg_root : cfg::node cfg::uint<0, (1 << 6) - 1> spu_wakeup_delay_mask{ this, "SPU Wake-Up Delay Thread Mask", (1 << 6) - 1, true }; cfg::uint<0, 400> max_cpu_preempt_count_per_frame{ this, "Max CPU Preempt Count", 0, true }; cfg::_bool allow_rsx_cpu_preempt{ this, "Allow RSX CPU Preemptions", true, true }; -#if defined (__linux__) || defined (__APPLE__) - cfg::_enum sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_as_host, true }; -#else - cfg::_enum sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_usleep, true }; -#endif + cfg::_enum sleep_timers_accuracy{ this, "Sleep Timers Accuracy 2", sleep_timers_accuracy_level::_dynamic, true }; cfg::_int<-1000, 1500> usleep_addend{ this, "Usleep Time Addend", 0, true }; cfg::uint64 perf_report_threshold{this, "Performance Report Threshold", 500, true}; // In µs, 0.5ms = default, 0 = everything diff --git a/rpcs3/Emu/system_config_types.cpp b/rpcs3/Emu/system_config_types.cpp index c01692b8a5..fc7fcfdd01 100644 --- a/rpcs3/Emu/system_config_types.cpp +++ b/rpcs3/Emu/system_config_types.cpp @@ -237,6 +237,7 @@ void fmt_class_string::format(std::string& out, u64 switch (value) { case sleep_timers_accuracy_level::_as_host: return "As Host"; + case sleep_timers_accuracy_level::_dynamic: return "Dynamic"; case sleep_timers_accuracy_level::_usleep: return "Usleep Only"; case sleep_timers_accuracy_level::_all_timers: return "All Timers"; } diff --git a/rpcs3/Emu/system_config_types.h b/rpcs3/Emu/system_config_types.h index f3e3b31f42..788a86a799 100644 --- a/rpcs3/Emu/system_config_types.h +++ b/rpcs3/Emu/system_config_types.h @@ -24,6 +24,7 @@ enum class spu_block_size_type enum class sleep_timers_accuracy_level { _as_host, + _dynamic, _usleep, _all_timers, }; diff --git a/rpcs3/rpcs3qt/emu_settings.cpp b/rpcs3/rpcs3qt/emu_settings.cpp index 2b4d4a0b87..09057de169 100644 --- a/rpcs3/rpcs3qt/emu_settings.cpp +++ b/rpcs3/rpcs3qt/emu_settings.cpp @@ -1202,6 +1202,7 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_ switch (static_cast(index)) { case sleep_timers_accuracy_level::_as_host: return tr("As Host", "Sleep timers accuracy"); + case sleep_timers_accuracy_level::_dynamic: return tr("Dynamic", "Sleep timers accuracy"); case sleep_timers_accuracy_level::_usleep: return tr("Usleep Only", "Sleep timers accuracy"); case sleep_timers_accuracy_level::_all_timers: return tr("All Timers", "Sleep timers accuracy"); } diff --git a/rpcs3/rpcs3qt/emu_settings_type.h b/rpcs3/rpcs3qt/emu_settings_type.h index a038fa8c84..a3c1a7e17a 100644 --- a/rpcs3/rpcs3qt/emu_settings_type.h +++ b/rpcs3/rpcs3qt/emu_settings_type.h @@ -233,7 +233,7 @@ inline static const std::map settings_location { emu_settings_type::SPUCache, { "Core", "SPU Cache"}}, { emu_settings_type::DebugConsoleMode, { "Core", "Debug Console Mode"}}, { emu_settings_type::MaxSPURSThreads, { "Core", "Max SPURS Threads"}}, - { emu_settings_type::SleepTimersAccuracy, { "Core", "Sleep Timers Accuracy"}}, + { emu_settings_type::SleepTimersAccuracy, { "Core", "Sleep Timers Accuracy 2"}}, { emu_settings_type::ClocksScale, { "Core", "Clocks scale"}}, { emu_settings_type::AccuratePPU128Loop, { "Core", "Accurate PPU 128-byte Reservation Op Max Length"}}, { emu_settings_type::PerformanceReport, { "Core", "Enable Performance Report"}},