From 64d6b88abd56530403b56c9757f01ac3230c8daa Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Mon, 30 Dec 2024 09:11:20 +0200 Subject: [PATCH 1/4] LV2: Introduce Dynamic Timer signals --- rpcs3/Emu/Cell/lv2/lv2.cpp | 137 ++++++++++++++++++++++++++---- rpcs3/Emu/Cell/lv2/sys_sync.h | 4 +- rpcs3/Emu/RSX/RSXThread.cpp | 2 + rpcs3/Emu/system_config.h | 6 +- rpcs3/Emu/system_config_types.cpp | 1 + rpcs3/Emu/system_config_types.h | 1 + rpcs3/rpcs3qt/emu_settings.cpp | 1 + rpcs3/rpcs3qt/emu_settings_type.h | 2 +- 8 files changed, 131 insertions(+), 23 deletions(-) diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 6349bd60c6..c6d69d8827 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -1307,6 +1307,8 @@ static std::deque g_to_sleep; static atomic_t g_scheduler_ready = false; static atomic_t s_yield_frequency = 0; static atomic_t s_max_allowed_yield_tsc = 0; +static atomic_t s_lv2_timers_sum_of_ten_delay_in_us = 5000; +static atomic_t s_lv2_timers_min_timer_in_us = 0; static u64 s_last_yield_tsc = 0; atomic_t g_lv2_preempts_taken = 0; @@ -1432,7 +1434,7 @@ bool lv2_obj::awake(cpu_thread* thread, s32 prio) if (!g_postpone_notify_barrier) { - notify_all(); + notify_all(thread); } return result; @@ -1573,6 +1575,11 @@ bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time) { if (it == end || it->first > wait_until) { + if (it == g_waiting.cbegin()) + { + s_lv2_timers_min_timer_in_us.release(wait_until); + } + g_waiting.emplace(it, wait_until, &thread); break; } @@ -1835,6 +1842,8 @@ void lv2_obj::cleanup() g_waiting.clear(); g_pending = 0; s_yield_frequency = 0; + s_lv2_timers_sum_of_ten_delay_in_us = 5000; + s_lv2_timers_min_timer_in_us = 0; } void lv2_obj::schedule_all(u64 current_time) @@ -1876,7 +1885,7 @@ void lv2_obj::schedule_all(u64 current_time) } // Check registered timeouts - while (!g_waiting.empty()) + while (!g_waiting.empty() && it != std::end(g_to_notify)) { const auto pair = &g_waiting.front(); @@ -1896,15 +1905,7 @@ void lv2_obj::schedule_all(u64 current_time) ensure(!target->state.test_and_set(cpu_flag::notify)); // Otherwise notify it to wake itself - if (it == std::end(g_to_notify)) - { - // Out of notification slots, notify locally (resizable container is not worth it) - target->state.notify_one(); - } - else - { - *it++ = &target->state; - } + *it++ = &target->state; } } else @@ -2171,7 +2172,35 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep #endif // TODO: Tune for other non windows operating sytems - if (g_cfg.core.sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_usleep : sleep_timers_accuracy_level::_all_timers)) + const sleep_timers_accuracy_level accuracy_type = g_cfg.core.sleep_timers_accuracy; + const u64 avg_delay = get_avg_timer_reponse_delay(); + + static atomic_t g_success = 0; + static atomic_t g_fails = 0; + + if (accuracy_type == sleep_timers_accuracy_level::_dynamic && avg_delay < 30 && avg_delay < (remaining + 15) / 2) + { + wait_for(remaining); + + if (remaining < host_min_quantum) + { + g_success += remaining; + //g_success++; + } + + passed = get_system_time() - start_time; + continue; + } + else + { + if (remaining < host_min_quantum) + { + g_fails += remaining; + //g_fails++; + } + } + + if (accuracy_type < (is_usleep ? sleep_timers_accuracy_level::_dynamic : sleep_timers_accuracy_level::_all_timers)) { wait_for(remaining); } @@ -2222,7 +2251,7 @@ void lv2_obj::prepare_for_sleep(cpu_thread& cpu) cpu_counter::remove(&cpu); } -void lv2_obj::notify_all() noexcept +void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept { for (auto cpu : g_to_notify) { @@ -2258,13 +2287,11 @@ void lv2_obj::notify_all() noexcept return; } - if (cpu->get_class() != thread_class::spu && cpu->state.none_of(cpu_flag::suspend)) + if (cpu->get_class() == thread_class::ppu && cpu->state.none_of(cpu_flag::suspend + cpu_flag::signal)) { return; } - std::optional lock; - constexpr usz total_waiters = std::size(spu_thread::g_spu_waiters_by_value); u32 notifies[total_waiters]{}; @@ -2346,4 +2373,82 @@ void lv2_obj::notify_all() noexcept vm::reservation_notifier_notify(addr); } } + + if (woke_thread == cpu) + { + return; + } + + const u64 min_timer = s_lv2_timers_min_timer_in_us; + const u64 current_time = get_guest_system_time(); + + if (current_time < min_timer) + { + return; + } + + atomic_bs_t* notifies_cpus[16]; + usz count_notifies_cpus = 0; + + std::unique_lock lock(g_mutex, std::try_to_lock); + + if (!lock) + { + // Not only is that this method is an opportunistic optimization + // But if it's already locked than it is likely that soon another thread would do this check instead + return; + } + + // Do it BEFORE clearing the queue in order to measure the delay properly even if the sleeping thread notified itself + // This 'redundancy' is what allows proper measurements + if (u64 min_time2 = s_lv2_timers_min_timer_in_us; current_time >= min_time2) + { + const u64 sum = s_lv2_timers_sum_of_ten_delay_in_us.observe(); + s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2) / 10); + } + + // Check registered timeouts + while (!g_waiting.empty() && count_notifies_cpus < std::size(notifies_cpus)) + { + const auto pair = &g_waiting.front(); + + if (pair->first <= current_time) + { + const auto target = pair->second; + g_waiting.pop_front(); + + if (target != cpu) + { + // Change cpu_thread::state for the lightweight notification to work + ensure(!target->state.test_and_set(cpu_flag::notify)); + notifies_cpus[count_notifies_cpus++] = &target->state; + } + } + else + { + // The list is sorted so assume no more timeouts + break; + } + } + + if (g_waiting.empty()) + { + s_lv2_timers_min_timer_in_us.release(u64{umax}); + } + else + { + s_lv2_timers_min_timer_in_us.release(g_waiting.front().first); + } + + lock.unlock(); + + for (usz i = count_notifies_cpus - 1; i != umax; i--) + { + atomic_wait_engine::notify_one(notifies_cpus[i]); + } +} + +u64 lv2_obj::get_avg_timer_reponse_delay() +{ + return s_lv2_timers_sum_of_ten_delay_in_us / 10; } diff --git a/rpcs3/Emu/Cell/lv2/sys_sync.h b/rpcs3/Emu/Cell/lv2/sys_sync.h index bd6004dfaa..244b029004 100644 --- a/rpcs3/Emu/Cell/lv2/sys_sync.h +++ b/rpcs3/Emu/Cell/lv2/sys_sync.h @@ -454,11 +454,13 @@ public: static bool wait_timeout(u64 usec, ppu_thread* cpu = {}, bool scale = true, bool is_usleep = false); - static void notify_all() noexcept; + static void notify_all(cpu_thread* woke_thread = nullptr) noexcept; // Can be called before the actual sleep call in order to move it out of mutex scope static void prepare_for_sleep(cpu_thread& cpu); + static u64 get_avg_timer_reponse_delay(); + struct notify_all_t { notify_all_t() noexcept diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 29c2d8e865..1942f8cefb 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -870,11 +870,13 @@ namespace rsx { // Wait 16ms during emulation pause. This reduces cpu load while still giving us the chance to render overlays. do_local_task(rsx::FIFO::state::paused); + lv2_obj::notify_all(); thread_ctrl::wait_on(state, old, 16000); } else { on_semaphore_acquire_wait(); + lv2_obj::notify_all(); std::this_thread::yield(); } } diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index 3cb3e39851..c207df4233 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -91,11 +91,7 @@ struct cfg_root : cfg::node cfg::uint<0, (1 << 6) - 1> spu_wakeup_delay_mask{ this, "SPU Wake-Up Delay Thread Mask", (1 << 6) - 1, true }; cfg::uint<0, 400> max_cpu_preempt_count_per_frame{ this, "Max CPU Preempt Count", 0, true }; cfg::_bool allow_rsx_cpu_preempt{ this, "Allow RSX CPU Preemptions", true, true }; -#if defined (__linux__) || defined (__APPLE__) - cfg::_enum sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_as_host, true }; -#else - cfg::_enum sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_usleep, true }; -#endif + cfg::_enum sleep_timers_accuracy{ this, "Sleep Timers Accuracy 2", sleep_timers_accuracy_level::_dynamic, true }; cfg::_int<-1000, 1500> usleep_addend{ this, "Usleep Time Addend", 0, true }; cfg::uint64 perf_report_threshold{this, "Performance Report Threshold", 500, true}; // In µs, 0.5ms = default, 0 = everything diff --git a/rpcs3/Emu/system_config_types.cpp b/rpcs3/Emu/system_config_types.cpp index c01692b8a5..fc7fcfdd01 100644 --- a/rpcs3/Emu/system_config_types.cpp +++ b/rpcs3/Emu/system_config_types.cpp @@ -237,6 +237,7 @@ void fmt_class_string::format(std::string& out, u64 switch (value) { case sleep_timers_accuracy_level::_as_host: return "As Host"; + case sleep_timers_accuracy_level::_dynamic: return "Dynamic"; case sleep_timers_accuracy_level::_usleep: return "Usleep Only"; case sleep_timers_accuracy_level::_all_timers: return "All Timers"; } diff --git a/rpcs3/Emu/system_config_types.h b/rpcs3/Emu/system_config_types.h index f3e3b31f42..788a86a799 100644 --- a/rpcs3/Emu/system_config_types.h +++ b/rpcs3/Emu/system_config_types.h @@ -24,6 +24,7 @@ enum class spu_block_size_type enum class sleep_timers_accuracy_level { _as_host, + _dynamic, _usleep, _all_timers, }; diff --git a/rpcs3/rpcs3qt/emu_settings.cpp b/rpcs3/rpcs3qt/emu_settings.cpp index 2b4d4a0b87..09057de169 100644 --- a/rpcs3/rpcs3qt/emu_settings.cpp +++ b/rpcs3/rpcs3qt/emu_settings.cpp @@ -1202,6 +1202,7 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_ switch (static_cast(index)) { case sleep_timers_accuracy_level::_as_host: return tr("As Host", "Sleep timers accuracy"); + case sleep_timers_accuracy_level::_dynamic: return tr("Dynamic", "Sleep timers accuracy"); case sleep_timers_accuracy_level::_usleep: return tr("Usleep Only", "Sleep timers accuracy"); case sleep_timers_accuracy_level::_all_timers: return tr("All Timers", "Sleep timers accuracy"); } diff --git a/rpcs3/rpcs3qt/emu_settings_type.h b/rpcs3/rpcs3qt/emu_settings_type.h index a038fa8c84..a3c1a7e17a 100644 --- a/rpcs3/rpcs3qt/emu_settings_type.h +++ b/rpcs3/rpcs3qt/emu_settings_type.h @@ -233,7 +233,7 @@ inline static const std::map settings_location { emu_settings_type::SPUCache, { "Core", "SPU Cache"}}, { emu_settings_type::DebugConsoleMode, { "Core", "Debug Console Mode"}}, { emu_settings_type::MaxSPURSThreads, { "Core", "Max SPURS Threads"}}, - { emu_settings_type::SleepTimersAccuracy, { "Core", "Sleep Timers Accuracy"}}, + { emu_settings_type::SleepTimersAccuracy, { "Core", "Sleep Timers Accuracy 2"}}, { emu_settings_type::ClocksScale, { "Core", "Clocks scale"}}, { emu_settings_type::AccuratePPU128Loop, { "Core", "Accurate PPU 128-byte Reservation Op Max Length"}}, { emu_settings_type::PerformanceReport, { "Core", "Enable Performance Report"}}, From 5fa2043e069638b0de908703475ad05840d489f8 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Mon, 30 Dec 2024 17:05:07 +0200 Subject: [PATCH 2/4] LV2: Usleep loop pattern recognition --- rpcs3/Emu/Cell/PPUThread.h | 5 +++++ rpcs3/Emu/Cell/lv2/lv2.cpp | 23 ++++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 9f94ad50f6..df95875200 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -300,6 +300,11 @@ public: const char* last_function{}; // Sticky copy of current_function, is not cleared on function return const char* current_module{}; // Current module name, for savestates. + // Sycall pattern recognition variables + u64 last_lv2_deschedule_cia = umax; // Position of syscall that puts the PPU to sleep + u64 last_lv2_deschedule_r3 = umax; // R3 argument of syscall that puts the PPU to sleep + u64 last_lv2_deschedule_match_count = 0; // Arguments matching count when PPU puts to sleep + const bool is_interrupt_thread; // True for interrupts-handler threads // Thread name diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index c6d69d8827..73ea2adc45 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -1339,11 +1339,13 @@ bool lv2_obj::sleep(cpu_thread& cpu, const u64 timeout) if (cpu.get_class() == thread_class::ppu) { - if (u32 addr = static_cast(cpu).res_notify) - { - static_cast(cpu).res_notify = 0; + ppu_thread& ppu = static_cast(cpu); - if (static_cast(cpu).res_notify_time != vm::reservation_notifier_count_index(addr).second) + if (u32 addr = ppu.res_notify) + { + ppu.res_notify = 0; + + if (ppu.res_notify_time != vm::reservation_notifier_count_index(addr).second) { // Ignore outdated notification request } @@ -1362,6 +1364,17 @@ bool lv2_obj::sleep(cpu_thread& cpu, const u64 timeout) vm::reservation_notifier_notify(addr); } } + + if (ppu.last_lv2_deschedule_cia == ppu.cia && ppu.last_lv2_deschedule_r3 == ppu.gpr[3]) + { + ppu.last_lv2_deschedule_match_count++; + } + else + { + ppu.last_lv2_deschedule_cia = ppu.cia; + ppu.last_lv2_deschedule_r3 = ppu.gpr[3]; + ppu.last_lv2_deschedule_match_count = 0; + } } bool result = false; @@ -2178,7 +2191,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep static atomic_t g_success = 0; static atomic_t g_fails = 0; - if (accuracy_type == sleep_timers_accuracy_level::_dynamic && avg_delay < 30 && avg_delay < (remaining + 15) / 2) + if ((accuracy_type == sleep_timers_accuracy_level::_dynamic && avg_delay < 30) && ((avg_delay < (remaining + 15) / 2) || (cpu && cpu->last_lv2_deschedule_match_count > 3))) { wait_for(remaining); From d50bdd755446ab8062d51f7e0e97fbbd2483ed5d Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Mon, 6 Jan 2025 18:38:43 +0200 Subject: [PATCH 3/4] s --- rpcs3/Emu/Cell/lv2/lv2.cpp | 57 ++++++++++++++++++++++++++------ rpcs3/Emu/Cell/lv2/sys_timer.cpp | 2 +- rpcs3/util/atomic.cpp | 30 ++++++++++++++--- 3 files changed, 73 insertions(+), 16 deletions(-) diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 73ea2adc45..dc6bbfc9a4 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -1308,7 +1308,7 @@ static atomic_t g_scheduler_ready = false; static atomic_t s_yield_frequency = 0; static atomic_t s_max_allowed_yield_tsc = 0; static atomic_t s_lv2_timers_sum_of_ten_delay_in_us = 5000; -static atomic_t s_lv2_timers_min_timer_in_us = 0; +static atomic_t s_lv2_timers_min_timer_in_us = u64{umax}; static u64 s_last_yield_tsc = 0; atomic_t g_lv2_preempts_taken = 0; @@ -1583,16 +1583,16 @@ bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time) { const u64 wait_until = start_time + std::min(timeout, ~start_time); + if (wait_until < s_lv2_timers_min_timer_in_us) + { + s_lv2_timers_min_timer_in_us.release(wait_until); + } + // Register timeout if necessary for (auto it = g_waiting.cbegin(), end = g_waiting.cend();; it++) { if (it == end || it->first > wait_until) { - if (it == g_waiting.cbegin()) - { - s_lv2_timers_min_timer_in_us.release(wait_until); - } - g_waiting.emplace(it, wait_until, &thread); break; } @@ -1856,7 +1856,7 @@ void lv2_obj::cleanup() g_pending = 0; s_yield_frequency = 0; s_lv2_timers_sum_of_ten_delay_in_us = 5000; - s_lv2_timers_min_timer_in_us = 0; + s_lv2_timers_min_timer_in_us = u64{umax}; } void lv2_obj::schedule_all(u64 current_time) @@ -2148,7 +2148,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep auto wait_for = [&](u64 timeout) { - thread_ctrl::wait_on(state, old_state, timeout); + state.wait(old_state, atomic_wait_timeout{std::min(timeout, u64{umax} / 1000) * 1000}); }; for (;; old_state = state) @@ -2156,7 +2156,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep if (old_state & cpu_flag::notify) { // Timeout notification has been forced - break; + //break; } if (old_state & cpu_flag::signal) @@ -2202,6 +2202,27 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep } passed = get_system_time() - start_time; + + if (passed >= usec && cpu) + { + static atomic_t g_fail_time = 10000; + static atomic_t c_all = 0, c_sig = 0; + c_all++; + if (cpu->state & cpu_flag::notify) + { + c_sig++; + g_fail_time.atomic_op([miss = passed - usec](u64& x) + { + x = x - x / 100 + miss; + }); + volatile u64 tls_fail_time = g_fail_time / 100; + +tls_fail_time; + } + } + else if (passed < usec && cpu && cpu->state & cpu_flag::notify) + { + __debugbreak(); + } continue; } else @@ -2403,12 +2424,17 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept atomic_bs_t* notifies_cpus[16]; usz count_notifies_cpus = 0; + static atomic_t + g_ok = 0, + g_fail = 0; + std::unique_lock lock(g_mutex, std::try_to_lock); if (!lock) { // Not only is that this method is an opportunistic optimization // But if it's already locked than it is likely that soon another thread would do this check instead + g_fail++; return; } @@ -2417,7 +2443,7 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept if (u64 min_time2 = s_lv2_timers_min_timer_in_us; current_time >= min_time2) { const u64 sum = s_lv2_timers_sum_of_ten_delay_in_us.observe(); - s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2) / 10); + s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2)); } // Check registered timeouts @@ -2434,6 +2460,7 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept { // Change cpu_thread::state for the lightweight notification to work ensure(!target->state.test_and_set(cpu_flag::notify)); + //target->state.notify_one();target->state.notify_one(); notifies_cpus[count_notifies_cpus++] = &target->state; } } @@ -2444,6 +2471,7 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept } } + if (g_waiting.empty()) { s_lv2_timers_min_timer_in_us.release(u64{umax}); @@ -2454,11 +2482,18 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept } lock.unlock(); + g_ok++; + + if (!count_notifies_cpus) + { + return; + } for (usz i = count_notifies_cpus - 1; i != umax; i--) { - atomic_wait_engine::notify_one(notifies_cpus[i]); + notifies_cpus[i]->notify_one();; } + std::this_thread::yield(); } u64 lv2_obj::get_avg_timer_reponse_delay() diff --git a/rpcs3/Emu/Cell/lv2/sys_timer.cpp b/rpcs3/Emu/Cell/lv2/sys_timer.cpp index b4b3b780f2..5d712de53d 100644 --- a/rpcs3/Emu/Cell/lv2/sys_timer.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_timer.cpp @@ -467,7 +467,7 @@ error_code sys_timer_usleep(ppu_thread& ppu, u64 sleep_time) sleep_time = std::max(1, utils::sub_saturate(sleep_time, -add_time)); } - lv2_obj::sleep(ppu, g_cfg.core.sleep_timers_accuracy < sleep_timers_accuracy_level::_usleep ? sleep_time : 0); + lv2_obj::sleep(ppu, sleep_time); if (!lv2_obj::wait_timeout(sleep_time, &ppu, true, true)) { diff --git a/rpcs3/util/atomic.cpp b/rpcs3/util/atomic.cpp index 41b28d1d40..8a8788e481 100644 --- a/rpcs3/util/atomic.cpp +++ b/rpcs3/util/atomic.cpp @@ -6,6 +6,10 @@ #define USE_STD #endif +#ifdef _WIN32 +#include +#endif + #ifdef _MSC_VER #include "emmintrin.h" @@ -302,7 +306,7 @@ namespace return false; } - static LARGE_INTEGER instant{}; + LARGE_INTEGER instant{}; if (NtReleaseKeyedEvent(nullptr, &sync, 1, &instant) != NTSTATUS_SUCCESS) { @@ -859,6 +863,19 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa { uint ext_size = 0; +#ifdef _WIN32 + LARGE_INTEGER start_time{}; + //QueryPerformanceCounter(&start_time); // get time in 1/perf_freq units from RDTSC + + FILETIME ftime{}; + if (timeout != umax) + { + GetSystemTimeAsFileTime(&ftime); // get time in 100ns units since January 1, 1601 (UTC) + } + + +#endif + #ifdef __linux__ ::timespec ts{}; if (timeout + 1) @@ -1073,7 +1090,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa cond->cv->wait(lock); } #elif defined(_WIN32) - LARGE_INTEGER qw; + LARGE_INTEGER qw{}; qw.QuadPart = -static_cast(timeout / 100); if (timeout % 100) @@ -1082,6 +1099,11 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa qw.QuadPart -= 1; } + if (!s_tls_one_time_wait_cb) + { + qw.QuadPart = (u64{ftime.dwHighDateTime} << 32) + ftime.dwLowDateTime - qw.QuadPart; + } + if (fallback) [[unlikely]] { if (!cond->set_sleep()) @@ -1096,7 +1118,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa } else if (NtWaitForAlertByThreadId) { - switch (DWORD status = NtWaitForAlertByThreadId(cond, timeout + 1 ? &qw : nullptr)) + switch (DWORD status = NtWaitForAlertByThreadId(nullptr, timeout + 1 ? &qw : nullptr)) { case NTSTATUS_ALERTED: fallback = true; break; case NTSTATUS_TIMEOUT: break; @@ -1137,7 +1159,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa while (!fallback) { #if defined(_WIN32) - static LARGE_INTEGER instant{}; + LARGE_INTEGER instant{}; if (cond->wakeup(1)) { From d6e014b3a9addf7743c59a80b25ec252c1d617a8 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Tue, 7 Jan 2025 13:41:41 +0200 Subject: [PATCH 4/4] PPU LLVM: Function table dependent resolver hashing --- Utilities/bin_patch.cpp | 5 +++++ rpcs3/Emu/Cell/PPUAnalyser.h | 2 +- rpcs3/Emu/Cell/PPUModule.cpp | 1 + rpcs3/Emu/Cell/PPUThread.cpp | 19 ++++++++++++++++--- rpcs3/Emu/Cell/lv2/sys_overlay.h | 1 - 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/Utilities/bin_patch.cpp b/Utilities/bin_patch.cpp index 49b19f5bda..f106a8afb9 100644 --- a/Utilities/bin_patch.cpp +++ b/Utilities/bin_patch.cpp @@ -1449,6 +1449,8 @@ static usz apply_modification(std::vector& applied, patch_engine::patch_inf void patch_engine::apply(std::vector& applied_total, const std::string& name, std::function mem_translate, u32 filesz, u32 min_addr) { + applied_total.clear(); + if (!m_map.contains(name)) { return; @@ -1597,6 +1599,9 @@ void patch_engine::apply(std::vector& applied_total, const std::string& nam } } } + + // Ensure consistent order + std::stable_sort(applied_total.begin(), applied_total.end()); } void patch_engine::unload(const std::string& name) diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h index 9d6f4ef9ed..0b225bc821 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.h +++ b/rpcs3/Emu/Cell/PPUAnalyser.h @@ -96,6 +96,7 @@ struct ppu_module : public Type std::vector segs{}; std::vector secs{}; std::vector funcs{}; + std::vector applied_patches; std::deque> allocations; std::map addr_to_seg_index; @@ -185,7 +186,6 @@ struct main_ppu_module : public ppu_module { u32 elf_entry{}; u32 seg0_code_end{}; - std::vector applied_patches; // Disable inherited savestate ordering void save(utils::serial&) = delete; diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index d26f060b7d..48b9837ca7 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -1947,6 +1947,7 @@ shared_ptr ppu_load_prx(const ppu_prx_object& elf, bool virtual_load, c ppu_check_patch_spu_images(*prx, seg); } + prx->applied_patches = applied; prx->analyse(toc, 0, end, applied, exported_funcs); if (!ar && !virtual_load) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 8001b95ac4..8278954b75 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -4898,6 +4898,22 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_s sha1_update(&ctx, ensure(info.get_ptr(func.addr)), func.size); } + if (fpos >= info.funcs.size()) + { + // Hash the entire function grouped addresses for the integrity of the symbol resolver function + // Potentially occuring during patches + + std::vector> addrs(info.funcs.size()); + usz addr_index = 0; + + for (const ppu_function& func : info.funcs) + { + addrs[addr_index] = func.addr; + } + + sha1_update(&ctx, addrs.data(), addrs.size() * sizeof(be_t)); + } + if (false) { const be_t forced_upd = 3; @@ -4920,7 +4936,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_s accurate_fpcc, accurate_vnan, accurate_nj_mode, - contains_symbol_resolver, __bitset_enum_max }; @@ -4950,8 +4965,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_s settings += ppu_settings::accurate_vnan, settings -= ppu_settings::fixup_vnan, fmt::throw_exception("VNAN Not implemented"); if (g_cfg.core.ppu_use_nj_bit) settings += ppu_settings::accurate_nj_mode, settings -= ppu_settings::fixup_nj_denormals, fmt::throw_exception("NJ Not implemented"); - if (fpos >= info.funcs.size()) - settings += ppu_settings::contains_symbol_resolver; // Avoid invalidating all modules for this purpose // Write version, hash, CPU, settings fmt::append(obj_name, "v6-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu)); diff --git a/rpcs3/Emu/Cell/lv2/sys_overlay.h b/rpcs3/Emu/Cell/lv2/sys_overlay.h index ef1c1ffbd7..1c950a4d0c 100644 --- a/rpcs3/Emu/Cell/lv2/sys_overlay.h +++ b/rpcs3/Emu/Cell/lv2/sys_overlay.h @@ -11,7 +11,6 @@ struct lv2_overlay final : ppu_module u32 entry{}; u32 seg0_code_end{}; - std::vector applied_patches; lv2_overlay() = default; lv2_overlay(utils::serial&){}