From 64d6b88abd56530403b56c9757f01ac3230c8daa Mon Sep 17 00:00:00 2001
From: Elad <18193363+elad335@users.noreply.github.com>
Date: Mon, 30 Dec 2024 09:11:20 +0200
Subject: [PATCH 1/4] LV2: Introduce Dynamic Timer signals

---
 rpcs3/Emu/Cell/lv2/lv2.cpp        | 137 ++++++++++++++++++++++++++----
 rpcs3/Emu/Cell/lv2/sys_sync.h     |   4 +-
 rpcs3/Emu/RSX/RSXThread.cpp       |   2 +
 rpcs3/Emu/system_config.h         |   6 +-
 rpcs3/Emu/system_config_types.cpp |   1 +
 rpcs3/Emu/system_config_types.h   |   1 +
 rpcs3/rpcs3qt/emu_settings.cpp    |   1 +
 rpcs3/rpcs3qt/emu_settings_type.h |   2 +-
 8 files changed, 131 insertions(+), 23 deletions(-)
diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp
index 6349bd60c6..c6d69d8827 100644
--- a/rpcs3/Emu/Cell/lv2/lv2.cpp
+++ b/rpcs3/Emu/Cell/lv2/lv2.cpp
@@ -1307,6 +1307,8 @@ static std::deque<class cpu_thread*> g_to_sleep;
 static atomic_t<bool> g_scheduler_ready = false;
 static atomic_t<u64> s_yield_frequency = 0;
 static atomic_t<u64> s_max_allowed_yield_tsc = 0;
+static atomic_t<u64> s_lv2_timers_sum_of_ten_delay_in_us = 5000;
+static atomic_t<u64> s_lv2_timers_min_timer_in_us = 0;
 static u64 s_last_yield_tsc = 0;
 atomic_t<u32> g_lv2_preempts_taken = 0;
 
@@ -1432,7 +1434,7 @@ bool lv2_obj::awake(cpu_thread* thread, s32 prio)
 
 	if (!g_postpone_notify_barrier)
 	{
-		notify_all();
+		notify_all(thread);
 	}
 
 	return result;
@@ -1573,6 +1575,11 @@ bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time)
 		{
 			if (it == end || it->first > wait_until)
 			{
+				if (it == g_waiting.cbegin())
+				{
+					s_lv2_timers_min_timer_in_us.release(wait_until);
+				}
+
 				g_waiting.emplace(it, wait_until, &thread);
 				break;
 			}
@@ -1835,6 +1842,8 @@ void lv2_obj::cleanup()
 	g_waiting.clear();
 	g_pending = 0;
 	s_yield_frequency = 0;
+	s_lv2_timers_sum_of_ten_delay_in_us = 5000;
+	s_lv2_timers_min_timer_in_us = 0;
 }
 
 void lv2_obj::schedule_all(u64 current_time)
@@ -1876,7 +1885,7 @@ void lv2_obj::schedule_all(u64 current_time)
 	}
 
 	// Check registered timeouts
-	while (!g_waiting.empty())
+	while (!g_waiting.empty() && it != std::end(g_to_notify))
 	{
 		const auto pair = &g_waiting.front();
 
@@ -1896,15 +1905,7 @@ void lv2_obj::schedule_all(u64 current_time)
 				ensure(!target->state.test_and_set(cpu_flag::notify));
 
 				// Otherwise notify it to wake itself
-				if (it == std::end(g_to_notify))
-				{
-					// Out of notification slots, notify locally (resizable container is not worth it)
-					target->state.notify_one();
-				}
-				else
-				{
-					*it++ = &target->state;
-				}
+				*it++ = &target->state;
 			}
 		}
 		else
@@ -2171,7 +2172,35 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
 #endif
 		// TODO: Tune for other non windows operating sytems
 
-		if (g_cfg.core.sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_usleep : sleep_timers_accuracy_level::_all_timers))
+		const sleep_timers_accuracy_level accuracy_type = g_cfg.core.sleep_timers_accuracy;
+		const u64 avg_delay = get_avg_timer_reponse_delay();
+
+		static atomic_t<u64> g_success = 0;
+		static atomic_t<u64> g_fails = 0;
+
+		if (accuracy_type == sleep_timers_accuracy_level::_dynamic && avg_delay < 30 && avg_delay < (remaining + 15) / 2)
+		{
+			wait_for(remaining);
+
+			if (remaining < host_min_quantum)
+			{
+				g_success += remaining;
+				//g_success++;
+			}
+
+			passed = get_system_time() - start_time;
+			continue;
+		}
+		else
+		{
+			if (remaining < host_min_quantum)
+			{
+				g_fails += remaining;
+				//g_fails++;
+			}
+		}
+
+		if (accuracy_type < (is_usleep ? sleep_timers_accuracy_level::_dynamic : sleep_timers_accuracy_level::_all_timers))
 		{
 			wait_for(remaining);
 		}
@@ -2222,7 +2251,7 @@ void lv2_obj::prepare_for_sleep(cpu_thread& cpu)
 	cpu_counter::remove(&cpu);
 }
 
-void lv2_obj::notify_all() noexcept
+void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
 {
 	for (auto cpu : g_to_notify)
 	{
@@ -2258,13 +2287,11 @@ void lv2_obj::notify_all() noexcept
 		return;
 	}
 
-	if (cpu->get_class() != thread_class::spu && cpu->state.none_of(cpu_flag::suspend))
+	if (cpu->get_class() == thread_class::ppu && cpu->state.none_of(cpu_flag::suspend + cpu_flag::signal))
 	{
 		return;
 	}
 
-	std::optional<vm::writer_lock> lock;
-
 	constexpr usz total_waiters = std::size(spu_thread::g_spu_waiters_by_value);
 
 	u32 notifies[total_waiters]{};
@@ -2346,4 +2373,82 @@ void lv2_obj::notify_all() noexcept
 			vm::reservation_notifier_notify(addr);
 		}
 	}
+
+	if (woke_thread == cpu)
+	{
+		return;
+	}
+
+	const u64 min_timer = s_lv2_timers_min_timer_in_us;
+	const u64 current_time = get_guest_system_time();
+
+	if (current_time < min_timer)
+	{
+		return;
+	}
+
+	atomic_bs_t<cpu_flag>* notifies_cpus[16];
+	usz count_notifies_cpus = 0;
+
+	std::unique_lock lock(g_mutex, std::try_to_lock);
+
+	if (!lock)
+	{
+		// Not only is that this method is an opportunistic optimization
+		// But if it's already locked than it is likely that soon another thread would do this check instead
+		return;
+	}
+
+	// Do it BEFORE clearing the queue in order to measure the delay properly even if the sleeping thread notified itself
+	// This 'redundancy' is what allows proper measurements
+	if (u64 min_time2 = s_lv2_timers_min_timer_in_us; current_time >= min_time2)
+	{
+		const u64 sum = s_lv2_timers_sum_of_ten_delay_in_us.observe();
+		s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2) / 10);
+	}
+
+	// Check registered timeouts
+	while (!g_waiting.empty() && count_notifies_cpus < std::size(notifies_cpus))
+	{
+		const auto pair = &g_waiting.front();
+
+		if (pair->first <= current_time)
+		{
+			const auto target = pair->second;
+			g_waiting.pop_front();
+
+			if (target != cpu)
+			{
+				// Change cpu_thread::state for the lightweight notification to work
+				ensure(!target->state.test_and_set(cpu_flag::notify));
+				notifies_cpus[count_notifies_cpus++] = &target->state;
+			}
+		}
+		else
+		{
+			// The list is sorted so assume no more timeouts
+			break;
+		}
+	}
+
+	if (g_waiting.empty())
+	{
+		s_lv2_timers_min_timer_in_us.release(u64{umax});
+	}
+	else
+	{
+		s_lv2_timers_min_timer_in_us.release(g_waiting.front().first);
+	}
+
+	lock.unlock();
+
+	for (usz i = count_notifies_cpus - 1; i != umax; i--)
+	{
+		atomic_wait_engine::notify_one(notifies_cpus[i]);
+	}
+}
+
+u64 lv2_obj::get_avg_timer_reponse_delay()
+{
+	return s_lv2_timers_sum_of_ten_delay_in_us / 10;
 }
diff --git a/rpcs3/Emu/Cell/lv2/sys_sync.h b/rpcs3/Emu/Cell/lv2/sys_sync.h
index bd6004dfaa..244b029004 100644
--- a/rpcs3/Emu/Cell/lv2/sys_sync.h
+++ b/rpcs3/Emu/Cell/lv2/sys_sync.h
@@ -454,11 +454,13 @@ public:
 
 	static bool wait_timeout(u64 usec, ppu_thread* cpu = {}, bool scale = true, bool is_usleep = false);
 
-	static void notify_all() noexcept;
+	static void notify_all(cpu_thread* woke_thread = nullptr) noexcept;
 
 	// Can be called before the actual sleep call in order to move it out of mutex scope
 	static void prepare_for_sleep(cpu_thread& cpu);
 
+	static u64 get_avg_timer_reponse_delay();
+
 	struct notify_all_t
 	{
 		notify_all_t() noexcept
diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
index 29c2d8e865..1942f8cefb 100644
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -870,11 +870,13 @@ namespace rsx
 		{
 			// Wait 16ms during emulation pause. This reduces cpu load while still giving us the chance to render overlays.
 			do_local_task(rsx::FIFO::state::paused);
+			lv2_obj::notify_all();
 			thread_ctrl::wait_on(state, old, 16000);
 		}
 		else
 		{
 			on_semaphore_acquire_wait();
+			lv2_obj::notify_all();
 			std::this_thread::yield();
 		}
 	}
diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h
index 3cb3e39851..c207df4233 100644
--- a/rpcs3/Emu/system_config.h
+++ b/rpcs3/Emu/system_config.h
@@ -91,11 +91,7 @@ struct cfg_root : cfg::node
 		cfg::uint<0, (1 << 6) - 1> spu_wakeup_delay_mask{ this, "SPU Wake-Up Delay Thread Mask", (1 << 6) - 1, true };
 		cfg::uint<0, 400> max_cpu_preempt_count_per_frame{ this, "Max CPU Preempt Count", 0, true };
 		cfg::_bool allow_rsx_cpu_preempt{ this, "Allow RSX CPU Preemptions", true, true };
-#if defined (__linux__) || defined (__APPLE__)
-		cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_as_host, true };
-#else
-		cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_usleep, true };
-#endif
+		cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy 2", sleep_timers_accuracy_level::_dynamic, true };
 		cfg::_int<-1000, 1500> usleep_addend{ this, "Usleep Time Addend", 0, true };
 
 		cfg::uint64 perf_report_threshold{this, "Performance Report Threshold", 500, true}; // In µs, 0.5ms = default, 0 = everything
diff --git a/rpcs3/Emu/system_config_types.cpp b/rpcs3/Emu/system_config_types.cpp
index c01692b8a5..fc7fcfdd01 100644
--- a/rpcs3/Emu/system_config_types.cpp
+++ b/rpcs3/Emu/system_config_types.cpp
@@ -237,6 +237,7 @@ void fmt_class_string<sleep_timers_accuracy_level>::format(std::string& out, u64
 		switch (value)
 		{
 		case sleep_timers_accuracy_level::_as_host: return "As Host";
+		case sleep_timers_accuracy_level::_dynamic: return "Dynamic";
 		case sleep_timers_accuracy_level::_usleep: return "Usleep Only";
 		case sleep_timers_accuracy_level::_all_timers: return "All Timers";
 		}
diff --git a/rpcs3/Emu/system_config_types.h b/rpcs3/Emu/system_config_types.h
index f3e3b31f42..788a86a799 100644
--- a/rpcs3/Emu/system_config_types.h
+++ b/rpcs3/Emu/system_config_types.h
@@ -24,6 +24,7 @@ enum class spu_block_size_type
 enum class sleep_timers_accuracy_level
 {
 	_as_host,
+	_dynamic,
 	_usleep,
 	_all_timers,
 };
diff --git a/rpcs3/rpcs3qt/emu_settings.cpp b/rpcs3/rpcs3qt/emu_settings.cpp
index 2b4d4a0b87..09057de169 100644
--- a/rpcs3/rpcs3qt/emu_settings.cpp
+++ b/rpcs3/rpcs3qt/emu_settings.cpp
@@ -1202,6 +1202,7 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
 		switch (static_cast<sleep_timers_accuracy_level>(index))
 		{
 		case sleep_timers_accuracy_level::_as_host: return tr("As Host", "Sleep timers accuracy");
+		case sleep_timers_accuracy_level::_dynamic: return tr("Dynamic", "Sleep timers accuracy");
 		case sleep_timers_accuracy_level::_usleep: return tr("Usleep Only", "Sleep timers accuracy");
 		case sleep_timers_accuracy_level::_all_timers: return tr("All Timers", "Sleep timers accuracy");
 		}
diff --git a/rpcs3/rpcs3qt/emu_settings_type.h b/rpcs3/rpcs3qt/emu_settings_type.h
index a038fa8c84..a3c1a7e17a 100644
--- a/rpcs3/rpcs3qt/emu_settings_type.h
+++ b/rpcs3/rpcs3qt/emu_settings_type.h
@@ -233,7 +233,7 @@ inline static const std::map<emu_settings_type, cfg_location> settings_location
 	{ emu_settings_type::SPUCache,                 { "Core", "SPU Cache"}},
 	{ emu_settings_type::DebugConsoleMode,         { "Core", "Debug Console Mode"}},
 	{ emu_settings_type::MaxSPURSThreads,          { "Core", "Max SPURS Threads"}},
-	{ emu_settings_type::SleepTimersAccuracy,      { "Core", "Sleep Timers Accuracy"}},
+	{ emu_settings_type::SleepTimersAccuracy,      { "Core", "Sleep Timers Accuracy 2"}},
 	{ emu_settings_type::ClocksScale,              { "Core", "Clocks scale"}},
 	{ emu_settings_type::AccuratePPU128Loop,       { "Core", "Accurate PPU 128-byte Reservation Op Max Length"}},
 	{ emu_settings_type::PerformanceReport,        { "Core", "Enable Performance Report"}},

From 5fa2043e069638b0de908703475ad05840d489f8 Mon Sep 17 00:00:00 2001
From: Elad <18193363+elad335@users.noreply.github.com>
Date: Mon, 30 Dec 2024 17:05:07 +0200
Subject: [PATCH 2/4] LV2: Usleep loop pattern recognition

---
 rpcs3/Emu/Cell/PPUThread.h |  5 +++++
 rpcs3/Emu/Cell/lv2/lv2.cpp | 23 ++++++++++++++++++-----
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h
index 9f94ad50f6..df95875200 100644
--- a/rpcs3/Emu/Cell/PPUThread.h
+++ b/rpcs3/Emu/Cell/PPUThread.h
@@ -300,6 +300,11 @@ public:
 	const char* last_function{}; // Sticky copy of current_function, is not cleared on function return
 	const char* current_module{}; // Current module name, for savestates.
 
+	// Sycall pattern recognition variables
+	u64 last_lv2_deschedule_cia = umax; // Position of syscall that puts the PPU to sleep
+	u64 last_lv2_deschedule_r3 = umax; // R3 argument of syscall that puts the PPU to sleep
+	u64 last_lv2_deschedule_match_count = 0; // Arguments matching count when PPU puts to sleep
+
 	const bool is_interrupt_thread; // True for interrupts-handler threads
 
 	// Thread name
diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp
index c6d69d8827..73ea2adc45 100644
--- a/rpcs3/Emu/Cell/lv2/lv2.cpp
+++ b/rpcs3/Emu/Cell/lv2/lv2.cpp
@@ -1339,11 +1339,13 @@ bool lv2_obj::sleep(cpu_thread& cpu, const u64 timeout)
 
 	if (cpu.get_class() == thread_class::ppu)
 	{
-		if (u32 addr = static_cast<ppu_thread&>(cpu).res_notify)
-		{
-			static_cast<ppu_thread&>(cpu).res_notify = 0;
+		ppu_thread& ppu = static_cast<ppu_thread&>(cpu);
 
-			if (static_cast<ppu_thread&>(cpu).res_notify_time != vm::reservation_notifier_count_index(addr).second)
+		if (u32 addr = ppu.res_notify)
+		{
+			ppu.res_notify = 0;
+
+			if (ppu.res_notify_time != vm::reservation_notifier_count_index(addr).second)
 			{
 				// Ignore outdated notification request
 			}
@@ -1362,6 +1364,17 @@ bool lv2_obj::sleep(cpu_thread& cpu, const u64 timeout)
 				vm::reservation_notifier_notify(addr);
 			}
 		}
+
+		if (ppu.last_lv2_deschedule_cia == ppu.cia && ppu.last_lv2_deschedule_r3 == ppu.gpr[3])
+		{
+			ppu.last_lv2_deschedule_match_count++;
+		}
+		else
+		{
+			ppu.last_lv2_deschedule_cia = ppu.cia;
+			ppu.last_lv2_deschedule_r3 = ppu.gpr[3];
+			ppu.last_lv2_deschedule_match_count = 0;
+		}
 	}
 
 	bool result = false;
@@ -2178,7 +2191,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
 		static atomic_t<u64> g_success = 0;
 		static atomic_t<u64> g_fails = 0;
 
-		if (accuracy_type == sleep_timers_accuracy_level::_dynamic && avg_delay < 30 && avg_delay < (remaining + 15) / 2)
+		if ((accuracy_type == sleep_timers_accuracy_level::_dynamic && avg_delay < 30) && ((avg_delay < (remaining + 15) / 2) || (cpu && cpu->last_lv2_deschedule_match_count > 3)))
 		{
 			wait_for(remaining);
 

From d50bdd755446ab8062d51f7e0e97fbbd2483ed5d Mon Sep 17 00:00:00 2001
From: Elad <18193363+elad335@users.noreply.github.com>
Date: Mon, 6 Jan 2025 18:38:43 +0200
Subject: [PATCH 3/4] s

---
 rpcs3/Emu/Cell/lv2/lv2.cpp       | 57 ++++++++++++++++++++++++++------
 rpcs3/Emu/Cell/lv2/sys_timer.cpp |  2 +-
 rpcs3/util/atomic.cpp            | 30 ++++++++++++++---
 3 files changed, 73 insertions(+), 16 deletions(-)

diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp
index 73ea2adc45..dc6bbfc9a4 100644
--- a/rpcs3/Emu/Cell/lv2/lv2.cpp
+++ b/rpcs3/Emu/Cell/lv2/lv2.cpp
@@ -1308,7 +1308,7 @@ static atomic_t<bool> g_scheduler_ready = false;
 static atomic_t<u64> s_yield_frequency = 0;
 static atomic_t<u64> s_max_allowed_yield_tsc = 0;
 static atomic_t<u64> s_lv2_timers_sum_of_ten_delay_in_us = 5000;
-static atomic_t<u64> s_lv2_timers_min_timer_in_us = 0;
+static atomic_t<u64> s_lv2_timers_min_timer_in_us = u64{umax};
 static u64 s_last_yield_tsc = 0;
 atomic_t<u32> g_lv2_preempts_taken = 0;
 
@@ -1583,16 +1583,16 @@ bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time)
 	{
 		const u64 wait_until = start_time + std::min<u64>(timeout, ~start_time);
 
+		if (wait_until < s_lv2_timers_min_timer_in_us)
+		{
+			s_lv2_timers_min_timer_in_us.release(wait_until);
+		}
+
 		// Register timeout if necessary
 		for (auto it = g_waiting.cbegin(), end = g_waiting.cend();; it++)
 		{
 			if (it == end || it->first > wait_until)
 			{
-				if (it == g_waiting.cbegin())
-				{
-					s_lv2_timers_min_timer_in_us.release(wait_until);
-				}
-
 				g_waiting.emplace(it, wait_until, &thread);
 				break;
 			}
@@ -1856,7 +1856,7 @@ void lv2_obj::cleanup()
 	g_pending = 0;
 	s_yield_frequency = 0;
 	s_lv2_timers_sum_of_ten_delay_in_us = 5000;
-	s_lv2_timers_min_timer_in_us = 0;
+	s_lv2_timers_min_timer_in_us = u64{umax};
 }
 
 void lv2_obj::schedule_all(u64 current_time)
@@ -2148,7 +2148,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
 
 	auto wait_for = [&](u64 timeout)
 	{
-		thread_ctrl::wait_on(state, old_state, timeout);
+		state.wait(old_state, atomic_wait_timeout{std::min<u64>(timeout, u64{umax} / 1000) * 1000});
 	};
 
 	for (;; old_state = state)
@@ -2156,7 +2156,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
 		if (old_state & cpu_flag::notify)
 		{
 			// Timeout notification has been forced
-			break;
+			//break;
 		}
 
 		if (old_state & cpu_flag::signal)
@@ -2202,6 +2202,27 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
 			}
 
 			passed = get_system_time() - start_time;
+
+			if (passed >= usec && cpu)
+			{
+				static atomic_t<u64> g_fail_time = 10000;
+				static atomic_t<u64> c_all = 0, c_sig = 0;
+				c_all++;
+				if (cpu->state & cpu_flag::notify)
+				{
+					c_sig++;
+					g_fail_time.atomic_op([miss = passed - usec](u64& x)
+					{
+						x = x - x / 100 + miss;
+					});
+					volatile u64 tls_fail_time = g_fail_time / 100;
+					+tls_fail_time;
+				}
+			}
+			else if (passed < usec && cpu && cpu->state & cpu_flag::notify)
+			{
+				__debugbreak();
+			}
 			continue;
 		}
 		else
@@ -2403,12 +2424,17 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
 	atomic_bs_t<cpu_flag>* notifies_cpus[16];
 	usz count_notifies_cpus = 0;
 
+	static atomic_t<u64> 
+	g_ok = 0,
+	g_fail = 0;
+
 	std::unique_lock lock(g_mutex, std::try_to_lock);
 
 	if (!lock)
 	{
 		// Not only is that this method is an opportunistic optimization
 		// But if it's already locked than it is likely that soon another thread would do this check instead
+		g_fail++;
 		return;
 	}
 
@@ -2417,7 +2443,7 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
 	if (u64 min_time2 = s_lv2_timers_min_timer_in_us; current_time >= min_time2)
 	{
 		const u64 sum = s_lv2_timers_sum_of_ten_delay_in_us.observe();
-		s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2) / 10);
+		s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2));
 	}
 
 	// Check registered timeouts
@@ -2434,6 +2460,7 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
 			{
 				// Change cpu_thread::state for the lightweight notification to work
 				ensure(!target->state.test_and_set(cpu_flag::notify));
+				//target->state.notify_one();target->state.notify_one();
 				notifies_cpus[count_notifies_cpus++] = &target->state;
 			}
 		}
@@ -2444,6 +2471,7 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
 		}
 	}
 
+
 	if (g_waiting.empty())
 	{
 		s_lv2_timers_min_timer_in_us.release(u64{umax});
@@ -2454,11 +2482,18 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
 	}
 
 	lock.unlock();
+	g_ok++;
+
+	if (!count_notifies_cpus)
+	{
+		return;
+	}
 
 	for (usz i = count_notifies_cpus - 1; i != umax; i--)
 	{
-		atomic_wait_engine::notify_one(notifies_cpus[i]);
+		notifies_cpus[i]->notify_one();;
 	}
+	std::this_thread::yield();
 }
 
 u64 lv2_obj::get_avg_timer_reponse_delay()
diff --git a/rpcs3/Emu/Cell/lv2/sys_timer.cpp b/rpcs3/Emu/Cell/lv2/sys_timer.cpp
index b4b3b780f2..5d712de53d 100644
--- a/rpcs3/Emu/Cell/lv2/sys_timer.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_timer.cpp
@@ -467,7 +467,7 @@ error_code sys_timer_usleep(ppu_thread& ppu, u64 sleep_time)
 			sleep_time = std::max<u64>(1, utils::sub_saturate<u64>(sleep_time, -add_time));
 		}
 
-		lv2_obj::sleep(ppu, g_cfg.core.sleep_timers_accuracy < sleep_timers_accuracy_level::_usleep ? sleep_time : 0);
+		lv2_obj::sleep(ppu, sleep_time);
 
 		if (!lv2_obj::wait_timeout(sleep_time, &ppu, true, true))
 		{
diff --git a/rpcs3/util/atomic.cpp b/rpcs3/util/atomic.cpp
index 41b28d1d40..8a8788e481 100644
--- a/rpcs3/util/atomic.cpp
+++ b/rpcs3/util/atomic.cpp
@@ -6,6 +6,10 @@
 #define USE_STD
 #endif
 
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
 #ifdef _MSC_VER
 
 #include "emmintrin.h"
@@ -302,7 +306,7 @@ namespace
 				return false;
 			}
 
-			static LARGE_INTEGER instant{};
+			LARGE_INTEGER instant{};
 
 			if (NtReleaseKeyedEvent(nullptr, &sync, 1, &instant) != NTSTATUS_SUCCESS)
 			{
@@ -859,6 +863,19 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
 {
 	uint ext_size = 0;
 
+#ifdef _WIN32
+	LARGE_INTEGER start_time{};
+	//QueryPerformanceCounter(&start_time); // get time in 1/perf_freq units from RDTSC
+
+	FILETIME ftime{};
+	if (timeout != umax)
+	{
+		GetSystemTimeAsFileTime(&ftime); // get time in 100ns units since January 1, 1601 (UTC)
+	}
+
+
+#endif
+
 #ifdef __linux__
 	::timespec ts{};
 	if (timeout + 1)
@@ -1073,7 +1090,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
 			cond->cv->wait(lock);
 		}
 #elif defined(_WIN32)
-		LARGE_INTEGER qw;
+		LARGE_INTEGER qw{};
 		qw.QuadPart = -static_cast<s64>(timeout / 100);
 
 		if (timeout % 100)
@@ -1082,6 +1099,11 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
 			qw.QuadPart -= 1;
 		}
 
+		if (!s_tls_one_time_wait_cb) 
+		{
+			qw.QuadPart = (u64{ftime.dwHighDateTime} << 32) + ftime.dwLowDateTime - qw.QuadPart;
+		}
+
 		if (fallback) [[unlikely]]
 		{
 			if (!cond->set_sleep())
@@ -1096,7 +1118,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
 		}
 		else if (NtWaitForAlertByThreadId)
 		{
-			switch (DWORD status = NtWaitForAlertByThreadId(cond, timeout + 1 ? &qw : nullptr))
+			switch (DWORD status = NtWaitForAlertByThreadId(nullptr, timeout + 1 ? &qw : nullptr))
 			{
 			case NTSTATUS_ALERTED: fallback = true; break;
 			case NTSTATUS_TIMEOUT: break;
@@ -1137,7 +1159,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
 	while (!fallback)
 	{
 #if defined(_WIN32)
-		static LARGE_INTEGER instant{};
+		LARGE_INTEGER instant{};
 
 		if (cond->wakeup(1))
 		{

From d6e014b3a9addf7743c59a80b25ec252c1d617a8 Mon Sep 17 00:00:00 2001
From: Elad <18193363+elad335@users.noreply.github.com>
Date: Tue, 7 Jan 2025 13:41:41 +0200
Subject: [PATCH 4/4] PPU LLVM: Function table dependent resolver hashing

---
 Utilities/bin_patch.cpp          |  5 +++++
 rpcs3/Emu/Cell/PPUAnalyser.h     |  2 +-
 rpcs3/Emu/Cell/PPUModule.cpp     |  1 +
 rpcs3/Emu/Cell/PPUThread.cpp     | 19 ++++++++++++++++---
 rpcs3/Emu/Cell/lv2/sys_overlay.h |  1 -
 5 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/Utilities/bin_patch.cpp b/Utilities/bin_patch.cpp
index 49b19f5bda..f106a8afb9 100644
--- a/Utilities/bin_patch.cpp
+++ b/Utilities/bin_patch.cpp
@@ -1449,6 +1449,8 @@ static usz apply_modification(std::vector<u32>& applied, patch_engine::patch_inf
 
 void patch_engine::apply(std::vector<u32>& applied_total, const std::string& name, std::function<u8*(u32, u32)> mem_translate, u32 filesz, u32 min_addr)
 {
+	applied_total.clear();
+
 	if (!m_map.contains(name))
 	{
 		return;
@@ -1597,6 +1599,9 @@ void patch_engine::apply(std::vector<u32>& applied_total, const std::string& nam
 			}
 		}
 	}
+
+	// Ensure consistent order
+	std::stable_sort(applied_total.begin(), applied_total.end());
 }
 
 void patch_engine::unload(const std::string& name)
diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h
index 9d6f4ef9ed..0b225bc821 100644
--- a/rpcs3/Emu/Cell/PPUAnalyser.h
+++ b/rpcs3/Emu/Cell/PPUAnalyser.h
@@ -96,6 +96,7 @@ struct ppu_module : public Type
 	std::vector<ppu_segment> segs{};
 	std::vector<ppu_segment> secs{};
 	std::vector<ppu_function> funcs{};
+	std::vector<u32> applied_patches;
 	std::deque<std::shared_ptr<void>> allocations;
 	std::map<u32, u32> addr_to_seg_index;
 
@@ -185,7 +186,6 @@ struct main_ppu_module : public ppu_module<T>
 {
 	u32 elf_entry{};
 	u32 seg0_code_end{};
-	std::vector<u32> applied_patches;
 
 	// Disable inherited savestate ordering
 	void save(utils::serial&) = delete;
diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp
index d26f060b7d..48b9837ca7 100644
--- a/rpcs3/Emu/Cell/PPUModule.cpp
+++ b/rpcs3/Emu/Cell/PPUModule.cpp
@@ -1947,6 +1947,7 @@ shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_load, c
 		ppu_check_patch_spu_images(*prx, seg);
 	}
 
+	prx->applied_patches = applied;
 	prx->analyse(toc, 0, end, applied, exported_funcs);
 
 	if (!ar && !virtual_load)
diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp
index 8001b95ac4..8278954b75 100644
--- a/rpcs3/Emu/Cell/PPUThread.cpp
+++ b/rpcs3/Emu/Cell/PPUThread.cpp
@@ -4898,6 +4898,22 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 				sha1_update(&ctx, ensure(info.get_ptr<const u8>(func.addr)), func.size);
 			}
 
+			if (fpos >= info.funcs.size())
+			{
+				// Hash the entire function grouped addresses for the integrity of the symbol resolver function
+				// Potentially occuring during patches
+
+				std::vector<be_t<u32>> addrs(info.funcs.size());
+				usz addr_index = 0;
+
+				for (const ppu_function& func : info.funcs)
+				{
+					addrs[addr_index] = func.addr;
+				}
+
+				sha1_update(&ctx, addrs.data(), addrs.size() * sizeof(be_t<u32>));
+			}
+
 			if (false)
 			{
 				const be_t<u64> forced_upd = 3;
@@ -4920,7 +4936,6 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 				accurate_fpcc,
 				accurate_vnan,
 				accurate_nj_mode,
-				contains_symbol_resolver,
 
 				__bitset_enum_max
 			};
@@ -4950,8 +4965,6 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 				settings += ppu_settings::accurate_vnan, settings -= ppu_settings::fixup_vnan, fmt::throw_exception("VNAN Not implemented");
 			if (g_cfg.core.ppu_use_nj_bit)
 				settings += ppu_settings::accurate_nj_mode, settings -= ppu_settings::fixup_nj_denormals, fmt::throw_exception("NJ Not implemented");
-			if (fpos >= info.funcs.size())
-				settings += ppu_settings::contains_symbol_resolver; // Avoid invalidating all modules for this purpose
 
 			// Write version, hash, CPU, settings
 			fmt::append(obj_name, "v6-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
diff --git a/rpcs3/Emu/Cell/lv2/sys_overlay.h b/rpcs3/Emu/Cell/lv2/sys_overlay.h
index ef1c1ffbd7..1c950a4d0c 100644
--- a/rpcs3/Emu/Cell/lv2/sys_overlay.h
+++ b/rpcs3/Emu/Cell/lv2/sys_overlay.h
@@ -11,7 +11,6 @@ struct lv2_overlay final : ppu_module<lv2_obj>
 
 	u32 entry{};
 	u32 seg0_code_end{};
-	std::vector<u32> applied_patches;
 
 	lv2_overlay() = default;
 	lv2_overlay(utils::serial&){}