From d50bdd755446ab8062d51f7e0e97fbbd2483ed5d Mon Sep 17 00:00:00 2001
From: Elad <18193363+elad335@users.noreply.github.com>
Date: Mon, 6 Jan 2025 18:38:43 +0200
Subject: [PATCH] s

---
 rpcs3/Emu/Cell/lv2/lv2.cpp       | 57 ++++++++++++++++++++++++++------
 rpcs3/Emu/Cell/lv2/sys_timer.cpp |  2 +-
 rpcs3/util/atomic.cpp            | 30 ++++++++++++++---
 3 files changed, 73 insertions(+), 16 deletions(-)
diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp
index 73ea2adc45..dc6bbfc9a4 100644
--- a/rpcs3/Emu/Cell/lv2/lv2.cpp
+++ b/rpcs3/Emu/Cell/lv2/lv2.cpp
@@ -1308,7 +1308,7 @@ static atomic_t<bool> g_scheduler_ready = false;
 static atomic_t<u64> s_yield_frequency = 0;
 static atomic_t<u64> s_max_allowed_yield_tsc = 0;
 static atomic_t<u64> s_lv2_timers_sum_of_ten_delay_in_us = 5000;
-static atomic_t<u64> s_lv2_timers_min_timer_in_us = 0;
+static atomic_t<u64> s_lv2_timers_min_timer_in_us = u64{umax};
 static u64 s_last_yield_tsc = 0;
 atomic_t<u32> g_lv2_preempts_taken = 0;
 
@@ -1583,16 +1583,16 @@ bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time)
 	{
 		const u64 wait_until = start_time + std::min<u64>(timeout, ~start_time);
 
+		if (wait_until < s_lv2_timers_min_timer_in_us)
+		{
+			s_lv2_timers_min_timer_in_us.release(wait_until);
+		}
+
 		// Register timeout if necessary
 		for (auto it = g_waiting.cbegin(), end = g_waiting.cend();; it++)
 		{
 			if (it == end || it->first > wait_until)
 			{
-				if (it == g_waiting.cbegin())
-				{
-					s_lv2_timers_min_timer_in_us.release(wait_until);
-				}
-
 				g_waiting.emplace(it, wait_until, &thread);
 				break;
 			}
@@ -1856,7 +1856,7 @@ void lv2_obj::cleanup()
 	g_pending = 0;
 	s_yield_frequency = 0;
 	s_lv2_timers_sum_of_ten_delay_in_us = 5000;
-	s_lv2_timers_min_timer_in_us = 0;
+	s_lv2_timers_min_timer_in_us = u64{umax};
 }
 
 void lv2_obj::schedule_all(u64 current_time)
@@ -2148,7 +2148,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
 
 	auto wait_for = [&](u64 timeout)
 	{
-		thread_ctrl::wait_on(state, old_state, timeout);
+		state.wait(old_state, atomic_wait_timeout{std::min<u64>(timeout, u64{umax} / 1000) * 1000});
 	};
 
 	for (;; old_state = state)
@@ -2156,7 +2156,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
 		if (old_state & cpu_flag::notify)
 		{
 			// Timeout notification has been forced
-			break;
+			//break;
 		}
 
 		if (old_state & cpu_flag::signal)
@@ -2202,6 +2202,27 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
 			}
 
 			passed = get_system_time() - start_time;
+
+			if (passed >= usec && cpu)
+			{
+				static atomic_t<u64> g_fail_time = 10000;
+				static atomic_t<u64> c_all = 0, c_sig = 0;
+				c_all++;
+				if (cpu->state & cpu_flag::notify)
+				{
+					c_sig++;
+					g_fail_time.atomic_op([miss = passed - usec](u64& x)
+					{
+						x = x - x / 100 + miss;
+					});
+					volatile u64 tls_fail_time = g_fail_time / 100;
+					+tls_fail_time;
+				}
+			}
+			else if (passed < usec && cpu && cpu->state & cpu_flag::notify)
+			{
+				__debugbreak();
+			}
 			continue;
 		}
 		else
@@ -2403,12 +2424,17 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
 	atomic_bs_t<cpu_flag>* notifies_cpus[16];
 	usz count_notifies_cpus = 0;
 
+	static atomic_t<u64> 
+	g_ok = 0,
+	g_fail = 0;
+
 	std::unique_lock lock(g_mutex, std::try_to_lock);
 
 	if (!lock)
 	{
 		// Not only is that this method is an opportunistic optimization
 		// But if it's already locked than it is likely that soon another thread would do this check instead
+		g_fail++;
 		return;
 	}
 
@@ -2417,7 +2443,7 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
 	if (u64 min_time2 = s_lv2_timers_min_timer_in_us; current_time >= min_time2)
 	{
 		const u64 sum = s_lv2_timers_sum_of_ten_delay_in_us.observe();
-		s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2) / 10);
+		s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2));
 	}
 
 	// Check registered timeouts
@@ -2434,6 +2460,7 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
 			{
 				// Change cpu_thread::state for the lightweight notification to work
 				ensure(!target->state.test_and_set(cpu_flag::notify));
+				//target->state.notify_one();target->state.notify_one();
 				notifies_cpus[count_notifies_cpus++] = &target->state;
 			}
 		}
@@ -2444,6 +2471,7 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
 		}
 	}
 
+
 	if (g_waiting.empty())
 	{
 		s_lv2_timers_min_timer_in_us.release(u64{umax});
@@ -2454,11 +2482,18 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
 	}
 
 	lock.unlock();
+	g_ok++;
+
+	if (!count_notifies_cpus)
+	{
+		return;
+	}
 
 	for (usz i = count_notifies_cpus - 1; i != umax; i--)
 	{
-		atomic_wait_engine::notify_one(notifies_cpus[i]);
+		notifies_cpus[i]->notify_one();;
 	}
+	std::this_thread::yield();
 }
 
 u64 lv2_obj::get_avg_timer_reponse_delay()
diff --git a/rpcs3/Emu/Cell/lv2/sys_timer.cpp b/rpcs3/Emu/Cell/lv2/sys_timer.cpp
index b4b3b780f2..5d712de53d 100644
--- a/rpcs3/Emu/Cell/lv2/sys_timer.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_timer.cpp
@@ -467,7 +467,7 @@ error_code sys_timer_usleep(ppu_thread& ppu, u64 sleep_time)
 			sleep_time = std::max<u64>(1, utils::sub_saturate<u64>(sleep_time, -add_time));
 		}
 
-		lv2_obj::sleep(ppu, g_cfg.core.sleep_timers_accuracy < sleep_timers_accuracy_level::_usleep ? sleep_time : 0);
+		lv2_obj::sleep(ppu, sleep_time);
 
 		if (!lv2_obj::wait_timeout(sleep_time, &ppu, true, true))
 		{
diff --git a/rpcs3/util/atomic.cpp b/rpcs3/util/atomic.cpp
index 41b28d1d40..8a8788e481 100644
--- a/rpcs3/util/atomic.cpp
+++ b/rpcs3/util/atomic.cpp
@@ -6,6 +6,10 @@
 #define USE_STD
 #endif
 
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
 #ifdef _MSC_VER
 
 #include "emmintrin.h"
@@ -302,7 +306,7 @@ namespace
 				return false;
 			}
 
-			static LARGE_INTEGER instant{};
+			LARGE_INTEGER instant{};
 
 			if (NtReleaseKeyedEvent(nullptr, &sync, 1, &instant) != NTSTATUS_SUCCESS)
 			{
@@ -859,6 +863,19 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
 {
 	uint ext_size = 0;
 
+#ifdef _WIN32
+	LARGE_INTEGER start_time{};
+	//QueryPerformanceCounter(&start_time); // get time in 1/perf_freq units from RDTSC
+
+	FILETIME ftime{};
+	if (timeout != umax)
+	{
+		GetSystemTimeAsFileTime(&ftime); // get time in 100ns units since January 1, 1601 (UTC)
+	}
+
+
+#endif
+
 #ifdef __linux__
 	::timespec ts{};
 	if (timeout + 1)
@@ -1073,7 +1090,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
 			cond->cv->wait(lock);
 		}
 #elif defined(_WIN32)
-		LARGE_INTEGER qw;
+		LARGE_INTEGER qw{};
 		qw.QuadPart = -static_cast<s64>(timeout / 100);
 
 		if (timeout % 100)
@@ -1082,6 +1099,11 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
 			qw.QuadPart -= 1;
 		}
 
+		if (!s_tls_one_time_wait_cb) 
+		{
+			qw.QuadPart = (u64{ftime.dwHighDateTime} << 32) + ftime.dwLowDateTime - qw.QuadPart;
+		}
+
 		if (fallback) [[unlikely]]
 		{
 			if (!cond->set_sleep())
@@ -1096,7 +1118,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
 		}
 		else if (NtWaitForAlertByThreadId)
 		{
-			switch (DWORD status = NtWaitForAlertByThreadId(cond, timeout + 1 ? &qw : nullptr))
+			switch (DWORD status = NtWaitForAlertByThreadId(nullptr, timeout + 1 ? &qw : nullptr))
 			{
 			case NTSTATUS_ALERTED: fallback = true; break;
 			case NTSTATUS_TIMEOUT: break;
@@ -1137,7 +1159,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
 	while (!fallback)
 	{
 #if defined(_WIN32)
-		static LARGE_INTEGER instant{};
+		LARGE_INTEGER instant{};
 
 		if (cond->wakeup(1))
 		{