This commit is contained in:
Elad 2025-01-06 18:38:43 +02:00
parent 5fa2043e06
commit d50bdd7554
3 changed files with 73 additions and 16 deletions

View file

@ -1308,7 +1308,7 @@ static atomic_t<bool> g_scheduler_ready = false;
static atomic_t<u64> s_yield_frequency = 0;
static atomic_t<u64> s_max_allowed_yield_tsc = 0;
static atomic_t<u64> s_lv2_timers_sum_of_ten_delay_in_us = 5000;
static atomic_t<u64> s_lv2_timers_min_timer_in_us = 0;
static atomic_t<u64> s_lv2_timers_min_timer_in_us = u64{umax};
static u64 s_last_yield_tsc = 0;
atomic_t<u32> g_lv2_preempts_taken = 0;
@ -1583,16 +1583,16 @@ bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time)
{
const u64 wait_until = start_time + std::min<u64>(timeout, ~start_time);
if (wait_until < s_lv2_timers_min_timer_in_us)
{
s_lv2_timers_min_timer_in_us.release(wait_until);
}
// Register timeout if necessary
for (auto it = g_waiting.cbegin(), end = g_waiting.cend();; it++)
{
if (it == end || it->first > wait_until)
{
if (it == g_waiting.cbegin())
{
s_lv2_timers_min_timer_in_us.release(wait_until);
}
g_waiting.emplace(it, wait_until, &thread);
break;
}
@ -1856,7 +1856,7 @@ void lv2_obj::cleanup()
g_pending = 0;
s_yield_frequency = 0;
s_lv2_timers_sum_of_ten_delay_in_us = 5000;
s_lv2_timers_min_timer_in_us = 0;
s_lv2_timers_min_timer_in_us = u64{umax};
}
void lv2_obj::schedule_all(u64 current_time)
@ -2148,7 +2148,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
auto wait_for = [&](u64 timeout)
{
thread_ctrl::wait_on(state, old_state, timeout);
state.wait(old_state, atomic_wait_timeout{std::min<u64>(timeout, u64{umax} / 1000) * 1000});
};
for (;; old_state = state)
@ -2156,7 +2156,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
if (old_state & cpu_flag::notify)
{
// Timeout notification has been forced
break;
//break;
}
if (old_state & cpu_flag::signal)
@ -2202,6 +2202,27 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
}
passed = get_system_time() - start_time;
if (passed >= usec && cpu)
{
static atomic_t<u64> g_fail_time = 10000;
static atomic_t<u64> c_all = 0, c_sig = 0;
c_all++;
if (cpu->state & cpu_flag::notify)
{
c_sig++;
g_fail_time.atomic_op([miss = passed - usec](u64& x)
{
x = x - x / 100 + miss;
});
volatile u64 tls_fail_time = g_fail_time / 100;
+tls_fail_time;
}
}
else if (passed < usec && cpu && cpu->state & cpu_flag::notify)
{
__debugbreak();
}
continue;
}
else
@ -2403,12 +2424,17 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
atomic_bs_t<cpu_flag>* notifies_cpus[16];
usz count_notifies_cpus = 0;
static atomic_t<u64>
g_ok = 0,
g_fail = 0;
std::unique_lock lock(g_mutex, std::try_to_lock);
if (!lock)
{
// Not only is that this method is an opportunistic optimization
// But if it's already locked than it is likely that soon another thread would do this check instead
g_fail++;
return;
}
@ -2417,7 +2443,7 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
if (u64 min_time2 = s_lv2_timers_min_timer_in_us; current_time >= min_time2)
{
const u64 sum = s_lv2_timers_sum_of_ten_delay_in_us.observe();
s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2) / 10);
s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2));
}
// Check registered timeouts
@ -2434,6 +2460,7 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
{
// Change cpu_thread::state for the lightweight notification to work
ensure(!target->state.test_and_set(cpu_flag::notify));
//target->state.notify_one();target->state.notify_one();
notifies_cpus[count_notifies_cpus++] = &target->state;
}
}
@ -2444,6 +2471,7 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
}
}
if (g_waiting.empty())
{
s_lv2_timers_min_timer_in_us.release(u64{umax});
@ -2454,11 +2482,18 @@ void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
}
lock.unlock();
g_ok++;
if (!count_notifies_cpus)
{
return;
}
for (usz i = count_notifies_cpus - 1; i != umax; i--)
{
atomic_wait_engine::notify_one(notifies_cpus[i]);
notifies_cpus[i]->notify_one();;
}
std::this_thread::yield();
}
u64 lv2_obj::get_avg_timer_reponse_delay()

View file

@ -467,7 +467,7 @@ error_code sys_timer_usleep(ppu_thread& ppu, u64 sleep_time)
sleep_time = std::max<u64>(1, utils::sub_saturate<u64>(sleep_time, -add_time));
}
lv2_obj::sleep(ppu, g_cfg.core.sleep_timers_accuracy < sleep_timers_accuracy_level::_usleep ? sleep_time : 0);
lv2_obj::sleep(ppu, sleep_time);
if (!lv2_obj::wait_timeout(sleep_time, &ppu, true, true))
{

View file

@ -6,6 +6,10 @@
#define USE_STD
#endif
#ifdef _WIN32
#include <windows.h>
#endif
#ifdef _MSC_VER
#include "emmintrin.h"
@ -302,7 +306,7 @@ namespace
return false;
}
static LARGE_INTEGER instant{};
LARGE_INTEGER instant{};
if (NtReleaseKeyedEvent(nullptr, &sync, 1, &instant) != NTSTATUS_SUCCESS)
{
@ -859,6 +863,19 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
{
uint ext_size = 0;
#ifdef _WIN32
LARGE_INTEGER start_time{};
//QueryPerformanceCounter(&start_time); // get time in 1/perf_freq units from RDTSC
FILETIME ftime{};
if (timeout != umax)
{
GetSystemTimeAsFileTime(&ftime); // get time in 100ns units since January 1, 1601 (UTC)
}
#endif
#ifdef __linux__
::timespec ts{};
if (timeout + 1)
@ -1073,7 +1090,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
cond->cv->wait(lock);
}
#elif defined(_WIN32)
LARGE_INTEGER qw;
LARGE_INTEGER qw{};
qw.QuadPart = -static_cast<s64>(timeout / 100);
if (timeout % 100)
@ -1082,6 +1099,11 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
qw.QuadPart -= 1;
}
if (!s_tls_one_time_wait_cb)
{
qw.QuadPart = (u64{ftime.dwHighDateTime} << 32) + ftime.dwLowDateTime - qw.QuadPart;
}
if (fallback) [[unlikely]]
{
if (!cond->set_sleep())
@ -1096,7 +1118,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
}
else if (NtWaitForAlertByThreadId)
{
switch (DWORD status = NtWaitForAlertByThreadId(cond, timeout + 1 ? &qw : nullptr))
switch (DWORD status = NtWaitForAlertByThreadId(nullptr, timeout + 1 ? &qw : nullptr))
{
case NTSTATUS_ALERTED: fallback = true; break;
case NTSTATUS_TIMEOUT: break;
@ -1137,7 +1159,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
while (!fallback)
{
#if defined(_WIN32)
static LARGE_INTEGER instant{};
LARGE_INTEGER instant{};
if (cond->wakeup(1))
{