mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 19:45:20 +00:00
Merge d6e014b3a9
into c443326fb1
This commit is contained in:
commit
0b326fef32
16 changed files with 240 additions and 39 deletions
|
@ -1449,6 +1449,8 @@ static usz apply_modification(std::vector<u32>& applied, patch_engine::patch_inf
|
|||
|
||||
void patch_engine::apply(std::vector<u32>& applied_total, const std::string& name, std::function<u8*(u32, u32)> mem_translate, u32 filesz, u32 min_addr)
|
||||
{
|
||||
applied_total.clear();
|
||||
|
||||
if (!m_map.contains(name))
|
||||
{
|
||||
return;
|
||||
|
@ -1597,6 +1599,9 @@ void patch_engine::apply(std::vector<u32>& applied_total, const std::string& nam
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure consistent order
|
||||
std::stable_sort(applied_total.begin(), applied_total.end());
|
||||
}
|
||||
|
||||
void patch_engine::unload(const std::string& name)
|
||||
|
|
|
@ -96,6 +96,7 @@ struct ppu_module : public Type
|
|||
std::vector<ppu_segment> segs{};
|
||||
std::vector<ppu_segment> secs{};
|
||||
std::vector<ppu_function> funcs{};
|
||||
std::vector<u32> applied_patches;
|
||||
std::deque<std::shared_ptr<void>> allocations;
|
||||
std::map<u32, u32> addr_to_seg_index;
|
||||
|
||||
|
@ -185,7 +186,6 @@ struct main_ppu_module : public ppu_module<T>
|
|||
{
|
||||
u32 elf_entry{};
|
||||
u32 seg0_code_end{};
|
||||
std::vector<u32> applied_patches;
|
||||
|
||||
// Disable inherited savestate ordering
|
||||
void save(utils::serial&) = delete;
|
||||
|
|
|
@ -1947,6 +1947,7 @@ shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_load, c
|
|||
ppu_check_patch_spu_images(*prx, seg);
|
||||
}
|
||||
|
||||
prx->applied_patches = applied;
|
||||
prx->analyse(toc, 0, end, applied, exported_funcs);
|
||||
|
||||
if (!ar && !virtual_load)
|
||||
|
|
|
@ -4898,6 +4898,22 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
sha1_update(&ctx, ensure(info.get_ptr<const u8>(func.addr)), func.size);
|
||||
}
|
||||
|
||||
if (fpos >= info.funcs.size())
|
||||
{
|
||||
// Hash the entire function grouped addresses for the integrity of the symbol resolver function
|
||||
// Potentially occuring during patches
|
||||
|
||||
std::vector<be_t<u32>> addrs(info.funcs.size());
|
||||
usz addr_index = 0;
|
||||
|
||||
for (const ppu_function& func : info.funcs)
|
||||
{
|
||||
addrs[addr_index] = func.addr;
|
||||
}
|
||||
|
||||
sha1_update(&ctx, addrs.data(), addrs.size() * sizeof(be_t<u32>));
|
||||
}
|
||||
|
||||
if (false)
|
||||
{
|
||||
const be_t<u64> forced_upd = 3;
|
||||
|
@ -4920,7 +4936,6 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
accurate_fpcc,
|
||||
accurate_vnan,
|
||||
accurate_nj_mode,
|
||||
contains_symbol_resolver,
|
||||
|
||||
__bitset_enum_max
|
||||
};
|
||||
|
@ -4950,8 +4965,6 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
settings += ppu_settings::accurate_vnan, settings -= ppu_settings::fixup_vnan, fmt::throw_exception("VNAN Not implemented");
|
||||
if (g_cfg.core.ppu_use_nj_bit)
|
||||
settings += ppu_settings::accurate_nj_mode, settings -= ppu_settings::fixup_nj_denormals, fmt::throw_exception("NJ Not implemented");
|
||||
if (fpos >= info.funcs.size())
|
||||
settings += ppu_settings::contains_symbol_resolver; // Avoid invalidating all modules for this purpose
|
||||
|
||||
// Write version, hash, CPU, settings
|
||||
fmt::append(obj_name, "v6-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
|
||||
|
|
|
@ -300,6 +300,11 @@ public:
|
|||
const char* last_function{}; // Sticky copy of current_function, is not cleared on function return
|
||||
const char* current_module{}; // Current module name, for savestates.
|
||||
|
||||
// Sycall pattern recognition variables
|
||||
u64 last_lv2_deschedule_cia = umax; // Position of syscall that puts the PPU to sleep
|
||||
u64 last_lv2_deschedule_r3 = umax; // R3 argument of syscall that puts the PPU to sleep
|
||||
u64 last_lv2_deschedule_match_count = 0; // Arguments matching count when PPU puts to sleep
|
||||
|
||||
const bool is_interrupt_thread; // True for interrupts-handler threads
|
||||
|
||||
// Thread name
|
||||
|
|
|
@ -1307,6 +1307,8 @@ static std::deque<class cpu_thread*> g_to_sleep;
|
|||
static atomic_t<bool> g_scheduler_ready = false;
|
||||
static atomic_t<u64> s_yield_frequency = 0;
|
||||
static atomic_t<u64> s_max_allowed_yield_tsc = 0;
|
||||
static atomic_t<u64> s_lv2_timers_sum_of_ten_delay_in_us = 5000;
|
||||
static atomic_t<u64> s_lv2_timers_min_timer_in_us = u64{umax};
|
||||
static u64 s_last_yield_tsc = 0;
|
||||
atomic_t<u32> g_lv2_preempts_taken = 0;
|
||||
|
||||
|
@ -1337,11 +1339,13 @@ bool lv2_obj::sleep(cpu_thread& cpu, const u64 timeout)
|
|||
|
||||
if (cpu.get_class() == thread_class::ppu)
|
||||
{
|
||||
if (u32 addr = static_cast<ppu_thread&>(cpu).res_notify)
|
||||
{
|
||||
static_cast<ppu_thread&>(cpu).res_notify = 0;
|
||||
ppu_thread& ppu = static_cast<ppu_thread&>(cpu);
|
||||
|
||||
if (static_cast<ppu_thread&>(cpu).res_notify_time != vm::reservation_notifier_count_index(addr).second)
|
||||
if (u32 addr = ppu.res_notify)
|
||||
{
|
||||
ppu.res_notify = 0;
|
||||
|
||||
if (ppu.res_notify_time != vm::reservation_notifier_count_index(addr).second)
|
||||
{
|
||||
// Ignore outdated notification request
|
||||
}
|
||||
|
@ -1360,6 +1364,17 @@ bool lv2_obj::sleep(cpu_thread& cpu, const u64 timeout)
|
|||
vm::reservation_notifier_notify(addr);
|
||||
}
|
||||
}
|
||||
|
||||
if (ppu.last_lv2_deschedule_cia == ppu.cia && ppu.last_lv2_deschedule_r3 == ppu.gpr[3])
|
||||
{
|
||||
ppu.last_lv2_deschedule_match_count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
ppu.last_lv2_deschedule_cia = ppu.cia;
|
||||
ppu.last_lv2_deschedule_r3 = ppu.gpr[3];
|
||||
ppu.last_lv2_deschedule_match_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool result = false;
|
||||
|
@ -1432,7 +1447,7 @@ bool lv2_obj::awake(cpu_thread* thread, s32 prio)
|
|||
|
||||
if (!g_postpone_notify_barrier)
|
||||
{
|
||||
notify_all();
|
||||
notify_all(thread);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -1568,6 +1583,11 @@ bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time)
|
|||
{
|
||||
const u64 wait_until = start_time + std::min<u64>(timeout, ~start_time);
|
||||
|
||||
if (wait_until < s_lv2_timers_min_timer_in_us)
|
||||
{
|
||||
s_lv2_timers_min_timer_in_us.release(wait_until);
|
||||
}
|
||||
|
||||
// Register timeout if necessary
|
||||
for (auto it = g_waiting.cbegin(), end = g_waiting.cend();; it++)
|
||||
{
|
||||
|
@ -1835,6 +1855,8 @@ void lv2_obj::cleanup()
|
|||
g_waiting.clear();
|
||||
g_pending = 0;
|
||||
s_yield_frequency = 0;
|
||||
s_lv2_timers_sum_of_ten_delay_in_us = 5000;
|
||||
s_lv2_timers_min_timer_in_us = u64{umax};
|
||||
}
|
||||
|
||||
void lv2_obj::schedule_all(u64 current_time)
|
||||
|
@ -1876,7 +1898,7 @@ void lv2_obj::schedule_all(u64 current_time)
|
|||
}
|
||||
|
||||
// Check registered timeouts
|
||||
while (!g_waiting.empty())
|
||||
while (!g_waiting.empty() && it != std::end(g_to_notify))
|
||||
{
|
||||
const auto pair = &g_waiting.front();
|
||||
|
||||
|
@ -1896,15 +1918,7 @@ void lv2_obj::schedule_all(u64 current_time)
|
|||
ensure(!target->state.test_and_set(cpu_flag::notify));
|
||||
|
||||
// Otherwise notify it to wake itself
|
||||
if (it == std::end(g_to_notify))
|
||||
{
|
||||
// Out of notification slots, notify locally (resizable container is not worth it)
|
||||
target->state.notify_one();
|
||||
}
|
||||
else
|
||||
{
|
||||
*it++ = &target->state;
|
||||
}
|
||||
*it++ = &target->state;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -2134,7 +2148,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
|
|||
|
||||
auto wait_for = [&](u64 timeout)
|
||||
{
|
||||
thread_ctrl::wait_on(state, old_state, timeout);
|
||||
state.wait(old_state, atomic_wait_timeout{std::min<u64>(timeout, u64{umax} / 1000) * 1000});
|
||||
};
|
||||
|
||||
for (;; old_state = state)
|
||||
|
@ -2142,7 +2156,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
|
|||
if (old_state & cpu_flag::notify)
|
||||
{
|
||||
// Timeout notification has been forced
|
||||
break;
|
||||
//break;
|
||||
}
|
||||
|
||||
if (old_state & cpu_flag::signal)
|
||||
|
@ -2171,7 +2185,56 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
|
|||
#endif
|
||||
// TODO: Tune for other non windows operating sytems
|
||||
|
||||
if (g_cfg.core.sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_usleep : sleep_timers_accuracy_level::_all_timers))
|
||||
const sleep_timers_accuracy_level accuracy_type = g_cfg.core.sleep_timers_accuracy;
|
||||
const u64 avg_delay = get_avg_timer_reponse_delay();
|
||||
|
||||
static atomic_t<u64> g_success = 0;
|
||||
static atomic_t<u64> g_fails = 0;
|
||||
|
||||
if ((accuracy_type == sleep_timers_accuracy_level::_dynamic && avg_delay < 30) && ((avg_delay < (remaining + 15) / 2) || (cpu && cpu->last_lv2_deschedule_match_count > 3)))
|
||||
{
|
||||
wait_for(remaining);
|
||||
|
||||
if (remaining < host_min_quantum)
|
||||
{
|
||||
g_success += remaining;
|
||||
//g_success++;
|
||||
}
|
||||
|
||||
passed = get_system_time() - start_time;
|
||||
|
||||
if (passed >= usec && cpu)
|
||||
{
|
||||
static atomic_t<u64> g_fail_time = 10000;
|
||||
static atomic_t<u64> c_all = 0, c_sig = 0;
|
||||
c_all++;
|
||||
if (cpu->state & cpu_flag::notify)
|
||||
{
|
||||
c_sig++;
|
||||
g_fail_time.atomic_op([miss = passed - usec](u64& x)
|
||||
{
|
||||
x = x - x / 100 + miss;
|
||||
});
|
||||
volatile u64 tls_fail_time = g_fail_time / 100;
|
||||
+tls_fail_time;
|
||||
}
|
||||
}
|
||||
else if (passed < usec && cpu && cpu->state & cpu_flag::notify)
|
||||
{
|
||||
__debugbreak();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (remaining < host_min_quantum)
|
||||
{
|
||||
g_fails += remaining;
|
||||
//g_fails++;
|
||||
}
|
||||
}
|
||||
|
||||
if (accuracy_type < (is_usleep ? sleep_timers_accuracy_level::_dynamic : sleep_timers_accuracy_level::_all_timers))
|
||||
{
|
||||
wait_for(remaining);
|
||||
}
|
||||
|
@ -2222,7 +2285,7 @@ void lv2_obj::prepare_for_sleep(cpu_thread& cpu)
|
|||
cpu_counter::remove(&cpu);
|
||||
}
|
||||
|
||||
void lv2_obj::notify_all() noexcept
|
||||
void lv2_obj::notify_all(cpu_thread* woke_thread) noexcept
|
||||
{
|
||||
for (auto cpu : g_to_notify)
|
||||
{
|
||||
|
@ -2258,13 +2321,11 @@ void lv2_obj::notify_all() noexcept
|
|||
return;
|
||||
}
|
||||
|
||||
if (cpu->get_class() != thread_class::spu && cpu->state.none_of(cpu_flag::suspend))
|
||||
if (cpu->get_class() == thread_class::ppu && cpu->state.none_of(cpu_flag::suspend + cpu_flag::signal))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
std::optional<vm::writer_lock> lock;
|
||||
|
||||
constexpr usz total_waiters = std::size(spu_thread::g_spu_waiters_by_value);
|
||||
|
||||
u32 notifies[total_waiters]{};
|
||||
|
@ -2346,4 +2407,96 @@ void lv2_obj::notify_all() noexcept
|
|||
vm::reservation_notifier_notify(addr);
|
||||
}
|
||||
}
|
||||
|
||||
if (woke_thread == cpu)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 min_timer = s_lv2_timers_min_timer_in_us;
|
||||
const u64 current_time = get_guest_system_time();
|
||||
|
||||
if (current_time < min_timer)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
atomic_bs_t<cpu_flag>* notifies_cpus[16];
|
||||
usz count_notifies_cpus = 0;
|
||||
|
||||
static atomic_t<u64>
|
||||
g_ok = 0,
|
||||
g_fail = 0;
|
||||
|
||||
std::unique_lock lock(g_mutex, std::try_to_lock);
|
||||
|
||||
if (!lock)
|
||||
{
|
||||
// Not only is that this method is an opportunistic optimization
|
||||
// But if it's already locked than it is likely that soon another thread would do this check instead
|
||||
g_fail++;
|
||||
return;
|
||||
}
|
||||
|
||||
// Do it BEFORE clearing the queue in order to measure the delay properly even if the sleeping thread notified itself
|
||||
// This 'redundancy' is what allows proper measurements
|
||||
if (u64 min_time2 = s_lv2_timers_min_timer_in_us; current_time >= min_time2)
|
||||
{
|
||||
const u64 sum = s_lv2_timers_sum_of_ten_delay_in_us.observe();
|
||||
s_lv2_timers_sum_of_ten_delay_in_us.release(sum - sum / 10 + (current_time - min_time2));
|
||||
}
|
||||
|
||||
// Check registered timeouts
|
||||
while (!g_waiting.empty() && count_notifies_cpus < std::size(notifies_cpus))
|
||||
{
|
||||
const auto pair = &g_waiting.front();
|
||||
|
||||
if (pair->first <= current_time)
|
||||
{
|
||||
const auto target = pair->second;
|
||||
g_waiting.pop_front();
|
||||
|
||||
if (target != cpu)
|
||||
{
|
||||
// Change cpu_thread::state for the lightweight notification to work
|
||||
ensure(!target->state.test_and_set(cpu_flag::notify));
|
||||
//target->state.notify_one();target->state.notify_one();
|
||||
notifies_cpus[count_notifies_cpus++] = &target->state;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// The list is sorted so assume no more timeouts
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (g_waiting.empty())
|
||||
{
|
||||
s_lv2_timers_min_timer_in_us.release(u64{umax});
|
||||
}
|
||||
else
|
||||
{
|
||||
s_lv2_timers_min_timer_in_us.release(g_waiting.front().first);
|
||||
}
|
||||
|
||||
lock.unlock();
|
||||
g_ok++;
|
||||
|
||||
if (!count_notifies_cpus)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
for (usz i = count_notifies_cpus - 1; i != umax; i--)
|
||||
{
|
||||
notifies_cpus[i]->notify_one();;
|
||||
}
|
||||
std::this_thread::yield();
|
||||
}
|
||||
|
||||
u64 lv2_obj::get_avg_timer_reponse_delay()
|
||||
{
|
||||
return s_lv2_timers_sum_of_ten_delay_in_us / 10;
|
||||
}
|
||||
|
|
|
@ -11,7 +11,6 @@ struct lv2_overlay final : ppu_module<lv2_obj>
|
|||
|
||||
u32 entry{};
|
||||
u32 seg0_code_end{};
|
||||
std::vector<u32> applied_patches;
|
||||
|
||||
lv2_overlay() = default;
|
||||
lv2_overlay(utils::serial&){}
|
||||
|
|
|
@ -454,11 +454,13 @@ public:
|
|||
|
||||
static bool wait_timeout(u64 usec, ppu_thread* cpu = {}, bool scale = true, bool is_usleep = false);
|
||||
|
||||
static void notify_all() noexcept;
|
||||
static void notify_all(cpu_thread* woke_thread = nullptr) noexcept;
|
||||
|
||||
// Can be called before the actual sleep call in order to move it out of mutex scope
|
||||
static void prepare_for_sleep(cpu_thread& cpu);
|
||||
|
||||
static u64 get_avg_timer_reponse_delay();
|
||||
|
||||
struct notify_all_t
|
||||
{
|
||||
notify_all_t() noexcept
|
||||
|
|
|
@ -467,7 +467,7 @@ error_code sys_timer_usleep(ppu_thread& ppu, u64 sleep_time)
|
|||
sleep_time = std::max<u64>(1, utils::sub_saturate<u64>(sleep_time, -add_time));
|
||||
}
|
||||
|
||||
lv2_obj::sleep(ppu, g_cfg.core.sleep_timers_accuracy < sleep_timers_accuracy_level::_usleep ? sleep_time : 0);
|
||||
lv2_obj::sleep(ppu, sleep_time);
|
||||
|
||||
if (!lv2_obj::wait_timeout(sleep_time, &ppu, true, true))
|
||||
{
|
||||
|
|
|
@ -870,11 +870,13 @@ namespace rsx
|
|||
{
|
||||
// Wait 16ms during emulation pause. This reduces cpu load while still giving us the chance to render overlays.
|
||||
do_local_task(rsx::FIFO::state::paused);
|
||||
lv2_obj::notify_all();
|
||||
thread_ctrl::wait_on(state, old, 16000);
|
||||
}
|
||||
else
|
||||
{
|
||||
on_semaphore_acquire_wait();
|
||||
lv2_obj::notify_all();
|
||||
std::this_thread::yield();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -91,11 +91,7 @@ struct cfg_root : cfg::node
|
|||
cfg::uint<0, (1 << 6) - 1> spu_wakeup_delay_mask{ this, "SPU Wake-Up Delay Thread Mask", (1 << 6) - 1, true };
|
||||
cfg::uint<0, 400> max_cpu_preempt_count_per_frame{ this, "Max CPU Preempt Count", 0, true };
|
||||
cfg::_bool allow_rsx_cpu_preempt{ this, "Allow RSX CPU Preemptions", true, true };
|
||||
#if defined (__linux__) || defined (__APPLE__)
|
||||
cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_as_host, true };
|
||||
#else
|
||||
cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_usleep, true };
|
||||
#endif
|
||||
cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy 2", sleep_timers_accuracy_level::_dynamic, true };
|
||||
cfg::_int<-1000, 1500> usleep_addend{ this, "Usleep Time Addend", 0, true };
|
||||
|
||||
cfg::uint64 perf_report_threshold{this, "Performance Report Threshold", 500, true}; // In µs, 0.5ms = default, 0 = everything
|
||||
|
|
|
@ -237,6 +237,7 @@ void fmt_class_string<sleep_timers_accuracy_level>::format(std::string& out, u64
|
|||
switch (value)
|
||||
{
|
||||
case sleep_timers_accuracy_level::_as_host: return "As Host";
|
||||
case sleep_timers_accuracy_level::_dynamic: return "Dynamic";
|
||||
case sleep_timers_accuracy_level::_usleep: return "Usleep Only";
|
||||
case sleep_timers_accuracy_level::_all_timers: return "All Timers";
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ enum class spu_block_size_type
|
|||
enum class sleep_timers_accuracy_level
|
||||
{
|
||||
_as_host,
|
||||
_dynamic,
|
||||
_usleep,
|
||||
_all_timers,
|
||||
};
|
||||
|
|
|
@ -1202,6 +1202,7 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
|
|||
switch (static_cast<sleep_timers_accuracy_level>(index))
|
||||
{
|
||||
case sleep_timers_accuracy_level::_as_host: return tr("As Host", "Sleep timers accuracy");
|
||||
case sleep_timers_accuracy_level::_dynamic: return tr("Dynamic", "Sleep timers accuracy");
|
||||
case sleep_timers_accuracy_level::_usleep: return tr("Usleep Only", "Sleep timers accuracy");
|
||||
case sleep_timers_accuracy_level::_all_timers: return tr("All Timers", "Sleep timers accuracy");
|
||||
}
|
||||
|
|
|
@ -233,7 +233,7 @@ inline static const std::map<emu_settings_type, cfg_location> settings_location
|
|||
{ emu_settings_type::SPUCache, { "Core", "SPU Cache"}},
|
||||
{ emu_settings_type::DebugConsoleMode, { "Core", "Debug Console Mode"}},
|
||||
{ emu_settings_type::MaxSPURSThreads, { "Core", "Max SPURS Threads"}},
|
||||
{ emu_settings_type::SleepTimersAccuracy, { "Core", "Sleep Timers Accuracy"}},
|
||||
{ emu_settings_type::SleepTimersAccuracy, { "Core", "Sleep Timers Accuracy 2"}},
|
||||
{ emu_settings_type::ClocksScale, { "Core", "Clocks scale"}},
|
||||
{ emu_settings_type::AccuratePPU128Loop, { "Core", "Accurate PPU 128-byte Reservation Op Max Length"}},
|
||||
{ emu_settings_type::PerformanceReport, { "Core", "Enable Performance Report"}},
|
||||
|
|
|
@ -6,6 +6,10 @@
|
|||
#define USE_STD
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#include "emmintrin.h"
|
||||
|
@ -302,7 +306,7 @@ namespace
|
|||
return false;
|
||||
}
|
||||
|
||||
static LARGE_INTEGER instant{};
|
||||
LARGE_INTEGER instant{};
|
||||
|
||||
if (NtReleaseKeyedEvent(nullptr, &sync, 1, &instant) != NTSTATUS_SUCCESS)
|
||||
{
|
||||
|
@ -859,6 +863,19 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
|
|||
{
|
||||
uint ext_size = 0;
|
||||
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER start_time{};
|
||||
//QueryPerformanceCounter(&start_time); // get time in 1/perf_freq units from RDTSC
|
||||
|
||||
FILETIME ftime{};
|
||||
if (timeout != umax)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ftime); // get time in 100ns units since January 1, 1601 (UTC)
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
::timespec ts{};
|
||||
if (timeout + 1)
|
||||
|
@ -1073,7 +1090,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
|
|||
cond->cv->wait(lock);
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
LARGE_INTEGER qw;
|
||||
LARGE_INTEGER qw{};
|
||||
qw.QuadPart = -static_cast<s64>(timeout / 100);
|
||||
|
||||
if (timeout % 100)
|
||||
|
@ -1082,6 +1099,11 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
|
|||
qw.QuadPart -= 1;
|
||||
}
|
||||
|
||||
if (!s_tls_one_time_wait_cb)
|
||||
{
|
||||
qw.QuadPart = (u64{ftime.dwHighDateTime} << 32) + ftime.dwLowDateTime - qw.QuadPart;
|
||||
}
|
||||
|
||||
if (fallback) [[unlikely]]
|
||||
{
|
||||
if (!cond->set_sleep())
|
||||
|
@ -1096,7 +1118,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
|
|||
}
|
||||
else if (NtWaitForAlertByThreadId)
|
||||
{
|
||||
switch (DWORD status = NtWaitForAlertByThreadId(cond, timeout + 1 ? &qw : nullptr))
|
||||
switch (DWORD status = NtWaitForAlertByThreadId(nullptr, timeout + 1 ? &qw : nullptr))
|
||||
{
|
||||
case NTSTATUS_ALERTED: fallback = true; break;
|
||||
case NTSTATUS_TIMEOUT: break;
|
||||
|
@ -1137,7 +1159,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
|
|||
while (!fallback)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
static LARGE_INTEGER instant{};
|
||||
LARGE_INTEGER instant{};
|
||||
|
||||
if (cond->wakeup(1))
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue