SPU: Use usermode waiting for busy GETLLAR loop

This commit is contained in:
Eladash 2023-08-16 16:16:49 +03:00 committed by Elad Ashkenazi
parent dddd12f66b
commit 6adc7f9ee6
3 changed files with 78 additions and 1 deletions

View file

@ -37,6 +37,15 @@
#include "util/sysinfo.hpp"
#include "util/serialization.hpp"
#if defined(ARCH_X64)
#ifdef _MSC_VER
#include <intrin.h>
#include <immintrin.h>
#else
#include <x86intrin.h>
#endif
#endif
using spu_rdata_t = decltype(spu_thread::rdata);
template <>
@ -320,6 +329,40 @@ extern void mov_rdata_nt(spu_rdata_t& _dst, const spu_rdata_t& _src)
#endif
}
#if defined(_MSC_VER)
#define mwaitx_func
#define waitpkg_func
#else
#define mwaitx_func __attribute__((__target__("mwaitx")))
#define waitpkg_func __attribute__((__target__("waitpkg")))
#endif
#if defined(ARCH_X64)
// Waits for a number of TSC clock cycles in power optimized state
// Cstate is represented in bits [7:4]+1 cstate. So C0 requires bits [7:4] to be set to 0xf, C1 requires bits [7:4] to be set to 0.
template <typename T, typename... Args>
mwaitx_func static void __mwaitx(u32 cycles, u32 cstate, const void* cline, const Args&... args)
{
constexpr u32 timer_enable = 0x2;
// monitorx will wake if the cache line is written to, use it for reservations which fits it almost perfectly
_mm_monitorx(const_cast<void*>(cline), 0, 0);
// Use static function to force inline
if (T::needs_wait(args...))
{
_mm_mwaitx(timer_enable, cstate, cycles);
}
}
// First bit indicates cstate, 0x0 for C.02 state (lower power) or 0x1 for C.01 state (higher power)
waitpkg_func static void __tpause(u32 cycles, u32 cstate)
{
const u64 tsc = utils::get_tsc() + cycles;
_tpause(cstate, tsc);
}
#endif
void do_cell_atomic_128_store(u32 addr, const void* to_write);
extern thread_local u64 g_tls_fault_spu;
@ -4113,7 +4156,32 @@ bool spu_thread::process_mfc_cmd()
if (getllar_busy_waiting_switch == 1)
{
busy_wait(300);
#if defined(ARCH_X64)
if (utils::has_um_wait())
{
if (utils::has_waitpkg())
{
__tpause(std::min<u32>(getllar_spin_count, 10) * 500, 0x1);
}
else
{
struct check_wait_t
{
static FORCE_INLINE bool needs_wait(u64 rtime, const atomic_t<u64>& mem_rtime) noexcept
{
return rtime == mem_rtime;
}
};
// Provide the first X64 cache line of the reservation to be tracked
__mwaitx<check_wait_t>(std::min<u32>(getllar_spin_count, 17) * 500, 0xf0, std::addressof(data), +rtime, vm::reservation_acquire(addr));
}
}
else
#endif
{
busy_wait(300);
}
}
return true;

View file

@ -358,6 +358,13 @@ bool utils::has_appropriate_um_wait()
#endif
}
// Similar to the above function but allow execution if alternatives such as yield are not wanted
bool utils::has_um_wait()
{
static const bool g_value = (has_waitx() || has_waitpkg()) && get_tsc_freq();
return g_value;
}
u32 utils::get_rep_movsb_threshold()
{
static const u32 g_value = []()

View file

@ -59,6 +59,8 @@ namespace utils
bool has_appropriate_um_wait();
bool has_um_wait();
std::string get_cpu_brand();
std::string get_system_info();