mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
SPU: Use usermode waiting for busy GETLLAR loop
This commit is contained in:
parent
dddd12f66b
commit
6adc7f9ee6
3 changed files with 78 additions and 1 deletions
|
@ -37,6 +37,15 @@
|
|||
#include "util/sysinfo.hpp"
|
||||
#include "util/serialization.hpp"
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#include <immintrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
using spu_rdata_t = decltype(spu_thread::rdata);
|
||||
|
||||
template <>
|
||||
|
@ -320,6 +329,40 @@ extern void mov_rdata_nt(spu_rdata_t& _dst, const spu_rdata_t& _src)
|
|||
#endif
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define mwaitx_func
|
||||
#define waitpkg_func
|
||||
#else
|
||||
#define mwaitx_func __attribute__((__target__("mwaitx")))
|
||||
#define waitpkg_func __attribute__((__target__("waitpkg")))
|
||||
#endif
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
// Waits for a number of TSC clock cycles in power optimized state
|
||||
// Cstate is represented in bits [7:4]+1 cstate. So C0 requires bits [7:4] to be set to 0xf, C1 requires bits [7:4] to be set to 0.
|
||||
template <typename T, typename... Args>
|
||||
mwaitx_func static void __mwaitx(u32 cycles, u32 cstate, const void* cline, const Args&... args)
|
||||
{
|
||||
constexpr u32 timer_enable = 0x2;
|
||||
|
||||
// monitorx will wake if the cache line is written to, use it for reservations which fits it almost perfectly
|
||||
_mm_monitorx(const_cast<void*>(cline), 0, 0);
|
||||
|
||||
// Use static function to force inline
|
||||
if (T::needs_wait(args...))
|
||||
{
|
||||
_mm_mwaitx(timer_enable, cstate, cycles);
|
||||
}
|
||||
}
|
||||
|
||||
// First bit indicates cstate, 0x0 for C.02 state (lower power) or 0x1 for C.01 state (higher power)
|
||||
waitpkg_func static void __tpause(u32 cycles, u32 cstate)
|
||||
{
|
||||
const u64 tsc = utils::get_tsc() + cycles;
|
||||
_tpause(cstate, tsc);
|
||||
}
|
||||
#endif
|
||||
|
||||
void do_cell_atomic_128_store(u32 addr, const void* to_write);
|
||||
|
||||
extern thread_local u64 g_tls_fault_spu;
|
||||
|
@ -4113,7 +4156,32 @@ bool spu_thread::process_mfc_cmd()
|
|||
|
||||
if (getllar_busy_waiting_switch == 1)
|
||||
{
|
||||
busy_wait(300);
|
||||
#if defined(ARCH_X64)
|
||||
if (utils::has_um_wait())
|
||||
{
|
||||
if (utils::has_waitpkg())
|
||||
{
|
||||
__tpause(std::min<u32>(getllar_spin_count, 10) * 500, 0x1);
|
||||
}
|
||||
else
|
||||
{
|
||||
struct check_wait_t
|
||||
{
|
||||
static FORCE_INLINE bool needs_wait(u64 rtime, const atomic_t<u64>& mem_rtime) noexcept
|
||||
{
|
||||
return rtime == mem_rtime;
|
||||
}
|
||||
};
|
||||
|
||||
// Provide the first X64 cache line of the reservation to be tracked
|
||||
__mwaitx<check_wait_t>(std::min<u32>(getllar_spin_count, 17) * 500, 0xf0, std::addressof(data), +rtime, vm::reservation_acquire(addr));
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
busy_wait(300);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -358,6 +358,13 @@ bool utils::has_appropriate_um_wait()
|
|||
#endif
|
||||
}
|
||||
|
||||
// Similar to the above function but allow execution if alternatives such as yield are not wanted
|
||||
bool utils::has_um_wait()
|
||||
{
|
||||
static const bool g_value = (has_waitx() || has_waitpkg()) && get_tsc_freq();
|
||||
return g_value;
|
||||
}
|
||||
|
||||
u32 utils::get_rep_movsb_threshold()
|
||||
{
|
||||
static const u32 g_value = []()
|
||||
|
|
|
@ -59,6 +59,8 @@ namespace utils
|
|||
|
||||
bool has_appropriate_um_wait();
|
||||
|
||||
bool has_um_wait();
|
||||
|
||||
std::string get_cpu_brand();
|
||||
|
||||
std::string get_system_info();
|
||||
|
|
Loading…
Add table
Reference in a new issue