mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-19 19:15:26 +00:00
SPU: Evaluated Thread-blocked LR messaging
This commit is contained in:
parent
ef328cfe57
commit
49d567426e
3 changed files with 233 additions and 51 deletions
|
@ -4497,6 +4497,126 @@ u32 spu_thread::get_mfc_completed() const
|
|||
return ch_tag_mask & ~mfc_fence;
|
||||
}
|
||||
|
||||
u32 evaluate_spin_optimization(std::span<u8> stats, u64 evaluate_time, const cfg::uint<0, 100>& wait_percent, bool inclined_for_responsiveness = false)
|
||||
{
|
||||
ensure(stats.size() >= 2 && stats.size() <= 16);
|
||||
|
||||
const u32 percent = wait_percent;
|
||||
|
||||
// Predict whether or not to use operating system sleep based on history
|
||||
|
||||
std::array<u8, 16> old_stats{};
|
||||
std::copy_n(stats.data(), stats.size(), old_stats.data());
|
||||
|
||||
// Rotate history (prepare newest entry)
|
||||
stats[0] = 0;
|
||||
std::copy_n(old_stats.data(), stats.size() - 1, stats.data() + 1);
|
||||
|
||||
u32 total_wait = 0;
|
||||
u32 zero_count = 0; // Try to ignore major inconsistencies
|
||||
u32 consecutive_zero = 0;
|
||||
u32 consecutive_zero_or_one = 0;
|
||||
u32 consecutive_zero_or_one_tally = 0;
|
||||
|
||||
usz index = umax;
|
||||
|
||||
for (u8 val : old_stats)
|
||||
{
|
||||
index++;
|
||||
|
||||
if (index == stats.size())
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
total_wait += val;
|
||||
|
||||
if (val == 0)
|
||||
{
|
||||
if (consecutive_zero == index)
|
||||
{
|
||||
consecutive_zero++;
|
||||
consecutive_zero_or_one++;
|
||||
//consecutive_zero_or_one_tally += 0;
|
||||
}
|
||||
|
||||
++zero_count;
|
||||
}
|
||||
|
||||
if (val == 1)
|
||||
{
|
||||
if (consecutive_zero_or_one == index)
|
||||
{
|
||||
consecutive_zero_or_one++;
|
||||
consecutive_zero_or_one_tally++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (inclined_for_responsiveness)
|
||||
{
|
||||
total_wait /= 2;
|
||||
}
|
||||
|
||||
// Add to chance if previous wait was long enough
|
||||
u32 add_count = 0;
|
||||
|
||||
if (stats.size() == 4)
|
||||
{
|
||||
add_count = zero_count == 3 && total_wait >= 40 ? (total_wait - 39) * 40
|
||||
: zero_count == 2 && total_wait >= 11 ? (total_wait - 10) * 40
|
||||
: zero_count == 1 && total_wait >= 8 ? (total_wait - 7) * 40
|
||||
: zero_count == 0 && total_wait >= 6 ? (total_wait - 5) * 40
|
||||
: 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
add_count = zero_count >= 12 && total_wait >= 80 ? (total_wait - 80) * 30
|
||||
: zero_count >= 7 && total_wait >= 30 ? (total_wait - 30) * 10
|
||||
: zero_count >= 4 && total_wait >= 20 ? (total_wait - 20) * 10
|
||||
: zero_count >= 0 && total_wait >= 10 ? (total_wait - 10) * 10
|
||||
: 0;
|
||||
}
|
||||
|
||||
if (stats.size() == 16 && (consecutive_zero >= 2 || (consecutive_zero_or_one >= 3 && consecutive_zero_or_one_tally < consecutive_zero_or_one * 2 / 3)))
|
||||
{
|
||||
// Thread is back to action after some sleep
|
||||
add_count = 0;
|
||||
}
|
||||
|
||||
if (inclined_for_responsiveness && std::count(old_stats.data(), old_stats.data() + 3, 0) >= 2)
|
||||
{
|
||||
add_count = 0;
|
||||
}
|
||||
|
||||
// Evalute its value (shift-right to ensure its randomness with different CPUs)
|
||||
const u32 busy_waiting_switch = ((evaluate_time >> 8) % 100 + add_count < percent) ? 1 : 0;
|
||||
|
||||
thread_local usz g_system_wait = 0, g_busy_wait = 0;
|
||||
|
||||
if (busy_waiting_switch)
|
||||
{
|
||||
g_busy_wait++;
|
||||
}
|
||||
else
|
||||
{
|
||||
g_system_wait++;
|
||||
}
|
||||
|
||||
if ((g_system_wait + g_busy_wait) && (g_system_wait + g_busy_wait) % 200 == 0)
|
||||
{
|
||||
spu_log.trace("SPU wait: count=%d. switch=%d, spin=%d, busy=%d, system=%d, {%d, %d, %d, %d}", total_wait, busy_waiting_switch, !"TODO: Spin", +g_busy_wait, +g_system_wait, old_stats[0], old_stats[1], old_stats[2], old_stats[3]);
|
||||
}
|
||||
|
||||
if ((g_system_wait + g_busy_wait) % 5000 == 0)
|
||||
{
|
||||
g_system_wait = 0;
|
||||
g_busy_wait = 0;
|
||||
}
|
||||
|
||||
return busy_waiting_switch;
|
||||
}
|
||||
|
||||
bool spu_thread::process_mfc_cmd()
|
||||
{
|
||||
// Stall infinitely if MFC queue is full
|
||||
|
@ -4611,61 +4731,16 @@ bool spu_thread::process_mfc_cmd()
|
|||
|
||||
getllar_spin_count = std::min<u32>(getllar_spin_count + 1, u16{umax});
|
||||
|
||||
static atomic_t<usz> g_ok = 0, g_fail = 0;
|
||||
|
||||
if (getllar_busy_waiting_switch == umax && getllar_spin_count == 4)
|
||||
{
|
||||
// Hidden value to force busy waiting (100 to 1 are dynamically adjusted, 0 is not)
|
||||
if (!g_cfg.core.spu_getllar_spin_optimization_disabled)
|
||||
{
|
||||
const u32 percent = g_cfg.core.spu_getllar_busy_waiting_percentage;
|
||||
|
||||
// Predict whether or not to use operating system sleep based on history
|
||||
auto& stats = getllar_wait_time[(addr % SPU_LS_SIZE) / 128];
|
||||
|
||||
const std::array<u8, 4> old_stats = stats;
|
||||
std::array<u8, 4> new_stats{};
|
||||
|
||||
// Rotate history (prepare newest entry)
|
||||
new_stats[0] = 0;
|
||||
new_stats[1] = old_stats[0];
|
||||
new_stats[2] = old_stats[1];
|
||||
new_stats[3] = old_stats[2];
|
||||
|
||||
stats = new_stats;
|
||||
|
||||
u32 total_wait = 0;
|
||||
u32 zero_count = 0; // Try to ignore major inconsistencies
|
||||
|
||||
for (u8 val : old_stats)
|
||||
{
|
||||
total_wait += val;
|
||||
if (val == 0) ++zero_count;
|
||||
}
|
||||
|
||||
// Add to chance if previous wait was long enough
|
||||
const u32 add_count = zero_count == 3 && total_wait >= 40 ? (total_wait - 39) * 40
|
||||
: zero_count == 2 && total_wait >= 11 ? (total_wait - 10) * 40
|
||||
: zero_count == 1 && total_wait >= 8 ? (total_wait - 7) * 40
|
||||
: zero_count == 0 && total_wait >= 6 ? (total_wait - 5) * 40
|
||||
: 0;
|
||||
|
||||
// Evalute its value (shift-right to ensure its randomness with different CPUs)
|
||||
getllar_busy_waiting_switch = ((perf0.get() >> 8) % 100 + add_count < percent) ? 1 : 0;
|
||||
|
||||
getllar_evaluate_time = perf0.get();
|
||||
auto& history = getllar_wait_time[(addr % SPU_LS_SIZE) / 128];
|
||||
|
||||
if (getllar_busy_waiting_switch)
|
||||
{
|
||||
g_fail++;
|
||||
}
|
||||
else
|
||||
{
|
||||
g_ok++;
|
||||
}
|
||||
|
||||
if ((g_ok + g_fail) % 200 == 0 && !getllar_busy_waiting_switch)
|
||||
spu_log.trace("SPU wait: count=%d. switch=%d, spin=%d, fail=%d, ok=%d, {%d, %d, %d, %d}", total_wait, getllar_busy_waiting_switch, getllar_spin_count, +g_fail, +g_ok, old_stats[0], old_stats[1], old_stats[2], old_stats[3] );
|
||||
getllar_busy_waiting_switch =
|
||||
evaluate_spin_optimization({ history.data(), history.size() }, getllar_evaluate_time, g_cfg.core.spu_getllar_busy_waiting_percentage);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -5916,7 +5991,52 @@ s64 spu_thread::get_ch_value(u32 ch)
|
|||
return true;
|
||||
};
|
||||
|
||||
for (; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true))
|
||||
const bool is_LR_wait = raddr && mask1 & SPU_EVENT_LR;
|
||||
|
||||
auto& history = eventstat_wait_time[(raddr % SPU_LS_SIZE) / 128];
|
||||
|
||||
if (is_LR_wait)
|
||||
{
|
||||
const u32 spu_group_restart = group ? +group->stop_count : 0;
|
||||
|
||||
// Check if waiting session changed
|
||||
if (eventstat_raddr != raddr || eventstat_block_counter != block_counter || last_getllar != eventstat_getllar || eventstat_spu_group_restart != spu_group_restart)
|
||||
{
|
||||
eventstat_raddr = raddr;
|
||||
eventstat_block_counter = block_counter;
|
||||
eventstat_getllar = last_getllar;
|
||||
eventstat_spu_group_restart = spu_group_restart;
|
||||
eventstat_spin_count = 0;
|
||||
eventstat_evaluate_time = get_system_time();
|
||||
eventstat_busy_waiting_switch = umax;
|
||||
}
|
||||
else
|
||||
{
|
||||
u8& val = history.front();
|
||||
val = static_cast<u8>(std::min<u32>(val + 1, u8{umax}));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
eventstat_busy_waiting_switch = 0;
|
||||
eventstat_raddr = 0;
|
||||
eventstat_block_counter = 0;
|
||||
}
|
||||
|
||||
if (eventstat_busy_waiting_switch == umax)
|
||||
{
|
||||
bool value = false;
|
||||
|
||||
if (is_LR_wait)
|
||||
{
|
||||
// Make single-threaded groups inclined for busy-waiting
|
||||
value = evaluate_spin_optimization({ history.data(), history.size() }, eventstat_evaluate_time, g_cfg.core.spu_reservation_busy_waiting_percentage, group && group->max_num == 1) != 0;
|
||||
}
|
||||
|
||||
eventstat_busy_waiting_switch = value ? 1 : 0;
|
||||
}
|
||||
|
||||
for (bool is_first = true; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true), is_first = false)
|
||||
{
|
||||
const auto old = +state;
|
||||
|
||||
|
@ -5931,7 +6051,7 @@ s64 spu_thread::get_ch_value(u32 ch)
|
|||
}
|
||||
|
||||
// Optimized check
|
||||
if (raddr && mask1 & SPU_EVENT_LR)
|
||||
if (is_LR_wait)
|
||||
{
|
||||
if (cache_line_waiter_index == umax)
|
||||
{
|
||||
|
@ -5962,6 +6082,59 @@ s64 spu_thread::get_ch_value(u32 ch)
|
|||
set_events(SPU_EVENT_LR);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_first && eventstat_busy_waiting_switch != 1)
|
||||
{
|
||||
u8& val = history.front();
|
||||
val = static_cast<u8>(std::min<u32>(val + 1, u8{umax}));
|
||||
}
|
||||
}
|
||||
|
||||
if (eventstat_busy_waiting_switch == 1)
|
||||
{
|
||||
// Don't be stubborn, force operating sleep if too much time has passed
|
||||
const u64 time_since = get_system_time() - eventstat_evaluate_time;
|
||||
|
||||
if (time_since >= (utils::get_thread_count() >= 12 ? 2500 : 200))
|
||||
{
|
||||
spu_log.trace("SPU RdEventStat wait for 0x%x failed", raddr);
|
||||
history.front() = 2;
|
||||
eventstat_busy_waiting_switch = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
if (utils::has_um_wait())
|
||||
{
|
||||
if (utils::has_waitpkg())
|
||||
{
|
||||
__tpause(std::min<u32>(eventstat_spin_count, 10) * 500, 0x1);
|
||||
}
|
||||
else
|
||||
{
|
||||
struct check_wait_t
|
||||
{
|
||||
static FORCE_INLINE bool needs_wait(u64 rtime, const atomic_t<u64>& mem_rtime) noexcept
|
||||
{
|
||||
return rtime == mem_rtime;
|
||||
}
|
||||
};
|
||||
|
||||
// Provide the first X64 cache line of the reservation to be tracked
|
||||
__mwaitx<check_wait_t>(std::min<u32>(eventstat_spin_count, 17) * 500, 0xf0, std::addressof(*resrv_mem), +rtime, vm::reservation_acquire(raddr));
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
busy_wait(300);
|
||||
}
|
||||
|
||||
// Check other reservations in other threads
|
||||
lv2_obj::notify_all();
|
||||
|
||||
eventstat_spin_count++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (raddr && (mask1 & ~SPU_EVENT_TM) == SPU_EVENT_LR)
|
||||
|
|
|
@ -805,6 +805,14 @@ public:
|
|||
u32 getllar_busy_waiting_switch = umax; // umax means the test needs evaluation, otherwise it's a boolean
|
||||
u64 getllar_evaluate_time = 0;
|
||||
|
||||
u32 eventstat_raddr = 0;
|
||||
u32 eventstat_getllar = 0;
|
||||
u64 eventstat_block_counter = 0;
|
||||
u64 eventstat_spu_group_restart = 0;
|
||||
u64 eventstat_spin_count = 0;
|
||||
u64 eventstat_evaluate_time = 0;
|
||||
u32 eventstat_busy_waiting_switch = 0;
|
||||
|
||||
std::vector<mfc_cmd_dump> mfc_history;
|
||||
u64 mfc_dump_idx = 0;
|
||||
static constexpr u32 max_mfc_dump_idx = 4096;
|
||||
|
@ -828,6 +836,7 @@ public:
|
|||
bool stop_flag_removal_protection = false;
|
||||
|
||||
std::array<std::array<u8, 4>, SPU_LS_SIZE / 128> getllar_wait_time{};
|
||||
std::array<std::array<u8, 16>, SPU_LS_SIZE / 128> eventstat_wait_time{};
|
||||
|
||||
void push_snr(u32 number, u32 value);
|
||||
static void do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* ls);
|
||||
|
|
|
@ -31,7 +31,7 @@ struct cfg_root : cfg::node
|
|||
cfg::_enum<thread_scheduler_mode> thread_scheduler{this, "Thread Scheduler Mode", thread_scheduler_mode::os};
|
||||
cfg::_bool set_daz_and_ftz{ this, "Set DAZ and FTZ", false };
|
||||
cfg::_enum<spu_decoder_type> spu_decoder{ this, "SPU Decoder", spu_decoder_type::llvm };
|
||||
cfg::uint<0, 100> spu_reservation_busy_waiting_percentage{ this, "SPU Reservation Busy Waiting Percentage", 0, true };
|
||||
cfg::uint<0, 100> spu_reservation_busy_waiting_percentage{ this, "SPU Reservation Busy Waiting Percentage 1", 100, true };
|
||||
cfg::uint<0, 100> spu_getllar_busy_waiting_percentage{ this, "SPU GETLLAR Busy Waiting Percentage", 100, true };
|
||||
cfg::_bool spu_getllar_spin_optimization_disabled{ this, "Disable SPU GETLLAR Spin Optimization", false, true };
|
||||
cfg::_bool spu_debug{ this, "SPU Debug" };
|
||||
|
|
Loading…
Add table
Reference in a new issue