This commit is contained in:
digant73 2024-10-14 15:01:58 +02:00
commit fc94b933ad
3 changed files with 99 additions and 115 deletions

View file

@ -1673,6 +1673,12 @@ void spu_thread::cpu_init()
status_npc.raw() = {get_type() == spu_type::isolated ? SPU_STATUS_IS_ISOLATED : 0, 0}; status_npc.raw() = {get_type() == spu_type::isolated ? SPU_STATUS_IS_ISOLATED : 0, 0};
run_ctrl.raw() = 0; run_ctrl.raw() = 0;
spurs_last_task_timestamp = 0;
spurs_wait_duration_last = 0;
spurs_average_task_duration = 0;
spurs_waited = false;
spurs_entered_wait = false;
int_ctrl[0].clear(); int_ctrl[0].clear();
int_ctrl[1].clear(); int_ctrl[1].clear();
int_ctrl[2].clear(); int_ctrl[2].clear();
@ -4890,53 +4896,71 @@ bool spu_thread::process_mfc_cmd()
// Avoid logging useless commands if there is no reservation // Avoid logging useless commands if there is no reservation
const bool dump = g_cfg.core.mfc_debug && raddr; const bool dump = g_cfg.core.mfc_debug && raddr;
const bool is_spurs_task_wait = pc == 0x11e4 && spurs_addr == raddr && g_cfg.core.max_spurs_threads != g_cfg.core.max_spurs_threads.def && !spurs_waited; const bool is_spurs_task_wait = pc == 0x11e4 && spurs_addr;
if (is_spurs_task_wait) if (!is_spurs_task_wait || spurs_addr != raddr || spurs_waited)
{
//
}
else if ((_ref<u8>(0x100 + 0x73) & (1u << index)) == 0 && (static_cast<u8>(rdata[0x73]) & (1u << index)) != 0)
{ {
// Wait for other threads to complete their tasks (temporarily) // Wait for other threads to complete their tasks (temporarily)
u32 max_run = group->max_run; u32 max_run = group->max_run;
u32 prev_running = group->spurs_running.fetch_op([max_run](u32& x) auto [prev_running, ok] = spurs_entered_wait ? std::make_pair(+group->spurs_running, false) :
group->spurs_running.fetch_op([max_run, num = group->max_num](u32& x)
{ {
if (x >= max_run) if (x >= max_run && max_run < num)
{ {
x--; x--;
return true; return true;
} }
return false; return false;
}).first; });
if (prev_running == max_run && prev_running != group->max_num) if (ok || spurs_entered_wait)
{ {
group->spurs_running.notify_one(); lv2_obj::prepare_for_sleep(*this);
if (group->spurs_running == max_run - 1) if (ok)
{ {
// Try to let another thread slip in and take over execution if (prev_running == max_run)
thread_ctrl::wait_for(300);
// Try to quit waiting
prev_running = group->spurs_running.fetch_op([max_run](u32& x)
{ {
if (x < max_run) group->spurs_running.notify_one();
if (group->spurs_running == max_run - 1)
{ {
x++; // Try to let another thread slip in and take over execution
return true; thread_ctrl::wait_for(300);
// Update value
prev_running = group->spurs_running + 1;
} }
}
return false; // Restore state
}).first; prev_running--;
} }
}
if (prev_running >= max_run)
{
const u64 before = get_system_time(); const u64 before = get_system_time();
u64 current = before; u64 current = before;
lv2_obj::prepare_for_sleep(*this); spurs_waited = true;
spurs_entered_wait = true;
// Wait the duration of 3 tasks
const u64 spurs_wait_time = std::clamp<u64>(spurs_average_task_duration / spurs_task_count_to_calculate * 3 + 2'000, 3'000, 100'000);
spurs_wait_duration_last = spurs_wait_time;
if (spurs_last_task_timestamp)
{
const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate;
spurs_average_task_duration -= avg_entry;
spurs_average_task_duration += std::min<u64>(45'000, before - spurs_last_task_timestamp);
spu_log.trace("duration: %d, avg=%d", current - spurs_last_task_timestamp, spurs_average_task_duration / spurs_task_count_to_calculate);
spurs_last_task_timestamp = 0;
}
while (true) while (true)
{ {
@ -4945,7 +4969,10 @@ bool spu_thread::process_mfc_cmd()
break; break;
} }
thread_ctrl::wait_on(group->spurs_running, prev_running, 10000 - (current - before)); if (prev_running >= max_run)
{
thread_ctrl::wait_on(group->spurs_running, prev_running, spurs_wait_time - (current - before));
}
max_run = group->max_run; max_run = group->max_run;
@ -4967,9 +4994,10 @@ bool spu_thread::process_mfc_cmd()
current = get_system_time(); current = get_system_time();
if (current - before >= 10000u) if (current - before >= spurs_wait_time)
{ {
// Timed-out // Timed-out
group->spurs_running++;
break; break;
} }
} }
@ -4979,11 +5007,27 @@ bool spu_thread::process_mfc_cmd()
} }
} }
if (do_putllc(ch_mfc_cmd)) if (do_putllc(ch_mfc_cmd))
{ {
ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS); ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS);
spurs_waited = false;
if (is_spurs_task_wait)
{
const u64 current = get_system_time();
if (spurs_last_task_timestamp)
{
const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate;
spurs_average_task_duration -= avg_entry;
spu_log.trace("duration: %d, avg=%d", current - spurs_last_task_timestamp, spurs_average_task_duration / spurs_task_count_to_calculate);
spurs_average_task_duration -= avg_entry;
spurs_average_task_duration += std::min<u64>(45'000, current - spurs_last_task_timestamp);
}
spurs_last_task_timestamp = current;
spurs_waited = false;
spurs_entered_wait = false;
}
} }
else else
{ {
@ -5588,101 +5632,37 @@ s64 spu_thread::get_ch_value(u32 ch)
return events.events & mask1; return events.events & mask1;
} }
const bool is_spurs_task_wait = pc == 0x11a8 && spurs_addr == raddr && g_cfg.core.max_spurs_threads != g_cfg.core.max_spurs_threads.def && !spurs_waited; const bool is_spurs_task_wait = pc == 0x11a8 && spurs_addr == raddr;
const auto wait_spurs_task = [&]
{
if (is_spurs_task_wait)
{
// Wait for other threads to complete their tasks (temporarily)
if (!is_stopped())
{
u32 max_run = group->max_run;
u32 prev_running = group->spurs_running.fetch_op([max_run](u32& x)
{
if (x < max_run)
{
x++;
return true;
}
return false;
}).first;
if (prev_running >= max_run)
{
const u64 before = get_system_time();
u64 current = before;
lv2_obj::prepare_for_sleep(*this);
spurs_waited = true;
while (true)
{
if (is_stopped())
{
break;
}
thread_ctrl::wait_on(group->spurs_running, prev_running, 10000u - (current - before));
max_run = group->max_run;
prev_running = group->spurs_running.fetch_op([max_run](u32& x)
{
if (x < max_run)
{
x++;
return true;
}
return false;
}).first;
if (prev_running < max_run)
{
break;
}
current = get_system_time();
if (current - before >= 10000u)
{
// Timed-out
group->spurs_running++;
break;
}
}
}
}
}
};
if (is_spurs_task_wait) if (is_spurs_task_wait)
{ {
const u32 prev_running = group->spurs_running.fetch_op([](u32& x) if (g_cfg.core.max_spurs_threads != g_cfg.core.max_spurs_threads.def && !spurs_entered_wait && (static_cast<u8>(rdata[0x73]) & (1u << index)))
{ {
if (x) const u32 prev_running = group->spurs_running.fetch_op([](u32& x)
{ {
x--; if (x)
return true; {
x--;
return true;
}
return false;
}).first;
if (prev_running)
{
spurs_entered_wait = true;
} }
return false; if (prev_running == group->max_run && prev_running < group->max_num)
}).first;
if (prev_running == group->max_run && prev_running < group->max_num)
{
group->spurs_running.notify_one();
spurs_waited = true;
if (group->spurs_running == prev_running - 1)
{ {
// Try to let another thread slip in and take over execution group->spurs_running.notify_one();
thread_ctrl::wait_for(300);
if (group->spurs_running == prev_running - 1)
{
// Try to let another thread slip in and take over execution
thread_ctrl::wait_for(300);
}
} }
} }
} }
@ -5918,7 +5898,6 @@ s64 spu_thread::get_ch_value(u32 ch)
thread_ctrl::wait_on(state, old, 100); thread_ctrl::wait_on(state, old, 100);
} }
wait_spurs_task();
wakeup_delay(); wakeup_delay();
if (is_paused(state - cpu_flag::suspend)) if (is_paused(state - cpu_flag::suspend))

View file

@ -768,6 +768,11 @@ public:
const u32 lv2_id; // The actual id that is used by syscalls const u32 lv2_id; // The actual id that is used by syscalls
u32 spurs_addr = 0; u32 spurs_addr = 0;
bool spurs_waited = false; bool spurs_waited = false;
bool spurs_entered_wait = false;
u64 spurs_wait_duration_last = 0;
u64 spurs_average_task_duration = 0;
u64 spurs_last_task_timestamp = 0;
static constexpr u64 spurs_task_count_to_calculate = 10;
spu_thread* next_cpu{}; // LV2 thread queues' node link spu_thread* next_cpu{}; // LV2 thread queues' node link

View file

@ -39,7 +39,7 @@ struct cfg_root : cfg::node
cfg::_int<0, 6> preferred_spu_threads{ this, "Preferred SPU Threads", 0, true }; // Number of hardware threads dedicated to heavy simultaneous spu tasks cfg::_int<0, 6> preferred_spu_threads{ this, "Preferred SPU Threads", 0, true }; // Number of hardware threads dedicated to heavy simultaneous spu tasks
cfg::_int<0, 16> spu_delay_penalty{ this, "SPU delay penalty", 3 }; // Number of milliseconds to block a thread if a virtual 'core' isn't free cfg::_int<0, 16> spu_delay_penalty{ this, "SPU delay penalty", 3 }; // Number of milliseconds to block a thread if a virtual 'core' isn't free
cfg::_bool spu_loop_detection{ this, "SPU loop detection", false }; // Try to detect wait loops and trigger thread yield cfg::_bool spu_loop_detection{ this, "SPU loop detection", false }; // Try to detect wait loops and trigger thread yield
cfg::_int<0, 6> max_spurs_threads{ this, "Max SPURS Threads", 6, true }; // HACK. If less then 6, max number of running SPURS threads in each thread group. cfg::_int<1, 6> max_spurs_threads{ this, "Max SPURS Threads", 6, true }; // HACK. If less then 6, max number of running SPURS threads in each thread group.
cfg::_enum<spu_block_size_type> spu_block_size{ this, "SPU Block Size", spu_block_size_type::safe }; cfg::_enum<spu_block_size_type> spu_block_size{ this, "SPU Block Size", spu_block_size_type::safe };
cfg::_bool spu_accurate_dma{ this, "Accurate SPU DMA", false }; cfg::_bool spu_accurate_dma{ this, "Accurate SPU DMA", false };
cfg::_bool spu_accurate_reservations{ this, "Accurate SPU Reservations", true }; cfg::_bool spu_accurate_reservations{ this, "Accurate SPU Reservations", true };