This commit is contained in:
digant73 2024-10-14 15:01:58 +02:00
parent 70124053c9
commit fc94b933ad
3 changed files with 99 additions and 115 deletions

View file

@ -1673,6 +1673,12 @@ void spu_thread::cpu_init()
status_npc.raw() = {get_type() == spu_type::isolated ? SPU_STATUS_IS_ISOLATED : 0, 0};
run_ctrl.raw() = 0;
spurs_last_task_timestamp = 0;
spurs_wait_duration_last = 0;
spurs_average_task_duration = 0;
spurs_waited = false;
spurs_entered_wait = false;
int_ctrl[0].clear();
int_ctrl[1].clear();
int_ctrl[2].clear();
@ -4890,53 +4896,71 @@ bool spu_thread::process_mfc_cmd()
// Avoid logging useless commands if there is no reservation
const bool dump = g_cfg.core.mfc_debug && raddr;
const bool is_spurs_task_wait = pc == 0x11e4 && spurs_addr == raddr && g_cfg.core.max_spurs_threads != g_cfg.core.max_spurs_threads.def && !spurs_waited;
const bool is_spurs_task_wait = pc == 0x11e4 && spurs_addr;
if (is_spurs_task_wait)
if (!is_spurs_task_wait || spurs_addr != raddr || spurs_waited)
{
//
}
else if ((_ref<u8>(0x100 + 0x73) & (1u << index)) == 0 && (static_cast<u8>(rdata[0x73]) & (1u << index)) != 0)
{
// Wait for other threads to complete their tasks (temporarily)
u32 max_run = group->max_run;
u32 prev_running = group->spurs_running.fetch_op([max_run](u32& x)
auto [prev_running, ok] = spurs_entered_wait ? std::make_pair(+group->spurs_running, false) :
group->spurs_running.fetch_op([max_run, num = group->max_num](u32& x)
{
if (x >= max_run)
if (x >= max_run && max_run < num)
{
x--;
return true;
}
return false;
}).first;
});
if (prev_running == max_run && prev_running != group->max_num)
if (ok || spurs_entered_wait)
{
group->spurs_running.notify_one();
lv2_obj::prepare_for_sleep(*this);
if (group->spurs_running == max_run - 1)
if (ok)
{
// Try to let another thread slip in and take over execution
thread_ctrl::wait_for(300);
// Try to quit waiting
prev_running = group->spurs_running.fetch_op([max_run](u32& x)
if (prev_running == max_run)
{
if (x < max_run)
group->spurs_running.notify_one();
if (group->spurs_running == max_run - 1)
{
x++;
return true;
// Try to let another thread slip in and take over execution
thread_ctrl::wait_for(300);
// Update value
prev_running = group->spurs_running + 1;
}
}
return false;
}).first;
// Restore state
prev_running--;
}
}
if (prev_running >= max_run)
{
const u64 before = get_system_time();
u64 current = before;
lv2_obj::prepare_for_sleep(*this);
spurs_waited = true;
spurs_entered_wait = true;
// Wait the duration of 3 tasks
const u64 spurs_wait_time = std::clamp<u64>(spurs_average_task_duration / spurs_task_count_to_calculate * 3 + 2'000, 3'000, 100'000);
spurs_wait_duration_last = spurs_wait_time;
if (spurs_last_task_timestamp)
{
const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate;
spurs_average_task_duration -= avg_entry;
spurs_average_task_duration += std::min<u64>(45'000, before - spurs_last_task_timestamp);
spu_log.trace("duration: %d, avg=%d", current - spurs_last_task_timestamp, spurs_average_task_duration / spurs_task_count_to_calculate);
spurs_last_task_timestamp = 0;
}
while (true)
{
@ -4945,7 +4969,10 @@ bool spu_thread::process_mfc_cmd()
break;
}
thread_ctrl::wait_on(group->spurs_running, prev_running, 10000 - (current - before));
if (prev_running >= max_run)
{
thread_ctrl::wait_on(group->spurs_running, prev_running, spurs_wait_time - (current - before));
}
max_run = group->max_run;
@ -4967,9 +4994,10 @@ bool spu_thread::process_mfc_cmd()
current = get_system_time();
if (current - before >= 10000u)
if (current - before >= spurs_wait_time)
{
// Timed-out
group->spurs_running++;
break;
}
}
@ -4979,11 +5007,27 @@ bool spu_thread::process_mfc_cmd()
}
}
if (do_putllc(ch_mfc_cmd))
{
ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS);
spurs_waited = false;
if (is_spurs_task_wait)
{
const u64 current = get_system_time();
if (spurs_last_task_timestamp)
{
const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate;
spurs_average_task_duration -= avg_entry;
spu_log.trace("duration: %d, avg=%d", current - spurs_last_task_timestamp, spurs_average_task_duration / spurs_task_count_to_calculate);
spurs_average_task_duration -= avg_entry;
spurs_average_task_duration += std::min<u64>(45'000, current - spurs_last_task_timestamp);
}
spurs_last_task_timestamp = current;
spurs_waited = false;
spurs_entered_wait = false;
}
}
else
{
@ -5588,101 +5632,37 @@ s64 spu_thread::get_ch_value(u32 ch)
return events.events & mask1;
}
const bool is_spurs_task_wait = pc == 0x11a8 && spurs_addr == raddr && g_cfg.core.max_spurs_threads != g_cfg.core.max_spurs_threads.def && !spurs_waited;
const auto wait_spurs_task = [&]
{
if (is_spurs_task_wait)
{
// Wait for other threads to complete their tasks (temporarily)
if (!is_stopped())
{
u32 max_run = group->max_run;
u32 prev_running = group->spurs_running.fetch_op([max_run](u32& x)
{
if (x < max_run)
{
x++;
return true;
}
return false;
}).first;
if (prev_running >= max_run)
{
const u64 before = get_system_time();
u64 current = before;
lv2_obj::prepare_for_sleep(*this);
spurs_waited = true;
while (true)
{
if (is_stopped())
{
break;
}
thread_ctrl::wait_on(group->spurs_running, prev_running, 10000u - (current - before));
max_run = group->max_run;
prev_running = group->spurs_running.fetch_op([max_run](u32& x)
{
if (x < max_run)
{
x++;
return true;
}
return false;
}).first;
if (prev_running < max_run)
{
break;
}
current = get_system_time();
if (current - before >= 10000u)
{
// Timed-out
group->spurs_running++;
break;
}
}
}
}
}
};
const bool is_spurs_task_wait = pc == 0x11a8 && spurs_addr == raddr;
if (is_spurs_task_wait)
{
const u32 prev_running = group->spurs_running.fetch_op([](u32& x)
if (g_cfg.core.max_spurs_threads != g_cfg.core.max_spurs_threads.def && !spurs_entered_wait && (static_cast<u8>(rdata[0x73]) & (1u << index)))
{
if (x)
const u32 prev_running = group->spurs_running.fetch_op([](u32& x)
{
x--;
return true;
if (x)
{
x--;
return true;
}
return false;
}).first;
if (prev_running)
{
spurs_entered_wait = true;
}
return false;
}).first;
if (prev_running == group->max_run && prev_running < group->max_num)
{
group->spurs_running.notify_one();
spurs_waited = true;
if (group->spurs_running == prev_running - 1)
if (prev_running == group->max_run && prev_running < group->max_num)
{
// Try to let another thread slip in and take over execution
thread_ctrl::wait_for(300);
group->spurs_running.notify_one();
if (group->spurs_running == prev_running - 1)
{
// Try to let another thread slip in and take over execution
thread_ctrl::wait_for(300);
}
}
}
}
@ -5918,7 +5898,6 @@ s64 spu_thread::get_ch_value(u32 ch)
thread_ctrl::wait_on(state, old, 100);
}
wait_spurs_task();
wakeup_delay();
if (is_paused(state - cpu_flag::suspend))

View file

@ -768,6 +768,11 @@ public:
const u32 lv2_id; // The actual id that is used by syscalls
u32 spurs_addr = 0;
bool spurs_waited = false;
bool spurs_entered_wait = false;
u64 spurs_wait_duration_last = 0;
u64 spurs_average_task_duration = 0;
u64 spurs_last_task_timestamp = 0;
static constexpr u64 spurs_task_count_to_calculate = 10;
spu_thread* next_cpu{}; // LV2 thread queues' node link

View file

@ -39,7 +39,7 @@ struct cfg_root : cfg::node
cfg::_int<0, 6> preferred_spu_threads{ this, "Preferred SPU Threads", 0, true }; // Number of hardware threads dedicated to heavy simultaneous spu tasks
cfg::_int<0, 16> spu_delay_penalty{ this, "SPU delay penalty", 3 }; // Number of milliseconds to block a thread if a virtual 'core' isn't free
cfg::_bool spu_loop_detection{ this, "SPU loop detection", false }; // Try to detect wait loops and trigger thread yield
cfg::_int<0, 6> max_spurs_threads{ this, "Max SPURS Threads", 6, true }; // HACK. If less then 6, max number of running SPURS threads in each thread group.
cfg::_int<1, 6> max_spurs_threads{ this, "Max SPURS Threads", 6, true }; // HACK. If less then 6, max number of running SPURS threads in each thread group.
cfg::_enum<spu_block_size_type> spu_block_size{ this, "SPU Block Size", spu_block_size_type::safe };
cfg::_bool spu_accurate_dma{ this, "Accurate SPU DMA", false };
cfg::_bool spu_accurate_reservations{ this, "Accurate SPU Reservations", true };