cpu_thread::test_state added

lv2_obj::sleep adjustment synchronization fixes
2025-04-20 03:25:16 +00:00 · 2017-02-22 13:10:55 +03:00 · 2017-02-22 13:10:55 +03:00 · 9000407a77
commit 9000407a77
parent 4b6f8d2f62
36 changed files with 421 additions and 302 deletions
--- a/rpcs3/Emu/CPU/CPUThread.cpp
+++ b/rpcs3/Emu/CPU/CPUThread.cpp
@ -2,6 +2,8 @@
 #include "Emu/System.h"
 #include "CPUThread.h"

+#include <thread>
+
 DECLARE(cpu_thread::g_threads_created){0};
 DECLARE(cpu_thread::g_threads_deleted){0};

@ -17,10 +19,11 @@ void fmt_class_string<cpu_flag>::format(std::string& out, u64 arg)
 		case cpu_flag::suspend: return "s";
 		case cpu_flag::ret: return "ret";
 		case cpu_flag::signal: return "sig";
-		case cpu_flag::dbg_global_pause: return "G.PAUSE";
-		case cpu_flag::dbg_global_stop: return "G.EXIT";
+		case cpu_flag::dbg_global_pause: return "G-PAUSE";
+		case cpu_flag::dbg_global_stop: return "G-EXIT";
 		case cpu_flag::dbg_pause: return "PAUSE";
 		case cpu_flag::dbg_step: return "STEP";
+		case cpu_flag::is_waiting: return "w";
 		case cpu_flag::__bitset_enum_max: break;
 		}

@ -63,6 +66,7 @@ void cpu_thread::on_task()
 			}

 			state -= cpu_flag::ret;
+			state += cpu_flag::is_waiting;
 			continue;
 		}

@ -93,8 +97,9 @@ bool cpu_thread::check_state()

 	while (true)
 	{
-		if (test(state & cpu_flag::exit))
+		if (test(state, cpu_flag::exit + cpu_flag::dbg_global_stop))
 		{
+			state += cpu_flag::is_waiting;
 			return true;
 		}

@ -103,11 +108,21 @@ bool cpu_thread::check_state()
 			cpu_sleep_called = false;
 		}

-		if (!test(state & (cpu_state_pause + cpu_flag::dbg_global_stop)))
+		if (!test(state, cpu_state_pause))
 		{
+			if (test(state, cpu_flag::is_waiting))
+			{
+				state -= cpu_flag::is_waiting;
+			}
+			
 			break;
 		}

+		if (!state.test_and_set(cpu_flag::is_waiting))
+		{
+			continue;
+		}
+
 		if (test(state & cpu_flag::suspend) && !cpu_sleep_called)
 		{
 			cpu_sleep();
@ -122,6 +137,7 @@ bool cpu_thread::check_state()

 	if (test(state_, cpu_flag::ret + cpu_flag::stop))
 	{
+		state += cpu_flag::is_waiting;
 		return true;
 	}

@ -131,9 +147,21 @@ bool cpu_thread::check_state()
 		state -= cpu_flag::dbg_step;
 	}

+	state -= cpu_flag::is_waiting;
 	return false;
 }

+void cpu_thread::test_state()
+{
+	if (UNLIKELY(test(state)))
+	{
+		if (check_state())
+		{
+			throw cpu_flag::ret;
+		}
+	}
+}
+
 void cpu_thread::run()
 {
 	state -= cpu_flag::stop;
--- a/rpcs3/Emu/CPU/CPUThread.h
+++ b/rpcs3/Emu/CPU/CPUThread.h
@ -17,6 +17,8 @@ enum class cpu_flag : u32
 	dbg_pause, // Thread paused
 	dbg_step, // Thread forced to pause after one step (one instruction, etc)

+	is_waiting, // Informational, self-maintained
+
 	__bitset_enum_max
 };

@ -36,11 +38,14 @@ public:
 	cpu_thread(u32 id);

 	// Public thread state
-	atomic_t<bs_t<cpu_flag>> state{+cpu_flag::stop};
+	atomic_t<bs_t<cpu_flag>> state{cpu_flag::stop + cpu_flag::is_waiting};

 	// Process thread state, return true if the checker must return
 	bool check_state();

+	// Process thread state
+	void test_state();
+
 	// Run thread
 	void run();

--- a/rpcs3/Emu/Cell/Modules/cellAdec.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellAdec.cpp
@ -200,7 +200,7 @@ public:
 				// TODO: finalize
 				cellAdec.warning("adecEndSeq:");
 				cbFunc(*this, id, CELL_ADEC_MSG_TYPE_SEQDONE, CELL_OK, cbArg);
-				lv2_obj::sleep(*this, -1);
+				lv2_obj::sleep(*this);

 				just_finished = true;
 				break;
@ -377,13 +377,13 @@ public:
 						{
 							frame.data = nullptr; // to prevent destruction
 							cbFunc(*this, id, CELL_ADEC_MSG_TYPE_PCMOUT, CELL_OK, cbArg);
-							lv2_obj::sleep(*this, -1);
+							lv2_obj::sleep(*this);
 						}
 					}
 				}

 				cbFunc(*this, id, CELL_ADEC_MSG_TYPE_AUDONE, task.au.auInfo_addr, cbArg);
-				lv2_obj::sleep(*this, -1);
+				lv2_obj::sleep(*this);
 				break;
 			}

--- a/rpcs3/Emu/Cell/Modules/cellDmux.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellDmux.cpp
@ -243,7 +243,7 @@ public:
 					dmuxMsg->msgType = CELL_DMUX_MSG_TYPE_DEMUX_DONE;
 					dmuxMsg->supplementalInfo = stream.userdata;
 					cbFunc(*this, id, dmuxMsg, cbArg);
-					lv2_obj::sleep(*this, -1);
+					lv2_obj::sleep(*this);

 					is_working = false;

@ -397,7 +397,7 @@ public:
 							esMsg->msgType = CELL_DMUX_ES_MSG_TYPE_AU_FOUND;
 							esMsg->supplementalInfo = stream.userdata;
 							es.cbFunc(*this, id, es.id, esMsg, es.cbArg);
-							lv2_obj::sleep(*this, -1);
+							lv2_obj::sleep(*this);
 						}
 					}
 					else
@ -463,7 +463,7 @@ public:
 							esMsg->msgType = CELL_DMUX_ES_MSG_TYPE_AU_FOUND;
 							esMsg->supplementalInfo = stream.userdata;
 							es.cbFunc(*this, id, es.id, esMsg, es.cbArg);
-							lv2_obj::sleep(*this, -1);
+							lv2_obj::sleep(*this);
 						}
 						
 						if (pes.has_ts)
@ -540,7 +540,7 @@ public:
 					dmuxMsg->msgType = CELL_DMUX_MSG_TYPE_DEMUX_DONE;
 					dmuxMsg->supplementalInfo = stream.userdata;
 					cbFunc(*this, id, dmuxMsg, cbArg);
-					lv2_obj::sleep(*this, -1);
+					lv2_obj::sleep(*this);

 					stream = {};

@ -629,7 +629,7 @@ public:
 					esMsg->msgType = CELL_DMUX_ES_MSG_TYPE_AU_FOUND;
 					esMsg->supplementalInfo = stream.userdata;
 					es.cbFunc(*this, id, es.id, esMsg, es.cbArg);
-					lv2_obj::sleep(*this, -1);
+					lv2_obj::sleep(*this);
 				}
 				
 				if (es.raw_data.size())
@ -642,7 +642,7 @@ public:
 				esMsg->msgType = CELL_DMUX_ES_MSG_TYPE_FLUSH_DONE;
 				esMsg->supplementalInfo = stream.userdata;
 				es.cbFunc(*this, id, es.id, esMsg, es.cbArg);
-				lv2_obj::sleep(*this, -1);
+				lv2_obj::sleep(*this);
 				break;
 			}

--- a/rpcs3/Emu/Cell/Modules/cellFs.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellFs.cpp
@ -743,7 +743,7 @@ struct fs_aio_thread : ppu_thread
 			}

 			func(*this, aio, error, xid, result);
-			lv2_obj::sleep(*this, -1);
+			lv2_obj::sleep(*this);
 		}
 	}
 };
--- a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp
@ -12,7 +12,7 @@

 logs::channel cellGcmSys("cellGcmSys", logs::level::notice);

-extern s32 cellGcmCallback(vm::ptr<CellGcmContextData> context, u32 count);
+extern s32 cellGcmCallback(ppu_thread& ppu, vm::ptr<CellGcmContextData> context, u32 count);
 extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr);

 const u32 tiled_pitches[] = {
@ -1298,12 +1298,12 @@ static bool isInCommandBufferExcept(u32 getPos, u32 bufferBegin, u32 bufferEnd)
 	return true;
 }

-s32 cellGcmCallback(vm::ptr<CellGcmContextData> context, u32 count)
+s32 cellGcmCallback(ppu_thread& ppu, vm::ptr<CellGcmContextData> context, u32 count)
 {
 	cellGcmSys.trace("cellGcmCallback(context=*0x%x, count=0x%x)", context, count);

 	auto& ctrl = vm::_ref<CellGcmControl>(gcm_info.control_addr);
-	const std::chrono::time_point<steady_clock> enterWait = steady_clock::now();
+
 	// Flush command buffer (ie allow RSX to read up to context->current)
 	ctrl.put.exchange(getOffsetFromAddress(context->current.addr()));

@ -1317,18 +1317,18 @@ s32 cellGcmCallback(vm::ptr<CellGcmContextData> context, u32 count)
 	context->end.set(newCommandBuffer.second);

 	// Wait for rsx to "release" the new command buffer
-	while (!Emu.IsStopped())
+	while (true)
 	{
 		u32 getPos = ctrl.get.load();
 		if (isInCommandBufferExcept(getPos, newCommandBuffer.first, newCommandBuffer.second))
 			break;
-		std::chrono::time_point<steady_clock> waitPoint = steady_clock::now();
-		long long elapsedTime = std::chrono::duration_cast<std::chrono::seconds>(waitPoint - enterWait).count();
-		if (elapsedTime > 0)
-			cellGcmSys.error("Has wait for more than a second for command buffer to be released by RSX");
-		std::this_thread::yield();
+
+		ppu.state += cpu_flag::is_waiting;
+		ppu.test_state();
+		busy_wait();
 	}

+	ppu.test_state();
 	return CELL_OK;
 }

--- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp
@ -572,7 +572,7 @@ void _spurs::handler_entry(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
 			_spurs::handler_wait_ready(ppu, spurs);
 		}

-		CHECK_SUCCESS(sys_spu_thread_group_start(spurs->spuTG));
+		CHECK_SUCCESS(sys_spu_thread_group_start(ppu, spurs->spuTG));

 		if (s32 rc = sys_spu_thread_group_join(ppu, spurs->spuTG, vm::null, vm::null))
 		{
@ -1118,9 +1118,10 @@ s32 _spurs::initialize(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, u32 revision,
 		}

 		// entry point cannot be initialized immediately because SPU LS will be rewritten by sys_spu_thread_group_start()
-		idm::get<SPUThread>(spurs->spus[num])->custom_task = [entry = spurs->spuImg.entry_point](SPUThread& spu)
+		//idm::get<SPUThread>(spurs->spus[num])->custom_task = [entry = spurs->spuImg.entry_point](SPUThread& spu)
 		{
-			spu.RegisterHleFunction(entry, spursKernelEntry);
+			// Disabled
+			//spu.RegisterHleFunction(entry, spursKernelEntry);
 		};
 	}

--- a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp
@ -266,10 +266,10 @@ bool spursKernel1SelectWorkload(SPUThread& spu)
 	u32 wklSelectedId;
 	u32 pollStatus;

-	vm::reservation_op(vm::cast(ctxt->spurs.addr(), HERE), 128, [&]()
+	//vm::reservation_op(vm::cast(ctxt->spurs.addr(), HERE), 128, [&]()
 	{
 		// lock the first 0x80 bytes of spurs
-		auto spurs = ctxt->spurs.get_ptr_priv();
+		auto spurs = ctxt->spurs.get_ptr();

 		// Calculate the contention (number of SPUs used) for each workload
 		u8 contention[CELL_SPURS_MAX_WORKLOAD];
@ -429,7 +429,7 @@ bool spursKernel1SelectWorkload(SPUThread& spu)
 		}

 		std::memcpy(vm::base(spu.offset + 0x100), spurs, 128);
-	});
+	}//);

 	u64 result = (u64)wklSelectedId << 32;
 	result |= pollStatus;
@ -450,10 +450,10 @@ bool spursKernel2SelectWorkload(SPUThread& spu)
 	u32 wklSelectedId;
 	u32 pollStatus;

-	vm::reservation_op(vm::cast(ctxt->spurs.addr(), HERE), 128, [&]()
+	//vm::reservation_op(vm::cast(ctxt->spurs.addr(), HERE), 128, [&]()
 	{
 		// lock the first 0x80 bytes of spurs
-		auto spurs = ctxt->spurs.get_ptr_priv();
+		auto spurs = ctxt->spurs.get_ptr();

 		// Calculate the contention (number of SPUs used) for each workload
 		u8 contention[CELL_SPURS_MAX_WORKLOAD2];
@ -602,7 +602,7 @@ bool spursKernel2SelectWorkload(SPUThread& spu)
 		}

 		std::memcpy(vm::base(spu.offset + 0x100), spurs, 128);
-	});
+	}//);

 	u64 result = (u64)wklSelectedId << 32;
 	result |= pollStatus;
@ -633,10 +633,10 @@ void spursKernelDispatchWorkload(SPUThread& spu, u64 widAndPollStatus)
 		switch (wklInfo->addr.addr())
 		{
 		case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD:
-			spu.RegisterHleFunction(0xA00, spursSysServiceEntry);
+			//spu.RegisterHleFunction(0xA00, spursSysServiceEntry);
 			break;
 		case SPURS_IMG_ADDR_TASKSET_PM:
-			spu.RegisterHleFunction(0xA00, spursTasksetEntry);
+			//spu.RegisterHleFunction(0xA00, spursTasksetEntry);
 			break;
 		default:
 			std::memcpy(vm::base(spu.offset + 0xA00), wklInfo->addr.get_ptr(), wklInfo->size);
@ -721,10 +721,10 @@ bool spursKernelEntry(SPUThread& spu)
 	}

 	// Register SPURS kernel HLE functions
-	spu.UnregisterHleFunctions(0, 0x40000/*LS_BOTTOM*/);
-	spu.RegisterHleFunction(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelEntry);
-	spu.RegisterHleFunction(ctxt->exitToKernelAddr, spursKernelWorkloadExit);
-	spu.RegisterHleFunction(ctxt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload);
+	//spu.UnregisterHleFunctions(0, 0x40000/*LS_BOTTOM*/);
+	//spu.RegisterHleFunction(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelEntry);
+	//spu.RegisterHleFunction(ctxt->exitToKernelAddr, spursKernelWorkloadExit);
+	//spu.RegisterHleFunction(ctxt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload);

 	// Start the system service
 	spursKernelDispatchWorkload(spu, ((u64)CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) << 32);
@ -771,7 +771,7 @@ void spursSysServiceIdleHandler(SPUThread& spu, SpursKernelContext* ctxt)

 	while (true)
 	{
-		vm::reservation_acquire(vm::base(spu.offset + 0x100), vm::cast(ctxt->spurs.addr(), HERE), 128);
+		//vm::reservation_acquire(vm::base(spu.offset + 0x100), vm::cast(ctxt->spurs.addr(), HERE), 128);
 		auto spurs = vm::_ptr<CellSpurs>(spu.offset + 0x100);

 		// Find the number of SPUs that are idling in this SPURS instance
@ -860,7 +860,7 @@ void spursSysServiceIdleHandler(SPUThread& spu, SpursKernelContext* ctxt)
 			continue;
 		}

-		if (vm::reservation_update(vm::cast(ctxt->spurs.addr(), HERE), vm::base(spu.offset + 0x100), 128) && (shouldExit || foundReadyWorkload))
+		//if (vm::reservation_update(vm::cast(ctxt->spurs.addr(), HERE), vm::base(spu.offset + 0x100), 128) && (shouldExit || foundReadyWorkload))
 		{
 			break;
 		}
@ -888,11 +888,11 @@ void spursSysServiceMain(SPUThread& spu, u32 pollStatus)
 	{
 		ctxt->sysSrvInitialised = 1;

-		vm::reservation_acquire(vm::base(spu.offset + 0x100), vm::cast(ctxt->spurs.addr(), HERE), 128);
+		//vm::reservation_acquire(vm::base(spu.offset + 0x100), vm::cast(ctxt->spurs.addr(), HERE), 128);

-		vm::reservation_op(ctxt->spurs.ptr(&CellSpurs::wklState1).addr(), 128, [&]()
+		//vm::reservation_op(ctxt->spurs.ptr(&CellSpurs::wklState1).addr(), 128, [&]()
 		{
-			auto spurs = ctxt->spurs.get_ptr_priv();
+			auto spurs = ctxt->spurs.get_ptr();

 			// Halt if already initialised
 			if (spurs->sysSrvOnSpu & (1 << ctxt->spuNum))
@ -904,7 +904,7 @@ void spursSysServiceMain(SPUThread& spu, u32 pollStatus)
 			spurs->sysSrvOnSpu |= 1 << ctxt->spuNum;

 			std::memcpy(vm::base(spu.offset + 0x2D80), spurs->wklState1, 128);
-		});
+		}//);

 		ctxt->traceBuffer = 0;
 		ctxt->traceMsgCount = -1;
@ -984,9 +984,9 @@ void spursSysServiceProcessRequests(SPUThread& spu, SpursKernelContext* ctxt)
 	bool updateWorkload = false;
 	bool terminate = false;

-	vm::reservation_op(vm::cast(ctxt->spurs.addr() + OFFSET_32(CellSpurs, wklState1), HERE), 128, [&]()
+	//vm::reservation_op(vm::cast(ctxt->spurs.addr() + OFFSET_32(CellSpurs, wklState1), HERE), 128, [&]()
 	{
-		auto spurs = ctxt->spurs.get_ptr_priv();
+		auto spurs = ctxt->spurs.get_ptr();

 		// Terminate request
 		if (spurs->sysSrvMsgTerminate & (1 << ctxt->spuNum))
@ -1009,7 +1009,7 @@ void spursSysServiceProcessRequests(SPUThread& spu, SpursKernelContext* ctxt)
 		}

 		std::memcpy(vm::base(spu.offset + 0x2D80), spurs->wklState1, 128);
-	});
+	}//);

 	// Process update workload message
 	if (updateWorkload)
@ -1063,9 +1063,9 @@ void spursSysServiceActivateWorkload(SPUThread& spu, SpursKernelContext* ctxt)
 		}
 	}

-	vm::reservation_op(ctxt->spurs.ptr(&CellSpurs::wklState1).addr(), 128, [&]()
+	//vm::reservation_op(ctxt->spurs.ptr(&CellSpurs::wklState1).addr(), 128, [&]()
 	{
-		auto spurs = ctxt->spurs.get_ptr_priv();
+		auto spurs = ctxt->spurs.get_ptr();

 		for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++)
 		{
@ -1120,7 +1120,7 @@ void spursSysServiceActivateWorkload(SPUThread& spu, SpursKernelContext* ctxt)
 		}

 		std::memcpy(vm::base(spu.offset + 0x2D80), spurs->wklState1, 128);
-	});
+	}//);

 	if (wklShutdownBitSet)
 	{
@ -1135,9 +1135,9 @@ void spursSysServiceUpdateShutdownCompletionEvents(SPUThread& spu, SpursKernelCo
 	// workloads that have a shutdown completion hook registered
 	u32 wklNotifyBitSet;
 	u8  spuPort;
-	vm::reservation_op(ctxt->spurs.ptr(&CellSpurs::wklState1).addr(), 128, [&]()
+	//vm::reservation_op(ctxt->spurs.ptr(&CellSpurs::wklState1).addr(), 128, [&]()
 	{
-		auto spurs = ctxt->spurs.get_ptr_priv();
+		auto spurs = ctxt->spurs.get_ptr();

 		wklNotifyBitSet = 0;
 		spuPort = spurs->spuPort;;
@ -1163,7 +1163,7 @@ void spursSysServiceUpdateShutdownCompletionEvents(SPUThread& spu, SpursKernelCo
 		}

 		std::memcpy(vm::base(spu.offset + 0x2D80), spurs->wklState1, 128);
-	});
+	}//);

 	if (wklNotifyBitSet)
 	{
@ -1187,9 +1187,9 @@ void spursSysServiceTraceUpdate(SPUThread& spu, SpursKernelContext* ctxt, u32 ar
 	bool notify;

 	u8 sysSrvMsgUpdateTrace;
-	vm::reservation_op(ctxt->spurs.ptr(&CellSpurs::wklState1).addr(), 128, [&]()
+	//vm::reservation_op(ctxt->spurs.ptr(&CellSpurs::wklState1).addr(), 128, [&]()
 	{
-		auto spurs = ctxt->spurs.get_ptr_priv();
+		auto spurs = ctxt->spurs.get_ptr();
 		auto& trace = spurs->sysSrvTrace.raw();

 		sysSrvMsgUpdateTrace = trace.sysSrvMsgUpdateTrace;
@ -1211,12 +1211,12 @@ void spursSysServiceTraceUpdate(SPUThread& spu, SpursKernelContext* ctxt, u32 ar
 		}

 		std::memcpy(vm::base(spu.offset + 0x2D80), spurs->wklState1, 128);
-	});
+	}//);

 	// Get trace parameters from CellSpurs and store them in the LS
 	if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) || (arg3 != 0))
 	{
-		vm::reservation_acquire(vm::base(spu.offset + 0x80), ctxt->spurs.ptr(&CellSpurs::traceBuffer).addr(), 128);
+		//vm::reservation_acquire(vm::base(spu.offset + 0x80), ctxt->spurs.ptr(&CellSpurs::traceBuffer).addr(), 128);
 		auto spurs = vm::_ptr<CellSpurs>(spu.offset + 0x80 - OFFSET_32(CellSpurs, traceBuffer));

 		if (ctxt->traceMsgCount != 0xFF || spurs->traceBuffer.addr() == 0)
@ -1252,9 +1252,9 @@ void spursSysServiceCleanupAfterSystemWorkload(SPUThread& spu, SpursKernelContex

 	bool do_return = false;

-	vm::reservation_op(ctxt->spurs.ptr(&CellSpurs::wklState1).addr(), 128, [&]()
+	//vm::reservation_op(ctxt->spurs.ptr(&CellSpurs::wklState1).addr(), 128, [&]()
 	{
-		auto spurs = ctxt->spurs.get_ptr_priv();
+		auto spurs = ctxt->spurs.get_ptr();

 		if (spurs->sysSrvPreemptWklId[ctxt->spuNum] == 0xFF)
 		{
@ -1266,15 +1266,15 @@ void spursSysServiceCleanupAfterSystemWorkload(SPUThread& spu, SpursKernelContex
 		spurs->sysSrvPreemptWklId[ctxt->spuNum] = 0xFF;

 		std::memcpy(vm::base(spu.offset + 0x2D80), spurs->wklState1, 128);
-	});
+	}//);

 	if (do_return) return;

 	spursSysServiceActivateWorkload(spu, ctxt);

-	vm::reservation_op(vm::cast(ctxt->spurs.addr(), HERE), 128, [&]()
+	//vm::reservation_op(vm::cast(ctxt->spurs.addr(), HERE), 128, [&]()
 	{
-		auto spurs = ctxt->spurs.get_ptr_priv();
+		auto spurs = ctxt->spurs.get_ptr();

 		if (wklId >= CELL_SPURS_MAX_WORKLOAD)
 		{
@ -1288,7 +1288,7 @@ void spursSysServiceCleanupAfterSystemWorkload(SPUThread& spu, SpursKernelContex
 		}

 		std::memcpy(vm::base(spu.offset + 0x100), spurs, 128);
-	});
+	}//);

 	// Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace
 	// uses the current worload id to determine the workload to which the trace belongs
@ -1341,9 +1341,9 @@ bool spursTasksetEntry(SPUThread& spu)
 	ctxt->taskId = 0xFFFFFFFF;

 	// Register SPURS takset policy module HLE functions
-	spu.UnregisterHleFunctions(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, 0x40000/*LS_BOTTOM*/);
-	spu.RegisterHleFunction(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, spursTasksetEntry);
-	spu.RegisterHleFunction(ctxt->syscallAddr, spursTasksetSyscallEntry);
+	//spu.UnregisterHleFunctions(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, 0x40000/*LS_BOTTOM*/);
+	//spu.RegisterHleFunction(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, spursTasksetEntry);
+	//spu.RegisterHleFunction(ctxt->syscallAddr, spursTasksetSyscallEntry);

 	try
 	{
@ -1433,9 +1433,9 @@ s32 spursTasksetProcessRequest(SPUThread& spu, s32 request, u32* taskId, u32* is

 	s32 rc = CELL_OK;
 	s32 numNewlyReadyTasks;
-	vm::reservation_op(vm::cast(ctxt->taskset.addr(), HERE), 128, [&]()
+	//vm::reservation_op(vm::cast(ctxt->taskset.addr(), HERE), 128, [&]()
 	{
-		auto taskset = ctxt->taskset.get_ptr_priv();
+		auto taskset = ctxt->taskset.get_ptr();

 		// Verify taskset state is valid
 		be_t<v128> _0(v128::from32(0));
@ -1592,12 +1592,12 @@ s32 spursTasksetProcessRequest(SPUThread& spu, s32 request, u32* taskId, u32* is
 		taskset->ready = ready;

 		std::memcpy(vm::base(spu.offset + 0x2700), taskset, 128);
-	});
+	}//);

 	// Increment the ready count of the workload by the number of tasks that have become ready
-	vm::reservation_op(vm::cast(kernelCtxt->spurs.addr(), HERE), 128, [&]()
+	//vm::reservation_op(vm::cast(kernelCtxt->spurs.addr(), HERE), 128, [&]()
 	{
-		auto spurs = kernelCtxt->spurs.get_ptr_priv();
+		auto spurs = kernelCtxt->spurs.get_ptr();

 		s32 readyCount = kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->wklReadyCount1[kernelCtxt->wklCurrentId].load() : spurs->wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].load();
 		readyCount += numNewlyReadyTasks;
@ -1613,7 +1613,7 @@ s32 spursTasksetProcessRequest(SPUThread& spu, s32 request, u32* taskId, u32* is
 		}

 		std::memcpy(vm::base(spu.offset + 0x100), spurs, 128);
-	});
+	}//);

 	return rc;
 }
--- a/rpcs3/Emu/Cell/Modules/cellSync.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellSync.cpp
@ -6,10 +6,6 @@
 #include "Emu/Cell/lv2/sys_process.h"
 #include "cellSync.h"

-#include "Emu/Memory/wait_engine.h"
-
-#include <thread>
-
 logs::channel cellSync("cellSync", logs::level::notice);

 template<>
@ -63,7 +59,7 @@ error_code cellSyncMutexInitialize(vm::ptr<CellSyncMutex> mutex)
 	return CELL_OK;
 }

-error_code cellSyncMutexLock(vm::ptr<CellSyncMutex> mutex)
+error_code cellSyncMutexLock(ppu_thread& ppu, vm::ptr<CellSyncMutex> mutex)
 {
 	cellSync.trace("cellSyncMutexLock(mutex=*0x%x)", mutex);

@ -81,10 +77,10 @@ error_code cellSyncMutexLock(vm::ptr<CellSyncMutex> mutex)
 	const auto order = mutex->ctrl.atomic_op(&CellSyncMutex::lock_begin);

 	// Wait until rel value is equal to old acq value
-	vm::wait_op(mutex.addr(), 4, [&]
+	while (mutex->ctrl.load().rel != order)
 	{
-		return mutex->ctrl.load().rel == order;
-	});
+		ppu.test_state();
+	}

 	_mm_mfence();

@ -129,8 +125,6 @@ error_code cellSyncMutexUnlock(vm::ptr<CellSyncMutex> mutex)

 	mutex->ctrl.atomic_op(&CellSyncMutex::unlock);

-	vm::notify_at(mutex.addr(), 4);
-
 	return CELL_OK;
 }

@ -159,7 +153,7 @@ error_code cellSyncBarrierInitialize(vm::ptr<CellSyncBarrier> barrier, u16 total
 	return CELL_OK;
 }

-error_code cellSyncBarrierNotify(vm::ptr<CellSyncBarrier> barrier)
+error_code cellSyncBarrierNotify(ppu_thread& ppu, vm::ptr<CellSyncBarrier> barrier)
 {
 	cellSync.trace("cellSyncBarrierNotify(barrier=*0x%x)", barrier);

@ -173,9 +167,10 @@ error_code cellSyncBarrierNotify(vm::ptr<CellSyncBarrier> barrier)
 		return CELL_SYNC_ERROR_ALIGN;
 	}

-	vm::wait_op(barrier.addr(), 4, [&] { return barrier->ctrl.atomic_op(&CellSyncBarrier::try_notify); });
-
-	vm::notify_at(barrier.addr(), 4);
+	while (!barrier->ctrl.atomic_op(&CellSyncBarrier::try_notify))
+	{
+		ppu.test_state();
+	}

 	return CELL_OK;
 }
@ -201,12 +196,10 @@ error_code cellSyncBarrierTryNotify(vm::ptr<CellSyncBarrier> barrier)
 		return not_an_error(CELL_SYNC_ERROR_BUSY);
 	}

-	vm::notify_at(barrier.addr(), 4);
-
 	return CELL_OK;
 }

-error_code cellSyncBarrierWait(vm::ptr<CellSyncBarrier> barrier)
+error_code cellSyncBarrierWait(ppu_thread& ppu, vm::ptr<CellSyncBarrier> barrier)
 {
 	cellSync.trace("cellSyncBarrierWait(barrier=*0x%x)", barrier);

@ -222,9 +215,10 @@ error_code cellSyncBarrierWait(vm::ptr<CellSyncBarrier> barrier)

 	_mm_mfence();

-	vm::wait_op(barrier.addr(), 4, [&] { return barrier->ctrl.atomic_op(&CellSyncBarrier::try_wait); });
-
-	vm::notify_at(barrier.addr(), 4);
+	while (!barrier->ctrl.atomic_op(&CellSyncBarrier::try_wait))
+	{
+		ppu.test_state();
+	}

 	return CELL_OK;
 }
@ -250,8 +244,6 @@ error_code cellSyncBarrierTryWait(vm::ptr<CellSyncBarrier> barrier)
 		return not_an_error(CELL_SYNC_ERROR_BUSY);
 	}

-	vm::notify_at(barrier.addr(), 4);
-
 	return CELL_OK;
 }

@ -284,7 +276,7 @@ error_code cellSyncRwmInitialize(vm::ptr<CellSyncRwm> rwm, vm::ptr<void> buffer,
 	return CELL_OK;
 }

-error_code cellSyncRwmRead(vm::ptr<CellSyncRwm> rwm, vm::ptr<void> buffer)
+error_code cellSyncRwmRead(ppu_thread& ppu, vm::ptr<CellSyncRwm> rwm, vm::ptr<void> buffer)
 {
 	cellSync.trace("cellSyncRwmRead(rwm=*0x%x, buffer=*0x%x)", rwm, buffer);

@ -299,7 +291,10 @@ error_code cellSyncRwmRead(vm::ptr<CellSyncRwm> rwm, vm::ptr<void> buffer)
 	}

 	// wait until `writers` is zero, increase `readers`
-	vm::wait_op(rwm.addr(), 8, [&] { return rwm->ctrl.atomic_op(&CellSyncRwm::try_read_begin); });
+	while (!rwm->ctrl.atomic_op(&CellSyncRwm::try_read_begin))
+	{
+		ppu.test_state();
+	}

 	// copy data to buffer
 	std::memcpy(buffer.get_ptr(), rwm->buffer.get_ptr(), rwm->size);
@ -310,8 +305,6 @@ error_code cellSyncRwmRead(vm::ptr<CellSyncRwm> rwm, vm::ptr<void> buffer)
 		return CELL_SYNC_ERROR_ABORT;
 	}

-	vm::notify_at(rwm.addr(), 8);
-
 	return CELL_OK;
 }

@ -344,12 +337,10 @@ error_code cellSyncRwmTryRead(vm::ptr<CellSyncRwm> rwm, vm::ptr<void> buffer)
 		return CELL_SYNC_ERROR_ABORT;
 	}

-	vm::notify_at(rwm.addr(), 8);
-
 	return CELL_OK;
 }

-error_code cellSyncRwmWrite(vm::ptr<CellSyncRwm> rwm, vm::cptr<void> buffer)
+error_code cellSyncRwmWrite(ppu_thread& ppu, vm::ptr<CellSyncRwm> rwm, vm::cptr<void> buffer)
 {
 	cellSync.trace("cellSyncRwmWrite(rwm=*0x%x, buffer=*0x%x)", rwm, buffer);

@ -364,10 +355,16 @@ error_code cellSyncRwmWrite(vm::ptr<CellSyncRwm> rwm, vm::cptr<void> buffer)
 	}

 	// wait until `writers` is zero, set to 1
-	vm::wait_op(rwm.addr(), 8, [&] { return rwm->ctrl.atomic_op(&CellSyncRwm::try_write_begin); });
+	while (!rwm->ctrl.atomic_op(&CellSyncRwm::try_write_begin))
+	{
+		ppu.test_state();
+	}

 	// wait until `readers` is zero
-	vm::wait_op(rwm.addr(), 8, [&] { return rwm->ctrl.load().readers == 0; });
+	while (rwm->ctrl.load().readers != 0)
+	{
+		ppu.test_state();
+	}

 	// copy data from buffer
 	std::memcpy(rwm->buffer.get_ptr(), buffer.get_ptr(), rwm->size);
@ -375,8 +372,6 @@ error_code cellSyncRwmWrite(vm::ptr<CellSyncRwm> rwm, vm::cptr<void> buffer)
 	// sync and clear `readers` and `writers`
 	rwm->ctrl.exchange({ 0, 0 });

-	vm::notify_at(rwm.addr(), 8);
-
 	return CELL_OK;
 }

@ -406,8 +401,6 @@ error_code cellSyncRwmTryWrite(vm::ptr<CellSyncRwm> rwm, vm::cptr<void> buffer)
 	// sync and clear `readers` and `writers`
 	rwm->ctrl.exchange({ 0, 0 });

-	vm::notify_at(rwm.addr(), 8);
-
 	return CELL_OK;
 }

@ -446,7 +439,7 @@ error_code cellSyncQueueInitialize(vm::ptr<CellSyncQueue> queue, vm::ptr<u8> buf
 	return CELL_OK;
 }

-error_code cellSyncQueuePush(vm::ptr<CellSyncQueue> queue, vm::cptr<void> buffer)
+error_code cellSyncQueuePush(ppu_thread& ppu, vm::ptr<CellSyncQueue> queue, vm::cptr<void> buffer)
 {
 	cellSync.trace("cellSyncQueuePush(queue=*0x%x, buffer=*0x%x)", queue, buffer);

@ -464,15 +457,16 @@ error_code cellSyncQueuePush(vm::ptr<CellSyncQueue> queue, vm::cptr<void> buffer

 	u32 position;

-	vm::wait_op(queue.addr(), 8, [&] { return queue->ctrl.atomic_op(&CellSyncQueue::try_push_begin, depth, &position); });
+	while (!queue->ctrl.atomic_op(&CellSyncQueue::try_push_begin, depth, &position))
+	{
+		ppu.test_state();
+	}

 	// copy data from the buffer at the position
 	std::memcpy(&queue->buffer[position * queue->size], buffer.get_ptr(), queue->size);

 	queue->ctrl.atomic_op(&CellSyncQueue::push_end);

-	vm::notify_at(queue.addr(), 8);
-
 	return CELL_OK;
 }

@ -504,12 +498,10 @@ error_code cellSyncQueueTryPush(vm::ptr<CellSyncQueue> queue, vm::cptr<void> buf

 	queue->ctrl.atomic_op(&CellSyncQueue::push_end);

-	vm::notify_at(queue.addr(), 8);
-
 	return CELL_OK;
 }

-error_code cellSyncQueuePop(vm::ptr<CellSyncQueue> queue, vm::ptr<void> buffer)
+error_code cellSyncQueuePop(ppu_thread& ppu, vm::ptr<CellSyncQueue> queue, vm::ptr<void> buffer)
 {
 	cellSync.trace("cellSyncQueuePop(queue=*0x%x, buffer=*0x%x)", queue, buffer);

@ -527,15 +519,16 @@ error_code cellSyncQueuePop(vm::ptr<CellSyncQueue> queue, vm::ptr<void> buffer)
 	
 	u32 position;

-	vm::wait_op(queue.addr(), 8, [&] { return queue->ctrl.atomic_op(&CellSyncQueue::try_pop_begin, depth, &position); });
+	while (!queue->ctrl.atomic_op(&CellSyncQueue::try_pop_begin, depth, &position))
+	{
+		ppu.test_state();
+	}

 	// copy data at the position to the buffer
 	std::memcpy(buffer.get_ptr(), &queue->buffer[position % depth * queue->size], queue->size);

 	queue->ctrl.atomic_op(&CellSyncQueue::pop_end);

-	vm::notify_at(queue.addr(), 8);
-
 	return CELL_OK;
 }

@ -567,12 +560,10 @@ error_code cellSyncQueueTryPop(vm::ptr<CellSyncQueue> queue, vm::ptr<void> buffe

 	queue->ctrl.atomic_op(&CellSyncQueue::pop_end);

-	vm::notify_at(queue.addr(), 8);
-
 	return CELL_OK;
 }

-error_code cellSyncQueuePeek(vm::ptr<CellSyncQueue> queue, vm::ptr<void> buffer)
+error_code cellSyncQueuePeek(ppu_thread& ppu, vm::ptr<CellSyncQueue> queue, vm::ptr<void> buffer)
 {
 	cellSync.trace("cellSyncQueuePeek(queue=*0x%x, buffer=*0x%x)", queue, buffer);

@ -590,15 +581,16 @@ error_code cellSyncQueuePeek(vm::ptr<CellSyncQueue> queue, vm::ptr<void> buffer)

 	u32 position;

-	vm::wait_op(queue.addr(), 8, [&] { return queue->ctrl.atomic_op(&CellSyncQueue::try_peek_begin, depth, &position); });
+	while (!queue->ctrl.atomic_op(&CellSyncQueue::try_peek_begin, depth, &position))
+	{
+		ppu.test_state();
+	}

 	// copy data at the position to the buffer
 	std::memcpy(buffer.get_ptr(), &queue->buffer[position % depth * queue->size], queue->size);

 	queue->ctrl.atomic_op(&CellSyncQueue::pop_end);

-	vm::notify_at(queue.addr(), 8);
-
 	return CELL_OK;
 }

@ -630,8 +622,6 @@ error_code cellSyncQueueTryPeek(vm::ptr<CellSyncQueue> queue, vm::ptr<void> buff

 	queue->ctrl.atomic_op(&CellSyncQueue::pop_end);

-	vm::notify_at(queue.addr(), 8);
-
 	return CELL_OK;
 }

@ -654,7 +644,7 @@ error_code cellSyncQueueSize(vm::ptr<CellSyncQueue> queue)
 	return not_an_error(queue->ctrl.load().count & 0xffffff);
 }

-error_code cellSyncQueueClear(vm::ptr<CellSyncQueue> queue)
+error_code cellSyncQueueClear(ppu_thread& ppu, vm::ptr<CellSyncQueue> queue)
 {
 	cellSync.trace("cellSyncQueueClear(queue=*0x%x)", queue);

@ -670,13 +660,18 @@ error_code cellSyncQueueClear(vm::ptr<CellSyncQueue> queue)

 	const u32 depth = queue->check_depth();

-	vm::wait_op(queue.addr(), 8, [&] { return queue->ctrl.atomic_op(&CellSyncQueue::try_clear_begin_1); });
-	vm::wait_op(queue.addr(), 8, [&] { return queue->ctrl.atomic_op(&CellSyncQueue::try_clear_begin_2); });
+	while (!queue->ctrl.atomic_op(&CellSyncQueue::try_clear_begin_1))
+	{
+		ppu.test_state();
+	}
+
+	while (!queue->ctrl.atomic_op(&CellSyncQueue::try_clear_begin_2))
+	{
+		ppu.test_state();
+	}

 	queue->ctrl.exchange({ 0, 0 });

-	vm::notify_at(queue.addr(), 8);
-
 	return CELL_OK;
 }

@ -1107,7 +1102,7 @@ error_code _cellSyncLFQueuePushBody(ppu_thread& ppu, vm::ptr<CellSyncLFQueue> qu
 			break;
 		}

-		thread_ctrl::wait_for(1000); // hack
+		ppu.test_state();
 	}

 	const s32 depth = queue->m_depth;
@ -1402,7 +1397,7 @@ error_code _cellSyncLFQueuePopBody(ppu_thread& ppu, vm::ptr<CellSyncLFQueue> que
 			break;
 		}

-		thread_ctrl::wait_for(1000); // hack
+		ppu.test_state();
 	}

 	const s32 depth = queue->m_depth;
--- a/rpcs3/Emu/Cell/Modules/cellVdec.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellVdec.cpp
@ -77,6 +77,7 @@ struct vdec_thread : ppu_thread

 	std::mutex mutex;
 	std::queue<vdec_frame> out;
+	u32 max_frames = 20;

 	vdec_thread(s32 type, u32 profile, u32 addr, u32 size, vm::ptr<CellVdecCbMsg> func, u32 arg, u32 prio, u32 stack)
 		: ppu_thread("HLE Video Decoder", prio, stack)
@ -328,7 +329,7 @@ struct vdec_thread : ppu_thread
 						std::lock_guard<std::mutex>{mutex}, out.push(std::move(frame));

 						cb_func(*this, id, CELL_VDEC_MSG_TYPE_PICOUT, CELL_OK, cb_arg);
-						lv2_obj::sleep(*this, -1);
+						lv2_obj::sleep(*this);
 					}

 					if (vcmd == vdec_cmd::decode)
@ -338,9 +339,9 @@ struct vdec_thread : ppu_thread
 				}

 				cb_func(*this, id, vcmd == vdec_cmd::decode ? CELL_VDEC_MSG_TYPE_AUDONE : CELL_VDEC_MSG_TYPE_SEQDONE, CELL_OK, cb_arg);
-				lv2_obj::sleep(*this, -1);
+				lv2_obj::sleep(*this);

-				while (std::lock_guard<std::mutex>{mutex}, out.size() > 60)
+				while (std::lock_guard<std::mutex>{mutex}, out.size() > max_frames)
 				{
 					thread_ctrl::wait();
 				}
@ -444,8 +445,14 @@ s32 cellVdecClose(ppu_thread& ppu, u32 handle)
 		return CELL_VDEC_ERROR_ARG;
 	}

-	lv2_obj::sleep(ppu, -1);
-	vdec->cmd_push({vdec_cmd::close, 0});
+	lv2_obj::sleep(ppu);
+
+	{
+		std::lock_guard<std::mutex> lock(vdec->mutex);
+		vdec->cmd_push({vdec_cmd::close, 0});
+		vdec->out = decltype(vdec->out){};
+	}
+	
 	vdec->notify();
 	vdec->join();
 	idm::remove<ppu_thread>(handle);
@ -534,7 +541,7 @@ s32 cellVdecGetPicture(u32 handle, vm::cptr<CellVdecPicFormat> format, vm::ptr<u

 		vdec->out.pop();

-		if (vdec->out.size() <= 60)
+		if (vdec->out.size() <= vdec->max_frames)
 		{
 			vdec->notify();
 		}
--- a/rpcs3/Emu/Cell/Modules/libmixer.cpp
+++ b/rpcs3/Emu/Cell/Modules/libmixer.cpp
@ -348,7 +348,7 @@ struct surmixer_thread : ppu_thread
 				if (g_surmx.cb)
 				{
 					g_surmx.cb(*this, g_surmx.cb_arg, (u32)g_surmx.mixcount, 256);
-					lv2_obj::sleep(*this, -1);
+					lv2_obj::sleep(*this);
 				}

 				//u64 stamp1 = get_system_time();
--- a/rpcs3/Emu/Cell/Modules/sys_lwmutex_.cpp
+++ b/rpcs3/Emu/Cell/Modules/sys_lwmutex_.cpp
@ -120,8 +120,10 @@ s32 sys_lwmutex_lock(ppu_thread& ppu, vm::ptr<sys_lwmutex_t> lwmutex, u64 timeou
 		return CELL_EINVAL;
 	}

-	for (u32 i = 0; i < 300; i++)
+	for (u32 i = 0; i < 10; i++)
 	{
+		busy_wait();
+
 		if (lwmutex->vars.owner.load() == lwmutex_free)
 		{
 			if (lwmutex->vars.owner.compare_and_swap_test(lwmutex_free, tid))
--- a/rpcs3/Emu/Cell/Modules/sys_ppu_thread_.cpp
+++ b/rpcs3/Emu/Cell/Modules/sys_ppu_thread_.cpp
@ -45,7 +45,7 @@ s32 sys_ppu_thread_create(ppu_thread& ppu, vm::ptr<u64> thread_id, u32 entry, u6
 	// Dirty hack for sound: confirm the creation of _mxr000 event queue
 	if (threadname && std::memcmp(threadname.get_ptr(), "_cellsurMixerMain", 18) == 0)
 	{
-		lv2_obj::sleep(ppu, -1);
+		lv2_obj::sleep(ppu);

 		while (!idm::select<lv2_obj, lv2_event_queue>([](u32, lv2_event_queue& eq)
 		{
@ -54,6 +54,8 @@ s32 sys_ppu_thread_create(ppu_thread& ppu, vm::ptr<u64> thread_id, u32 entry, u6
 		{
 			thread_ctrl::wait_for(50000);
 		}
+
+		ppu.test_state();
 	}

 	return CELL_OK;
--- a/rpcs3/Emu/Cell/Modules/sys_spinlock.cpp
+++ b/rpcs3/Emu/Cell/Modules/sys_spinlock.cpp
@ -4,30 +4,34 @@

 #include "sysPrxForUser.h"

-#include "Emu/Memory/wait_engine.h"
-
 extern logs::channel sysPrxForUser;

 void sys_spinlock_initialize(vm::ptr<atomic_be_t<u32>> lock)
 {
 	sysPrxForUser.trace("sys_spinlock_initialize(lock=*0x%x)", lock);

-	lock->exchange(0);
+	if (*lock)
+	{
+		*lock = 0;
+	}
 }

-void sys_spinlock_lock(vm::ptr<atomic_be_t<u32>> lock)
+void sys_spinlock_lock(ppu_thread& ppu, vm::ptr<atomic_be_t<u32>> lock)
 {
 	sysPrxForUser.trace("sys_spinlock_lock(lock=*0x%x)", lock);

-	// Try exchange with 0xabadcafe, repeat until exchanged with 0
-	vm::wait_op(lock.addr(), 4, [&] { return lock->exchange(0xabadcafe) == 0; });
+	// Try to exchange with 0xabadcafe, repeat until exchanged with 0
+	while (*lock || lock->exchange(0xabadcafe))
+	{
+		ppu.test_state();
+	}
 }

 s32 sys_spinlock_trylock(vm::ptr<atomic_be_t<u32>> lock)
 {
 	sysPrxForUser.trace("sys_spinlock_trylock(lock=*0x%x)", lock);

-	if (lock->exchange(0xabadcafe))
+	if (*lock || lock->exchange(0xabadcafe))
 	{
 		return CELL_EBUSY;
 	}
@ -39,9 +43,7 @@ void sys_spinlock_unlock(vm::ptr<atomic_be_t<u32>> lock)
 {
 	sysPrxForUser.trace("sys_spinlock_unlock(lock=*0x%x)", lock);

-	lock->exchange(0);
-
-	vm::notify_at(lock.addr(), 4);
+	*lock = 0;
 }

 void sysPrxForUser_sys_spinlock_init()
--- a/rpcs3/Emu/Cell/PPUModule.cpp
+++ b/rpcs3/Emu/Cell/PPUModule.cpp
@ -145,8 +145,8 @@ extern void ppu_execute_function(ppu_thread& ppu, u32 index)
 {
 	if (index < g_ppu_function_cache.size())
 	{
-		// If autopause occures, check_status() will hold the thread until unpaused.
-		if (debug::autopause::pause_function(g_ppu_fnid_cache[index]) && ppu.check_state()) throw cpu_flag::ret;
+		// If autopause occures, check_status() will hold the thread until unpaused
+		if (debug::autopause::pause_function(g_ppu_fnid_cache[index])) ppu.test_state();

 		if (const auto func = g_ppu_function_cache[index])
 		{
--- a/rpcs3/Emu/Cell/PPUThread.cpp
+++ b/rpcs3/Emu/Cell/PPUThread.cpp
@ -51,6 +51,31 @@ extern u64 get_system_time();

 namespace vm { using namespace ps3; }

+enum class join_status : u32
+{
+	joinable = 0,
+	detached = 0u-1,
+	exited = 0u-2,
+	zombie = 0u-3,
+};
+
+template <>
+void fmt_class_string<join_status>::format(std::string& out, u64 arg)
+{
+	format_enum(out, arg, [](join_status js)
+	{
+		switch (js)
+		{
+		case join_status::joinable: return "";
+		case join_status::detached: return "detached";
+		case join_status::zombie: return "zombie";
+		case join_status::exited: return "exited";
+		}
+
+		return unknown;
+	});
+}
+
 enum class ppu_decoder_type
 {
 	precise,
@ -197,7 +222,32 @@ std::string ppu_thread::dump() const
 {
 	std::string ret = cpu_thread::dump();
 	ret += fmt::format("Priority: %d\n", +prio);
-	ret += fmt::format("Last function: %s\n", last_function ? last_function : "");
+	ret += fmt::format("Stack: 0x%x..0x%x\n", stack_addr, stack_addr + stack_size - 1);
+	ret += fmt::format("Joiner: %s\n", join_status(joiner.load()));
+	ret += fmt::format("Commands: %u\n", cmd_queue.size());
+
+	const auto _func = last_function;
+
+	if (_func)
+	{
+		ret += "Last function: ";
+		ret += _func;
+		ret += '\n';
+	}
+
+	if (const auto _time = start_time)
+	{
+		ret += fmt::format("Waiting: %fs\n", (get_system_time() - _time) / 1000000.);
+	}
+	else
+	{
+		ret += '\n';
+	}
+
+	if (!_func)
+	{
+		ret += '\n';
+	}
 	
 	ret += "\nRegisters:\n=========\n";
 	for (uint i = 0; i < 32; ++i) ret += fmt::format("GPR[%d] = 0x%llx\n", i, gpr[i]);
@ -289,7 +339,7 @@ void ppu_thread::cpu_task()
 		}
 		case ppu_cmd::sleep:
 		{
-			cmd_pop(), lv2_obj::sleep(*this, -1);
+			cmd_pop(), lv2_obj::sleep(*this);
 			break;
 		}
 		default:
@ -400,6 +450,7 @@ ppu_thread::ppu_thread(const std::string& name, u32 prio, u32 stack)
 	, prio(prio)
 	, stack_size(std::max<u32>(stack, 0x4000))
 	, stack_addr(vm::alloc(stack_size, vm::stack))
+	, start_time(get_system_time())
 	, m_name(name)
 {
 	if (!stack_addr)
@ -505,7 +556,14 @@ void ppu_thread::fast_call(u32 addr, u32 rtoc)
 		{
 			if (last_function)
 			{
-				LOG_WARNING(PPU, "'%s' aborted (%fs)", last_function, (get_system_time() - gpr[10]) / 1000000.);
+				if (start_time)
+				{
+					LOG_WARNING(PPU, "'%s' aborted (%fs)", last_function, (get_system_time() - start_time) / 1000000.);
+				}
+				else
+				{
+					LOG_WARNING(PPU, "'%s' aborted", last_function);
+				}
 			}

 			last_function = old_func;
@ -612,7 +670,7 @@ extern void sse_cellbe_stvrx(u64 addr, __m128i a);
 static void ppu_check(ppu_thread& ppu, u64 addr)
 {
 	ppu.cia = addr;
-	ppu.check_state();
+	ppu.test_state();
 }

 static void ppu_trace(u64 addr)
--- a/rpcs3/Emu/Cell/PPUThread.h
+++ b/rpcs3/Emu/Cell/PPUThread.h
@ -133,6 +133,7 @@ public:
 	cmd64 cmd_wait(); // Empty command means caller must return, like true from cpu_thread::check_status().
 	cmd64 cmd_get(u32 index) { return cmd_queue[cmd_queue.peek() + index].load(); }

+	u64 start_time{0}; // Sleep start timepoint
 	const char* last_function{}; // Last function name for diagnosis, optimized for speed.

 	const std::string m_name; // Thread name
--- a/rpcs3/Emu/Cell/SPUInterpreter.cpp
+++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp
@ -50,7 +50,7 @@ void spu_interpreter::set_interrupt_status(SPUThread& spu, spu_opcode_t op)
 		spu.set_interrupt_status(false);
 	}

-	if ((spu.ch_event_stat & SPU_EVENT_INTR_TEST) > SPU_EVENT_INTR_ENABLED)
+	if ((spu.ch_event_stat & SPU_EVENT_INTR_TEST & spu.ch_event_mask) > SPU_EVENT_INTR_ENABLED)
 	{
 		spu.ch_event_stat &= ~SPU_EVENT_INTR_ENABLED;
 		spu.srr0 = std::exchange(spu.pc, -4) + 4;
@ -89,10 +89,16 @@ void spu_interpreter::MFSPR(SPUThread& spu, spu_opcode_t op)

 void spu_interpreter::RDCH(SPUThread& spu, spu_opcode_t op)
 {
-	if (!spu.get_ch_value(op.ra, spu.gpr[op.rt]._u32[3]))
+	u32 result;
+
+	if (!spu.get_ch_value(op.ra, result))
 	{
 		spu.pc -= 4;
 	}
+	else
+	{
+		spu.gpr[op.rt] = v128::from32r(result);
+	}
 }

 void spu_interpreter::RCHCNT(SPUThread& spu, spu_opcode_t op)
--- a/rpcs3/Emu/Cell/SPURecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPURecompiler.cpp
@ -26,7 +26,7 @@ void spu_recompiler_base::enter(SPUThread& spu)
 	const auto func = spu.spu_db->analyse(_ls, spu.pc);

 	// Reset callstack if necessary
-	if (func->does_reset_stack && spu.recursion_level)
+	if ((func->does_reset_stack && spu.recursion_level) || spu.recursion_level >= 128)
 	{
 		spu.state += cpu_flag::ret;
 		return;
@ -77,7 +77,7 @@ void spu_recompiler_base::enter(SPUThread& spu)

 	spu.pc = res & 0x3fffc;

-	if ((spu.ch_event_stat & SPU_EVENT_INTR_TEST) > SPU_EVENT_INTR_ENABLED)
+	if ((spu.ch_event_stat & SPU_EVENT_INTR_TEST & spu.ch_event_mask) > SPU_EVENT_INTR_ENABLED)
 	{
 		spu.ch_event_stat &= ~SPU_EVENT_INTR_ENABLED;
 		spu.srr0 = std::exchange(spu.pc, 0);
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@ -1,5 +1,6 @@
 #include "stdafx.h"
 #include "Utilities/Config.h"
+#include "Utilities/lockless.h"
 #include "Emu/Memory/Memory.h"
 #include "Emu/System.h"

@ -1060,18 +1061,8 @@ bool SPUThread::stop_and_signal(u32 code)
 	{
 	case 0x000:
 	{
-		// Hack: wait for an instruction become available
-		while (vm::ps3::read32(offset + pc) == 0)
-		{
-			if (test(state) && check_state())
-			{
-				return false;
-			}
-
-			thread_ctrl::wait_for(1000);
-		}
-
-		return false;
+		// Hack: execute as NOP
+		return true;
 	}

 	case 0x001:
@ -1086,23 +1077,6 @@ bool SPUThread::stop_and_signal(u32 code)
 		return true;
 	}

-	case 0x003:
-	{
-		const auto found = m_addr_to_hle_function_map.find(pc);
-
-		if (found == m_addr_to_hle_function_map.end())
-		{
-			fmt::throw_exception("HLE function not registered (PC=0x%05x)" HERE, pc);
-		}
-
-		if (const auto return_to_caller = found->second(*this))
-		{
-			pc = (gpr[0]._u32[3] & 0x3fffc) - 4;
-		}
-
-		return true;
-	}
-
 	case 0x110:
 	{
 		/* ===== sys_spu_thread_receive_event ===== */
--- a/rpcs3/Emu/Cell/lv2/lv2.cpp
+++ b/rpcs3/Emu/Cell/lv2/lv2.cpp
@ -986,8 +986,11 @@ extern void ppu_execute_syscall(ppu_thread& ppu, u64 code)
 {
 	if (code < g_ppu_syscall_table.size())
 	{
-		// If autopause occures, check_status() will hold the thread till unpaused.
-		if (debug::autopause::pause_syscall(code) && ppu.check_state()) throw cpu_flag::ret;
+		// If autopause occures, check_status() will hold the thread till unpaused
+		if (debug::autopause::pause_syscall(code))
+		{
+			ppu.test_state();
+		}

 		if (auto func = g_ppu_syscall_table[code])
 		{
@ -1026,10 +1029,12 @@ DECLARE(lv2_obj::g_waiting);
 // Amount of PPU threads running simultaneously (must be 2)
 cfg::int_entry<1, 16> g_cfg_ppu_threads(cfg::root.core, "PPU Threads", 2);

-void lv2_obj::sleep(named_thread& thread, u64 wait_until)
+void lv2_obj::sleep_timeout(named_thread& thread, u64 timeout)
 {
 	semaphore_lock lock(g_mutex);

+	const u64 start_time = get_system_time();
+
 	if (auto ppu = dynamic_cast<ppu_thread*>(&thread))
 	{
 		sys_ppu_thread.trace("sleep() - waiting (%zu)", g_pending.size());
@ -1039,21 +1044,27 @@ void lv2_obj::sleep(named_thread& thread, u64 wait_until)
 			if (!test(val, cpu_flag::signal))
 			{
 				val += cpu_flag::suspend;
+				val += cpu_flag::is_waiting;
 			}
 		});

 		if (test(state, cpu_flag::signal))
 		{
-			sys_ppu_thread.error("sleep() failed (signaled)");
+			sys_ppu_thread.trace("sleep() failed (signaled)");
+			return;
 		}

 		// Find and remove the thread
 		unqueue(g_ppu, ppu);
 		unqueue(g_pending, ppu);
+
+		ppu->start_time = start_time;
 	}

-	if (wait_until < -2)
+	if (timeout)
 	{
+		const u64 wait_until = start_time + timeout;
+
 		// Register timeout if necessary
 		for (auto it = g_waiting.begin(), end = g_waiting.end(); it != end; it++)
 		{
@ -1160,6 +1171,7 @@ void lv2_obj::schedule_all()
 			{
 				sys_ppu_thread.trace("schedule(): %s", target->id);
 				target->state ^= (cpu_flag::signal + cpu_flag::suspend);
+				target->start_time = 0;

 				if (target->get() != thread_ctrl::get_current())
 				{
--- a/rpcs3/Emu/Cell/lv2/sys_cond.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_cond.cpp
@ -99,8 +99,9 @@ error_code sys_cond_signal(ppu_thread& ppu, u32 cond_id)

 	if (cond.ret)
 	{
+		ppu.state += cpu_flag::is_waiting;
 		cond->awake(*cond.ret);
-		ppu.check_state();
+		ppu.test_state();
 	}

 	return CELL_OK;
@ -139,8 +140,9 @@ error_code sys_cond_signal_all(ppu_thread& ppu, u32 cond_id)

 	if (cond.ret)
 	{
+		ppu.state += cpu_flag::is_waiting;
 		cond->awake(*cond.ret);
-		ppu.check_state();
+		ppu.test_state();
 	}

 	return CELL_OK;
@ -182,8 +184,9 @@ error_code sys_cond_signal_to(ppu_thread& ppu, u32 cond_id, u32 thread_id)

 	if (cond.ret && cond.ret != (cpu_thread*)(1))
 	{
+		ppu.state += cpu_flag::is_waiting;
 		cond->awake(*cond.ret);
-		ppu.check_state();
+		ppu.test_state();
 	}
 	else if (!cond.ret)
 	{
@ -197,8 +200,6 @@ error_code sys_cond_wait(ppu_thread& ppu, u32 cond_id, u64 timeout)
 {
 	sys_cond.trace("sys_cond_wait(cond_id=0x%x, timeout=%lld)", cond_id, timeout);

-	const u64 start_time = ppu.gpr[10] = get_system_time();
-
 	const auto cond = idm::get<lv2_obj, lv2_cond>(cond_id, [&](lv2_cond& cond)
 	{
 		// Add a "promise" to add a waiter
@ -226,11 +227,15 @@ error_code sys_cond_wait(ppu_thread& ppu, u32 cond_id, u64 timeout)

 		// Register waiter
 		cond->sq.emplace_back(&ppu);
-		cond->sleep(ppu, start_time, timeout);
+		cond->sleep(ppu, timeout);

 		// Unlock the mutex
 		cond->mutex->lock_count = 0;
-		cond->mutex->reown<ppu_thread>();
+
+		if (auto cpu = cond->mutex->reown<ppu_thread>())
+		{
+			cond->mutex->awake(*cpu);
+		}

 		// Further function result
 		ppu.gpr[3] = CELL_OK;
@ -240,7 +245,7 @@ error_code sys_cond_wait(ppu_thread& ppu, u32 cond_id, u64 timeout)
 	{
 		if (timeout)
 		{
-			const u64 passed = get_system_time() - start_time;
+			const u64 passed = get_system_time() - ppu.start_time;

 			if (passed >= timeout)
 			{
@ -278,12 +283,6 @@ error_code sys_cond_wait(ppu_thread& ppu, u32 cond_id, u64 timeout)
 	// Restore the recursive value
 	cond->mutex->lock_count = cond.ret;

-	ppu.check_state();
-
-	if (ppu.gpr[3] == CELL_ETIMEDOUT)
-	{
-		return not_an_error(CELL_ETIMEDOUT);
-	}
-
-	return CELL_OK;
+	ppu.test_state();
+	return not_an_error(ppu.gpr[3]);
 }
--- a/rpcs3/Emu/Cell/lv2/sys_event.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_event.cpp
@ -172,6 +172,7 @@ error_code sys_event_queue_destroy(ppu_thread& ppu, u32 equeue_id, s32 mode)
 			if (queue->type == SYS_PPU_QUEUE)
 			{
 				static_cast<ppu_thread&>(*cpu).gpr[3] = CELL_ECANCELED;
+				ppu.state += cpu_flag::is_waiting;
 				queue->awake(*cpu);
 			}
 			else
@ -183,8 +184,7 @@ error_code sys_event_queue_destroy(ppu_thread& ppu, u32 equeue_id, s32 mode)
 		}
 	}

-	ppu.check_state();
-
+	ppu.test_state();
 	return CELL_OK;
 }

@ -226,8 +226,6 @@ error_code sys_event_queue_receive(ppu_thread& ppu, u32 equeue_id, vm::ptr<sys_e
 {
 	sys_event.trace("sys_event_queue_receive(equeue_id=0x%x, *0x%x, timeout=0x%llx)", equeue_id, dummy_event, timeout);

-	const u64 start_time = ppu.gpr[10] = get_system_time();
-
 	const auto queue = idm::get<lv2_obj, lv2_event_queue>(equeue_id, [&](lv2_event_queue& queue) -> CellError
 	{
 		if (queue.type != SYS_PPU_QUEUE)
@ -240,7 +238,7 @@ error_code sys_event_queue_receive(ppu_thread& ppu, u32 equeue_id, vm::ptr<sys_e
 		if (queue.events.empty())
 		{
 			queue.sq.emplace_back(&ppu);
-			queue.sleep(ppu, start_time, timeout);
+			queue.sleep(ppu, timeout);
 			return CELL_EBUSY;
 		}

@ -273,7 +271,7 @@ error_code sys_event_queue_receive(ppu_thread& ppu, u32 equeue_id, vm::ptr<sys_e
 	{
 		if (timeout)
 		{
-			const u64 passed = get_system_time() - start_time;
+			const u64 passed = get_system_time() - ppu.start_time;

 			if (passed >= timeout)
 			{
@ -297,7 +295,7 @@ error_code sys_event_queue_receive(ppu_thread& ppu, u32 equeue_id, vm::ptr<sys_e
 		}
 	}

-	ppu.check_state();
+	ppu.test_state();
 	return not_an_error(ppu.gpr[3]);
 }

@ -423,6 +421,8 @@ error_code sys_event_port_send(ppu_thread& ppu, u32 eport_id, u64 data1, u64 dat
 {
 	sys_event.trace("sys_event_port_send(eport_id=0x%x, data1=0x%llx, data2=0x%llx, data3=0x%llx)", eport_id, data1, data2, data3);

+	ppu.state += cpu_flag::is_waiting;
+
 	const auto port = idm::get<lv2_obj, lv2_event_port>(eport_id, [&](lv2_event_port& port) -> CellError
 	{
 		if (const auto queue = port.queue.lock())
@ -455,6 +455,6 @@ error_code sys_event_port_send(ppu_thread& ppu, u32 eport_id, u64 data1, u64 dat
 		return port.ret;
 	}

-	ppu.check_state();
+	ppu.test_state();
 	return CELL_OK;
 }
--- a/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp
@ -91,8 +91,6 @@ error_code sys_event_flag_wait(ppu_thread& ppu, u32 id, u64 bitptn, u32 mode, vm
 {
 	sys_event_flag.trace("sys_event_flag_wait(id=0x%x, bitptn=0x%llx, mode=0x%x, result=*0x%x, timeout=0x%llx)", id, bitptn, mode, result, timeout);

-	const u64 start_time = ppu.gpr[10] = get_system_time();
-
 	// Fix function arguments for external access
 	ppu.gpr[3] = -1;
 	ppu.gpr[4] = bitptn;
@ -130,7 +128,7 @@ error_code sys_event_flag_wait(ppu_thread& ppu, u32 id, u64 bitptn, u32 mode, vm

 		flag.waiters++;
 		flag.sq.emplace_back(&ppu);
-		flag.sleep(ppu, start_time, timeout);
+		flag.sleep(ppu, timeout);
 		return CELL_EBUSY;
 	});

@ -156,7 +154,7 @@ error_code sys_event_flag_wait(ppu_thread& ppu, u32 id, u64 bitptn, u32 mode, vm
 	{
 		if (timeout)
 		{
-			const u64 passed = get_system_time() - start_time;
+			const u64 passed = get_system_time() - ppu.start_time;

 			if (passed >= timeout)
 			{
@ -182,7 +180,7 @@ error_code sys_event_flag_wait(ppu_thread& ppu, u32 id, u64 bitptn, u32 mode, vm
 		}
 	}
 	
-	ppu.check_state();
+	ppu.test_state();
 	if (result) *result = ppu.gpr[6];
 	return not_an_error(ppu.gpr[3]);
 }
@ -223,6 +221,12 @@ error_code sys_event_flag_trywait(u32 id, u64 bitptn, u32 mode, vm::ptr<u64> res
 error_code sys_event_flag_set(u32 id, u64 bitptn)
 {
 	// Warning: may be called from SPU thread.
+	auto cpu = get_current_cpu_thread();
+
+	if (cpu && cpu->id_type() != 1)
+	{
+		cpu = nullptr;
+	}

 	sys_event_flag.trace("sys_event_flag_set(id=0x%x, bitptn=0x%llx)", id, bitptn);

@ -252,9 +256,10 @@ error_code sys_event_flag_set(u32 id, u64 bitptn)
 		}

 		// Process all waiters in single atomic op
-		flag->pattern.atomic_op([&](u64& value)
+		const u32 count = flag->pattern.atomic_op([&](u64& value)
 		{
 			value |= bitptn;
+			u32 count = 0;

 			for (auto cpu : flag->sq)
 			{
@ -266,10 +271,22 @@ error_code sys_event_flag_set(u32 id, u64 bitptn)
 				if (lv2_event_flag::check_pattern(value, pattern, mode, &ppu.gpr[6]))
 				{
 					ppu.gpr[3] = CELL_OK;
+					count++;
 				}
 			}
+
+			return count;
 		});

+		if (!count)
+		{
+			return CELL_OK;
+		}
+		else if (cpu)
+		{
+			cpu->state += cpu_flag::is_waiting;
+		}
+
 		// Remove waiters
 		const auto tail = std::remove_if(flag->sq.begin(), flag->sq.end(), [&](cpu_thread* cpu)
 		{
@ -287,15 +304,8 @@ error_code sys_event_flag_set(u32 id, u64 bitptn)

 		flag->sq.erase(tail, flag->sq.end());
 	}
-
-	if (auto cpu = get_current_cpu_thread())
-	{
-		if (cpu->id_type() == 1)
-		{
-			static_cast<ppu_thread&>(*cpu).check_state();
-		}
-	}
 	
+	if (cpu) cpu->test_state();
 	return CELL_OK;
 }

@ -342,6 +352,8 @@ error_code sys_event_flag_cancel(ppu_thread& ppu, u32 id, vm::ptr<u32> num)
 		// Signal all threads to return CELL_ECANCELED
 		while (auto thread = flag->schedule<ppu_thread>(flag->sq, flag->protocol))
 		{
+			ppu.state += cpu_flag::is_waiting;
+
 			auto& ppu = static_cast<ppu_thread&>(*thread);

 			ppu.gpr[3] = CELL_ECANCELED;
@ -352,10 +364,8 @@ error_code sys_event_flag_cancel(ppu_thread& ppu, u32 id, vm::ptr<u32> num)
 		}
 	}

+	ppu.test_state();
 	if (num) *num = value;
-
-	ppu.check_state();
-
 	return CELL_OK;
 }

--- a/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp
@ -132,8 +132,9 @@ error_code _sys_lwcond_signal(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id, u3

 	if (cond.ret)
 	{
+		ppu.state += cpu_flag::is_waiting;
 		cond->awake(*cond.ret);
-		ppu.check_state();
+		ppu.test_state();
 	}
 	else if (mode == 2)
 	{
@ -211,12 +212,13 @@ error_code _sys_lwcond_signal_all(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id

 	for (auto cpu : threads)
 	{
+		ppu.state += cpu_flag::is_waiting;
 		cond->awake(*cpu);
 	}

 	if (threads.size())
 	{
-		ppu.check_state();
+		ppu.test_state();
 	}

 	if (mode == 1)
@ -232,8 +234,6 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id
 {
 	sys_lwcond.trace("_sys_lwcond_queue_wait(lwcond_id=0x%x, lwmutex_id=0x%x, timeout=0x%llx)", lwcond_id, lwmutex_id, timeout);

-	const u64 start_time = ppu.gpr[10] = get_system_time();
-
 	std::shared_ptr<lv2_lwmutex> mutex;

 	const auto cond = idm::get<lv2_obj, lv2_lwcond>(lwcond_id, [&](lv2_lwcond& cond) -> cpu_thread*
@ -250,7 +250,7 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id
 		// Add a waiter
 		cond.waiters++;
 		cond.sq.emplace_back(&ppu);
-		cond.sleep(ppu, start_time, timeout);
+		cond.sleep(ppu, timeout);

 		// Process lwmutex sleep queue
 		if (const auto cpu = mutex->schedule<ppu_thread>(mutex->sq, mutex->protocol))
@ -269,6 +269,7 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id

 	if (cond.ret)
 	{
+		ppu.state += cpu_flag::is_waiting;
 		cond->awake(*cond.ret);
 	}

@ -278,7 +279,7 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id
 	{
 		if (timeout)
 		{
-			const u64 passed = get_system_time() - start_time;
+			const u64 passed = get_system_time() - ppu.start_time;

 			if (passed >= timeout)
 			{
@ -311,6 +312,6 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id
 	}

 	// Return cause
-	ppu.check_state();
+	ppu.test_state();
 	return not_an_error(ppu.gpr[3]);
 }
--- a/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp
@ -73,8 +73,6 @@ error_code _sys_lwmutex_lock(ppu_thread& ppu, u32 lwmutex_id, u64 timeout)
 {
 	sys_lwmutex.trace("_sys_lwmutex_lock(lwmutex_id=0x%x, timeout=0x%llx)", lwmutex_id, timeout);

-	const u64 start_time = ppu.gpr[10] = get_system_time();
-
 	const auto mutex = idm::get<lv2_obj, lv2_lwmutex>(lwmutex_id, [&](lv2_lwmutex& mutex)
 	{
 		if (u32 value = mutex.signaled)
@ -96,7 +94,7 @@ error_code _sys_lwmutex_lock(ppu_thread& ppu, u32 lwmutex_id, u64 timeout)
 		}

 		mutex.sq.emplace_back(&ppu);
-		mutex.sleep(ppu, start_time, timeout);
+		mutex.sleep(ppu, timeout);
 		return false;
 	});

@ -116,7 +114,7 @@ error_code _sys_lwmutex_lock(ppu_thread& ppu, u32 lwmutex_id, u64 timeout)
 	{
 		if (timeout)
 		{
-			const u64 passed = get_system_time() - start_time;
+			const u64 passed = get_system_time() - ppu.start_time;

 			if (passed >= timeout)
 			{
@ -140,7 +138,7 @@ error_code _sys_lwmutex_lock(ppu_thread& ppu, u32 lwmutex_id, u64 timeout)
 		}
 	}

-	ppu.check_state();
+	ppu.test_state();
 	return not_an_error(ppu.gpr[3]);
 }

@ -198,8 +196,9 @@ error_code _sys_lwmutex_unlock(ppu_thread& ppu, u32 lwmutex_id)

 	if (mutex.ret)
 	{
+		ppu.state += cpu_flag::is_waiting;
 		mutex->awake(*mutex.ret);
-		ppu.check_state();
+		ppu.test_state();
 	}

 	return CELL_OK;
--- a/rpcs3/Emu/Cell/lv2/sys_mutex.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_mutex.cpp
@ -102,8 +102,6 @@ error_code sys_mutex_lock(ppu_thread& ppu, u32 mutex_id, u64 timeout)
 {
 	sys_mutex.trace("sys_mutex_lock(mutex_id=0x%x, timeout=0x%llx)", mutex_id, timeout);

-	const u64 start_time = ppu.gpr[10] = get_system_time();
-
 	const auto mutex = idm::get<lv2_obj, lv2_mutex>(mutex_id, [&](lv2_mutex& mutex)
 	{
 		CellError result = mutex.try_lock(ppu.id);
@ -118,7 +116,7 @@ error_code sys_mutex_lock(ppu_thread& ppu, u32 mutex_id, u64 timeout)
 			}
 			else
 			{
-				mutex.sleep(ppu, start_time, timeout);
+				mutex.sleep(ppu, timeout);
 			}
 		}

@ -148,7 +146,7 @@ error_code sys_mutex_lock(ppu_thread& ppu, u32 mutex_id, u64 timeout)
 	{
 		if (timeout)
 		{
-			const u64 passed = get_system_time() - start_time;
+			const u64 passed = get_system_time() - ppu.start_time;

 			if (passed >= timeout)
 			{
@ -172,7 +170,7 @@ error_code sys_mutex_lock(ppu_thread& ppu, u32 mutex_id, u64 timeout)
 		}
 	}

-	ppu.check_state();
+	ppu.test_state();
 	return not_an_error(ppu.gpr[3]);
 }

@ -221,12 +219,17 @@ error_code sys_mutex_unlock(ppu_thread& ppu, u32 mutex_id)
 	{
 		semaphore_lock lock(mutex->mutex);

-		mutex->reown<ppu_thread>();
+		if (auto cpu = mutex->reown<ppu_thread>())
+		{
+			ppu.state += cpu_flag::is_waiting;
+			mutex->awake(*cpu);
+		}
 	}
 	else if (mutex.ret)
 	{
 		return mutex.ret;
 	}

+	ppu.test_state();
 	return CELL_OK;
 }
--- a/rpcs3/Emu/Cell/lv2/sys_mutex.h
+++ b/rpcs3/Emu/Cell/lv2/sys_mutex.h
@ -128,17 +128,17 @@ struct lv2_mutex final : lv2_obj
 	}

 	template <typename T>
-	void reown()
+	T* reown()
 	{
 		if (auto cpu = schedule<T>(sq, protocol))
 		{
 			owner = cpu->id << 1 | !sq.empty();
-
-			awake(*cpu);
+			return static_cast<T*>(cpu);
 		}
 		else
 		{
 			owner = 0;
+			return nullptr;
 		}
 	}
 };
--- a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp
@ -16,6 +16,7 @@ void _sys_ppu_thread_exit(ppu_thread& ppu, u64 errorcode)
 	sys_ppu_thread.trace("_sys_ppu_thread_exit(errorcode=0x%llx)", errorcode);

 	ppu.state += cpu_flag::exit;
+	ppu.state += cpu_flag::is_waiting;

 	// Get joiner ID
 	const u32 jid = ppu.joiner.fetch_op([](u32& value)
@ -48,7 +49,7 @@ void _sys_ppu_thread_exit(ppu_thread& ppu, u64 errorcode)
 	}

 	// Unqueue
-	lv2_obj::sleep(ppu, -1);
+	lv2_obj::sleep(ppu);

 	// Remove suspend state (TODO)
 	ppu.state -= cpu_flag::suspend;
@ -64,8 +65,9 @@ void sys_ppu_thread_yield(ppu_thread& ppu)
 {
 	sys_ppu_thread.trace("sys_ppu_thread_yield()");

+	ppu.state += cpu_flag::is_waiting;
 	lv2_obj::awake(ppu, -4);
-	ppu.check_state();
+	ppu.test_state();
 }

 error_code sys_ppu_thread_join(ppu_thread& ppu, u32 thread_id, vm::ptr<u64> vptr)
@ -104,7 +106,7 @@ error_code sys_ppu_thread_join(ppu_thread& ppu, u32 thread_id, vm::ptr<u64> vptr

 		if (!result)
 		{
-			lv2_obj::sleep(ppu, -1);
+			lv2_obj::sleep(ppu);
 		}
 		
 		return result;
@ -126,12 +128,14 @@ error_code sys_ppu_thread_join(ppu_thread& ppu, u32 thread_id, vm::ptr<u64> vptr
 	// Get the exit status from the register
 	if (vptr)
 	{
+		ppu.test_state();
 		*vptr = thread->gpr[3];
 	}

 	// Cleanup
 	idm::remove<ppu_thread>(thread->id);

+	ppu.test_state();
 	return CELL_OK;
 }

@ -207,6 +211,7 @@ error_code sys_ppu_thread_set_priority(ppu_thread& ppu, u32 thread_id, s32 prio)
 	{
 		if (thread.prio != prio && thread.prio.exchange(prio) != prio)
 		{
+			ppu.state += cpu_flag::is_waiting;
 			lv2_obj::awake(thread, prio);
 		}
 	});
@ -216,11 +221,7 @@ error_code sys_ppu_thread_set_priority(ppu_thread& ppu, u32 thread_id, s32 prio)
 		return CELL_ESRCH;
 	}

-	if (&ppu == thread)
-	{
-		ppu.check_state();
-	}
-
+	ppu.test_state();
 	return CELL_OK;
 }

@ -338,6 +339,7 @@ error_code sys_ppu_thread_start(ppu_thread& ppu, u32 thread_id)

 	const auto thread = idm::get<ppu_thread>(thread_id, [&](ppu_thread& thread)
 	{
+		ppu.state += cpu_flag::is_waiting;
 		lv2_obj::awake(thread, -2);
 	});

@ -356,7 +358,7 @@ error_code sys_ppu_thread_start(ppu_thread& ppu, u32 thread_id)
 		thread->notify();
 	}

-	ppu.check_state();
+	ppu.test_state();
 	return CELL_OK;
 }

--- a/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp
@ -79,8 +79,6 @@ error_code sys_rwlock_rlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout)
 {
 	sys_rwlock.trace("sys_rwlock_rlock(rw_lock_id=0x%x, timeout=0x%llx)", rw_lock_id, timeout);

-	const u64 start_time = ppu.gpr[10] = get_system_time();
-
 	const auto rwlock = idm::get<lv2_obj, lv2_rwlock>(rw_lock_id, [&](lv2_rwlock& rwlock)
 	{
 		const s64 val = rwlock.owner;
@ -110,7 +108,7 @@ error_code sys_rwlock_rlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout)
 		if (_old > 0 || _old & 1)
 		{
 			rwlock.rq.emplace_back(&ppu);
-			rwlock.sleep(ppu, start_time, timeout);
+			rwlock.sleep(ppu, timeout);
 			return false;
 		}

@ -133,7 +131,7 @@ error_code sys_rwlock_rlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout)
 	{
 		if (timeout)
 		{
-			const u64 passed = get_system_time() - start_time;
+			const u64 passed = get_system_time() - ppu.start_time;

 			if (passed >= timeout)
 			{
@ -157,7 +155,7 @@ error_code sys_rwlock_rlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout)
 		}
 	}

-	ppu.check_state();
+	ppu.test_state();
 	return not_an_error(ppu.gpr[3]);
 }

@ -243,6 +241,8 @@ error_code sys_rwlock_runlock(ppu_thread& ppu, u32 rw_lock_id)
 		{
 			if (const auto cpu = rwlock->schedule<ppu_thread>(rwlock->wq, rwlock->protocol))
 			{
+				ppu.state += cpu_flag::is_waiting;
+
 				rwlock->owner = cpu->id << 1 | !rwlock->wq.empty();

 				rwlock->awake(*cpu);
@ -256,7 +256,7 @@ error_code sys_rwlock_runlock(ppu_thread& ppu, u32 rw_lock_id)
 		}
 	}

-	ppu.check_state();
+	ppu.test_state();
 	return CELL_OK;
 }

@ -264,8 +264,6 @@ error_code sys_rwlock_wlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout)
 {
 	sys_rwlock.trace("sys_rwlock_wlock(rw_lock_id=0x%x, timeout=0x%llx)", rw_lock_id, timeout);

-	const u64 start_time = ppu.gpr[10] = get_system_time();
-
 	const auto rwlock = idm::get<lv2_obj, lv2_rwlock>(rw_lock_id, [&](lv2_rwlock& rwlock)
 	{
 		const s64 val = rwlock.owner;
@ -299,7 +297,7 @@ error_code sys_rwlock_wlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout)
 		if (_old != 0)
 		{
 			rwlock.wq.emplace_back(&ppu);
-			rwlock.sleep(ppu, start_time, timeout);
+			rwlock.sleep(ppu, timeout);
 			return false;
 		}

@ -327,7 +325,7 @@ error_code sys_rwlock_wlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout)
 	{
 		if (timeout)
 		{
-			const u64 passed = get_system_time() - start_time;
+			const u64 passed = get_system_time() - ppu.start_time;

 			if (passed >= timeout)
 			{
@ -346,6 +344,8 @@ error_code sys_rwlock_wlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout)

 					while (auto cpu = rwlock->schedule<ppu_thread>(rwlock->rq, SYS_SYNC_PRIORITY))
 					{
+						ppu.state += cpu_flag::is_waiting;
+
 						rwlock->awake(*cpu);
 					}

@ -364,7 +364,7 @@ error_code sys_rwlock_wlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout)
 		}
 	}

-	ppu.check_state();
+	ppu.test_state();
 	return not_an_error(ppu.gpr[3]);
 }

@ -426,6 +426,8 @@ error_code sys_rwlock_wunlock(ppu_thread& ppu, u32 rw_lock_id)

 		if (auto cpu = rwlock->schedule<ppu_thread>(rwlock->wq, rwlock->protocol))
 		{
+			ppu.state += cpu_flag::is_waiting;
+
 			rwlock->owner = cpu->id << 1 | !rwlock->wq.empty();

 			rwlock->awake(*cpu);
@ -436,6 +438,7 @@ error_code sys_rwlock_wunlock(ppu_thread& ppu, u32 rw_lock_id)

 			while (auto cpu = rwlock->schedule<ppu_thread>(rwlock->rq, SYS_SYNC_PRIORITY))
 			{
+				ppu.state += cpu_flag::is_waiting;
 				rwlock->awake(*cpu);
 			}

@ -449,7 +452,7 @@ error_code sys_rwlock_wunlock(ppu_thread& ppu, u32 rw_lock_id)

 	if (rwlock.ret & 1)
 	{
-		ppu.check_state();
+		ppu.test_state();
 	}

 	return CELL_OK;
--- a/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp
@ -85,8 +85,6 @@ error_code sys_semaphore_wait(ppu_thread& ppu, u32 sem_id, u64 timeout)
 {
 	sys_semaphore.trace("sys_semaphore_wait(sem_id=0x%x, timeout=0x%llx)", sem_id, timeout);

-	const u64 start_time = ppu.gpr[10] = get_system_time();
-
 	const auto sem = idm::get<lv2_obj, lv2_sema>(sem_id, [&](lv2_sema& sema)
 	{
 		const s32 val = sema.val;
@ -104,7 +102,7 @@ error_code sys_semaphore_wait(ppu_thread& ppu, u32 sem_id, u64 timeout)
 		if (sema.val-- <= 0)
 		{
 			sema.sq.emplace_back(&ppu);
-			sema.sleep(ppu, start_time, timeout);
+			sema.sleep(ppu, timeout);
 			return false;
 		}

@ -127,7 +125,7 @@ error_code sys_semaphore_wait(ppu_thread& ppu, u32 sem_id, u64 timeout)
 	{
 		if (timeout)
 		{
-			const u64 passed = get_system_time() - start_time;
+			const u64 passed = get_system_time() - ppu.start_time;

 			if (passed >= timeout)
 			{
@ -160,7 +158,7 @@ error_code sys_semaphore_wait(ppu_thread& ppu, u32 sem_id, u64 timeout)
 		}
 	}

-	ppu.check_state();
+	ppu.test_state();
 	return not_an_error(ppu.gpr[3]);
 }

@ -251,11 +249,12 @@ error_code sys_semaphore_post(ppu_thread& ppu, u32 sem_id, s32 count)
 		{
 			const auto cpu = verify(HERE, sem->schedule<ppu_thread>(sem->sq, sem->protocol));

+			ppu.state += cpu_flag::is_waiting;
 			sem->awake(*cpu);
 		}
 	}

-	ppu.check_state();
+	ppu.test_state();
 	return CELL_OK;
 }

--- a/rpcs3/Emu/Cell/lv2/sys_spu.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_spu.cpp
@ -227,8 +227,10 @@ error_code sys_spu_thread_group_destroy(u32 id)
 	return CELL_OK;
 }

-error_code sys_spu_thread_group_start(u32 id)
+error_code sys_spu_thread_group_start(ppu_thread& ppu, u32 id)
 {
+	ppu.state += cpu_flag::is_waiting;
+
 	sys_spu.warning("sys_spu_thread_group_start(id=0x%x)", id);

 	const auto group = idm::get<lv2_spu_group>(id, [](lv2_spu_group& group)
@ -285,6 +287,7 @@ error_code sys_spu_thread_group_start(u32 id)
 		}
 	}

+	ppu.test_state();
 	return CELL_OK;
 }

@ -495,7 +498,7 @@ error_code sys_spu_thread_group_join(ppu_thread& ppu, u32 id, vm::ptr<u32> cause
 		return CELL_EBUSY;
 	}

-	lv2_obj::sleep(ppu, -1);
+	lv2_obj::sleep(ppu);

 	while ((group->join_state & ~SPU_TGJSF_IS_JOINING) == 0)
 	{
--- a/rpcs3/Emu/Cell/lv2/sys_spu.h
+++ b/rpcs3/Emu/Cell/lv2/sys_spu.h
@ -216,7 +216,7 @@ error_code sys_spu_thread_initialize(vm::ps3::ptr<u32> thread, u32 group, u32 sp
 error_code sys_spu_thread_set_argument(u32 id, vm::ps3::ptr<sys_spu_thread_argument> arg);
 error_code sys_spu_thread_group_create(vm::ps3::ptr<u32> id, u32 num, s32 prio, vm::ps3::ptr<sys_spu_thread_group_attribute> attr);
 error_code sys_spu_thread_group_destroy(u32 id);
-error_code sys_spu_thread_group_start(u32 id);
+error_code sys_spu_thread_group_start(ppu_thread&, u32 id);
 error_code sys_spu_thread_group_suspend(u32 id);
 error_code sys_spu_thread_group_resume(u32 id);
 error_code sys_spu_thread_group_yield(u32 id);	
--- a/rpcs3/Emu/Cell/lv2/sys_sync.h
+++ b/rpcs3/Emu/Cell/lv2/sys_sync.h
@ -1,10 +1,10 @@
 #pragma once

-#include "Utilities/Thread.h"
 #include "Utilities/mutex.h"
 #include "Utilities/sema.h"
 #include "Utilities/cond.h"

+#include "Emu/CPU/CPUThread.h"
 #include "Emu/Cell/ErrorCodes.h"

 #include <deque>
@ -107,23 +107,24 @@ struct lv2_obj
 	}

 	// Remove the current thread from the scheduling queue, register timeout
-	static void sleep(named_thread&, u64 wait_until);
+	static void sleep_timeout(named_thread&, u64 timeout);

-	template <typename T>
-	static void sleep(T& thread, u64 start_time, u64 timeout)
+	static void sleep(cpu_thread& thread, u64 timeout = 0)
 	{
-		sleep(thread, timeout ? start_time + timeout : -1);
+		thread.state += cpu_flag::is_waiting;
+		sleep_timeout(thread, timeout);
 	}

 	// Schedule the thread
-	static void awake(class cpu_thread&, u32 prio);
+	static void awake(cpu_thread&, u32 prio);

-	template <typename T>
-	static void awake(T& cpu)
+	static void awake(cpu_thread& thread)
 	{
-		awake(cpu, -1);
+		awake(thread, -1);
 	}

+	static void lock_all();
+	static void unlock_all();
 	static void cleanup();

 private:
--- a/rpcs3/Emu/Cell/lv2/sys_timer.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_timer.cpp
@ -50,7 +50,7 @@ void lv2_timer::on_task()
 			}

 			// TODO: use single global dedicated thread for busy waiting, no timer threads
-			lv2_obj::sleep(*this, next);
+			lv2_obj::sleep_timeout(*this, next - _now);
 			thread_ctrl::wait_for(next - _now);
 		}
 		else if (_state == SYS_TIMER_STATE_STOP)
@ -290,15 +290,14 @@ error_code sys_timer_usleep(ppu_thread& ppu, u64 sleep_time)
 {
 	sys_timer.trace("sys_timer_usleep(sleep_time=0x%llx)", sleep_time);

-	u64 start = ppu.gpr[10] = get_system_time();
 	u64 passed = 0;

-	lv2_obj::sleep(ppu, start, std::max<u64>(1, sleep_time));
+	lv2_obj::sleep(ppu, std::max<u64>(1, sleep_time));

 	while (sleep_time >= passed)
 	{
 		thread_ctrl::wait_for(std::max<u64>(1, sleep_time - passed));
-		passed = get_system_time() - start;
+		passed = get_system_time() - ppu.start_time;
 	}

 	return CELL_OK;
--- a/rpcs3/Emu/System.cpp
+++ b/rpcs3/Emu/System.cpp
@ -311,6 +311,13 @@ void Emulator::Load()
 			g_system = system_type::psv;
 			m_status = Ready;
 			vm::psv::init();
+
+			if (m_elf_path.empty())
+			{
+				m_elf_path = "host_root:" + m_path;
+				LOG_NOTICE(LOADER, "Elf path: %s", m_elf_path);
+			}
+
 			arm_load_exec(arm_exec);
 		}
 		else