From a254507be1fe7fb3cf0f1274e35eeb545f65618f Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Wed, 10 Dec 2014 10:12:13 +0530
Subject: [PATCH 01/29] SPURS: Implement taskset attribute functions

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 124 ++++++++++++++++++++---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h   |  23 +++++
 2 files changed, 134 insertions(+), 13 deletions(-)
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 4248c35d74..0aff338d50 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -2296,15 +2296,49 @@ s64 cellSpursJobChainGetSpursAddress()
 #endif
 }
 
-s64 cellSpursCreateTasksetWithAttribute()
+s64 spursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u64 args, vm::ptr<const u8> priority,
+	u32 max_contention, u32 enable_clear_ls, u32 size, u8 unknown /*TODO: Figure this out*/)
+{
+	if (!spurs || !taskset)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CELL_SPURS_ALIGN || taskset.addr() % CELL_SPURS_TASKSET_ALIGN)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	memset((void *)taskset.addr(), 0, size);
+
+	// TODO: Implement the rest of this
+	return CELL_OK;
+}
+
+s64 cellSpursCreateTasksetWithAttribute(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, vm::ptr<CellSpursTasksetAttribute> attr)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14BEC, libsre_rtoc);
-#else
-	UNIMPLEMENTED_FUNC(cellSpurs);
-	return CELL_OK;
 #endif
+
+	if (!attr)
+	{
+		CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (attr.addr() % 8)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (attr->revision != 1)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	return spursCreateTaskset(spurs, taskset, attr->args, vm::ptr<const u8>::make(attr.addr() + offsetof(CellSpursTasksetAttribute, priority)),
+		attr->max_contention, attr->enable_clear_ls, attr->taskset_size, 0 /*TODO: Figure this out*/);
 }
 
 s64 cellSpursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u64 args, vm::ptr<const u8> priority, u32 maxContention)
@@ -2314,12 +2348,16 @@ s64 cellSpursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> t
 
 #ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14CB8, libsre_rtoc);
-#else
+#endif
+
+#if 0
 	SPURSManagerTasksetAttribute *tattr = new SPURSManagerTasksetAttribute(args, priority, maxContention);
 	taskset->taskset = new SPURSManagerTaskset(taskset.addr(), tattr);
 
 	return CELL_OK;
 #endif
+
+	return spursCreateTaskset(spurs, taskset, args, priority, maxContention, 0, CELL_SPURS_TASKSET_SIZE, 0);
 }
 
 s64 cellSpursJoinTaskset(vm::ptr<CellSpursTaskset> taskset)
@@ -2390,35 +2428,70 @@ s64 cellSpursCreateTaskWithAttribute()
 #endif
 }
 
-s64 cellSpursTasksetAttributeSetName()
+s64 cellSpursTasksetAttributeSetName(vm::ptr<CellSpursTasksetAttribute> attr, vm::ptr<const char> name)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14210, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!attr || !name)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (attr.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	attr->name = name.addr();
 	return CELL_OK;
 #endif
 }
 
-s64 cellSpursTasksetAttributeSetTasksetSize()
+s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr<CellSpursTasksetAttribute> attr, size_t size)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14254, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!attr)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (attr.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (size != CELL_SPURS_TASKSET_SIZE && size != CELL_SPURS_TASKSET2_SIZE)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	attr->taskset_size = size;
 	return CELL_OK;
 #endif
 }
 
-s64 cellSpursTasksetAttributeEnableClearLS()
+s64 cellSpursTasksetAttributeEnableClearLS(vm::ptr<CellSpursTasksetAttribute> attr, s32 enable)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x142AC, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!attr)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (attr.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	attr->enable_clear_ls = enable ? 1 : 0;
 	return CELL_OK;
 #endif
 }
@@ -2430,6 +2503,7 @@ s64 _cellSpursTasksetAttribute2Initialize(vm::ptr<CellSpursTasksetAttribute2> at
 #ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1474C, libsre_rtoc);
 #else
+	memset(attribute.get_ptr(), 0, CELL_SPURS_TASKSET_ATTRIBUTE2_SIZE);
 	attribute->revision = revision;
 	attribute->name_addr = NULL;
 	attribute->argTaskset = 0;
@@ -2694,13 +2768,37 @@ s64 cellSpursGetTasksetInfo()
 #endif
 }
 
-s64 _cellSpursTasksetAttributeInitialize()
+s64 _cellSpursTasksetAttributeInitialize(vm::ptr<CellSpursTasksetAttribute> attribute, u32 revision, u32 sdk_version, u64 args, vm::ptr<const u8> priority, u32 max_contention)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x142FC, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!attribute)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (attribute.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	for (u32 i = 0; i < 8; i++)
+	{
+		if (priority[i] > 0xF)
+		{
+			return CELL_SPURS_TASK_ERROR_INVAL;
+		}
+	}
+
+	memset(attribute.get_ptr(), 0, CELL_SPURS_TASKSET_ATTRIBUTE_SIZE);
+	attribute->revision = revision;
+	attribute->sdk_version = sdk_version;
+	attribute->args = args;
+	memcpy(attribute->priority, priority.get_ptr(), 8);
+	attribute->taskset_size = CELL_SPURS_TASKSET_SIZE;
+	attribute->max_contention = max_contention;
 	return CELL_OK;
 #endif
 }
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 4e72b50826..0248311f70 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -130,6 +130,17 @@ enum TaskConstants
 	CELL_SPURS_TASK_TOP = 0x3000,
 	CELL_SPURS_TASK_BOTTOM = 0x40000,
 	CELL_SPURS_MAX_TASK_NAME_LENGTH = 32,
+	CELL_SPURS_TASK_ATTRIBUTE_REVISION = 1,
+	CELL_SPURS_TASK_ATTRIBUTE_SIZE = 256,
+	CELL_SPURS_TASKSET_SIZE = 6400,
+	CELL_SPURS_TASKSET_ALIGN = 128,
+	CELL_SPURS_TASKSET_ATTRIBUTE_REVISION = 1,
+	CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN = 8,
+	CELL_SPURS_TASKSET_ATTRIBUTE_SIZE = 512,
+	CELL_SPURS_TASKSET2_SIZE = 10496,
+	CELL_SPURS_TASKSET2_ALIGN = 128,
+	CELL_SPURS_TASKSET_ATTRIBUTE2_ALIGN = 8,
+	CELL_SPURS_TASKSET_ATTRIBUTE2_SIZE = 512,
 };
 
 class SPURSManager;
@@ -531,6 +542,18 @@ struct CellSpursTaskset2
 	be_t<u8> skip[10496];
 };
 
+struct CellSpursTasksetAttribute
+{
+	be_t<u32> revision;
+	be_t<u32> sdk_version;
+	be_t<u64> args;
+	u8 priority[8];
+	be_t<u32> max_contention;
+	be_t<u32> name;
+	be_t<u32> taskset_size;
+	be_t<u32> enable_clear_ls;
+};
+
 struct CellSpursTasksetAttribute2
 {
 	be_t<u32> revision;

From 5dd15b3c47bb9acc47e2e351fec0aaeeae78afb4 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Sat, 13 Dec 2014 01:42:09 +0530
Subject: [PATCH 02/29] SPURS: Implement create taskset

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 142 ++++++++++++++++-------
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h   | 106 ++++++++++++-----
 2 files changed, 178 insertions(+), 70 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 0aff338d50..aebf52dbf8 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -2296,22 +2296,50 @@ s64 cellSpursJobChainGetSpursAddress()
 #endif
 }
 
-s64 spursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u64 args, vm::ptr<const u8> priority,
-	u32 max_contention, u32 enable_clear_ls, u32 size, u8 unknown /*TODO: Figure this out*/)
+s64 spursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u64 args, vm::ptr<const u8[8]> priority,
+	u32 max_contention, vm::ptr<const char> name, u32 size, s32 enable_clear_ls)
 {
 	if (!spurs || !taskset)
 	{
 		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
 	}
 
-	if (spurs.addr() % CELL_SPURS_ALIGN || taskset.addr() % CELL_SPURS_TASKSET_ALIGN)
+	if (spurs.addr() % CellSpurs::align || taskset.addr() % CellSpursTaskset::align)
 	{
 		return CELL_SPURS_TASK_ERROR_ALIGN;
 	}
 
 	memset((void *)taskset.addr(), 0, size);
 
-	// TODO: Implement the rest of this
+	taskset->m.spurs = spurs;
+	taskset->m.args = args;
+	taskset->m.enable_clear_ls = enable_clear_ls > 0 ? 1 : 0;
+	taskset->m.size = size;
+
+	auto wkl_attr = vm::ptr<CellSpursWorkloadAttribute>::make((u32)Memory.Alloc(CellSpursWorkloadAttribute::size, CellSpursWorkloadAttribute::align));
+	_cellSpursWorkloadAttributeInitialize(wkl_attr, 1 /*revision*/, 0x33 /*sdk_version*/, vm::ptr<const void>::make(16) /*pm*/, 0x1E40 /*pm_size*/,
+		taskset.addr(), priority, 8 /*min_contention*/, max_contention);
+	// TODO: Check return code
+
+	auto cls = vm::ptr<const char>::make((u32)Memory.Alloc(1, 1));
+	*((char *)cls.get_ptr()) = 0;
+	cellSpursWorkloadAttributeSetName(wkl_attr, cls, name);
+	// TODO: Check return code
+
+	// TODO: cellSpursWorkloadAttributeSetShutdownCompletionEventHook(wkl_attr, hook, taskset);
+	// TODO: Check return code
+
+	auto wid = vm::ptr<u32>::make((u32)Memory.Alloc(4, 4));
+	cellSpursAddWorkloadWithAttribute(spurs, wid, vm::ptr<const CellSpursWorkloadAttribute>::make(wkl_attr.addr()));
+	// TODO: Check return code
+
+	taskset->m.x72 = 0x80;
+	taskset->m.wid.FromBE(*wid);
+	// TODO: cellSpursSetExceptionEventHandler(spurs, wid, hook, taskset);
+	// TODO: Check return code
+
+	Memory.Free(wkl_attr.addr());
+	Memory.Free(wid.addr());
 	return CELL_OK;
 }
 
@@ -2327,21 +2355,28 @@ s64 cellSpursCreateTasksetWithAttribute(vm::ptr<CellSpurs> spurs, vm::ptr<CellSp
 		CELL_SPURS_TASK_ERROR_NULL_POINTER;
 	}
 
-	if (attr.addr() % 8)
+	if (attr.addr() % CellSpursTasksetAttribute::align)
 	{
 		return CELL_SPURS_TASK_ERROR_ALIGN;
 	}
 
-	if (attr->revision != 1)
+	if (attr->m.revision != CELL_SPURS_TASKSET_ATTRIBUTE_REVISION)
 	{
 		return CELL_SPURS_TASK_ERROR_INVAL;
 	}
 
-	return spursCreateTaskset(spurs, taskset, attr->args, vm::ptr<const u8>::make(attr.addr() + offsetof(CellSpursTasksetAttribute, priority)),
-		attr->max_contention, attr->enable_clear_ls, attr->taskset_size, 0 /*TODO: Figure this out*/);
+	auto rc = spursCreateTaskset(spurs, taskset, attr->m.args, vm::ptr<const u8[8]>::make(attr.addr() + offsetof(CellSpursTasksetAttribute, m.priority)),
+		attr->m.max_contention, vm::ptr<const char>::make(attr->m.name.addr()), attr->m.taskset_size, attr->m.enable_clear_ls);
+
+	if (attr->m.taskset_size >= CellSpursTaskset2::size)
+	{
+		// TODO: Implement this
+	}
+
+	return rc;
 }
 
-s64 cellSpursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u64 args, vm::ptr<const u8> priority, u32 maxContention)
+s64 cellSpursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u64 args, vm::ptr<const u8[8]> priority, u32 maxContention)
 {
 	cellSpurs->Warning("cellSpursCreateTaskset(spurs_addr=0x%x, taskset_addr=0x%x, args=0x%llx, priority_addr=0x%x, maxContention=%d)",
 		spurs.addr(), taskset.addr(), args, priority.addr(), maxContention);
@@ -2357,7 +2392,7 @@ s64 cellSpursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> t
 	return CELL_OK;
 #endif
 
-	return spursCreateTaskset(spurs, taskset, args, priority, maxContention, 0, CELL_SPURS_TASKSET_SIZE, 0);
+	return spursCreateTaskset(spurs, taskset, args, priority, maxContention, vm::ptr<const char>::make(0), CellSpursTaskset::size, 0);
 }
 
 s64 cellSpursJoinTaskset(vm::ptr<CellSpursTaskset> taskset)
@@ -2439,17 +2474,17 @@ s64 cellSpursTasksetAttributeSetName(vm::ptr<CellSpursTasksetAttribute> attr, vm
 		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
 	}
 
-	if (attr.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN)
+	if (attr.addr() % CellSpursTasksetAttribute::align)
 	{
 		return CELL_SPURS_TASK_ERROR_ALIGN;
 	}
 
-	attr->name = name.addr();
+	attr->m.name = name;
 	return CELL_OK;
 #endif
 }
 
-s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr<CellSpursTasksetAttribute> attr, size_t size)
+s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr<CellSpursTasksetAttribute> attr, u32 size)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
@@ -2460,17 +2495,17 @@ s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr<CellSpursTasksetAttribute> a
 		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
 	}
 
-	if (attr.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN)
+	if (attr.addr() % CellSpursTasksetAttribute::align)
 	{
 		return CELL_SPURS_TASK_ERROR_ALIGN;
 	}
 
-	if (size != CELL_SPURS_TASKSET_SIZE && size != CELL_SPURS_TASKSET2_SIZE)
+	if (size != CellSpursTaskset::size && size != CellSpursTaskset2::size)
 	{
 		return CELL_SPURS_TASK_ERROR_INVAL;
 	}
 
-	attr->taskset_size = size;
+	attr->m.taskset_size = size;
 	return CELL_OK;
 #endif
 }
@@ -2486,12 +2521,12 @@ s64 cellSpursTasksetAttributeEnableClearLS(vm::ptr<CellSpursTasksetAttribute> at
 		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
 	}
 
-	if (attr.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN)
+	if (attr.addr() % CellSpursTasksetAttribute::align)
 	{
 		return CELL_SPURS_TASK_ERROR_ALIGN;
 	}
 
-	attr->enable_clear_ls = enable ? 1 : 0;
+	attr->m.enable_clear_ls = enable ? 1 : 0;
 	return CELL_OK;
 #endif
 }
@@ -2503,20 +2538,19 @@ s64 _cellSpursTasksetAttribute2Initialize(vm::ptr<CellSpursTasksetAttribute2> at
 #ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1474C, libsre_rtoc);
 #else
-	memset(attribute.get_ptr(), 0, CELL_SPURS_TASKSET_ATTRIBUTE2_SIZE);
-	attribute->revision = revision;
-	attribute->name_addr = NULL;
-	attribute->argTaskset = 0;
+	memset(attribute.get_ptr(), 0, CellSpursTasksetAttribute2::size);
+	attribute->m.revision = revision;
+	attribute->m.name.set(0);
+	attribute->m.args = 0;
 
 	for (s32 i = 0; i < 8; i++)
 	{
-		attribute->priority[i] = 1;
+		attribute->m.priority[i] = 1;
 	}
 
-	attribute->maxContention = 8;
-	attribute->enableClearLs = 0;
-	attribute->CellSpursTaskNameBuffer_addr = 0;
-
+	attribute->m.max_contention = 8;
+	attribute->m.enable_clear_ls = 0;
+	attribute->m.task_name_buffer.set(0);
 	return CELL_OK;
 #endif
 }
@@ -2647,13 +2681,43 @@ s64 cellSpursTaskGetContextSaveAreaSize()
 #endif
 }
 
-s64 cellSpursCreateTaskset2()
+s64 cellSpursCreateTaskset2(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset2> taskset, vm::ptr<CellSpursTasksetAttribute2> attr)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15108, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	auto free_attr = false;
+	if (!attr)
+	{
+		attr.set(Memory.Alloc(CellSpursTasksetAttribute2::size, CellSpursTasksetAttribute2::align));
+		free_attr = true;
+		_cellSpursTasksetAttribute2Initialize(attr, 0);
+	}
+
+	auto rc = spursCreateTaskset(spurs, vm::ptr<CellSpursTaskset>::make(taskset.addr()), attr->m.args,
+		vm::ptr<const u8[8]>::make(attr.addr() + offsetof(CellSpursTasksetAttribute, m.priority)),
+		attr->m.max_contention, vm::ptr<const char>::make(attr->m.name.addr()), CellSpursTaskset2::size, (u8)attr->m.enable_clear_ls);
+	if (rc != CELL_OK)
+	{
+		if (free_attr)
+		{
+			Memory.Free(attr.addr());
+		}
+		return rc;
+	}
+
+	if (attr->m.task_name_buffer.addr() % CellSpursTaskNameBuffer::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	// TODO: Implement rest of the function
+
+	if (free_attr)
+	{
+		Memory.Free(attr.addr());
+	}
 	return CELL_OK;
 #endif
 }
@@ -2735,7 +2799,7 @@ s64 cellSpursTasksetUnsetExceptionEventHandler()
 #endif
 }
 
-s64 cellSpursLookUpTasksetAddress()
+s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset *> taskset, u32 id)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
@@ -2746,7 +2810,7 @@ s64 cellSpursLookUpTasksetAddress()
 #endif
 }
 
-s64 cellSpursTasksetGetSpursAddress()
+s64 cellSpursTasksetGetSpursAddress(vm::ptr<const CellSpursTaskset> taskset, vm::ptr<CellSpurs *> spurs)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
@@ -2779,7 +2843,7 @@ s64 _cellSpursTasksetAttributeInitialize(vm::ptr<CellSpursTasksetAttribute> attr
 		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
 	}
 
-	if (attribute.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN)
+	if (attribute.addr() % CellSpursTasksetAttribute::align)
 	{
 		return CELL_SPURS_TASK_ERROR_ALIGN;
 	}
@@ -2792,13 +2856,13 @@ s64 _cellSpursTasksetAttributeInitialize(vm::ptr<CellSpursTasksetAttribute> attr
 		}
 	}
 
-	memset(attribute.get_ptr(), 0, CELL_SPURS_TASKSET_ATTRIBUTE_SIZE);
-	attribute->revision = revision;
-	attribute->sdk_version = sdk_version;
-	attribute->args = args;
-	memcpy(attribute->priority, priority.get_ptr(), 8);
-	attribute->taskset_size = CELL_SPURS_TASKSET_SIZE;
-	attribute->max_contention = max_contention;
+	memset(attribute.get_ptr(), 0, CellSpursTasksetAttribute::size);
+	attribute->m.revision = revision;
+	attribute->m.sdk_version = sdk_version;
+	attribute->m.args = args;
+	memcpy(attribute->m.priority, priority.get_ptr(), 8);
+	attribute->m.taskset_size = CellSpursTaskset::size;
+	attribute->m.max_contention = max_contention;
 	return CELL_OK;
 #endif
 }
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 0248311f70..adb1a60799 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -131,16 +131,7 @@ enum TaskConstants
 	CELL_SPURS_TASK_BOTTOM = 0x40000,
 	CELL_SPURS_MAX_TASK_NAME_LENGTH = 32,
 	CELL_SPURS_TASK_ATTRIBUTE_REVISION = 1,
-	CELL_SPURS_TASK_ATTRIBUTE_SIZE = 256,
-	CELL_SPURS_TASKSET_SIZE = 6400,
-	CELL_SPURS_TASKSET_ALIGN = 128,
 	CELL_SPURS_TASKSET_ATTRIBUTE_REVISION = 1,
-	CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN = 8,
-	CELL_SPURS_TASKSET_ATTRIBUTE_SIZE = 512,
-	CELL_SPURS_TASKSET2_SIZE = 10496,
-	CELL_SPURS_TASKSET2_ALIGN = 128,
-	CELL_SPURS_TASKSET_ATTRIBUTE2_ALIGN = 8,
-	CELL_SPURS_TASKSET_ATTRIBUTE2_SIZE = 512,
 };
 
 class SPURSManager;
@@ -428,7 +419,31 @@ struct CellSpursEventFlag
 
 struct CellSpursTaskset
 {
-	SPURSManagerTaskset *taskset;
+	static const u32 align = 128;
+	static const u32 size = 6400;
+
+	union
+	{
+		// Raw data
+		u8 _u8[size];
+
+		// Real data
+		struct
+		{
+			u8 unk1[0x64];              // 0x00
+			vm::bptr<CellSpurs> spurs;  // 0x64
+			be_t<u64> args;             // 0x68
+			u8 enable_clear_ls;         // 0x70
+			u8 x71;                     // 0x71
+			u8 x72;                     // 0x72
+			u8 x73;                     // 0x73
+			be_t<u32> wid;              // 0x74
+			u8 unk3[0x1818];            // 0x78
+			be_t<u32> size;             // 0x1890
+		} m;
+
+		SPURSManagerTaskset *taskset;
+	};
 };
 
 struct CellSpursInfo
@@ -524,6 +539,13 @@ struct CellSpursTracePacket
 //typedef void (*CellSpursTasksetExceptionEventHandler)(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, 
 //													    u32 idTask, vm::ptr<const CellSpursExceptionInfo> info, vm::ptr<void> arg);
 
+struct CellSpursTaskNameBuffer
+{
+	static const u32 align = 16;
+
+	char taskName[CELL_SPURS_MAX_TASK][CELL_SPURS_MAX_TASK_NAME_LENGTH];
+};
+
 struct CellSpursTasksetInfo
 {
 	//CellSpursTaskInfo taskInfo[CELL_SPURS_MAX_TASK];
@@ -539,37 +561,59 @@ struct CellSpursTasksetInfo
 
 struct CellSpursTaskset2
 {
+	static const u32 align = 128;
+	static const u32 size = 10496;
+
 	be_t<u8> skip[10496];
 };
 
 struct CellSpursTasksetAttribute
 {
-	be_t<u32> revision;
-	be_t<u32> sdk_version;
-	be_t<u64> args;
-	u8 priority[8];
-	be_t<u32> max_contention;
-	be_t<u32> name;
-	be_t<u32> taskset_size;
-	be_t<u32> enable_clear_ls;
+	static const u32 align = 8;
+	static const u32 size = 512;
+
+	union
+	{
+		// Raw data
+		u8 _u8[size];
+
+		// Real data
+		struct
+		{
+			be_t<u32> revision;             // 0x00
+			be_t<u32> sdk_version;          // 0x04
+			be_t<u64> args;                 // 0x08
+			u8 priority[8];                 // 0x10
+			be_t<u32> max_contention;       // 0x18
+			vm::bptr<const char> name;      // 0x1C
+			be_t<u32> taskset_size;         // 0x20
+			be_t<s32> enable_clear_ls;      // 0x24
+		} m;
+	};
 };
 
 struct CellSpursTasksetAttribute2
 {
-	be_t<u32> revision;
-	be_t<u32> name_addr;
-	be_t<u64> argTaskset;
-	u8 priority[8];
-	be_t<u32> maxContention;
-	be_t<s32> enableClearLs;
-	be_t<s32> CellSpursTaskNameBuffer_addr; //??? *taskNameBuffer
-	//be_t<u32> __reserved__[];
-};
+	static const u32 align = 8;
+	static const u32 size = 512;
 
-// cellSpurs task structures.
-struct CellSpursTaskNameBuffer
-{
-	char taskName[CELL_SPURS_MAX_TASK][CELL_SPURS_MAX_TASK_NAME_LENGTH];
+	union
+	{
+		// Raw data
+		u8 _u8[size];
+
+		// Real data
+		struct
+		{
+			be_t<u32> revision;                                 // 0x00
+			vm::bptr<const char> name;                          // 0x04
+			be_t<u64> args;                                     // 0x08
+			u8 priority[8];                                     // 0x10
+			be_t<u32> max_contention;                           // 0x18
+			be_t<s32> enable_clear_ls;                          // 0x1C
+			vm::bptr<CellSpursTaskNameBuffer> task_name_buffer; // 0x20
+		} m;
+	};
 };
 
 struct CellSpursTraceTaskData

From 40f5f73658a2e4ad388884feacfdf9f963aed359 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Sun, 14 Dec 2014 00:22:31 +0530
Subject: [PATCH 03/29] SPURS: Implement some taskset functions

---
 rpcs3/Emu/Memory/vm_ptr.h                |  4 +-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 91 ++++++++++++++++++++----
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h   | 81 ++++++++++++++++++---
 rpcs3/emucore.vcxproj                    |  5 ++
 4 files changed, 156 insertions(+), 25 deletions(-)

diff --git a/rpcs3/Emu/Memory/vm_ptr.h b/rpcs3/Emu/Memory/vm_ptr.h
index 838caf930c..6f0c65ed0d 100644
--- a/rpcs3/Emu/Memory/vm_ptr.h
+++ b/rpcs3/Emu/Memory/vm_ptr.h
@@ -253,7 +253,7 @@ namespace vm
 
 		void* get_ptr() const
 		{
-			return vm::get_ptr<void>(m_addr);
+			return vm::get_ptr<void>((u32)m_addr);
 		}
 
 		explicit operator void*() const
@@ -313,7 +313,7 @@ namespace vm
 
 		const void* get_ptr() const
 		{
-			return vm::get_ptr<const void>(m_addr);
+			return vm::get_ptr<const void>((u32)m_addr);
 		}
 
 		explicit operator const void*() const
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index aebf52dbf8..831748949b 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -2309,7 +2309,7 @@ s64 spursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> tasks
 		return CELL_SPURS_TASK_ERROR_ALIGN;
 	}
 
-	memset((void *)taskset.addr(), 0, size);
+	memset(taskset.get_ptr(), 0, size);
 
 	taskset->m.spurs = spurs;
 	taskset->m.args = args;
@@ -2406,13 +2406,28 @@ s64 cellSpursJoinTaskset(vm::ptr<CellSpursTaskset> taskset)
 #endif
 }
 
-s64 cellSpursGetTasksetId(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> workloadId)
+s64 cellSpursGetTasksetId(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> wid)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursGetTasksetId(taskset_addr=0x%x, workloadId_addr=0x%x)", taskset.addr(), workloadId.addr());
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14EA0, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!taskset || !wid)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	*wid = taskset->m.wid.ToBE();
 	return CELL_OK;
 #endif
 }
@@ -2777,29 +2792,66 @@ s64 cellSpursCreateTask2WithBinInfo()
 #endif
 }
 
-s64 cellSpursTasksetSetExceptionEventHandler()
+s64 cellSpursTasksetSetExceptionEventHandler(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u64> handler, vm::ptr<u64> arg)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13124, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!taskset || !handler)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	if (taskset->m.exception_handler != 0)
+	{
+		return CELL_SPURS_TASK_ERROR_BUSY;
+	}
+
+	taskset->m.exception_handler = handler;
+	taskset->m.exception_handler_arg = arg;
 	return CELL_OK;
 #endif
 }
 
-s64 cellSpursTasksetUnsetExceptionEventHandler()
+s64 cellSpursTasksetUnsetExceptionEventHandler(vm::ptr<CellSpursTaskset> taskset)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13194, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!taskset)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	taskset->m.exception_handler.set(0);
+	taskset->m.exception_handler_arg.set(0);
 	return CELL_OK;
 #endif
 }
 
-s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset *> taskset, u32 id)
+s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<u32> taskset, u32 id)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
@@ -2810,13 +2862,28 @@ s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTas
 #endif
 }
 
-s64 cellSpursTasksetGetSpursAddress(vm::ptr<const CellSpursTaskset> taskset, vm::ptr<CellSpurs *> spurs)
+s64 cellSpursTasksetGetSpursAddress(vm::ptr<const CellSpursTaskset> taskset, vm::ptr<u32> spurs)
 {
 #ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14408, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!taskset || !spurs)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	*spurs = (u32)taskset->m.spurs.addr().ToBE();
 	return CELL_OK;
 #endif
 }
@@ -3092,6 +3159,8 @@ void cellSpurs_init(Module *pxThis)
 	REG_FUNC(cellSpurs, cellSpursEnableExceptionEventHandler);
 	REG_FUNC(cellSpurs, cellSpursSetGlobalExceptionEventHandler);
 	REG_FUNC(cellSpurs, cellSpursUnsetGlobalExceptionEventHandler);
+	REG_FUNC(cellSpurs, cellSpursSetExceptionEventHandler);
+	REG_FUNC(cellSpurs, cellSpursUnsetExceptionEventHandler);
 
 	// Event flag
 	REG_FUNC(cellSpurs, _cellSpursEventFlagInitialize);
@@ -3137,8 +3206,6 @@ void cellSpurs_init(Module *pxThis)
 	REG_FUNC(cellSpurs, cellSpursCreateTask2WithBinInfo);
 	REG_FUNC(cellSpurs, cellSpursLookUpTasksetAddress);
 	REG_FUNC(cellSpurs, cellSpursTasksetGetSpursAddress);
-	REG_FUNC(cellSpurs, cellSpursSetExceptionEventHandler);
-	REG_FUNC(cellSpurs, cellSpursUnsetExceptionEventHandler);
 	REG_FUNC(cellSpurs, cellSpursGetTasksetInfo);
 	REG_FUNC(cellSpurs, cellSpursTasksetSetExceptionEventHandler);
 	REG_FUNC(cellSpurs, cellSpursTasksetUnsetExceptionEventHandler);
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index adb1a60799..fc8871ecd6 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -427,19 +427,38 @@ struct CellSpursTaskset
 		// Raw data
 		u8 _u8[size];
 
+		struct TaskInfo
+		{
+			be_t<u32> args[4];
+			vm::bptr<u64, 1, u64> elf;
+			vm::bptr<u64, 1, u64> context_save_storage;
+			be_t<u32> ls_pattern[4];
+		};
+
 		// Real data
 		struct
 		{
-			u8 unk1[0x64];              // 0x00
-			vm::bptr<CellSpurs> spurs;  // 0x64
-			be_t<u64> args;             // 0x68
-			u8 enable_clear_ls;         // 0x70
-			u8 x71;                     // 0x71
-			u8 x72;                     // 0x72
-			u8 x73;                     // 0x73
-			be_t<u32> wid;              // 0x74
-			u8 unk3[0x1818];            // 0x78
-			be_t<u32> size;             // 0x1890
+			be_t<u32> running_set[4];                    // 0x00
+			be_t<u32> ready_set[4];                      // 0x10
+			be_t<u32> unk_set[4];                        // 0x20 - TODO: Find out what this is
+			be_t<u32> enabled_set[4];                    // 0x30
+			be_t<u32> signal_received_set[4];            // 0x40
+			be_t<u32> waiting_set[4];                    // 0x50
+			vm::bptr<CellSpurs, 1, u64> spurs;           // 0x60
+			be_t<u64> args;                              // 0x68
+			u8 enable_clear_ls;                          // 0x70
+			u8 x71;                                      // 0x71
+			u8 x72;                                      // 0x72
+			u8 last_scheduled_task;                      // 0x73
+			be_t<u32> wid;                               // 0x74
+			u8 unk1[8];                                  // 0x78
+			TaskInfo task_info[128];                     // 0x80
+			vm::bptr<u64, 1, u64> exception_handler;     // 0x1880
+			vm::bptr<u64, 1, u64> exception_handler_arg; // 0x1888
+			be_t<u32> size;                              // 0x1890
+			u32 unk2;                                    // 0x1894
+			u32 event_flag_id1;                          // 0x1898
+			u32 event_flag_id2;                          // 0x189C
 		} m;
 
 		SPURSManagerTaskset *taskset;
@@ -564,7 +583,47 @@ struct CellSpursTaskset2
 	static const u32 align = 128;
 	static const u32 size = 10496;
 
-	be_t<u8> skip[10496];
+	union
+	{
+		// Raw data
+		u8 _u8[size];
+
+		struct TaskInfo
+		{
+			be_t<u32> args[4];
+			vm::bptr<u64, 1, u64> elf_address;
+			vm::bptr<u64, 1, u64> context_save_storage;
+			be_t<u32> ls_pattern[4];
+		};
+
+		// Real data
+		struct
+		{
+			be_t<u32> running_set[4];                    // 0x00
+			be_t<u32> ready_set[4];                      // 0x10
+			be_t<u32> unk_set[4];                        // 0x20 - TODO: Find out what this is
+			be_t<u32> enabled_set[4];                    // 0x30
+			be_t<u32> signal_received_set[4];            // 0x40
+			be_t<u32> waiting_set[4];                    // 0x50
+			vm::bptr<CellSpurs, 1, u64> spurs;           // 0x60
+			be_t<u64> args;                              // 0x68
+			u8 enable_clear_ls;                          // 0x70
+			u8 x71;                                      // 0x71
+			u8 x72;                                      // 0x72
+			u8 last_scheduled_task;                      // 0x73
+			be_t<u32> wid;                               // 0x74
+			u8 unk1[8];                                  // 0x78
+			TaskInfo task_info[128];                     // 0x80
+			vm::bptr<u64, 1, u64> exception_handler;     // 0x1880
+			vm::bptr<u64, 1, u64> exception_handler_arg; // 0x1888
+			be_t<u32> size;                              // 0x1890
+			u32 unk2;                                    // 0x1894
+			u32 event_flag_id1;                          // 0x1898
+			u32 event_flag_id2;                          // 0x189C
+			u8 unk3[0x88];                               // 0x1900
+			u128 task_exit_code[128];                    // 0x1988
+		} m;
+	};
 };
 
 struct CellSpursTasksetAttribute
diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj
index b0990ba2f6..8e2629cea3 100644
--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@@ -662,6 +662,7 @@
       <PreprocessorDefinitions>_UNICODE;UNICODE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <PrecompiledHeaderFile>stdafx.h</PrecompiledHeaderFile>
       <ExceptionHandling>Async</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -676,6 +677,7 @@
       <PreprocessorDefinitions>_UNICODE;UNICODE;LLVM_AVAILABLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <PrecompiledHeaderFile>stdafx.h</PrecompiledHeaderFile>
       <ExceptionHandling>Async</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -694,6 +696,7 @@
       <PreprocessorDefinitions>_UNICODE;UNICODE;MSVC_CRT_MEMLEAK_DETECTION;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <PrecompiledHeaderFile>stdafx.h</PrecompiledHeaderFile>
       <ExceptionHandling>Async</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -741,6 +744,7 @@
       <PrecompiledHeader>Use</PrecompiledHeader>
       <PrecompiledHeaderFile>stdafx.h</PrecompiledHeaderFile>
       <ExceptionHandling>Async</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -759,6 +763,7 @@
       <PrecompiledHeaderFile>stdafx.h</PrecompiledHeaderFile>
       <ExceptionHandling>Async</ExceptionHandling>
       <PreprocessorDefinitions>LLVM_AVAILABLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>

From 53e0979f2910879090dbae21345625bafea08b97 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Sun, 14 Dec 2014 03:19:06 +0530
Subject: [PATCH 04/29] SPURS: Implement cellSpursCreateTask

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 122 ++++++++++++++++++++++-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h   |  62 +++++++-----
 2 files changed, 153 insertions(+), 31 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 831748949b..2810152043 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -2427,7 +2427,7 @@ s64 cellSpursGetTasksetId(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> wid)
 		return CELL_SPURS_TASK_ERROR_INVAL;
 	}
 
-	*wid = taskset->m.wid.ToBE();
+	*wid = taskset->m.wid;
 	return CELL_OK;
 #endif
 }
@@ -2443,6 +2443,121 @@ s64 cellSpursShutdownTaskset(vm::ptr<CellSpursTaskset> taskset)
 #endif
 }
 
+u32 _cellSpursGetSdkVersion()
+{
+	static u32 sdk_version = -2;
+
+	if (sdk_version == -2)
+	{
+		auto version = vm::ptr<s32>::make((u32)Memory.Alloc(sizeof(u32), sizeof(u32)));
+		sys_process_get_sdk_version(sys_process_getpid(), version);
+		sdk_version = *version;
+		Memory.Free(version.addr());
+	}
+
+	return sdk_version;
+}
+
+s64 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm::ptr<u32> elf_addr, vm::ptr<u32> context_addr, u32 context_size, vm::ptr<CellSpursTaskLsPattern> ls_pattern, vm::ptr<CellSpursTaskArgument> arg)
+{
+	if (!taskset || !elf_addr)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (elf_addr.addr() % 16)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	auto sdk_version = _cellSpursGetSdkVersion();
+	if (sdk_version < 0x27FFFF)
+	{
+		if (context_addr.addr() % 16)
+		{
+			return CELL_SPURS_TASK_ERROR_ALIGN;
+		}
+	}
+	else
+	{
+		if (context_addr.addr() % 128)
+		{
+			return CELL_SPURS_TASK_ERROR_ALIGN;
+		}
+	}
+
+	u32 alloc_ls_blocks = 0;
+	if (context_addr.addr() != 0)
+	{
+		if (context_size < CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE)
+		{
+			return CELL_SPURS_TASK_ERROR_INVAL;
+		}
+
+		alloc_ls_blocks = context_size > 0x3D400 ? 0x7A : ((context_size - 0x400) >> 11);
+		if (ls_pattern.addr() != 0)
+		{
+			u32 ls_blocks = 0;
+			for (u32 i = 0; i < 2; i++)
+			{
+				for (u32 j = 0; j < 64; j++)
+				{
+					if (ls_pattern->u64[0] & ((u64)1 << j))
+					{
+						ls_blocks++;
+					}
+				}
+			}
+
+			if (ls_blocks > alloc_ls_blocks)
+			{
+				return CELL_SPURS_TASK_ERROR_INVAL;
+			}
+
+			if (ls_pattern->u32[0] & 0xFC000000)
+			{
+				// Prevent save/restore to SPURS management area
+				return CELL_SPURS_TASK_ERROR_INVAL;
+			}
+		}
+	}
+	else
+	{
+		alloc_ls_blocks = 0;
+	}
+
+	// TODO: Verify the ELF header is proper and all its load segments are at address >= 0x3000
+
+	// TODO: Make the following block execute atomically
+	u32 tmp_task_id;
+	for (tmp_task_id = 0; tmp_task_id < CELL_SPURS_MAX_TASK; tmp_task_id++)
+	{
+		u32 l = tmp_task_id >> 5;
+		u32 b = tmp_task_id & 0x1F;
+		if ((taskset->m.enabled_set[l] & (0x80000000 >> b)) == 0)
+		{
+			taskset->m.enabled_set[l] |= 0x80000000 >> b;
+			break;
+		}
+	}
+
+	if (tmp_task_id >= CELL_SPURS_MAX_TASK)
+	{
+		CELL_SPURS_TASK_ERROR_AGAIN;
+	}
+
+	taskset->m.task_info[tmp_task_id].elf_addr.set(elf_addr.addr());
+	taskset->m.task_info[tmp_task_id].context_save_storage.set((context_addr.addr() & 0xFFFFFFF8) | alloc_ls_blocks);
+	for (u32 i = 0; i < 2; i++)
+	{
+		taskset->m.task_info[tmp_task_id].args.u64[i] = arg != 0 ? arg->u64[i] : 0;
+		taskset->m.task_info[tmp_task_id].ls_pattern.u64[i] = ls_pattern != 0 ? ls_pattern->u64[i] : 0;
+	}
+
+	*task_id = tmp_task_id;
+	return CELL_OK;
+}
+
 s64 cellSpursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> taskID, u32 elf_addr, u32 context_addr, u32 context_size, vm::ptr<CellSpursTaskLsPattern> lsPattern,
 	vm::ptr<CellSpursTaskArgument> argument)
 {
@@ -2451,8 +2566,7 @@ s64 cellSpursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> taskID,
 		taskset.addr(), taskID.addr(), elf_addr, context_addr, context_size, lsPattern.addr(), argument.addr());
 	return GetCurrentPPUThread().FastCall2(libsre + 0x12414, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
-	return CELL_OK;
+	return spursCreateTask(taskset, taskID, vm::ptr<u32>::make(elf_addr), vm::ptr<u32>::make(context_addr), context_size, lsPattern, argument);
 #endif
 }
 
@@ -2883,7 +2997,7 @@ s64 cellSpursTasksetGetSpursAddress(vm::ptr<const CellSpursTaskset> taskset, vm:
 		return CELL_SPURS_TASK_ERROR_INVAL;
 	}
 
-	*spurs = (u32)taskset->m.spurs.addr().ToBE();
+	*spurs = (u32)taskset->m.spurs.addr();
 	return CELL_OK;
 #endif
 }
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index fc8871ecd6..4a8ba993af 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -132,6 +132,7 @@ enum TaskConstants
 	CELL_SPURS_MAX_TASK_NAME_LENGTH = 32,
 	CELL_SPURS_TASK_ATTRIBUTE_REVISION = 1,
 	CELL_SPURS_TASKSET_ATTRIBUTE_REVISION = 1,
+	CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE = 1024,
 };
 
 class SPURSManager;
@@ -417,6 +418,18 @@ struct CellSpursEventFlag
 	SPURSManagerEventFlag *eventFlag;
 };
 
+union CellSpursTaskArgument
+{
+	be_t<u32> u32[4];
+	be_t<u64> u64[2];
+};
+
+union CellSpursTaskLsPattern
+{
+	be_t<u32> u32[4];
+	be_t<u64> u64[2];
+};
+
 struct CellSpursTaskset
 {
 	static const u32 align = 128;
@@ -429,14 +442,16 @@ struct CellSpursTaskset
 
 		struct TaskInfo
 		{
-			be_t<u32> args[4];
-			vm::bptr<u64, 1, u64> elf;
-			vm::bptr<u64, 1, u64> context_save_storage;
-			be_t<u32> ls_pattern[4];
+			CellSpursTaskArgument args;
+			vm::bptr<u64, 1, u64> elf_addr;
+			vm::bptr<u64, 1, u64> context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks)
+			CellSpursTaskLsPattern ls_pattern;
 		};
 
+		static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size");
+
 		// Real data
-		struct
+		struct _CellSpursTaskset
 		{
 			be_t<u32> running_set[4];                    // 0x00
 			be_t<u32> ready_set[4];                      // 0x10
@@ -459,8 +474,11 @@ struct CellSpursTaskset
 			u32 unk2;                                    // 0x1894
 			u32 event_flag_id1;                          // 0x1898
 			u32 event_flag_id2;                          // 0x189C
+			u8 unk3[0x60];                               // 0x18A0
 		} m;
 
+		static_assert(sizeof(_CellSpursTaskset) == size, "Wrong _CellSpursTaskset size");
+
 		SPURSManagerTaskset *taskset;
 	};
 };
@@ -590,14 +608,16 @@ struct CellSpursTaskset2
 
 		struct TaskInfo
 		{
-			be_t<u32> args[4];
-			vm::bptr<u64, 1, u64> elf_address;
-			vm::bptr<u64, 1, u64> context_save_storage;
-			be_t<u32> ls_pattern[4];
+			CellSpursTaskArgument args;
+			vm::bptr<u64, 1, u64> elf_addr;
+			vm::bptr<u64, 1, u64> context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks)
+			CellSpursTaskLsPattern ls_pattern;
 		};
 
+		static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size");
+
 		// Real data
-		struct
+		struct _CellSpursTaskset2
 		{
 			be_t<u32> running_set[4];                    // 0x00
 			be_t<u32> ready_set[4];                      // 0x10
@@ -620,9 +640,12 @@ struct CellSpursTaskset2
 			u32 unk2;                                    // 0x1894
 			u32 event_flag_id1;                          // 0x1898
 			u32 event_flag_id2;                          // 0x189C
-			u8 unk3[0x88];                               // 0x1900
-			u128 task_exit_code[128];                    // 0x1988
+			u8 unk3[0xE8];                               // 0x18A0
+			u64 task_exit_code[256];                     // 0x1988
+			u8 unk4[0x778];                              // 0x2188
 		} m;
+
+		static_assert(sizeof(_CellSpursTaskset2) == size, "Wrong _CellSpursTaskset2 size");
 	};
 };
 
@@ -681,21 +704,6 @@ struct CellSpursTraceTaskData
 	be_t<u32> task;
 };
 
-typedef be_t<u32> be_u32;
-typedef be_t<u64> be_u64;
-
-struct CellSpursTaskArgument
-{
-	be_u32 u32[4];
-	be_u64 u64[2];
-};
-
-struct CellSpursTaskLsPattern
-{
-	be_u32 u32[4];
-	be_u64 u64[2];
-};
-
 struct CellSpursTaskAttribute2
 {
 	be_t<u32> revision;

From 68cc9b205920c3579d5d89897caa022f506d44bc Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Sun, 14 Dec 2014 12:07:46 +0530
Subject: [PATCH 05/29] SPURS: Enable logging of taskset calls

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 91 ++++++++++++++++--------
 1 file changed, 61 insertions(+), 30 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 2810152043..8163443e42 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -2345,8 +2345,9 @@ s64 spursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> tasks
 
 s64 cellSpursCreateTasksetWithAttribute(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, vm::ptr<CellSpursTasksetAttribute> attr)
 {
+	cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, attr=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), attr.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14BEC, libsre_rtoc);
 #endif
 
@@ -2397,8 +2398,9 @@ s64 cellSpursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> t
 
 s64 cellSpursJoinTaskset(vm::ptr<CellSpursTaskset> taskset)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursJoinTaskset(taskset_addr=0x%x)", taskset.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x152F8, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2408,8 +2410,9 @@ s64 cellSpursJoinTaskset(vm::ptr<CellSpursTaskset> taskset)
 
 s64 cellSpursGetTasksetId(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> wid)
 {
+	cellSpurs->Warning("cellSpursGetTasksetId(taskset_addr=0x%x, wid=0x%x)", taskset.addr(), wid.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("cellSpursGetTasksetId(taskset_addr=0x%x, workloadId_addr=0x%x)", taskset.addr(), workloadId.addr());
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14EA0, libsre_rtoc);
 #else
 	if (!taskset || !wid)
@@ -2434,8 +2437,9 @@ s64 cellSpursGetTasksetId(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> wid)
 
 s64 cellSpursShutdownTaskset(vm::ptr<CellSpursTaskset> taskset)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursShutdownTaskset(taskset_addr=0x%x)", taskset.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14868, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2561,9 +2565,10 @@ s64 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
 s64 cellSpursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> taskID, u32 elf_addr, u32 context_addr, u32 context_size, vm::ptr<CellSpursTaskLsPattern> lsPattern,
 	vm::ptr<CellSpursTaskArgument> argument)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursCreateTask(taskset_addr=0x%x, taskID_addr=0x%x, elf_addr_addr=0x%x, context_addr_addr=0x%x, context_size=%d, lsPattern_addr=0x%x, argument_addr=0x%x)",
 		taskset.addr(), taskID.addr(), elf_addr, context_addr, context_size, lsPattern.addr(), argument.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x12414, libsre_rtoc);
 #else
 	return spursCreateTask(taskset, taskID, vm::ptr<u32>::make(elf_addr), vm::ptr<u32>::make(context_addr), context_size, lsPattern, argument);
@@ -2583,8 +2588,9 @@ s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID)
 
 s64 cellSpursCreateTaskWithAttribute()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x12204, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2594,8 +2600,9 @@ s64 cellSpursCreateTaskWithAttribute()
 
 s64 cellSpursTasksetAttributeSetName(vm::ptr<CellSpursTasksetAttribute> attr, vm::ptr<const char> name)
 {
+	cellSpurs->Warning("%s(attr=0x%x, name=0x%x)", __FUNCTION__, attr.addr(), name.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14210, libsre_rtoc);
 #else
 	if (!attr || !name)
@@ -2615,8 +2622,9 @@ s64 cellSpursTasksetAttributeSetName(vm::ptr<CellSpursTasksetAttribute> attr, vm
 
 s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr<CellSpursTasksetAttribute> attr, u32 size)
 {
+	cellSpurs->Warning("%s(attr=0x%x, size=0x%x)", __FUNCTION__, attr.addr(), size);
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14254, libsre_rtoc);
 #else
 	if (!attr)
@@ -2641,8 +2649,9 @@ s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr<CellSpursTasksetAttribute> a
 
 s64 cellSpursTasksetAttributeEnableClearLS(vm::ptr<CellSpursTasksetAttribute> attr, s32 enable)
 {
+	cellSpurs->Warning("%s(attr=0x%x, enable=%d)", __FUNCTION__, attr.addr(), enable);
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x142AC, libsre_rtoc);
 #else
 	if (!attr)
@@ -2686,8 +2695,9 @@ s64 _cellSpursTasksetAttribute2Initialize(vm::ptr<CellSpursTasksetAttribute2> at
 
 s64 cellSpursTaskExitCodeGet()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1397C, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2697,8 +2707,9 @@ s64 cellSpursTaskExitCodeGet()
 
 s64 cellSpursTaskExitCodeInitialize()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1352C, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2708,8 +2719,9 @@ s64 cellSpursTaskExitCodeInitialize()
 
 s64 cellSpursTaskExitCodeTryGet()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13974, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2719,8 +2731,9 @@ s64 cellSpursTaskExitCodeTryGet()
 
 s64 cellSpursTaskGetLoadableSegmentPattern()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13ED4, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2730,8 +2743,9 @@ s64 cellSpursTaskGetLoadableSegmentPattern()
 
 s64 cellSpursTaskGetReadOnlyAreaPattern()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13CFC, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2741,8 +2755,9 @@ s64 cellSpursTaskGetReadOnlyAreaPattern()
 
 s64 cellSpursTaskGenerateLsPattern()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13B78, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2752,8 +2767,9 @@ s64 cellSpursTaskGenerateLsPattern()
 
 s64 _cellSpursTaskAttributeInitialize()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x10C30, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2763,8 +2779,9 @@ s64 _cellSpursTaskAttributeInitialize()
 
 s64 cellSpursTaskAttributeSetExitCodeContainer()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x10A98, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2801,8 +2818,9 @@ s64 _cellSpursTaskAttribute2Initialize(vm::ptr<CellSpursTaskAttribute2> attribut
 
 s64 cellSpursTaskGetContextSaveAreaSize()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1409C, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2812,8 +2830,9 @@ s64 cellSpursTaskGetContextSaveAreaSize()
 
 s64 cellSpursCreateTaskset2(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset2> taskset, vm::ptr<CellSpursTasksetAttribute2> attr)
 {
+	cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, attr=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), attr.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15108, libsre_rtoc);
 #else
 	auto free_attr = false;
@@ -2853,8 +2872,9 @@ s64 cellSpursCreateTaskset2(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset2>
 
 s64 cellSpursCreateTask2()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x11E54, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2864,8 +2884,9 @@ s64 cellSpursCreateTask2()
 
 s64 cellSpursJoinTask2()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x11378, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2875,8 +2896,9 @@ s64 cellSpursJoinTask2()
 
 s64 cellSpursTryJoinTask2()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x11748, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2886,8 +2908,9 @@ s64 cellSpursTryJoinTask2()
 
 s64 cellSpursDestroyTaskset2()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14EE8, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2897,8 +2920,9 @@ s64 cellSpursDestroyTaskset2()
 
 s64 cellSpursCreateTask2WithBinInfo()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x120E0, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2908,8 +2932,9 @@ s64 cellSpursCreateTask2WithBinInfo()
 
 s64 cellSpursTasksetSetExceptionEventHandler(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u64> handler, vm::ptr<u64> arg)
 {
+	cellSpurs->Warning("%s(taskset=0x5x, handler=0x%x, arg=0x%x)", __FUNCTION__, taskset.addr(), handler.addr(), arg.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13124, libsre_rtoc);
 #else
 	if (!taskset || !handler)
@@ -2940,8 +2965,9 @@ s64 cellSpursTasksetSetExceptionEventHandler(vm::ptr<CellSpursTaskset> taskset,
 
 s64 cellSpursTasksetUnsetExceptionEventHandler(vm::ptr<CellSpursTaskset> taskset)
 {
+	cellSpurs->Warning("%s(taskset=0x%x)", __FUNCTION__, taskset.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13194, libsre_rtoc);
 #else
 	if (!taskset)
@@ -2967,8 +2993,9 @@ s64 cellSpursTasksetUnsetExceptionEventHandler(vm::ptr<CellSpursTaskset> taskset
 
 s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<u32> taskset, u32 id)
 {
+	cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, id=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), id);
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x133AC, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2978,8 +3005,9 @@ s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<u32> taskset
 
 s64 cellSpursTasksetGetSpursAddress(vm::ptr<const CellSpursTaskset> taskset, vm::ptr<u32> spurs)
 {
+	cellSpurs->Warning("%s(taskset=0x%x, spurs=0x%x)", __FUNCTION__, taskset.addr(), spurs.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14408, libsre_rtoc);
 #else
 	if (!taskset || !spurs)
@@ -3004,8 +3032,9 @@ s64 cellSpursTasksetGetSpursAddress(vm::ptr<const CellSpursTaskset> taskset, vm:
 
 s64 cellSpursGetTasksetInfo()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1445C, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -3015,8 +3044,10 @@ s64 cellSpursGetTasksetInfo()
 
 s64 _cellSpursTasksetAttributeInitialize(vm::ptr<CellSpursTasksetAttribute> attribute, u32 revision, u32 sdk_version, u64 args, vm::ptr<const u8> priority, u32 max_contention)
 {
+	cellSpurs->Warning("%s(attribute=0x%x, revision=%u, skd_version=%u, args=0x%llx, priority=0x%x, max_contention=%u)",
+		__FUNCTION__, attribute.addr(), revision, sdk_version, args, priority.addr(), max_contention);
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x142FC, libsre_rtoc);
 #else
 	if (!attribute)

From 5a6016003542d7064ac716f7f7f7c71d0d1b59e7 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Tue, 16 Dec 2014 09:12:50 +0530
Subject: [PATCH 06/29] SPURS: Added comments on CellSpurs fields

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 126 ++++++++++++-------------
 1 file changed, 63 insertions(+), 63 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 4a8ba993af..849a597223 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -224,7 +224,7 @@ struct CellSpurs
 
 	struct _sub_str1
 	{
-		u8 unk0[0x20];
+		u8 unk0[0x20]; // 0x00 - SPU exceptionh handler 0x08 - SPU exception handler args
 		be_t<u64> sem; // 0x20
 		u8 unk1[0x8];
 		vm::bptr<CellSpursShutdownCompletionEventHook, 1, u64> hook; // 0x30
@@ -234,14 +234,14 @@ struct CellSpurs
 
 	static_assert(sizeof(_sub_str1) == 0x80, "Wrong _sub_str1 size");
 
-	struct _sub_str2
+	struct _sub_str2 // Event port multiplexer
 	{
-		be_t<u32> unk0;
-		be_t<u32> unk1;
-		be_t<u32> unk2;
-		be_t<u32> unk3;
+		be_t<u32> unk0; // 0x00 Outstanding requests
+		be_t<u32> unk1; // 0x04
+		be_t<u32> unk2; // 0x08
+		be_t<u32> unk3; // 0x0C
 		be_t<u64> port; // 0x10
-		u8 unk_[0x68];
+		u8 unk_[0x68];  // 0x18 - The first u64 seems to be the start of a linked list. The linked list struct seems to be {u64 next; u64 data; u64 handler}
 	};
 
 	static_assert(sizeof(_sub_str2) == 0x80, "Wrong _sub_str2 size");
@@ -275,43 +275,43 @@ struct CellSpurs
 		struct
 		{
 			atomic_t<u8> wklReadyCount[0x20]; // 0x0 (index = wid)
-			u8 wklA[0x10];        // 0x20 (packed 4-bit data, index = wid % 16, internal index = wid / 16)
-			u8 wklB[0x10];        // 0x30 (packed 4-bit data, index = wid % 16, internal index = wid / 16)
-			u8 wklMinCnt[0x10];   // 0x40 (seems only for first 0..15 wids)
-			atomic_t<u8> wklMaxCnt[0x10]; // 0x50 (packed 4-bit data, index = wid % 16, internal index = wid / 16)
+			u8 wklA[0x10];        // 0x20 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Current contention
+			u8 wklB[0x10];        // 0x30 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Pending
+			u8 wklMinCnt[0x10];   // 0x40 (seems only for first 0..15 wids) - Min contention
+			atomic_t<u8> wklMaxCnt[0x10]; // 0x50 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Max contention
 			CellSpursWorkloadFlag wklFlag; // 0x60
-			atomic_t<u16> wklSet1; // 0x70 (bitset for 0..15 wids)
-			atomic_t<u8> x72;     // 0x72
-			u8 x73;               // 0x73
-			u8 flags1;            // 0x74
-			u8 x75;               // 0x75
-			u8 nSpus;             // 0x76
+			atomic_t<u16> wklSet1; // 0x70 (bitset for 0..15 wids) - Workload signal
+			atomic_t<u8> x72;     // 0x72 - message
+			u8 x73;               // 0x73 - idling
+			u8 flags1;            // 0x74 - Bit0(MSB)=exit_if_no_work, Bit1=32_workloads
+			u8 x75;               // 0x75 - trace control
+			u8 nSpus;             // 0x76 - Number of SPUs
 			atomic_t<u8> flagRecv; // 0x77
-			atomic_t<u16> wklSet2; // 0x78 (bitset for 16..32 wids)
+			atomic_t<u16> wklSet2; // 0x78 (bitset for 16..32 wids) - Workload signal
 			u8 x7A[6];            // 0x7A
-			atomic_t<u8> wklStat1[0x10]; // 0x80
-			u8 wklD1[0x10];       // 0x90
-			u8 wklE1[0x10];       // 0xA0
-			atomic_t<u32> wklMskA; // 0xB0
-			atomic_t<u32> wklMskB; // 0xB4
-			u8 xB8[5];            // 0xB8
-			atomic_t<u8> xBD;     // 0xBD
-			u8 xBE[2];            // 0xBE
-			u8 xC0[8];            // 0xC0
-			u8 xC8;               // 0xC8
-			u8 spuPort;           // 0xC9
+			atomic_t<u8> wklStat1[0x10]; // 0x80 - Workload state (16*u8) - State enum {non_exitent, preparing, runnable, shutting_down, removable, invalid}
+			u8 wklD1[0x10];       // 0x90 - Workload status (16*u8)
+			u8 wklE1[0x10];       // 0xA0 - Workload event (16*u8)
+			atomic_t<u32> wklMskA; // 0xB0 - Available workloads (32*u1)
+			atomic_t<u32> wklMskB; // 0xB4 - Available module id
+			u8 xB8[5];            // 0xB8 - 0xBC - exit barrier
+			atomic_t<u8> xBD;     // 0xBD - update workload
+			u8 xBE[2];            // 0xBE - 0xBF - message - terminate
+			u8 xC0[8];            // 0xC0 - System workload
+			u8 xC8;               // 0xC8 - System service - on spu
+			u8 spuPort;           // 0xC9 - SPU port for system service
 			u8 xCA;               // 0xCA
 			u8 xCB;               // 0xCB
 			u8 xCC;               // 0xCC
 			u8 xCD;               // 0xCD
-			u8 xCE;               // 0xCE
+			u8 xCE;               // 0xCE - message - update trace
 			u8 xCF;               // 0xCF
-			atomic_t<u8> wklStat2[0x10]; // 0xD0
-			u8 wklD2[0x10];       // 0xE0
-			u8 wklE2[0x10];       // 0xF0
+			atomic_t<u8> wklStat2[0x10]; // 0xD0 - Workload state (16*u8)
+			u8 wklD2[0x10];       // 0xE0 - Workload status (16*u8)
+			u8 wklE2[0x10];       // 0xF0 - Workload event (16*u8)
 			_sub_str1 wklF1[0x10]; // 0x100
-			be_t<u64> unk22;      // 0x900
-			u8 unknown7[0x980 - 0x908];
+			be_t<u64> unk22;      // 0x900 - SPURS trace buffer
+			u8 unknown7[0x980 - 0x908]; // 0x908 - Per SPU trace info ??? (8*u32) 0x950 - SPURS trace mode (u32)
 			be_t<u64> semPrv;     // 0x980
 			be_t<u32> unk11;      // 0x988
 			be_t<u32> unk12;      // 0x98C
@@ -321,14 +321,14 @@ struct CellSpurs
 			_sub_str3 wklSysG;    // 0xD00
 			be_t<u64> ppu0;       // 0xD20
 			be_t<u64> ppu1;       // 0xD28
-			be_t<u32> spuTG;      // 0xD30
+			be_t<u32> spuTG;      // 0xD30 - SPU thread group
 			be_t<u32> spus[8];    // 0xD34
 			u8 unknown3[0xD5C - 0xD54];
-			be_t<u32> queue;      // 0xD5C
-			be_t<u32> port;       // 0xD60
-			atomic_t<u8> xD64;    // 0xD64
-			atomic_t<u8> xD65;    // 0xD65
-			atomic_t<u8> xD66;    // 0xD66
+			be_t<u32> queue;      // 0xD5C - Event queue
+			be_t<u32> port;       // 0xD60 - Event port
+			atomic_t<u8> xD64;    // 0xD64 - SPURS handler dirty
+			atomic_t<u8> xD65;    // 0xD65 - SPURS handler waiting
+			atomic_t<u8> xD66;    // 0xD66 - SPURS handler exiting
 			atomic_t<u32> enableEH; // 0xD68
 			be_t<u32> exception;  // 0xD6C
 			sys_spu_image spuImg; // 0xD70
@@ -340,13 +340,13 @@ struct CellSpurs
 			be_t<u32> unk5;       // 0xD9C
 			be_t<u32> revision;   // 0xDA0
 			be_t<u32> sdkVersion; // 0xDA4
-			atomic_t<u64> spups;  // 0xDA8
+			atomic_t<u64> spups;  // 0xDA8 - SPU port bits
 			sys_lwmutex_t mutex;  // 0xDB0
 			sys_lwcond_t cond;    // 0xDC8
 			u8 unknown9[0xE00 - 0xDD0];
 			_sub_str4 wklH1[0x10]; // 0xE00
 			_sub_str2 sub3;       // 0xF00
-			u8 unknown6[0x1000 - 0xF80];
+			u8 unknown6[0x1000 - 0xF80]; // 0xF80 - Gloabl SPU exception handler 0xF88 - Gloabl SPU exception handlers args
 			_sub_str3 wklG2[0x10]; // 0x1000
 			_sub_str1 wklF2[0x10]; // 0x1200
 			_sub_str4 wklH2[0x10]; // 0x1A00
@@ -435,21 +435,21 @@ struct CellSpursTaskset
 	static const u32 align = 128;
 	static const u32 size = 6400;
 
+	struct TaskInfo
+	{
+		CellSpursTaskArgument args;
+		vm::bptr<u64, 1, u64> elf_addr;
+		vm::bptr<u64, 1, u64> context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks)
+		CellSpursTaskLsPattern ls_pattern;
+	};
+
+	static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size");
+
 	union
 	{
 		// Raw data
 		u8 _u8[size];
 
-		struct TaskInfo
-		{
-			CellSpursTaskArgument args;
-			vm::bptr<u64, 1, u64> elf_addr;
-			vm::bptr<u64, 1, u64> context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks)
-			CellSpursTaskLsPattern ls_pattern;
-		};
-
-		static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size");
-
 		// Real data
 		struct _CellSpursTaskset
 		{
@@ -601,21 +601,21 @@ struct CellSpursTaskset2
 	static const u32 align = 128;
 	static const u32 size = 10496;
 
+	struct TaskInfo
+	{
+		CellSpursTaskArgument args;
+		vm::bptr<u64, 1, u64> elf_addr;
+		vm::bptr<u64, 1, u64> context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks)
+		CellSpursTaskLsPattern ls_pattern;
+	};
+
+	static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size");
+
 	union
 	{
 		// Raw data
 		u8 _u8[size];
 
-		struct TaskInfo
-		{
-			CellSpursTaskArgument args;
-			vm::bptr<u64, 1, u64> elf_addr;
-			vm::bptr<u64, 1, u64> context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks)
-			CellSpursTaskLsPattern ls_pattern;
-		};
-
-		static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size");
-
 		// Real data
 		struct _CellSpursTaskset2
 		{

From 698f4fd4506dd0c8429d82026c82b0761c7b1706 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Sun, 21 Dec 2014 01:37:29 +0530
Subject: [PATCH 07/29] SPURS: Improve the readability of the SPURS2 kernel at
 the cost of some performance

---
 rpcs3/Emu/Cell/SPUThread.h               |   2 +-
 rpcs3/Emu/Memory/vm_ptr.h                |   2 +-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 332 ++++++++++++-----------
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h   |  91 +++++--
 4 files changed, 242 insertions(+), 185 deletions(-)

diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h
index aaffd7d00d..e7c28ed8ff 100644
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@@ -608,7 +608,7 @@ public:
 		for (auto &arg : values)
 		{
 			u32 arg_size = align(u32(arg.size() + 1), stack_align);
-			u32 arg_addr = Memory.MainMem.AllocAlign(arg_size, stack_align);
+			u32 arg_addr = (u32)Memory.MainMem.AllocAlign(arg_size, stack_align);
 
 			std::strcpy(vm::get_ptr<char>(arg_addr), arg.c_str());
 
diff --git a/rpcs3/Emu/Memory/vm_ptr.h b/rpcs3/Emu/Memory/vm_ptr.h
index 6f0c65ed0d..0970ad7996 100644
--- a/rpcs3/Emu/Memory/vm_ptr.h
+++ b/rpcs3/Emu/Memory/vm_ptr.h
@@ -112,7 +112,7 @@ namespace vm
 
 		__forceinline T* const operator -> () const
 		{
-			return vm::get_ptr<T>(m_addr);
+			return vm::get_ptr<T>((u32)m_addr);
 		}
 
 		_ptr_base operator++ (int)
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 8163443e42..483c3d56c6 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -37,7 +37,7 @@ s64 spursCreateLv2EventQueue(vm::ptr<CellSpurs> spurs, u32& queue_id, vm::ptr<u8
 		return CELL_EAGAIN; // rough
 	}
 
-	if (s32 res = spursAttachLv2EventQueue(spurs, queue_id, port, 1, true))
+	if (s32 res = (s32)spursAttachLv2EventQueue(spurs, queue_id, port, 1, true))
 	{
 		assert(!"spursAttachLv2EventQueue() failed");
 	}
@@ -112,10 +112,10 @@ s64 spursInit(
 	spurs->m.wklSysG.pm.set(be_t<u64>::make(vm::read32(libsre_rtoc - 0x7EA4)));
 	spurs->m.wklSysG.size = 0x2200;
 #else
-	spurs->m.wklSysG.pm.set(be_t<u64>::make(0x100)); // wrong 64-bit address
+	spurs->m.wklInfoSysSrv.pm.set(be_t<u64>::make(0x100)); // wrong 64-bit address
 #endif
-	spurs->m.wklSysG.data = 0;
-	spurs->m.wklSysG.copy.write_relaxed(0xff);
+	spurs->m.wklInfoSysSrv.data = 0;
+	spurs->m.wklInfoSysSrv.copy.write_relaxed(0xff);
 	u32 sem;
 	for (u32 i = 0; i < 0x10; i++)
 	{
@@ -146,7 +146,7 @@ s64 spursInit(
 		assert(!"spu_image_import() failed");
 	}
 #else
-	spurs->m.spuImg.addr = Memory.Alloc(0x40000, 4096);
+	spurs->m.spuImg.addr = (u32)Memory.Alloc(0x40000, 4096);
 #endif
 
 	s32 tgt = SYS_SPU_THREAD_GROUP_TYPE_NORMAL;
@@ -208,12 +208,12 @@ s64 spursInit(
 				u64 vRES = 0x20ull << 32;
 				u128 vSET = {};
 
-				if (spurs->m.x72.read_relaxed() & (1 << num))
+				if (spurs->m.sysSrvMessage.read_relaxed() & (1 << num))
 				{
 					SPU.WriteLS8(0x1eb, 0); // var4
 					if (arg1 == 0 || var1 == 0x20)
 					{
-						spurs->m.x72._and_not(1 << num);
+						spurs->m.sysSrvMessage._and_not(1 << num);
 					}
 				}
 				else
@@ -224,9 +224,9 @@ s64 spursInit(
 					u128 savedD = SPU.ReadLS128(0x1B0);
 					u128 vRC = u128::add8(u128::minu8(wklReadyCount0, u128::from8p(8)), u128::minu8(wklReadyCount1, u128::from8p(8)));
 					u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed();
-					u32 flagRecv = spurs->m.flagRecv.read_relaxed();
+					u32 flagRecv = spurs->m.wklFlagReceiver.read_relaxed();
 					u128 vFM = u128::fromV(g_imm_table.fsmb_table[(wklFlag == 0) && (flagRecv < 16) ? 0x8000 >> flagRecv : 0]);
-					u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet1.read_relaxed()]);
+					u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSignal1.read_relaxed()]);
 					u128 vFMS1 = vFM | wklSet1;
 					u128 vFMV1 = u128::fromV(g_imm_table.fsmb_table[(var1 < 16) ? 0x8000 >> var1 : 0]);
 					u32 var5 = SPU.ReadLS32(0x1ec);
@@ -270,7 +270,7 @@ s64 spursInit(
 
 					if (!arg1 || var1 == vNUM)
 					{
-						spurs->m.wklSet1._and_not(be_t<u16>::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0)));
+						spurs->m.wklSignal1._and_not(be_t<u16>::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0)));
 						if (vNUM == flagRecv && wklFlag == 0)
 						{
 							spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(-1));
@@ -305,126 +305,145 @@ s64 spursInit(
 			{
 				LV2_LOCK(0); // TODO: lock-free implementation if possible
 
-				const u32 arg1 = SPU.GPR[3]._u32[3];
-				u32 var0 = SPU.ReadLS32(0x1d8);
-				u32 var1 = SPU.ReadLS32(0x1dc);
-				u128 wklA = vm::read128(spurs.addr() + 0x20);
-				u128 wklB = vm::read128(spurs.addr() + 0x30);
-				u128 savedA = SPU.ReadLS128(0x180);
-				u128 savedB = SPU.ReadLS128(0x190);
-				u128 vAA = u128::sub8(wklA, savedA);
-				u128 vBB = u128::sub8(wklB, savedB);
-				u128 vM1 = {}; if (var1 <= 31) vM1.u8r[var1 & 0xf] = (var1 <= 15) ? 0xf : 0xf0;
-				u128 vAABB = (arg1 == 0) ? vAA : u128::add8(vAA, u128::andnot(vM1, vBB));
+				auto mgmt  = vm::get_ptr<SpursKernelMgmtData>(SPU.ls_offset);
 
-				u32 vNUM = 0x20;
-				u64 vRES = 0x20ull << 32;
-				u128 vSET = {};
+				// The first and only argument to this function is a boolean that is set to false if the function
+				// is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus.
+				// If the first argument is true then the shared data is not updated with the result.
+				const auto isPoll = SPU.GPR[3]._u32[3];
 
-				if (spurs->m.x72.read_relaxed() & (1 << num))
+				// Calculate the contention (number of SPUs used) for each workload
+				u8 contention[CELL_SPURS_MAX_WORKLOAD2];
+				u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2];
+				for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++)
 				{
-					SPU.WriteLS8(0x1eb, 0); // var4
-					if (arg1 == 0 || var1 == 0x20)
+					contention[i] = mgmt->spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F];
+					contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4;
+
+					// If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
+					// to prevent unnecessary jumps to the kernel
+					if (isPoll)
 					{
-						spurs->m.x72._and_not(1 << num);
+						pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i];
+						pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4;
+						if (i != mgmt->wklCurrentId)
+						{
+							contention[i] += pendingContention[i];
+						}
+					}
+				}
+
+				u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+				u32 pollStatus    = 0;
+
+				// The system service workload has the highest priority. Select the system service workload if
+				// the system service message bit for this SPU is set.
+				if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum))
+				{
+					// Not sure what this does. Possibly Mark the SPU as in use.
+					mgmt->x1EB = 0;
+					if (!isPoll || mgmt->wklCurrentId == 0x20)
+					{
+						// Clear the message bit
+						mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
 					}
 				}
 				else
 				{
-					u128 wklReadyCount0 = vm::read128(spurs.addr() + 0x0);
-					u128 wklReadyCount1 = vm::read128(spurs.addr() + 0x10);
-					u128 savedC = SPU.ReadLS128(0x1A0);
-					u128 wklMaxCnt = vm::read128(spurs.addr() + 0x50);
-					u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed();
-					u32 flagRecv = spurs->m.flagRecv.read_relaxed();
-					u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet1.read_relaxed()]);
-					u128 wklSet2 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet2.read_relaxed()]);
-					u128 vABL = vAABB & u128::from8p(0x0f);
-					u128 vABH = u128::fromV(_mm_srli_epi32((vAABB & u128::from8p(0xf0)).vi, 4));
-					u32 var5 = SPU.ReadLS32(0x1ec);
-					u128 v5L = u128::fromV(g_imm_table.fsmb_table[var5 >> 16]);
-					u128 v5H = u128::fromV(g_imm_table.fsmb_table[(u16)var5]);
-					u128 vFML = u128::fromV(g_imm_table.fsmb_table[(wklFlag == 0) && (flagRecv < 16) ? 0x8000 >> flagRecv : 0]);
-					u128 vFMH = u128::fromV(g_imm_table.fsmb_table[(u16)((wklFlag == 0) && (flagRecv < 32) ? 0x80000000 >> flagRecv : 0)]);
-					u128 vCL = u128::fromV(_mm_slli_epi32((savedC & u128::from8p(0x0f)).vi, 4));
-					u128 vCH = savedC & u128::from8p(0xf0);
-					u128 vABRL = u128::gtu8(wklReadyCount0, vABL);
-					u128 vABRH = u128::gtu8(wklReadyCount1, vABH);
-					u128 vCCL = v5L & u128::gtu8(vCL, {}) & u128::gtu8(wklMaxCnt & u128::from8p(0x0f), vABL) & (wklSet1 | vFML | vABRL);
-					u128 vCCH = v5H & u128::gtu8(vCH, {}) & u128::gtu8(u128::fromV(_mm_srli_epi32((wklMaxCnt & u128::from8p(0xf0)).vi, 4)), vABH) & (wklSet2 | vFMH | vABRH);
-					u128 v1H = {}; if (var1 <= 31 && var1 > 15) v1H.u8r[var1 & 0xf] = 4;
-					u128 v1L = {}; if (var1 <= 15) v1L.u8r[var1] = 4;
-					u128 vCH1 = (v1H | vCH & u128::from8p(0xFB)) & vCCH;
-					u128 vCL1 = (v1L | vCL & u128::from8p(0xFB)) & vCCL;
-					u128 vSTATL = vABRL & u128::from8p(1) | wklSet1 & u128::from8p(2) | vFML & u128::from8p(4);
-					u128 vSTATH = vABRH & u128::from8p(1) | wklSet2 & u128::from8p(2) | vFMH & u128::from8p(4);
+					// Caclulate the scheduling weight for each worjload
+					u8 maxWeight = 0;
+					for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++)
+					{
+						auto j           = i & 0x0F;
+						u8 x1ECx1EE      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->x1EC & (0x8000 >> j) : mgmt->x1EE & (0x8000 >> j);
+						u8 priority      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
+						u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4;
+						u8 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
+						u8 wklFlag       = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
 
-					s32 max = -1;
-					for (u32 i = 0; i < 0x10; i++)
-					{
-						const s32 value = vCL1.u8r[i];
-						if (value > max && (vCCL.u8r[i] & 1))
+						// For a worload to be considered for scheduling:
+						// 1. Its priority must be greater than 0
+						// 2. The number of SPUs used by it must be less than the max contention for that workload
+						// 3. The bit in 0x1EC/0x1EE for the wokload must be set
+						// 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
+						// Condition #4 may be overriden using a workload signal or using the workload flag
+						if (x1ECx1EE && priority > 0 && maxContention > contention[i])
 						{
-							vNUM = i;
-							max = value;
-						}
-					}
-					for (u32 i = 16; i < 0x20; i++)
-					{
-						const s32 value = vCH1.u8r[i];
-						if (value > max && (vCCH.u8r[i] & 1))
-						{
-							vNUM = i;
-							max = value;
+							if (wklFlag || wklSignal || mgmt->spurs->m.wklReadyCount[i].read_relaxed() > contention[i])
+							{
+								// The scheduling weight of the workload is equal to the priority of the workload for the SPU.
+								// The current workload is given a sligtly higher weight presumably to reduce the number of context switches.
+								u8 weight = priority << 4;
+								if (mgmt->wklCurrentId == i)
+								{
+									weight |= 0x04;
+								}
+
+								// In case of a tie the lower numbered workload is chosen
+								if (weight > maxWeight)
+								{
+									wklSelectedId  = i;
+									maxWeight      = weight;
+									pollStatus     = mgmt->spurs->m.wklReadyCount[i].read_relaxed() > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
+									pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
+									pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
+								}
+							}
 						}
 					}
 
-					if (vNUM < 0x10)
-					{
-						vRES = ((u64)vNUM << 32) | vSTATL.u8r[vNUM];
-						vSET.u8r[vNUM] = 0x01;
-					}
-					else if (vNUM < 0x20)
-					{
-						vRES = ((u64)vNUM << 32) | vSTATH.u8r[vNUM & 0xf];
-						vSET.u8r[vNUM] = 0x10;
-					}
+					// Not sure what this does. Possibly mark the SPU as idle/in use.
+					mgmt->x1EB = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
 
-					SPU.WriteLS8(0x1eb, vNUM == 0x20);
-
-					if (!arg1 || var1 == vNUM)
+					if (!isPoll || wklSelectedId == mgmt->wklCurrentId)
 					{
-						spurs->m.wklSet1._and_not(be_t<u16>::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0)));
-						spurs->m.wklSet2._and_not(be_t<u16>::make((u16)(0x80000000 >> vNUM)));
-						if (vNUM == flagRecv && wklFlag == 0)
+						// Clear workload signal for the selected workload
+						mgmt->spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
+						mgmt->spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
+
+						// If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
+						if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed())
 						{
-							spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(-1));
+							mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
 						}
 					}
 				}
 
-				if (arg1 == 0)
+				if (!isPoll)
 				{
-					vm::write128(spurs.addr() + 0x20, u128::add8(vAA, vSET)); // update wklA
+					// Called by kernel
+					// Increment the contention for the selected workload
+					if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
+					{
+						contention[wklSelectedId]++;
+					}
 
-					SPU.WriteLS128(0x180, vSET); // update savedA
-					SPU.WriteLS32(0x1dc, vNUM); // update var1
+					for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++)
+					{
+						mgmt->spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4);
+						mgmt->wklLocContention[i]              = 0;
+						mgmt->wklLocPendingContention[i]       = 0;
+					}
+
+					mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
+					mgmt->wklCurrentId                           = wklSelectedId;
+				}
+				else if (wklSelectedId != mgmt->wklCurrentId)
+				{
+					// Not called by kernel but a context switch is required
+					// Increment the pending contention for the selected workload
+					for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++)
+					{
+						mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4);
+						mgmt->wklLocPendingContention[i]       = 0;
+					}
+
+					mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
 				}
 
-				if (arg1 == 1 && vNUM != var1)
-				{
-					vm::write128(spurs.addr() + 0x30, u128::add8(vBB, vSET)); // update wklB
-
-					SPU.WriteLS128(0x190, vSET); // update savedB
-				}
-				else
-				{
-					vm::write128(spurs.addr() + 0x30, vBB); // update wklB
-
-					SPU.WriteLS128(0x190, {}); // update savedB
-				}
-
-				return vRES;
+				u64 result  = (u64)wklSelectedId << 32;
+				result     |= pollStatus;
+				return result;
 			};
 			//SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // test
 			//{
@@ -434,10 +453,13 @@ s64 spursInit(
 			//	return vRES;
 			//};
 
-			SPU.WriteLS128(0x1c0, u128::from32r(0, spurs.addr(), num, 0x1f));
+			SpursKernelMgmtData * mgmt = vm::get_ptr<SpursKernelMgmtData>(SPU.ls_offset);
+			mgmt->spurs    = spurs;
+			mgmt->spuNum   = num;
+			mgmt->dmaTagId = 0x1F;
 
-			u32 wid = 0x20;
-			u32 stat = 0;
+			u32 wid        = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+			u32 pollStatus = 0;
 			while (true)
 			{
 				if (Emu.IsStopped())
@@ -447,14 +469,14 @@ s64 spursInit(
 				}
 
 				// get current workload info:
-				auto& wkl = wid <= 15 ? spurs->m.wklG1[wid] : (wid <= 31 && isSecond ? spurs->m.wklG2[wid & 0xf] : spurs->m.wklSysG);
+				auto& wkl = wid < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? spurs->m.wklInfo2[wid & 0xf] : spurs->m.wklInfoSysSrv);
 
-				if (SPU.ReadLS64(0x1d0) != wkl.pm.addr())
+				if (mgmt->wklCurrentAddr != wkl.pm)
 				{
 					// load executable code:
 					memcpy(vm::get_ptr<void>(SPU.ls_offset + 0xa00), wkl.pm.get_ptr(), wkl.size);
-					SPU.WriteLS64(0x1d0, wkl.pm.addr());
-					SPU.WriteLS32(0x1d8, wkl.copy.read_relaxed());
+					mgmt->wklCurrentAddr = wkl.pm;
+					mgmt->x1D8 = wkl.copy.read_relaxed();
 				}
 				
 				if (!isSecond) SPU.WriteLS16(0x1e8, 0);
@@ -463,7 +485,7 @@ s64 spursInit(
 				SPU.GPR[1]._u32[3] = 0x3FFB0;
 				SPU.GPR[3]._u32[3] = 0x100;
 				SPU.GPR[4]._u64[1] = wkl.data;
-				SPU.GPR[5]._u32[3] = stat;
+				SPU.GPR[5]._u32[3] = pollStatus;
 				SPU.FastCall(0xa00);
 
 				// check status:
@@ -481,7 +503,7 @@ s64 spursInit(
 				SPU.GPR[3].clear();
 				assert(SPU.m_code3_func);
 				u64 res = SPU.m_code3_func(SPU);
-				stat = (u32)(res);
+				pollStatus = (u32)(res);
 				wid = (u32)(res >> 32);
 			}
 			
@@ -507,8 +529,8 @@ s64 spursInit(
 		assert(!"lwcond_create() failed");
 	}
 
-	spurs->m.flags1 = (flags & SAF_EXIT_IF_NO_WORK ? SF1_EXIT_IF_NO_WORK : 0) | (isSecond ? SF1_IS_SECOND : 0);
-	spurs->m.flagRecv.write_relaxed(0xff);
+	spurs->m.flags1 = (flags & SAF_EXIT_IF_NO_WORK ? SF1_EXIT_IF_NO_WORK : 0) | (isSecond ? SF1_32_WORKLOADS : 0);
+	spurs->m.wklFlagReceiver.write_relaxed(0xff);
 	spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(-1));
 	spurs->_u8[0xD64] = 0;
 	spurs->_u8[0xD65] = 0;
@@ -516,7 +538,7 @@ s64 spursInit(
 	spurs->m.ppuPriority = ppuPriority;
 
 	u32 queue;
-	if (s32 res = spursCreateLv2EventQueue(spurs, queue, vm::ptr<u8>::make(spurs.addr() + 0xc9), 0x2a, *(u64*)"_spuPrv"))
+	if (s32 res = (s32)spursCreateLv2EventQueue(spurs, queue, vm::ptr<u8>::make(spurs.addr() + 0xc9), 0x2a, *(u64*)"_spuPrv"))
 	{
 		assert(!"spursCreateLv2EventQueue() failed");
 	}
@@ -575,14 +597,14 @@ s64 spursInit(
 						for (u32 i = 0; i < 16; i++)
 						{
 							if (spurs->m.wklStat1[i].read_relaxed() == 2 &&
-								spurs->m.wklG1[i].priority.ToBE() != 0 &&
-								spurs->m.wklMaxCnt[i].read_relaxed() & 0xf
+								spurs->m.wklInfo1[i].priority.ToBE() != 0 &&
+								spurs->m.wklMaxContention[i].read_relaxed() & 0xf
 								)
 							{
 								if (spurs->m.wklReadyCount[i].read_relaxed() ||
-									spurs->m.wklSet1.read_relaxed() & (0x8000u >> i) ||
+									spurs->m.wklSignal1.read_relaxed() & (0x8000u >> i) ||
 									(spurs->m.wklFlag.flag.read_relaxed() == 0 &&
-									spurs->m.flagRecv.read_relaxed() == (u8)i
+									spurs->m.wklFlagReceiver.read_relaxed() == (u8)i
 									))
 								{
 									do_break = true;
@@ -590,17 +612,17 @@ s64 spursInit(
 								}
 							}
 						}
-						if (spurs->m.flags1 & SF1_IS_SECOND) for (u32 i = 0; i < 16; i++)
+						if (spurs->m.flags1 & SF1_32_WORKLOADS) for (u32 i = 0; i < 16; i++)
 						{
 							if (spurs->m.wklStat2[i].read_relaxed() == 2 &&
-								spurs->m.wklG2[i].priority.ToBE() != 0 &&
-								spurs->m.wklMaxCnt[i].read_relaxed() & 0xf0
+								spurs->m.wklInfo2[i].priority.ToBE() != 0 &&
+								spurs->m.wklMaxContention[i].read_relaxed() & 0xf0
 								)
 							{
 								if (spurs->m.wklReadyCount[i + 0x10].read_relaxed() ||
-									spurs->m.wklSet2.read_relaxed() & (0x8000u >> i) ||
+									spurs->m.wklSignal2.read_relaxed() & (0x8000u >> i) ||
 									(spurs->m.wklFlag.flag.read_relaxed() == 0 &&
-									spurs->m.flagRecv.read_relaxed() == (u8)i + 0x10
+									spurs->m.wklFlagReceiver.read_relaxed() == (u8)i + 0x10
 									))
 								{
 									do_break = true;
@@ -1339,7 +1361,7 @@ s32 spursAddWorkload(
 	}
 	
 	u32 wnum;
-	const u32 wmax = spurs->m.flags1 & SF1_IS_SECOND ? 0x20u : 0x10u; // TODO: check if can be changed
+	const u32 wmax = spurs->m.flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u; // TODO: check if can be changed
 	spurs->m.wklMskA.atomic_op([spurs, wmax, &wnum](be_t<u32>& value)
 	{
 		wnum = cntlz32(~(u32)value); // found empty position
@@ -1358,15 +1380,15 @@ s32 spursAddWorkload(
 	u32 index = wnum & 0xf;
 	if (wnum <= 15)
 	{
-		assert((spurs->m.wklA[wnum] & 0xf) == 0);
-		assert((spurs->m.wklB[wnum] & 0xf) == 0);
+		assert((spurs->m.wklCurrentContention[wnum] & 0xf) == 0);
+		assert((spurs->m.wklPendingContention[wnum] & 0xf) == 0);
 		spurs->m.wklStat1[wnum].write_relaxed(1);
 		spurs->m.wklD1[wnum] = 0;
 		spurs->m.wklE1[wnum] = 0;
-		spurs->m.wklG1[wnum].pm = pm;
-		spurs->m.wklG1[wnum].data = data;
-		spurs->m.wklG1[wnum].size = size;
-		spurs->m.wklG1[wnum].priority = *(be_t<u64>*)priorityTable;
+		spurs->m.wklInfo1[wnum].pm = pm;
+		spurs->m.wklInfo1[wnum].data = data;
+		spurs->m.wklInfo1[wnum].size = size;
+		spurs->m.wklInfo1[wnum].priority = *(be_t<u64>*)priorityTable;
 		spurs->m.wklH1[wnum].nameClass = nameClass;
 		spurs->m.wklH1[wnum].nameInstance = nameInstance;
 		memset(spurs->m.wklF1[wnum].unk0, 0, 0x20); // clear struct preserving semaphore id
@@ -1377,23 +1399,23 @@ s32 spursAddWorkload(
 			spurs->m.wklF1[wnum].hookArg = hookArg;
 			spurs->m.wklE1[wnum] |= 2;
 		}
-		if ((spurs->m.flags1 & SF1_IS_SECOND) == 0)
+		if ((spurs->m.flags1 & SF1_32_WORKLOADS) == 0)
 		{
 			spurs->m.wklReadyCount[wnum + 16].write_relaxed(0);
-			spurs->m.wklMinCnt[wnum] = minContention > 8 ? 8 : minContention;
+			spurs->m.wklMinContention[wnum] = minContention > 8 ? 8 : minContention;
 		}
 	}
 	else
 	{
-		assert((spurs->m.wklA[index] & 0xf0) == 0);
-		assert((spurs->m.wklB[index] & 0xf0) == 0);
+		assert((spurs->m.wklCurrentContention[index] & 0xf0) == 0);
+		assert((spurs->m.wklPendingContention[index] & 0xf0) == 0);
 		spurs->m.wklStat2[index].write_relaxed(1);
 		spurs->m.wklD2[index] = 0;
 		spurs->m.wklE2[index] = 0;
-		spurs->m.wklG2[index].pm = pm;
-		spurs->m.wklG2[index].data = data;
-		spurs->m.wklG2[index].size = size;
-		spurs->m.wklG2[index].priority = *(be_t<u64>*)priorityTable;
+		spurs->m.wklInfo2[index].pm = pm;
+		spurs->m.wklInfo2[index].data = data;
+		spurs->m.wklInfo2[index].size = size;
+		spurs->m.wklInfo2[index].priority = *(be_t<u64>*)priorityTable;
 		spurs->m.wklH2[index].nameClass = nameClass;
 		spurs->m.wklH2[index].nameInstance = nameInstance;
 		memset(spurs->m.wklF2[index].unk0, 0, 0x20); // clear struct preserving semaphore id
@@ -1409,27 +1431,27 @@ s32 spursAddWorkload(
 
 	if (wnum <= 15)
 	{
-		spurs->m.wklMaxCnt[wnum].atomic_op([maxContention](u8& v)
+		spurs->m.wklMaxContention[wnum].atomic_op([maxContention](u8& v)
 		{
 			v &= ~0xf;
 			v |= (maxContention > 8 ? 8 : maxContention);
 		});
-		spurs->m.wklSet1._and_not({ be_t<u16>::make(0x8000 >> index) }); // clear bit in wklFlag1
+		spurs->m.wklSignal1._and_not({ be_t<u16>::make(0x8000 >> index) }); // clear bit in wklFlag1
 	}
 	else
 	{
-		spurs->m.wklMaxCnt[index].atomic_op([maxContention](u8& v)
+		spurs->m.wklMaxContention[index].atomic_op([maxContention](u8& v)
 		{
 			v &= ~0xf0;
 			v |= (maxContention > 8 ? 8 : maxContention) << 4;
 		});
-		spurs->m.wklSet2._and_not({ be_t<u16>::make(0x8000 >> index) }); // clear bit in wklFlag2
+		spurs->m.wklSignal2._and_not({ be_t<u16>::make(0x8000 >> index) }); // clear bit in wklFlag2
 	}
 
-	spurs->m.flagRecv.compare_and_swap(wnum, 0xff);
+	spurs->m.wklFlagReceiver.compare_and_swap(wnum, 0xff);
 
 	u32 res_wkl;
-	CellSpurs::_sub_str3& wkl = wnum <= 15 ? spurs->m.wklG1[wnum] : spurs->m.wklG2[wnum & 0xf];
+	CellSpurs::WorkloadInfo& wkl = wnum <= 15 ? spurs->m.wklInfo1[wnum] : spurs->m.wklInfo2[wnum & 0xf];
 	spurs->m.wklMskB.atomic_op_sync([spurs, &wkl, wnum, &res_wkl](be_t<u32>& v)
 	{
 		const u32 mask = v.ToLE() & ~(0x80000000u >> wnum);
@@ -1439,7 +1461,7 @@ s32 spursAddWorkload(
 		{
 			if (mask & m)
 			{
-				CellSpurs::_sub_str3& current = i <= 15 ? spurs->m.wklG1[i] : spurs->m.wklG2[i & 0xf];
+				CellSpurs::WorkloadInfo& current = i <= 15 ? spurs->m.wklInfo1[i] : spurs->m.wklInfo2[i & 0xf];
 				if (current.pm.addr() == wkl.pm.addr())
 				{
 					// if a workload with identical policy module found
@@ -1461,7 +1483,7 @@ s32 spursAddWorkload(
 
 	spurs->wklStat(wnum).exchange(2);
 	spurs->m.xBD.exchange(0xff);
-	spurs->m.x72.exchange(0xff);
+	spurs->m.sysSrvMessage.exchange(0xff);
 	return CELL_OK;
 }
 
@@ -1671,7 +1693,7 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr<CellSpurs> spurs, u32 wid, u32 is_set
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN;
 	}
-	if (wid >= (spurs->m.flags1 & SF1_IS_SECOND ? 0x20u : 0x10u))
+	if (wid >= (spurs->m.flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u))
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_INVAL;
 	}
@@ -1687,14 +1709,14 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr<CellSpurs> spurs, u32 wid, u32 is_set
 	{
 		if (is_set)
 		{
-			if (spurs->m.flagRecv.read_relaxed() != 0xff)
+			if (spurs->m.wklFlagReceiver.read_relaxed() != 0xff)
 			{
 				return CELL_SPURS_POLICY_MODULE_ERROR_BUSY;
 			}
 		}
 		else
 		{
-			if (spurs->m.flagRecv.read_relaxed() != wid)
+			if (spurs->m.wklFlagReceiver.read_relaxed() != wid)
 			{
 				return CELL_SPURS_POLICY_MODULE_ERROR_PERM;
 			}
@@ -1706,7 +1728,7 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr<CellSpurs> spurs, u32 wid, u32 is_set
 		return res;
 	}
 
-	spurs->m.flagRecv.atomic_op([wid, is_set](u8& FR)
+	spurs->m.wklFlagReceiver.atomic_op([wid, is_set](u8& FR)
 	{
 		if (is_set)
 		{
@@ -1783,7 +1805,7 @@ s64 cellSpursReadyCountStore(vm::ptr<CellSpurs> spurs, u32 wid, u32 value)
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN;
 	}
-	if (wid >= (spurs->m.flags1 & SF1_IS_SECOND ? 0x20u : 0x10u) || value > 0xff)
+	if (wid >= (spurs->m.flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u) || value > 0xff)
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_INVAL;
 	}
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 849a597223..61fcc42648 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -94,6 +94,7 @@ enum SPURSKernelInterfaces
 	CELL_SPURS_MAX_SPU = 8,
 	CELL_SPURS_MAX_WORKLOAD = 16,
 	CELL_SPURS_MAX_WORKLOAD2 = 32,
+	CELL_SPURS_SYS_SERVICE_WORKLOAD_ID = 32,
 	CELL_SPURS_MAX_PRIORITY = 16,
 	CELL_SPURS_NAME_MAX_LENGTH = 15,
 	CELL_SPURS_SIZE = 4096,
@@ -162,8 +163,8 @@ enum SpursAttrFlags : u32
 enum SpursFlags1 : u8
 {
 	SF1_NONE = 0x0,
-	
-	SF1_IS_SECOND = 0x40,
+
+	SF1_32_WORKLOADS    = 0x40,
 	SF1_EXIT_IF_NO_WORK = 0x80,
 };
 
@@ -214,6 +215,12 @@ struct CellSpursWorkloadFlag
 
 typedef void(*CellSpursShutdownCompletionEventHook)(vm::ptr<CellSpurs>, u32 wid, vm::ptr<void> arg);
 
+enum CellSpursModulePollStatus {
+	CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT  = 1,
+	CELL_SPURS_MODULE_POLL_STATUS_SIGNAL      = 2,
+	CELL_SPURS_MODULE_POLL_STATUS_FLAG        = 4
+};
+
 // Core CellSpurs structures
 struct CellSpurs
 {
@@ -246,7 +253,7 @@ struct CellSpurs
 
 	static_assert(sizeof(_sub_str2) == 0x80, "Wrong _sub_str2 size");
 
-	struct _sub_str3
+	struct WorkloadInfo
 	{
 		vm::bptr<const void, 1, u64> pm; // policy module
 		be_t<u64> data; // spu argument
@@ -255,7 +262,7 @@ struct CellSpurs
 		be_t<u64> priority;
 	};
 
-	static_assert(sizeof(_sub_str3) == 0x20, "Wrong _sub_str3 size");
+	static_assert(sizeof(WorkloadInfo) == 0x20, "Wrong WorkloadInfo size");
 
 	struct _sub_str4
 	{
@@ -274,29 +281,33 @@ struct CellSpurs
 		// real data
 		struct
 		{
-			atomic_t<u8> wklReadyCount[0x20]; // 0x0 (index = wid)
-			u8 wklA[0x10];        // 0x20 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Current contention
-			u8 wklB[0x10];        // 0x30 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Pending
-			u8 wklMinCnt[0x10];   // 0x40 (seems only for first 0..15 wids) - Min contention
-			atomic_t<u8> wklMaxCnt[0x10]; // 0x50 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Max contention
-			CellSpursWorkloadFlag wklFlag; // 0x60
-			atomic_t<u16> wklSet1; // 0x70 (bitset for 0..15 wids) - Workload signal
-			atomic_t<u8> x72;     // 0x72 - message
-			u8 x73;               // 0x73 - idling
-			u8 flags1;            // 0x74 - Bit0(MSB)=exit_if_no_work, Bit1=32_workloads
-			u8 x75;               // 0x75 - trace control
-			u8 nSpus;             // 0x76 - Number of SPUs
-			atomic_t<u8> flagRecv; // 0x77
-			atomic_t<u16> wklSet2; // 0x78 (bitset for 16..32 wids) - Workload signal
-			u8 x7A[6];            // 0x7A
+			// The first 0x80 bytes of the CellSpurs structure is shared by all instances of the SPURS kernel in a SPURS instance and the PPU.
+			// The SPURS kernel copies this from main memory to the LS (address 0x100) then runs its scheduling algorithm using this as one
+			// of the inputs. After selecting a new workload, the SPURS kernel updates this and writes it back to main memory.
+			// The read-modify-write is performed atomically by the SPURS kernel.
+			atomic_t<u8> wklReadyCount[0x10];       // 0x00 (index = wid) - Number of SPUs requested by each workload
+			u8 wklCurrentContention[0x10];          // 0x20 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Number of SPUs used by each workload
+			u8 wklPendingContention[0x10];          // 0x30 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Number of SPUs that are pending to context switch to the workload
+			u8 wklMinContention[0x10];              // 0x40 (seems only for first 0..15 wids) - Min SPUs required for each workload
+			atomic_t<u8> wklMaxContention[0x10];    // 0x50 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Max SPUs that may be allocated to each workload
+			CellSpursWorkloadFlag wklFlag;          // 0x60
+			atomic_t<u16> wklSignal1;               // 0x70 (bitset for 0..15 wids)
+			atomic_t<u8> sysSrvMessage;             // 0x72
+			u8 sysSrvIdling;                        // 0x73
+			u8 flags1;                              // 0x74 Type is SpursFlags1
+			u8 sysSrvTraceControl;                  // 0x75
+			u8 nSpus;                               // 0x76
+			atomic_t<u8> wklFlagReceiver;           // 0x77
+			atomic_t<u16> wklSignal2;               // 0x78 (bitset for 16..32 wids)
+			u8 x7A[6];                              // 0x7A
 			atomic_t<u8> wklStat1[0x10]; // 0x80 - Workload state (16*u8) - State enum {non_exitent, preparing, runnable, shutting_down, removable, invalid}
 			u8 wklD1[0x10];       // 0x90 - Workload status (16*u8)
 			u8 wklE1[0x10];       // 0xA0 - Workload event (16*u8)
-			atomic_t<u32> wklMskA; // 0xB0 - Available workloads (32*u1)
-			atomic_t<u32> wklMskB; // 0xB4 - Available module id
-			u8 xB8[5];            // 0xB8 - 0xBC - exit barrier
-			atomic_t<u8> xBD;     // 0xBD - update workload
-			u8 xBE[2];            // 0xBE - 0xBF - message - terminate
+			atomic_t<u32> wklMskA; // 0xB0 - System service - Available workloads (32*u1)
+			atomic_t<u32> wklMskB; // 0xB4 - System service - Available module id
+			u8 xB8[5];            // 0xB8 - 0xBC - Syetem service exit barrier
+			atomic_t<u8> xBD;     // 0xBD - System service message - update workload
+			u8 xBE[2];            // 0xBE - 0xBF - System service message - terminate
 			u8 xC0[8];            // 0xC0 - System workload
 			u8 xC8;               // 0xC8 - System service - on spu
 			u8 spuPort;           // 0xC9 - SPU port for system service
@@ -304,7 +315,7 @@ struct CellSpurs
 			u8 xCB;               // 0xCB
 			u8 xCC;               // 0xCC
 			u8 xCD;               // 0xCD
-			u8 xCE;               // 0xCE - message - update trace
+			u8 xCE;               // 0xCE - System service message - update trace
 			u8 xCF;               // 0xCF
 			atomic_t<u8> wklStat2[0x10]; // 0xD0 - Workload state (16*u8)
 			u8 wklD2[0x10];       // 0xE0 - Workload status (16*u8)
@@ -317,8 +328,8 @@ struct CellSpurs
 			be_t<u32> unk12;      // 0x98C
 			be_t<u64> unk13;      // 0x990
 			u8 unknown4[0xB00 - 0x998];
-			_sub_str3 wklG1[0x10]; // 0xB00
-			_sub_str3 wklSysG;    // 0xD00
+			WorkloadInfo wklInfo1[0x10]; // 0xB00
+			WorkloadInfo wklInfoSysSrv;  // 0xD00
 			be_t<u64> ppu0;       // 0xD20
 			be_t<u64> ppu1;       // 0xD28
 			be_t<u32> spuTG;      // 0xD30 - SPU thread group
@@ -347,7 +358,7 @@ struct CellSpurs
 			_sub_str4 wklH1[0x10]; // 0xE00
 			_sub_str2 sub3;       // 0xF00
 			u8 unknown6[0x1000 - 0xF80]; // 0xF80 - Gloabl SPU exception handler 0xF88 - Gloabl SPU exception handlers args
-			_sub_str3 wklG2[0x10]; // 0x1000
+			WorkloadInfo wklInfo2[0x10]; // 0x1000
 			_sub_str1 wklF2[0x10]; // 0x1200
 			_sub_str4 wklH2[0x10]; // 0x1A00
 		} m;
@@ -741,5 +752,29 @@ struct CellSpursTaskBinInfo
 	CellSpursTaskLsPattern lsPattern;
 };
 
+// The SPURS kernel data store. This resides at 0x00 of the LS.
+struct SpursKernelMgmtData {
+	u8 unk0[0x100];                                 // 0x00
+	u8 tempArea[0x80];                              // 0x100
+	u8 wklLocContention[0x10];                      // 0x180
+	u8 wklLocPendingContention[0x10];               // 0x190
+	u8 priority[0x10];                              // 0x1A0
+	u8 x1B0[0x10];                                  // 0x1B0
+	vm::bptr<CellSpurs, 1, u64> spurs;              // 0x1C0
+	be_t<u32> spuNum;                               // 0x1C8
+	be_t<u32> dmaTagId;                             // 0x1CC
+	vm::bptr<const void, 1, u64> wklCurrentAddr;    // 0x1D0
+	be_t<u32> x1D8;                                 // 0x1D8
+	u32 wklCurrentId;                               // 0x1DC
+	be_t<u32> yieldToKernelAddr;                    // 0x1E0
+	be_t<u32> selectWorkloadAddr;                   // 0x1E4
+	u8 x1E8;                                        // 0x1E8
+	u8 x1E9;                                        // 0x1E9
+	u8 x1EA;                                        // 0x1EA
+	u8 x1EB;                                        // 0x1EB - This might be spuIdling
+	be_t<u16> x1EC;                                 // 0x1EC - This might be wklEnable1
+	be_t<u16> x1EE;                                 // 0x1EE - This might be wklEnable2
+};
+
 s64 spursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> port, s32 isDynamic, bool wasCreated);
 s64 spursWakeUp(vm::ptr<CellSpurs> spurs);

From 52b342464be7c067d62473730703345923f3148f Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Mon, 22 Dec 2014 01:07:53 +0530
Subject: [PATCH 08/29] SPURS: Improve readability of SPURS1 kernel at the cost
 of some perormance

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 311 ++++++++++++++---------
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h   |  41 +--
 2 files changed, 210 insertions(+), 142 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 483c3d56c6..557ff274ac 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -109,13 +109,13 @@ s64 spursInit(
 
 	// default or system workload:
 #ifdef PRX_DEBUG
-	spurs->m.wklSysG.pm.set(be_t<u64>::make(vm::read32(libsre_rtoc - 0x7EA4)));
-	spurs->m.wklSysG.size = 0x2200;
+	spurs->m.wklInfoSysSrv.addr.set(be_t<u64>::make(vm::read32(libsre_rtoc - 0x7EA4)));
+	spurs->m.wklInfoSysSrv.size = 0x2200;
 #else
-	spurs->m.wklInfoSysSrv.pm.set(be_t<u64>::make(0x100)); // wrong 64-bit address
+	spurs->m.wklInfoSysSrv.addr.set(be_t<u64>::make(0x100)); // wrong 64-bit address
 #endif
-	spurs->m.wklInfoSysSrv.data = 0;
-	spurs->m.wklInfoSysSrv.copy.write_relaxed(0xff);
+	spurs->m.wklInfoSysSrv.arg = 0;
+	spurs->m.wklInfoSysSrv.uniqueId.write_relaxed(0xff);
 	u32 sem;
 	for (u32 i = 0; i < 0x10; i++)
 	{
@@ -192,114 +192,164 @@ s64 spursInit(
 			{
 				LV2_LOCK(0); // TODO: lock-free implementation if possible
 
-				const u32 arg1 = SPU.GPR[3]._u32[3];
-				u32 var0 = SPU.ReadLS32(0x1d8);
-				u32 var1 = SPU.ReadLS32(0x1dc);
-				u128 wklA = vm::read128(spurs.addr() + 0x20);
-				u128 wklB = vm::read128(spurs.addr() + 0x30);
-				u128 savedA = SPU.ReadLS128(0x180);
-				u128 savedB = SPU.ReadLS128(0x190);
-				u128 vAA = u128::sub8(wklA, savedA);
-				u128 vBB = u128::sub8(wklB, savedB);
-				u128 vM1 = {}; if (var1 <= 15) vM1.u8r[var1] = 0xff;
-				u128 vAABB = (arg1 == 0) ? vAA : u128::add8(vAA, u128::andnot(vM1, vBB));
-				
-				u32 vNUM = 0x20;
-				u64 vRES = 0x20ull << 32;
-				u128 vSET = {};
+				auto mgmt  = vm::get_ptr<SpursKernelMgmtData>(SPU.ls_offset);
 
-				if (spurs->m.sysSrvMessage.read_relaxed() & (1 << num))
+				// The first and only argument to this function is a boolean that is set to false if the function
+				// is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus.
+				// If the first argument is true then the shared data is not updated with the result.
+				const auto isPoll = SPU.GPR[3]._u32[3];
+
+				// Calculate the contention (number of SPUs used) for each workload
+				u8 contention[CELL_SPURS_MAX_WORKLOAD];
+				u8 pendingContention[CELL_SPURS_MAX_WORKLOAD];
+				for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++)
 				{
-					SPU.WriteLS8(0x1eb, 0); // var4
-					if (arg1 == 0 || var1 == 0x20)
-					{
-						spurs->m.sysSrvMessage._and_not(1 << num);
-					}
-				}
-				else
-				{
-					u128 wklReadyCount0 = vm::read128(spurs.addr() + 0x0);
-					u128 wklReadyCount1 = vm::read128(spurs.addr() + 0x10);
-					u128 savedC = SPU.ReadLS128(0x1A0);
-					u128 savedD = SPU.ReadLS128(0x1B0);
-					u128 vRC = u128::add8(u128::minu8(wklReadyCount0, u128::from8p(8)), u128::minu8(wklReadyCount1, u128::from8p(8)));
-					u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed();
-					u32 flagRecv = spurs->m.wklFlagReceiver.read_relaxed();
-					u128 vFM = u128::fromV(g_imm_table.fsmb_table[(wklFlag == 0) && (flagRecv < 16) ? 0x8000 >> flagRecv : 0]);
-					u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSignal1.read_relaxed()]);
-					u128 vFMS1 = vFM | wklSet1;
-					u128 vFMV1 = u128::fromV(g_imm_table.fsmb_table[(var1 < 16) ? 0x8000 >> var1 : 0]);
-					u32 var5 = SPU.ReadLS32(0x1ec);
-					u128 wklMinCnt = vm::read128(spurs.addr() + 0x40);
-					u128 wklMaxCnt = vm::read128(spurs.addr() + 0x50);
-					u128 vCC = u128::andnot(vFMS1, u128::eq8(wklReadyCount0, {}) | u128::leu8(vRC, vAABB)) |
-						u128::leu8(wklMaxCnt, vAABB) |
-						u128::eq8(savedC, {}) |
-						u128::fromV(g_imm_table.fsmb_table[(~var5) >> 16]);
-					u128 vCCH1 = u128::andnot(vCC,
-						u128::from8p(0x80) & (vFMS1 | u128::gtu8(wklReadyCount0, vAABB)) |
-						u128::from8p(0x7f) & savedC);
-					u128 vCCL1 = u128::andnot(vCC,
-						u128::from8p(0x80) & vFMV1 |
-						u128::from8p(0x40) & u128::gtu8(vAABB, {}) & u128::gtu8(wklMinCnt, vAABB) |
-						u128::from8p(0x3c) & u128::fromV(_mm_slli_epi32(u128::sub8(u128::from8p(8), vAABB).vi, 2)) |
-						u128::from8p(0x02) & u128::eq8(savedD, u128::from8p((u8)var0)) |
-						u128::from8p(0x01));
-					u128 vSTAT =
-						u128::from8p(0x01) & u128::gtu8(wklReadyCount0, vAABB) |
-						u128::from8p(0x02) & wklSet1 |
-						u128::from8p(0x04) & vFM;
+					contention[i] = mgmt->spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i];
 
-					for (s32 i = 0, max = -1; i < 0x10; i++)
+					// If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
+					// to prevent unnecessary jumps to the kernel
+					if (isPoll)
 					{
-						const s32 value = ((s32)vCCH1.u8r[i] << 8) | ((s32)vCCL1.u8r[i]);
-						if (value > max && (vCC.u8r[i] & 1) == 0)
+						pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i];
+						if (i != mgmt->wklCurrentId)
 						{
-							vNUM = i;
-							max = value;
-						}
-					}
-
-					if (vNUM < 0x10)
-					{
-						vRES = ((u64)vNUM << 32) | vSTAT.u8r[vNUM];
-						vSET.u8r[vNUM] = 0x01;
-					}
-
-					SPU.WriteLS8(0x1eb, vNUM == 0x20);
-
-					if (!arg1 || var1 == vNUM)
-					{
-						spurs->m.wklSignal1._and_not(be_t<u16>::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0)));
-						if (vNUM == flagRecv && wklFlag == 0)
-						{
-							spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(-1));
+							contention[i] += pendingContention[i];
 						}
 					}
 				}
 
-				if (arg1 == 0)
+				u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+				u32 pollStatus    = 0;
+
+				// The system service workload has the highest priority. Select the system service workload if
+				// the system service message bit for this SPU is set.
+				if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum))
 				{
-					vm::write128(spurs.addr() + 0x20, u128::add8(vAA, vSET)); // update wklA
-
-					SPU.WriteLS128(0x180, vSET); // update savedA
-					SPU.WriteLS32(0x1dc, vNUM); // update var1
-				}
-
-				if (arg1 == 1 && vNUM != var1)
-				{
-					vm::write128(spurs.addr() + 0x30, u128::add8(vBB, vSET)); // update wklB
-
-					SPU.WriteLS128(0x190, vSET); // update savedB
+					// Not sure what this does. Possibly Mark the SPU as in use.
+					mgmt->x1EB = 0;
+					if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
+					{
+						// Clear the message bit
+						mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
+					}
 				}
 				else
 				{
-					vm::write128(spurs.addr() + 0x30, vBB); // update wklB
+					// Caclulate the scheduling weight for each workload
+					u16 maxWeight = 0;
+					for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++)
+					{
+						u8 x1EC         = mgmt->x1EC & (0x8000 >> i);
+						u8 wklSignal    = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
+						u8 wklFlag      = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+						u8 readyCount   = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed();
+						u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
+						u8 requestCount = readyCount + idleSpuCount;
 
-					SPU.WriteLS128(0x190, {}); // update savedB
+						// For a workload to be considered for scheduling:
+						// 1. Its priority must not be 0
+						// 2. The number of SPUs used by it must be less than the max contention for that workload
+						// 3. The bit in 0x1EC for the wokload must be set
+						// 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
+						//    OR the workload must be signalled
+						//    OR the workload flag is 0 and the workload is configured as the wokload flag receiver
+						if (x1EC && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i])
+						{
+							if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i]))
+							{
+								// The scheduling weight of the workload is formed from the following parameters in decreasing order of priority:
+								// 1. Wokload signal set or workload flag or ready count > contention
+								// 2. Priority of the workload on the SPU
+								// 3. Is the workload the last selected workload
+								// 4. Minimum contention of the workload
+								// 5. Number of SPUs that are being used by the workload (lesser the number, more the weight)
+								// 6. Is the workload executable same as the currently loaded executable
+								// 7. The workload id (lesser the number, more the weight)
+								u16 weight  = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0;
+								weight     |= (u16)(mgmt->priority[i] & 0x7F) << 16;
+								weight     |= i == mgmt->wklCurrentId ? 0x80 : 0x00;
+								weight     |= (contention[i] > 0 && mgmt->spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00;
+								weight     |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2;
+								weight     |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00;
+								weight     |= 0x01;
+
+								// In case of a tie the lower numbered workload is chosen
+								if (weight > maxWeight)
+								{
+									wklSelectedId  = i;
+									maxWeight      = weight;
+									pollStatus     = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
+									pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
+									pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
+								}
+							}
+						}
+					}
+
+					// Not sure what this does. Possibly mark the SPU as idle/in use.
+					mgmt->x1EB = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+
+					if (!isPoll || wklSelectedId == mgmt->wklCurrentId)
+					{
+						// Clear workload signal for the selected workload
+						mgmt->spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
+						mgmt->spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
+
+						// If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
+						if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed())
+						{
+							mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
+						}
+					}
 				}
 
-				return vRES;
+				if (!isPoll)
+				{
+					// Called by kernel
+					// Increment the contention for the selected workload
+					if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
+					{
+						contention[wklSelectedId]++;
+					}
+
+					for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++)
+					{
+						mgmt->spurs->m.wklCurrentContention[i] = contention[i];
+						mgmt->wklLocContention[i]              = 0;
+						mgmt->wklLocPendingContention[i]       = 0;
+					}
+
+					if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
+					{
+						mgmt->wklLocContention[wklSelectedId] = 1;
+					}
+
+					mgmt->wklCurrentId = wklSelectedId;
+				}
+				else if (wklSelectedId != mgmt->wklCurrentId)
+				{
+					// Not called by kernel but a context switch is required
+					// Increment the pending contention for the selected workload
+					if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
+					{
+						pendingContention[wklSelectedId]++;
+					}
+
+					for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++)
+					{
+						mgmt->spurs->m.wklPendingContention[i] = pendingContention[i];
+						mgmt->wklLocPendingContention[i]       = 0;
+					}
+
+					if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
+					{
+						mgmt->wklLocPendingContention[wklSelectedId] = 1;
+					}
+				}
+
+				u64 result  = (u64)wklSelectedId << 32;
+				result     |= pollStatus;
+				return result;
 			};
 			else SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // second kernel
 			{
@@ -324,7 +374,7 @@ s64 spursInit(
 					// to prevent unnecessary jumps to the kernel
 					if (isPoll)
 					{
-						pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i];
+						pendingContention[i] = mgmt->spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F];
 						pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4;
 						if (i != mgmt->wklCurrentId)
 						{
@@ -342,7 +392,7 @@ s64 spursInit(
 				{
 					// Not sure what this does. Possibly Mark the SPU as in use.
 					mgmt->x1EB = 0;
-					if (!isPoll || mgmt->wklCurrentId == 0x20)
+					if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
 					{
 						// Clear the message bit
 						mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
@@ -350,7 +400,7 @@ s64 spursInit(
 				}
 				else
 				{
-					// Caclulate the scheduling weight for each worjload
+					// Caclulate the scheduling weight for each workload
 					u8 maxWeight = 0;
 					for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++)
 					{
@@ -360,31 +410,33 @@ s64 spursInit(
 						u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4;
 						u8 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
 						u8 wklFlag       = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+						u8 readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
 
-						// For a worload to be considered for scheduling:
+						// For a workload to be considered for scheduling:
 						// 1. Its priority must be greater than 0
 						// 2. The number of SPUs used by it must be less than the max contention for that workload
 						// 3. The bit in 0x1EC/0x1EE for the wokload must be set
 						// 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
-						// Condition #4 may be overriden using a workload signal or using the workload flag
+						//    OR the workload must be signalled
+						//    OR the workload flag is 0 and the workload is configured as the wokload receiver
 						if (x1ECx1EE && priority > 0 && maxContention > contention[i])
 						{
-							if (wklFlag || wklSignal || mgmt->spurs->m.wklReadyCount[i].read_relaxed() > contention[i])
+							if (wklFlag || wklSignal || readyCount > contention[i])
 							{
 								// The scheduling weight of the workload is equal to the priority of the workload for the SPU.
 								// The current workload is given a sligtly higher weight presumably to reduce the number of context switches.
+								// In case of a tie the lower numbered workload is chosen.
 								u8 weight = priority << 4;
 								if (mgmt->wklCurrentId == i)
 								{
 									weight |= 0x04;
 								}
 
-								// In case of a tie the lower numbered workload is chosen
 								if (weight > maxWeight)
 								{
 									wklSelectedId  = i;
 									maxWeight      = weight;
-									pollStatus     = mgmt->spurs->m.wklReadyCount[i].read_relaxed() > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
+									pollStatus     = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
 									pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
 									pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
 								}
@@ -432,6 +484,11 @@ s64 spursInit(
 				{
 					// Not called by kernel but a context switch is required
 					// Increment the pending contention for the selected workload
+					if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
+					{
+						pendingContention[wklSelectedId]++;
+					}
+
 					for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++)
 					{
 						mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4);
@@ -471,12 +528,12 @@ s64 spursInit(
 				// get current workload info:
 				auto& wkl = wid < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? spurs->m.wklInfo2[wid & 0xf] : spurs->m.wklInfoSysSrv);
 
-				if (mgmt->wklCurrentAddr != wkl.pm)
+				if (mgmt->wklCurrentAddr != wkl.addr)
 				{
 					// load executable code:
-					memcpy(vm::get_ptr<void>(SPU.ls_offset + 0xa00), wkl.pm.get_ptr(), wkl.size);
-					mgmt->wklCurrentAddr = wkl.pm;
-					mgmt->x1D8 = wkl.copy.read_relaxed();
+					memcpy(vm::get_ptr<void>(SPU.ls_offset + 0xa00), wkl.addr.get_ptr(), wkl.size);
+					mgmt->wklCurrentAddr     = wkl.addr;
+					mgmt->wklCurrentUniqueId = wkl.uniqueId.read_relaxed();
 				}
 				
 				if (!isSecond) SPU.WriteLS16(0x1e8, 0);
@@ -484,7 +541,7 @@ s64 spursInit(
 				// run workload:
 				SPU.GPR[1]._u32[3] = 0x3FFB0;
 				SPU.GPR[3]._u32[3] = 0x100;
-				SPU.GPR[4]._u64[1] = wkl.data;
+				SPU.GPR[4]._u64[1] = wkl.arg;
 				SPU.GPR[5]._u32[3] = pollStatus;
 				SPU.FastCall(0xa00);
 
@@ -601,7 +658,7 @@ s64 spursInit(
 								spurs->m.wklMaxContention[i].read_relaxed() & 0xf
 								)
 							{
-								if (spurs->m.wklReadyCount[i].read_relaxed() ||
+								if (spurs->m.wklReadyCount1[i].read_relaxed() ||
 									spurs->m.wklSignal1.read_relaxed() & (0x8000u >> i) ||
 									(spurs->m.wklFlag.flag.read_relaxed() == 0 &&
 									spurs->m.wklFlagReceiver.read_relaxed() == (u8)i
@@ -619,7 +676,7 @@ s64 spursInit(
 								spurs->m.wklMaxContention[i].read_relaxed() & 0xf0
 								)
 							{
-								if (spurs->m.wklReadyCount[i + 0x10].read_relaxed() ||
+								if (spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() ||
 									spurs->m.wklSignal2.read_relaxed() & (0x8000u >> i) ||
 									(spurs->m.wklFlag.flag.read_relaxed() == 0 &&
 									spurs->m.wklFlagReceiver.read_relaxed() == (u8)i + 0x10
@@ -1385,8 +1442,8 @@ s32 spursAddWorkload(
 		spurs->m.wklStat1[wnum].write_relaxed(1);
 		spurs->m.wklD1[wnum] = 0;
 		spurs->m.wklE1[wnum] = 0;
-		spurs->m.wklInfo1[wnum].pm = pm;
-		spurs->m.wklInfo1[wnum].data = data;
+		spurs->m.wklInfo1[wnum].addr = pm;
+		spurs->m.wklInfo1[wnum].arg = data;
 		spurs->m.wklInfo1[wnum].size = size;
 		spurs->m.wklInfo1[wnum].priority = *(be_t<u64>*)priorityTable;
 		spurs->m.wklH1[wnum].nameClass = nameClass;
@@ -1401,9 +1458,10 @@ s32 spursAddWorkload(
 		}
 		if ((spurs->m.flags1 & SF1_32_WORKLOADS) == 0)
 		{
-			spurs->m.wklReadyCount[wnum + 16].write_relaxed(0);
+			spurs->m.wklIdleSpuCountOrReadyCount2[wnum].write_relaxed(0);
 			spurs->m.wklMinContention[wnum] = minContention > 8 ? 8 : minContention;
 		}
+		spurs->m.wklReadyCount1[wnum].write_relaxed(0);
 	}
 	else
 	{
@@ -1412,8 +1470,8 @@ s32 spursAddWorkload(
 		spurs->m.wklStat2[index].write_relaxed(1);
 		spurs->m.wklD2[index] = 0;
 		spurs->m.wklE2[index] = 0;
-		spurs->m.wklInfo2[index].pm = pm;
-		spurs->m.wklInfo2[index].data = data;
+		spurs->m.wklInfo2[index].addr = pm;
+		spurs->m.wklInfo2[index].arg = data;
 		spurs->m.wklInfo2[index].size = size;
 		spurs->m.wklInfo2[index].priority = *(be_t<u64>*)priorityTable;
 		spurs->m.wklH2[index].nameClass = nameClass;
@@ -1426,8 +1484,8 @@ s32 spursAddWorkload(
 			spurs->m.wklF2[index].hookArg = hookArg;
 			spurs->m.wklE2[index] |= 2;
 		}
+		spurs->m.wklIdleSpuCountOrReadyCount2[wnum].write_relaxed(0);
 	}
-	spurs->m.wklReadyCount[wnum].write_relaxed(0);
 
 	if (wnum <= 15)
 	{
@@ -1462,21 +1520,21 @@ s32 spursAddWorkload(
 			if (mask & m)
 			{
 				CellSpurs::WorkloadInfo& current = i <= 15 ? spurs->m.wklInfo1[i] : spurs->m.wklInfo2[i & 0xf];
-				if (current.pm.addr() == wkl.pm.addr())
+				if (current.addr.addr() == wkl.addr.addr())
 				{
 					// if a workload with identical policy module found
-					res_wkl = current.copy.read_relaxed();
+					res_wkl = current.uniqueId.read_relaxed();
 					break;
 				}
 				else
 				{
-					k |= 0x80000000 >> current.copy.read_relaxed();
+					k |= 0x80000000 >> current.uniqueId.read_relaxed();
 					res_wkl = cntlz32(~k);
 				}
 			}
 		}
 
-		wkl.copy.exchange((u8)res_wkl);
+		wkl.uniqueId.exchange((u8)res_wkl);
 		v = mask | (0x80000000u >> wnum);
 	});
 	assert(res_wkl <= 31);
@@ -1818,7 +1876,14 @@ s64 cellSpursReadyCountStore(vm::ptr<CellSpurs> spurs, u32 wid, u32 value)
 		return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
 	}
 
-	spurs->m.wklReadyCount[wid].exchange((u8)value);
+	if (wid < CELL_SPURS_MAX_WORKLOAD)
+	{
+		spurs->m.wklReadyCount1[wid].exchange((u8)value);
+	}
+	else
+	{
+		spurs->m.wklIdleSpuCountOrReadyCount2[wid].exchange((u8)value);
+	}
 	return CELL_OK;
 }
 
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 61fcc42648..379679207f 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -255,10 +255,10 @@ struct CellSpurs
 
 	struct WorkloadInfo
 	{
-		vm::bptr<const void, 1, u64> pm; // policy module
-		be_t<u64> data; // spu argument
+		vm::bptr<const void, 1, u64> addr; // Address of the executable
+		be_t<u64> arg; // spu argument
 		be_t<u32> size;
-		atomic_t<u8> copy;
+		atomic_t<u8> uniqueId; // The unique id is the same for all workloads with the same addr
 		be_t<u64> priority;
 	};
 
@@ -285,21 +285,22 @@ struct CellSpurs
 			// The SPURS kernel copies this from main memory to the LS (address 0x100) then runs its scheduling algorithm using this as one
 			// of the inputs. After selecting a new workload, the SPURS kernel updates this and writes it back to main memory.
 			// The read-modify-write is performed atomically by the SPURS kernel.
-			atomic_t<u8> wklReadyCount[0x10];       // 0x00 (index = wid) - Number of SPUs requested by each workload
-			u8 wklCurrentContention[0x10];          // 0x20 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Number of SPUs used by each workload
-			u8 wklPendingContention[0x10];          // 0x30 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Number of SPUs that are pending to context switch to the workload
-			u8 wklMinContention[0x10];              // 0x40 (seems only for first 0..15 wids) - Min SPUs required for each workload
-			atomic_t<u8> wklMaxContention[0x10];    // 0x50 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Max SPUs that may be allocated to each workload
-			CellSpursWorkloadFlag wklFlag;          // 0x60
-			atomic_t<u16> wklSignal1;               // 0x70 (bitset for 0..15 wids)
-			atomic_t<u8> sysSrvMessage;             // 0x72
-			u8 sysSrvIdling;                        // 0x73
-			u8 flags1;                              // 0x74 Type is SpursFlags1
-			u8 sysSrvTraceControl;                  // 0x75
-			u8 nSpus;                               // 0x76
-			atomic_t<u8> wklFlagReceiver;           // 0x77
-			atomic_t<u16> wklSignal2;               // 0x78 (bitset for 16..32 wids)
-			u8 x7A[6];                              // 0x7A
+			atomic_t<u8> wklReadyCount1[0x10];                  // 0x00 Number of SPUs requested by each workload (0..15 wids).
+			atomic_t<u8> wklIdleSpuCountOrReadyCount2[0x10];    // 0x10 SPURS1: Number of idle SPUs requested by each workload (0..15 wids). SPURS2: Number of SPUs requested by each workload (16..31 wids).
+			u8 wklCurrentContention[0x10];                      // 0x20 Number of SPUs used by each workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16.
+			u8 wklPendingContention[0x10];                      // 0x30 Number of SPUs that are pending to context switch to the workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16.
+			u8 wklMinContention[0x10];                          // 0x40 Min SPUs required for each workload. SPURS1: index = wid. SPURS2: Unused.
+			atomic_t<u8> wklMaxContention[0x10];                // 0x50 Max SPUs that may be allocated to each workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16.
+			CellSpursWorkloadFlag wklFlag;                      // 0x60
+			atomic_t<u16> wklSignal1;                           // 0x70 (bitset for 0..15 wids)
+			atomic_t<u8> sysSrvMessage;                         // 0x72
+			u8 sysSrvIdling;                                    // 0x73
+			u8 flags1;                                          // 0x74 Type is SpursFlags1
+			u8 sysSrvTraceControl;                              // 0x75
+			u8 nSpus;                                           // 0x76
+			atomic_t<u8> wklFlagReceiver;                       // 0x77
+			atomic_t<u16> wklSignal2;                           // 0x78 (bitset for 16..32 wids)
+			u8 x7A[6];                                          // 0x7A
 			atomic_t<u8> wklStat1[0x10]; // 0x80 - Workload state (16*u8) - State enum {non_exitent, preparing, runnable, shutting_down, removable, invalid}
 			u8 wklD1[0x10];       // 0x90 - Workload status (16*u8)
 			u8 wklE1[0x10];       // 0xA0 - Workload event (16*u8)
@@ -764,7 +765,7 @@ struct SpursKernelMgmtData {
 	be_t<u32> spuNum;                               // 0x1C8
 	be_t<u32> dmaTagId;                             // 0x1CC
 	vm::bptr<const void, 1, u64> wklCurrentAddr;    // 0x1D0
-	be_t<u32> x1D8;                                 // 0x1D8
+	be_t<u32> wklCurrentUniqueId;                   // 0x1D8
 	u32 wklCurrentId;                               // 0x1DC
 	be_t<u32> yieldToKernelAddr;                    // 0x1E0
 	be_t<u32> selectWorkloadAddr;                   // 0x1E4
@@ -774,6 +775,8 @@ struct SpursKernelMgmtData {
 	u8 x1EB;                                        // 0x1EB - This might be spuIdling
 	be_t<u16> x1EC;                                 // 0x1EC - This might be wklEnable1
 	be_t<u16> x1EE;                                 // 0x1EE - This might be wklEnable2
+    u8 x1F0[0x220 - 0x1F0];                         // 0x1F0
+    u8 wklUniqueId[0x10];                           // 0x220
 };
 
 s64 spursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> port, s32 isDynamic, bool wasCreated);

From 66641160422e704616065c717f815c60cb647499 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Thu, 1 Jan 2015 01:51:22 +0530
Subject: [PATCH 09/29] SPURS: System service workload - initial commit

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 251 ++++++++++++++++++++---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h   | 189 ++++++++++-------
 rpcs3/cellSpursSpu.cpp                   | 220 ++++++++++++++++++++
 rpcs3/emucore.vcxproj                    |   1 +
 rpcs3/emucore.vcxproj.filters            |   3 +
 5 files changed, 561 insertions(+), 103 deletions(-)
 create mode 100644 rpcs3/cellSpursSpu.cpp

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 557ff274ac..bdf0d97101 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -101,7 +101,7 @@ s64 spursInit(
 	}
 	spurs->m.xCC = 0;
 	spurs->m.xCD = 0;
-	spurs->m.xCE = 0;
+	spurs->m.sysSrvMsgUpdateTrace = 0;
 	for (u32 i = 0; i < 8; i++)
 	{
 		spurs->m.xC0[i] = -1;
@@ -225,8 +225,7 @@ s64 spursInit(
 				// the system service message bit for this SPU is set.
 				if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum))
 				{
-					// Not sure what this does. Possibly Mark the SPU as in use.
-					mgmt->x1EB = 0;
+					mgmt->spuIdling = 0;
 					if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
 					{
 						// Clear the message bit
@@ -239,7 +238,7 @@ s64 spursInit(
 					u16 maxWeight = 0;
 					for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++)
 					{
-						u8 x1EC         = mgmt->x1EC & (0x8000 >> i);
+						u8 runnable     = mgmt->wklRunnable1 & (0x8000 >> i);
 						u8 wklSignal    = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
 						u8 wklFlag      = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
 						u8 readyCount   = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed();
@@ -249,11 +248,11 @@ s64 spursInit(
 						// For a workload to be considered for scheduling:
 						// 1. Its priority must not be 0
 						// 2. The number of SPUs used by it must be less than the max contention for that workload
-						// 3. The bit in 0x1EC for the wokload must be set
+						// 3. The workload should be in runnable state
 						// 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
 						//    OR the workload must be signalled
 						//    OR the workload flag is 0 and the workload is configured as the wokload flag receiver
-						if (x1EC && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i])
+						if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i])
 						{
 							if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i]))
 							{
@@ -287,7 +286,7 @@ s64 spursInit(
 					}
 
 					// Not sure what this does. Possibly mark the SPU as idle/in use.
-					mgmt->x1EB = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+					mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
 
 					if (!isPoll || wklSelectedId == mgmt->wklCurrentId)
 					{
@@ -391,7 +390,7 @@ s64 spursInit(
 				if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum))
 				{
 					// Not sure what this does. Possibly Mark the SPU as in use.
-					mgmt->x1EB = 0;
+					mgmt->spuIdling = 0;
 					if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
 					{
 						// Clear the message bit
@@ -405,7 +404,7 @@ s64 spursInit(
 					for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++)
 					{
 						auto j           = i & 0x0F;
-						u8 x1ECx1EE      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->x1EC & (0x8000 >> j) : mgmt->x1EE & (0x8000 >> j);
+						u8 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
 						u8 priority      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
 						u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4;
 						u8 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
@@ -415,11 +414,11 @@ s64 spursInit(
 						// For a workload to be considered for scheduling:
 						// 1. Its priority must be greater than 0
 						// 2. The number of SPUs used by it must be less than the max contention for that workload
-						// 3. The bit in 0x1EC/0x1EE for the wokload must be set
+						// 3. The workload should be in runnable state
 						// 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
 						//    OR the workload must be signalled
 						//    OR the workload flag is 0 and the workload is configured as the wokload receiver
-						if (x1ECx1EE && priority > 0 && maxContention > contention[i])
+						if (runnable && priority > 0 && maxContention > contention[i])
 						{
 							if (wklFlag || wklSignal || readyCount > contention[i])
 							{
@@ -445,7 +444,7 @@ s64 spursInit(
 					}
 
 					// Not sure what this does. Possibly mark the SPU as idle/in use.
-					mgmt->x1EB = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+					mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
 
 					if (!isPoll || wklSelectedId == mgmt->wklCurrentId)
 					{
@@ -653,7 +652,7 @@ s64 spursInit(
 						bool do_break = false;
 						for (u32 i = 0; i < 16; i++)
 						{
-							if (spurs->m.wklStat1[i].read_relaxed() == 2 &&
+							if (spurs->m.wklState1[i].read_relaxed() == 2 &&
 								spurs->m.wklInfo1[i].priority.ToBE() != 0 &&
 								spurs->m.wklMaxContention[i].read_relaxed() & 0xf
 								)
@@ -671,7 +670,7 @@ s64 spursInit(
 						}
 						if (spurs->m.flags1 & SF1_32_WORKLOADS) for (u32 i = 0; i < 16; i++)
 						{
-							if (spurs->m.wklStat2[i].read_relaxed() == 2 &&
+							if (spurs->m.wklState2[i].read_relaxed() == 2 &&
 								spurs->m.wklInfo2[i].priority.ToBE() != 0 &&
 								spurs->m.wklMaxContention[i].read_relaxed() & 0xf0
 								)
@@ -756,7 +755,7 @@ s64 spursInit(
 		}
 	}
 	
-	spurs->m.unk22 = 0;
+	spurs->m.traceBuffer = 0;
 	// can also use cellLibprof if available (omitted)
 
 	// some unknown subroutine
@@ -1439,13 +1438,16 @@ s32 spursAddWorkload(
 	{
 		assert((spurs->m.wklCurrentContention[wnum] & 0xf) == 0);
 		assert((spurs->m.wklPendingContention[wnum] & 0xf) == 0);
-		spurs->m.wklStat1[wnum].write_relaxed(1);
-		spurs->m.wklD1[wnum] = 0;
-		spurs->m.wklE1[wnum] = 0;
+		spurs->m.wklState1[wnum].write_relaxed(1);
+		spurs->m.wklStatus1[wnum] = 0;
+		spurs->m.wklEvent1[wnum] = 0;
 		spurs->m.wklInfo1[wnum].addr = pm;
 		spurs->m.wklInfo1[wnum].arg = data;
 		spurs->m.wklInfo1[wnum].size = size;
-		spurs->m.wklInfo1[wnum].priority = *(be_t<u64>*)priorityTable;
+		for (u32 i = 0; i < 8; i++)
+		{
+			spurs->m.wklInfo1[wnum].priority[i] = priorityTable[i];
+		}
 		spurs->m.wklH1[wnum].nameClass = nameClass;
 		spurs->m.wklH1[wnum].nameInstance = nameInstance;
 		memset(spurs->m.wklF1[wnum].unk0, 0, 0x20); // clear struct preserving semaphore id
@@ -1454,7 +1456,7 @@ s32 spursAddWorkload(
 		{
 			spurs->m.wklF1[wnum].hook = hook;
 			spurs->m.wklF1[wnum].hookArg = hookArg;
-			spurs->m.wklE1[wnum] |= 2;
+			spurs->m.wklEvent1[wnum] |= 2;
 		}
 		if ((spurs->m.flags1 & SF1_32_WORKLOADS) == 0)
 		{
@@ -1467,13 +1469,16 @@ s32 spursAddWorkload(
 	{
 		assert((spurs->m.wklCurrentContention[index] & 0xf0) == 0);
 		assert((spurs->m.wklPendingContention[index] & 0xf0) == 0);
-		spurs->m.wklStat2[index].write_relaxed(1);
-		spurs->m.wklD2[index] = 0;
-		spurs->m.wklE2[index] = 0;
+		spurs->m.wklState2[index].write_relaxed(1);
+		spurs->m.wklStatus2[index] = 0;
+		spurs->m.wklEvent2[index] = 0;
 		spurs->m.wklInfo2[index].addr = pm;
 		spurs->m.wklInfo2[index].arg = data;
 		spurs->m.wklInfo2[index].size = size;
-		spurs->m.wklInfo2[index].priority = *(be_t<u64>*)priorityTable;
+		for (u32 i = 0; i < 8; i++)
+		{
+			spurs->m.wklInfo2[index].priority[i] = priorityTable[i];
+		}
 		spurs->m.wklH2[index].nameClass = nameClass;
 		spurs->m.wklH2[index].nameInstance = nameInstance;
 		memset(spurs->m.wklF2[index].unk0, 0, 0x20); // clear struct preserving semaphore id
@@ -1482,7 +1487,7 @@ s32 spursAddWorkload(
 		{
 			spurs->m.wklF2[index].hook = hook;
 			spurs->m.wklF2[index].hookArg = hookArg;
-			spurs->m.wklE2[index] |= 2;
+			spurs->m.wklEvent2[index] |= 2;
 		}
 		spurs->m.wklIdleSpuCountOrReadyCount2[wnum].write_relaxed(0);
 	}
@@ -1539,8 +1544,8 @@ s32 spursAddWorkload(
 	});
 	assert(res_wkl <= 31);
 
-	spurs->wklStat(wnum).exchange(2);
-	spurs->m.xBD.exchange(0xff);
+	spurs->wklState(wnum).exchange(2);
+	spurs->m.sysSrvMsgUpdateWorkload.exchange(0xff);
 	spurs->m.sysSrvMessage.exchange(0xff);
 	return CELL_OK;
 }
@@ -1871,7 +1876,7 @@ s64 cellSpursReadyCountStore(vm::ptr<CellSpurs> spurs, u32 wid, u32 value)
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_SRCH;
 	}
-	if (spurs->m.exception.ToBE() || spurs->wklStat(wid).read_relaxed() != 2)
+	if (spurs->m.exception.ToBE() || spurs->wklState(wid).read_relaxed() != 2)
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
 	}
@@ -3364,6 +3369,190 @@ s64 cellSpursSemaphoreGetTasksetAddress()
 #endif
 }
 
+bool spursIsLibProfLoaded()
+{
+	return false;
+}
+
+void spursTraceStatusUpdate(vm::ptr<CellSpurs> spurs)
+{
+	LV2_LOCK(0);
+
+	if (spurs->m.xCC != 0)
+	{
+		spurs->m.xCD                  = 1;
+		spurs->m.sysSrvMsgUpdateTrace = (1 << spurs->m.nSpus) - 1;
+		spurs->m.sysSrvMessage.write_relaxed(0xFF);
+		sys_semaphore_wait(spurs->m.semPrv, 0);
+	}
+}
+
+s64 spursTraceInitialize(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTraceInfo> buffer, u32 size, u32 mode, u32 updateStatus)
+{
+	if (!spurs || !buffer)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align || buffer.addr() % CellSpursTraceInfo::align)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+
+	if (size < CellSpursTraceInfo::size || mode & ~(CELL_SPURS_TRACE_MODE_FLAG_MASK))
+	{
+		return CELL_SPURS_CORE_ERROR_INVAL;
+	}
+
+	if (spurs->m.traceBuffer != 0)
+	{
+		return CELL_SPURS_CORE_ERROR_STAT;
+	}
+
+	spurs->m.traceDataSize = size - CellSpursTraceInfo::size;
+	for (u32 i = 0; i < 8; i++)
+	{
+		buffer->spu_thread[i] = spurs->m.spus[i];
+		buffer->count[i]      = 0;
+	}
+
+	buffer->spu_thread_grp = spurs->m.spuTG;
+	buffer->nspu           = spurs->m.nSpus;
+	spurs->m.traceBuffer   = buffer.addr() | (mode & CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER ? 1 : 0);
+	spurs->m.traceMode     = mode;
+
+	u32 spuTraceDataCount = (spurs->m.traceDataSize / CellSpursTracePacket::size) / spurs->m.nSpus;
+	for (u32 i = 0, j = 8; i < 6; i++)
+	{
+		spurs->m.x908[i] = j;
+		j += spuTraceDataCount;
+	}
+
+	spurs->m.sysSrvTraceControl = 0;
+	if (updateStatus)
+	{
+		spursTraceStatusUpdate(spurs);
+	}
+
+	return CELL_OK;
+}
+
+s64 cellSpursTraceInitialize(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTraceInfo> buffer, u32 size, u32 mode)
+{
+	if (spursIsLibProfLoaded())
+	{
+		return CELL_SPURS_CORE_ERROR_STAT;
+	}
+
+	return spursTraceInitialize(spurs, buffer, size, mode, 1);
+}
+
+s64 spursTraceStart(vm::ptr<CellSpurs> spurs, u32 updateStatus)
+{
+	if (!spurs)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+
+	if (!spurs->m.traceBuffer)
+	{
+		return CELL_SPURS_CORE_ERROR_STAT;
+	}
+
+	spurs->m.sysSrvTraceControl = 1;
+	if (updateStatus)
+	{
+		spursTraceStatusUpdate(spurs);
+	}
+
+	return CELL_OK;
+}
+
+s64 cellSpursTraceStart(vm::ptr<CellSpurs> spurs)
+{
+	if (!spurs)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+
+	return spursTraceStart(spurs, spurs->m.traceMode & CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP);
+}
+
+s64 spursTraceStop(vm::ptr<CellSpurs> spurs, u32 updateStatus)
+{
+	if (!spurs)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+
+	if (!spurs->m.traceBuffer)
+	{
+		return CELL_SPURS_CORE_ERROR_STAT;
+	}
+
+	spurs->m.sysSrvTraceControl = 2;
+	if (updateStatus)
+	{
+		spursTraceStatusUpdate(spurs);
+	}
+
+	return CELL_OK;
+}
+
+s64 cellSpursTraceStop(vm::ptr<CellSpurs> spurs)
+{
+	if (!spurs)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+
+	return spursTraceStop(spurs, spurs->m.traceMode & CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP);
+}
+
+s64 cellSpursTraceFinalize(vm::ptr<CellSpurs> spurs)
+{
+	if (!spurs)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+
+	if (!spurs->m.traceBuffer)
+	{
+		return CELL_SPURS_CORE_ERROR_STAT;
+	}
+
+	spurs->m.sysSrvTraceControl = 0;
+	spurs->m.traceMode          = 0;
+	spurs->m.traceBuffer        = 0;
+	spursTraceStatusUpdate(spurs);
+	return CELL_OK;
+}
+
 void cellSpurs_init(Module *pxThis)
 {
 	cellSpurs = pxThis;
@@ -3521,5 +3710,11 @@ void cellSpurs_init(Module *pxThis)
 	REG_FUNC(cellSpurs, _cellSpursSemaphoreInitialize);
 	REG_FUNC(cellSpurs, cellSpursSemaphoreGetTasksetAddress);
 
+	// Trace
+	REG_FUNC(cellSpurs, cellSpursTraceInitialize);
+	REG_FUNC(cellSpurs, cellSpursTraceStart);
+	REG_FUNC(cellSpurs, cellSpursTraceStop);
+	REG_FUNC(cellSpurs, cellSpursTraceFinalize);
+
 	// TODO: some trace funcs
 }
\ No newline at end of file
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 379679207f..1307de72e9 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -114,34 +114,6 @@ enum RangeofEventQueuePortNumbers
 	CELL_SPURS_DYNAMIC_PORT_RANGE_BOTTOM = 63,
 };
 
-enum SPURSTraceTypes
-{
-	CELL_SPURS_TRACE_TAG_LOAD = 0x2a,
-	CELL_SPURS_TRACE_TAG_MAP = 0x2b,
-	CELL_SPURS_TRACE_TAG_START = 0x2c,
-	CELL_SPURS_TRACE_TAG_STOP = 0x2d,
-	CELL_SPURS_TRACE_TAG_USER = 0x2e,
-	CELL_SPURS_TRACE_TAG_GUID = 0x2f,
-};
-
-// SPURS task defines.
-enum TaskConstants
-{
-	CELL_SPURS_MAX_TASK = 128,
-	CELL_SPURS_TASK_TOP = 0x3000,
-	CELL_SPURS_TASK_BOTTOM = 0x40000,
-	CELL_SPURS_MAX_TASK_NAME_LENGTH = 32,
-	CELL_SPURS_TASK_ATTRIBUTE_REVISION = 1,
-	CELL_SPURS_TASKSET_ATTRIBUTE_REVISION = 1,
-	CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE = 1024,
-};
-
-class SPURSManager;
-class SPURSManagerEventFlag;
-class SPURSManagerTaskset;
-
-struct CellSpurs;
-
 enum SpursAttrFlags : u32
 {
 	SAF_NONE = 0x0,
@@ -168,6 +140,68 @@ enum SpursFlags1 : u8
 	SF1_EXIT_IF_NO_WORK = 0x80,
 };
 
+enum SpursWorkloadConstants
+{
+	// Workload states
+	SPURS_WKL_STATE_NON_EXISTENT    = 0,
+	SPURS_WKL_STATE_PREPARING       = 1,
+	SPURS_WKL_STATE_RUNNABLE        = 2,
+	SPURS_WKL_STATE_SHUTTING_DOWN   = 3,
+	SPURS_WKL_STATE_REMOVABLE       = 4,
+	SPURS_WKL_STATE_INVALID         = 5,
+};
+
+enum CellSpursModulePollStatus
+{
+	CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT  = 1,
+	CELL_SPURS_MODULE_POLL_STATUS_SIGNAL      = 2,
+	CELL_SPURS_MODULE_POLL_STATUS_FLAG        = 4
+};
+
+enum SpursTraceConstants
+{
+	// Trace tag types
+	CELL_SPURS_TRACE_TAG_KERNEL     = 0x20,
+	CELL_SPURS_TRACE_TAG_SERVICE    = 0x21,
+	CELL_SPURS_TRACE_TAG_TASK       = 0x22,
+	CELL_SPURS_TRACE_TAG_JOB        = 0x23,
+	CELL_SPURS_TRACE_TAG_OVIS       = 0x24,
+	CELL_SPURS_TRACE_TAG_LOAD       = 0x2a,
+	CELL_SPURS_TRACE_TAG_MAP        = 0x2b,
+	CELL_SPURS_TRACE_TAG_START      = 0x2c,
+	CELL_SPURS_TRACE_TAG_STOP       = 0x2d,
+	CELL_SPURS_TRACE_TAG_USER       = 0x2e,
+	CELL_SPURS_TRACE_TAG_GUID       = 0x2f,
+
+	// Service incident
+	CELL_SPURS_TRACE_SERVICE_INIT   = 0x01,
+	CELL_SPURS_TRACE_SERVICE_WAIT   = 0x02,
+	CELL_SPURS_TRACE_SERVICE_EXIT   = 0x03,
+
+	// Trace mode flags
+	CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER              = 0x1,
+	CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP   = 0x2,
+	CELL_SPURS_TRACE_MODE_FLAG_MASK                     = 0x3,
+};
+
+// SPURS task constants
+enum SpursTaskConstants
+{
+	CELL_SPURS_MAX_TASK                     = 128,
+	CELL_SPURS_TASK_TOP                     = 0x3000,
+	CELL_SPURS_TASK_BOTTOM                  = 0x40000,
+	CELL_SPURS_MAX_TASK_NAME_LENGTH         = 32,
+	CELL_SPURS_TASK_ATTRIBUTE_REVISION      = 1,
+	CELL_SPURS_TASKSET_ATTRIBUTE_REVISION   = 1,
+	CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE  = 1024,
+};
+
+class SPURSManager;
+class SPURSManagerEventFlag;
+class SPURSManagerTaskset;
+
+struct CellSpurs;
+
 struct CellSpursAttribute
 {
 	static const uint align = 8;
@@ -215,12 +249,6 @@ struct CellSpursWorkloadFlag
 
 typedef void(*CellSpursShutdownCompletionEventHook)(vm::ptr<CellSpurs>, u32 wid, vm::ptr<void> arg);
 
-enum CellSpursModulePollStatus {
-	CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT  = 1,
-	CELL_SPURS_MODULE_POLL_STATUS_SIGNAL      = 2,
-	CELL_SPURS_MODULE_POLL_STATUS_FLAG        = 4
-};
-
 // Core CellSpurs structures
 struct CellSpurs
 {
@@ -259,7 +287,7 @@ struct CellSpurs
 		be_t<u64> arg; // spu argument
 		be_t<u32> size;
 		atomic_t<u8> uniqueId; // The unique id is the same for all workloads with the same addr
-		be_t<u64> priority;
+		be_t<u8> priority[8];
 	};
 
 	static_assert(sizeof(WorkloadInfo) == 0x20, "Wrong WorkloadInfo size");
@@ -294,36 +322,41 @@ struct CellSpurs
 			CellSpursWorkloadFlag wklFlag;                      // 0x60
 			atomic_t<u16> wklSignal1;                           // 0x70 (bitset for 0..15 wids)
 			atomic_t<u8> sysSrvMessage;                         // 0x72
-			u8 sysSrvIdling;                                    // 0x73
+			u8 spuIdling;                                       // 0x73
 			u8 flags1;                                          // 0x74 Type is SpursFlags1
 			u8 sysSrvTraceControl;                              // 0x75
 			u8 nSpus;                                           // 0x76
 			atomic_t<u8> wklFlagReceiver;                       // 0x77
 			atomic_t<u16> wklSignal2;                           // 0x78 (bitset for 16..32 wids)
 			u8 x7A[6];                                          // 0x7A
-			atomic_t<u8> wklStat1[0x10]; // 0x80 - Workload state (16*u8) - State enum {non_exitent, preparing, runnable, shutting_down, removable, invalid}
-			u8 wklD1[0x10];       // 0x90 - Workload status (16*u8)
-			u8 wklE1[0x10];       // 0xA0 - Workload event (16*u8)
+			atomic_t<u8> wklState1[0x10];                       // 0x80 SPURS_WKL_STATE_*
+			u8 wklStatus1[0x10];       // 0x90
+			u8 wklEvent1[0x10];       // 0xA0
 			atomic_t<u32> wklMskA; // 0xB0 - System service - Available workloads (32*u1)
 			atomic_t<u32> wklMskB; // 0xB4 - System service - Available module id
 			u8 xB8[5];            // 0xB8 - 0xBC - Syetem service exit barrier
-			atomic_t<u8> xBD;     // 0xBD - System service message - update workload
-			u8 xBE[2];            // 0xBE - 0xBF - System service message - terminate
+			atomic_t<u8> sysSrvMsgUpdateWorkload;     // 0xBD
+			u8 xBE[2];            // 0xBE
+			u8 sysSrvMsgTerminate;  // 0xBF
 			u8 xC0[8];            // 0xC0 - System workload
-			u8 xC8;               // 0xC8 - System service - on spu
+			u8 sysSrvOnSpu;       // 0xC8
 			u8 spuPort;           // 0xC9 - SPU port for system service
 			u8 xCA;               // 0xCA
 			u8 xCB;               // 0xCB
 			u8 xCC;               // 0xCC
 			u8 xCD;               // 0xCD
-			u8 xCE;               // 0xCE - System service message - update trace
+			u8 sysSrvMsgUpdateTrace; // 0xCE
 			u8 xCF;               // 0xCF
-			atomic_t<u8> wklStat2[0x10]; // 0xD0 - Workload state (16*u8)
-			u8 wklD2[0x10];       // 0xE0 - Workload status (16*u8)
-			u8 wklE2[0x10];       // 0xF0 - Workload event (16*u8)
+			atomic_t<u8> wklState2[0x10]; // 0xD0 SPURS_WKL_STATE_*
+			u8 wklStatus2[0x10];       // 0xE0
+			u8 wklEvent2[0x10];       // 0xF0
 			_sub_str1 wklF1[0x10]; // 0x100
-			be_t<u64> unk22;      // 0x900 - SPURS trace buffer
-			u8 unknown7[0x980 - 0x908]; // 0x908 - Per SPU trace info ??? (8*u32) 0x950 - SPURS trace mode (u32)
+			be_t<u64> traceBuffer; // 0x900
+			be_t<u32> x908[6];     // 0x908 - Indices to traceData (a guess)
+			u8 unknown7[0x948 - 0x920]; // 0x920
+			be_t<u64> traceDataSize; // 0x948
+			be_t<u32> traceMode;  // 0x950
+			u8 unknown8[0x980 - 0x954]; // 0x954
 			be_t<u64> semPrv;     // 0x980
 			be_t<u32> unk11;      // 0x988
 			be_t<u32> unk12;      // 0x98C
@@ -371,15 +404,15 @@ struct CellSpurs
 		} c;
 	};
 
-	__forceinline atomic_t<u8>& wklStat(const u32 wid)
+	__forceinline atomic_t<u8>& wklState(const u32 wid)
 	{
 		if (wid & 0x10)
 		{
-			return m.wklStat2[wid & 0xf];
+			return m.wklState2[wid & 0xf];
 		}
 		else
 		{
-			return m.wklStat1[wid & 0xf];
+			return m.wklState1[wid & 0xf];
 		}
 	}
 
@@ -526,25 +559,21 @@ struct CellSpursExceptionInfo
 
 struct CellSpursTraceInfo
 {
-	be_t<u32> spu_thread[8];
-	be_t<u32> count[8];
-	be_t<u32> spu_thread_grp;
-	be_t<u32> nspu;
-	//u8 padding[];
-};
+	static const u32 size = 0x80;
+	static const u32 align = 16;
 
-struct CellTraceHeader
-{
-	u8 tag;
-	u8 length;
-	u8 cpu;
-	u8 thread;
-	be_t<u32> time;
+	be_t<u32> spu_thread[8];    // 0x00
+	be_t<u32> count[8];         // 0x20
+	be_t<u32> spu_thread_grp;   // 0x40
+	be_t<u32> nspu;             // 0x44
+	//u8 padding[];
 };
 
 struct CellSpursTracePacket
 {
-	struct header_struct
+	static const u32 size = 16;
+
+	struct
 	{
 		u8 tag;
 		u8 length;
@@ -553,23 +582,29 @@ struct CellSpursTracePacket
 		be_t<u32> time;
 	} header;
 
-	struct data_struct
+	union
 	{
-		struct load_struct
+		struct
+		{
+			be_t<u32> incident;
+			be_t<u32> reserved;
+		} service;
+
+		struct
 		{
 			be_t<u32> ea;
 			be_t<u16> ls;
 			be_t<u16> size;
 		} load;
 
-		struct map_struct
+		struct
 		{
 			be_t<u32> offset;
 			be_t<u16> ls;
 			be_t<u16> size;
 		} map;
 
-		struct start_struct
+		struct
 		{
 			s8 module[4];
 			be_t<u16> level;
@@ -578,6 +613,7 @@ struct CellSpursTracePacket
 
 		be_t<u64> user;
 		be_t<u64> guid;
+		be_t<u64> stop;
 	} data;
 };
 
@@ -771,12 +807,15 @@ struct SpursKernelMgmtData {
 	be_t<u32> selectWorkloadAddr;                   // 0x1E4
 	u8 x1E8;                                        // 0x1E8
 	u8 x1E9;                                        // 0x1E9
-	u8 x1EA;                                        // 0x1EA
-	u8 x1EB;                                        // 0x1EB - This might be spuIdling
-	be_t<u16> x1EC;                                 // 0x1EC - This might be wklEnable1
-	be_t<u16> x1EE;                                 // 0x1EE - This might be wklEnable2
-    u8 x1F0[0x220 - 0x1F0];                         // 0x1F0
-    u8 wklUniqueId[0x10];                           // 0x220
+	u8 sysSrvInitialised;                           // 0x1EA
+	u8 spuIdling;                                   // 0x1EB
+	be_t<u16> wklRunnable1;                         // 0x1EC
+    be_t<u16> wklRunnable2;                         // 0x1EE
+	u8 x1F0[0x210 - 0x1F0];                         // 0x1F0
+	be_t<u64> traceBuffer;                          // 0x210
+	be_t<u32> traceMsgCount;                        // 0x218
+	be_t<u32> traceMaxCount;                        // 0x21C
+	u8 wklUniqueId[0x10];                           // 0x220
 };
 
 s64 spursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> port, s32 isDynamic, bool wasCreated);
diff --git a/rpcs3/cellSpursSpu.cpp b/rpcs3/cellSpursSpu.cpp
new file mode 100644
index 0000000000..6aad7f7430
--- /dev/null
+++ b/rpcs3/cellSpursSpu.cpp
@@ -0,0 +1,220 @@
+#include "stdafx.h"
+#include "Emu/Memory/Memory.h"
+#include "Emu/System.h"
+#include "Emu/Cell/SPUThread.h"
+#include "Emu/SysCalls/Modules/cellSpurs.h"
+
+/// Output trace information
+void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag) {
+    // TODO: Implement this
+}
+
+/// Check for execution right requests
+unsigned cellSpursModulePollStatus(CellSpursModulePollStatus * status) {
+    // TODO: Implement this
+    return 0;
+}
+
+void spursSysServiceCleanup(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    // TODO: Implement this
+}
+
+void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) {
+    // TODO: Implement this
+}
+
+void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownMask) {
+    u32 wklNotifyMask = 0;
+    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+        if (wklShutdownMask & (0x80000000 >> i)) {
+            mgmt->spurs->m.wklEvent1[i] |= 0x01;
+            if (mgmt->spurs->m.wklEvent1[i] & 0x02 || mgmt->spurs->m.wklEvent1[i] & 0x10) {
+                wklNotifyMask |= 0x80000000 >> i;
+            }
+        }
+    }
+
+    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+        if (wklShutdownMask & (0x8000 >> i)) {
+            mgmt->spurs->m.wklEvent2[i] |= 0x01;
+            if (mgmt->spurs->m.wklEvent2[i] & 0x02 || mgmt->spurs->m.wklEvent2[i] & 0x10) {
+                wklNotifyMask |= 0x8000 >> i;
+            }
+        }
+    }
+
+    if (wklNotifyMask) {
+        // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 0, wklNotifyMask);
+    }
+}
+
+/// Update workload information in the LS from main memory
+void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    u32 wklShutdownMask = 0;
+    mgmt->wklRunnable1  = 0;
+    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+        mgmt->priority[i]    = mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum];
+        mgmt->wklUniqueId[i] = mgmt->spurs->m.wklInfo1[i].uniqueId.read_relaxed();
+
+        auto wklStatus = mgmt->spurs->m.wklStatus1[i];
+        if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
+            mgmt->spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum;
+            mgmt->wklRunnable1           |= 0x8000 >> i;
+        } else {
+            mgmt->spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum);
+        }
+
+        if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
+            if (((wklStatus & (1 << mgmt->spuNum)) != 0) && ((mgmt->spurs->m.wklStatus1[i] & (1 << mgmt->spuNum)) == 0)) {
+                mgmt->spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
+                wklShutdownMask |= 0x80000000 >> i;
+            }
+        }
+    }
+
+    if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) {
+        mgmt->wklRunnable2 = 0;
+        for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+            if (mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) {
+                mgmt->priority[i] |= (0x10 - mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) << 4;
+            }
+
+            auto wklStatus = mgmt->spurs->m.wklStatus2[i];
+            if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
+                mgmt->spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum;
+                mgmt->wklRunnable2           |= 0x8000 >> i;
+            } else {
+                mgmt->spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum);
+            }
+
+            if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
+                if (((wklStatus & (1 << mgmt->spuNum)) != 0) && ((mgmt->spurs->m.wklStatus2[i] & (1 << mgmt->spuNum)) == 0)) {
+                    mgmt->spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
+                    wklShutdownMask |= 0x8000 >> i;
+                }
+            }
+        }
+    }
+
+    if (wklShutdownMask) {
+        spursSysServiceUpdateEvent(spu, mgmt, wklShutdownMask);
+    }
+}
+
+/// Process any messages
+void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) {
+        mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum);
+        spursSysServiceUpdateWorkload(spu, mgmt);
+    }
+
+    if (mgmt->spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) {
+        spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0);
+    }
+
+    if (mgmt->spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) {
+        mgmt->spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum);
+        // TODO: Rest of the terminate processing
+    }
+}
+
+void spursSysServiceExitIfRequired() {
+    // TODO: Implement this
+}
+
+/// Main function for the system service workload
+void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) {
+    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset);
+
+    if (mgmt->spurs.addr() % CellSpurs::align) {
+        // TODO: Halt
+    }
+
+    // Initialise the system service if this is the first time its being started on this SPU
+    if (mgmt->sysSrvInitialised != 0) {
+        mgmt->sysSrvInitialised = 1;
+
+        if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) {
+            // TODO: Halt
+        }
+
+        mgmt->spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum;
+        mgmt->traceBuffer           = 0;
+        mgmt->traceMsgCount         = -1;
+
+        spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0);
+        spursSysServiceCleanup(spu, mgmt);
+
+        // Trace - SERVICE: INIT
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT;
+        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+    }
+
+    // Trace - START: Module='SYS '
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = CELL_SPURS_TRACE_TAG_START;
+    memcpy(pkt.data.start.module, "SYS ", 4);
+    pkt.data.start.level = 1; // Policy module
+    pkt.data.start.ls    = 0xA00 >> 2;
+    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+    while (true) {
+        spursSysServiceProcessMessages(spu, mgmt);
+
+        if (cellSpursModulePollStatus(nullptr)) {
+            // Trace - SERVICE: EXIT
+            CellSpursTracePacket pkt;
+            memset(&pkt, 0, sizeof(pkt));
+            pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+            pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT;
+            cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+            // Trace - STOP: GUID
+            memset(&pkt, 0, sizeof(pkt));
+            pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
+            pkt.data.stop  = 0; // TODO: Put GUID of the sys service workload here
+            cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+            break;
+        }
+
+        if (mgmt->spuIdling == 0) {
+            continue;
+        }
+
+        // Trace - SERVICE: WAIT
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT;
+        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+        spursSysServiceExitIfRequired();
+    }
+}
+
+/// Entry point of the system service workload
+void spursSysServiceWorkloadEntry(SPUThread & spu) {
+    auto mgmt       = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + spu.GPR[3]._u32[3]);
+    auto arg        = spu.GPR[4]._u64[1];
+    auto pollStatus = spu.GPR[5]._u32[3];
+
+    spu.GPR[1]._u32[3]                        = 0x3FFD0;
+    *(vm::ptr<u32>::make(spu.GPR[1]._u32[3])) = 0x3FFF0;
+    memset(vm::get_ptr<void>(spu.ls_offset + 0x3FFE0), 0, 32);
+
+    LV2_LOCK(0);
+
+    if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+        spursSysServiceWorkloadMain(spu, pollStatus);
+    } else {
+        // TODO: If we reach here it means the current workload was preempted to start the
+        // system service workload. Need to implement this.
+    }
+
+    // TODO: Ensure that this function always returns to the SPURS kernel
+    return;
+}
diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj
index 8e2629cea3..3599cd2cd3 100644
--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@@ -54,6 +54,7 @@
     <ClCompile Include="..\Utilities\SSemaphore.cpp" />
     <ClCompile Include="..\Utilities\StrFmt.cpp" />
     <ClCompile Include="..\Utilities\Thread.cpp" />
+    <ClCompile Include="cellSpursSpu.cpp" />
     <ClCompile Include="Crypto\aes.cpp" />
     <ClCompile Include="Crypto\ec.cpp" />
     <ClCompile Include="Crypto\key_vault.cpp" />
diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters
index 9335c19690..a1e6732cc9 100644
--- a/rpcs3/emucore.vcxproj.filters
+++ b/rpcs3/emucore.vcxproj.filters
@@ -653,6 +653,9 @@
     <ClCompile Include="Emu\ARMv7\Modules\sceLibm.cpp">
       <Filter>Emu\CPU\ARMv7\Modules</Filter>
     </ClCompile>
+    <ClCompile Include="cellSpursSpu.cpp">
+      <Filter>Emu\SysCalls\Modules</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="Crypto\aes.h">

From 4a83d43a8f317c0a5f75ddced3b78f20db84d7e8 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Fri, 2 Jan 2015 01:33:36 +0530
Subject: [PATCH 10/29] SPURS: System service workload

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp |  10 +-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h   |  12 +-
 rpcs3/cellSpursSpu.cpp                   | 162 ++++++++++++++++++++---
 3 files changed, 159 insertions(+), 25 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index bdf0d97101..4cac04ffad 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -104,7 +104,7 @@ s64 spursInit(
 	spurs->m.sysSrvMsgUpdateTrace = 0;
 	for (u32 i = 0; i < 8; i++)
 	{
-		spurs->m.xC0[i] = -1;
+		spurs->m.sysSrvWorkload[i] = -1;
 	}
 
 	// default or system workload:
@@ -755,7 +755,7 @@ s64 spursInit(
 		}
 	}
 	
-	spurs->m.traceBuffer = 0;
+	spurs->m.traceBuffer.set(0);
 	// can also use cellLibprof if available (omitted)
 
 	// some unknown subroutine
@@ -3418,13 +3418,13 @@ s64 spursTraceInitialize(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTraceInfo> b
 
 	buffer->spu_thread_grp = spurs->m.spuTG;
 	buffer->nspu           = spurs->m.nSpus;
-	spurs->m.traceBuffer   = buffer.addr() | (mode & CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER ? 1 : 0);
+	spurs->m.traceBuffer.set(buffer.addr() | (mode & CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER ? 1 : 0));
 	spurs->m.traceMode     = mode;
 
 	u32 spuTraceDataCount = (spurs->m.traceDataSize / CellSpursTracePacket::size) / spurs->m.nSpus;
 	for (u32 i = 0, j = 8; i < 6; i++)
 	{
-		spurs->m.x908[i] = j;
+		spurs->m.traceStartIndex[i] = j;
 		j += spuTraceDataCount;
 	}
 
@@ -3548,7 +3548,7 @@ s64 cellSpursTraceFinalize(vm::ptr<CellSpurs> spurs)
 
 	spurs->m.sysSrvTraceControl = 0;
 	spurs->m.traceMode          = 0;
-	spurs->m.traceBuffer        = 0;
+	spurs->m.traceBuffer.set(0);
 	spursTraceStatusUpdate(spurs);
 	return CELL_OK;
 }
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 1307de72e9..3d817e4a4d 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -249,6 +249,8 @@ struct CellSpursWorkloadFlag
 
 typedef void(*CellSpursShutdownCompletionEventHook)(vm::ptr<CellSpurs>, u32 wid, vm::ptr<void> arg);
 
+struct CellSpursTraceInfo;
+
 // Core CellSpurs structures
 struct CellSpurs
 {
@@ -336,9 +338,9 @@ struct CellSpurs
 			atomic_t<u32> wklMskB; // 0xB4 - System service - Available module id
 			u8 xB8[5];            // 0xB8 - 0xBC - Syetem service exit barrier
 			atomic_t<u8> sysSrvMsgUpdateWorkload;     // 0xBD
-			u8 xBE[2];            // 0xBE
+			u8 xBE;            // 0xBE
 			u8 sysSrvMsgTerminate;  // 0xBF
-			u8 xC0[8];            // 0xC0 - System workload
+			u8 sysSrvWorkload[8];            // 0xC0
 			u8 sysSrvOnSpu;       // 0xC8
 			u8 spuPort;           // 0xC9 - SPU port for system service
 			u8 xCA;               // 0xCA
@@ -351,8 +353,8 @@ struct CellSpurs
 			u8 wklStatus2[0x10];       // 0xE0
 			u8 wklEvent2[0x10];       // 0xF0
 			_sub_str1 wklF1[0x10]; // 0x100
-			be_t<u64> traceBuffer; // 0x900
-			be_t<u32> x908[6];     // 0x908 - Indices to traceData (a guess)
+			vm::bptr<CellSpursTraceInfo, 1, u64> traceBuffer; // 0x900
+			be_t<u32> traceStartIndex[6];     // 0x908
 			u8 unknown7[0x948 - 0x920]; // 0x920
 			be_t<u64> traceDataSize; // 0x948
 			be_t<u32> traceMode;  // 0x950
@@ -810,7 +812,7 @@ struct SpursKernelMgmtData {
 	u8 sysSrvInitialised;                           // 0x1EA
 	u8 spuIdling;                                   // 0x1EB
 	be_t<u16> wklRunnable1;                         // 0x1EC
-    be_t<u16> wklRunnable2;                         // 0x1EE
+	be_t<u16> wklRunnable2;                         // 0x1EE
 	u8 x1F0[0x210 - 0x1F0];                         // 0x1F0
 	be_t<u64> traceBuffer;                          // 0x210
 	be_t<u32> traceMsgCount;                        // 0x218
diff --git a/rpcs3/cellSpursSpu.cpp b/rpcs3/cellSpursSpu.cpp
index 6aad7f7430..33c75e8a44 100644
--- a/rpcs3/cellSpursSpu.cpp
+++ b/rpcs3/cellSpursSpu.cpp
@@ -15,14 +15,80 @@ unsigned cellSpursModulePollStatus(CellSpursModulePollStatus * status) {
     return 0;
 }
 
-void spursSysServiceCleanup(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    // TODO: Implement this
+/// Restore scheduling paraneters to the right values after a workload has been preempted by the system service workload
+void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    if (mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] != -1) {
+        auto wklId = mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum];
+        mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] = -1;
+
+        spursSysServiceUpdateWorkload(spu, mgmt);
+        if (wklId >= CELL_SPURS_MAX_WORKLOAD) {
+            mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10;
+            mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1);
+        } else {
+            mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01;
+            mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1);
+        }
+
+        auto wklIdSaved    = mgmt->wklCurrentId;
+        mgmt->wklCurrentId = wklId;
+
+        // Trace - STOP: GUID
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
+        pkt.data.stop  = 0; // TODO: Put GUID of the sys service workload here
+        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+        mgmt->wklCurrentId = wklIdSaved;
+    }
 }
 
+/// Updatre the trace count for this SPU in CellSpurs
+void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    if (mgmt->traceBuffer) {
+        mgmt->spurs->m.traceBuffer->count[mgmt->spuNum] = mgmt->traceMsgCount;
+    }
+}
+
+/// Update trace control in SPU from CellSpurs
 void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) {
-    // TODO: Implement this
+    auto sysSrvMsgUpdateTrace           = mgmt->spurs->m.sysSrvMsgUpdateTrace;
+    mgmt->spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum);
+    mgmt->spurs->m.xCC                  &= ~(1 << mgmt->spuNum);
+    mgmt->spurs->m.xCC                  |= arg2 << mgmt->spuNum;
+
+    bool notify = false;
+    if ((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum) != 0) && (mgmt->spurs->m.sysSrvMsgUpdateTrace == 0) && (mgmt->spurs->m.xCD != 0)) {
+        mgmt->spurs->m.xCD = 0;
+        notify             = true;
+    }
+
+    if (arg4 && mgmt->spurs->m.xCD != 0) {
+        mgmt->spurs->m.xCD = 0;
+        notify             = true;
+    }
+
+    if ((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum) != 0) || (arg3 != 0)) {
+        if (mgmt->traceMsgCount != 0xFF || mgmt->traceBuffer == 0 || mgmt->spurs->m.traceBuffer.addr() == 0) {
+            spursSysServiceUpdateTraceCount(spu, mgmt);
+        } else {
+            mgmt->traceMsgCount = mgmt->spurs->m.traceBuffer->count[mgmt->spuNum];
+        }
+
+        mgmt->traceBuffer   = mgmt->spurs->m.traceBuffer.addr() + (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4);
+        mgmt->traceMaxCount = mgmt->spurs->m.traceStartIndex[1] - mgmt->spurs->m.traceStartIndex[0];
+        if (mgmt->traceBuffer == 0) {
+            mgmt->traceMsgCount = 0;
+        }
+    }
+
+    if (notify) {
+        // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 2, 0);
+    }
 }
 
+/// Update events in CellSpurs
 void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownMask) {
     u32 wklNotifyMask = 0;
     for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
@@ -48,7 +114,7 @@ void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32
     }
 }
 
-/// Update workload information in the LS from main memory
+/// Update workload information in the SPU LS from CellSpurs
 void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) {
     u32 wklShutdownMask = 0;
     mgmt->wklRunnable1  = 0;
@@ -65,7 +131,7 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt)
         }
 
         if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
-            if (((wklStatus & (1 << mgmt->spuNum)) != 0) && ((mgmt->spurs->m.wklStatus1[i] & (1 << mgmt->spuNum)) == 0)) {
+            if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus1[i] == 0)) {
                 mgmt->spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
                 wklShutdownMask |= 0x80000000 >> i;
             }
@@ -88,7 +154,7 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt)
             }
 
             if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
-                if (((wklStatus & (1 << mgmt->spuNum)) != 0) && ((mgmt->spurs->m.wklStatus2[i] & (1 << mgmt->spuNum)) == 0)) {
+                if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus2[i] == 0)) {
                     mgmt->spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
                     wklShutdownMask |= 0x8000 >> i;
                 }
@@ -103,23 +169,82 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt)
 
 /// Process any messages
 void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    // Process update workload message
     if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) {
         mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum);
         spursSysServiceUpdateWorkload(spu, mgmt);
     }
 
+    // Process update trace message
     if (mgmt->spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) {
         spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0);
     }
 
+    // Process terminate request
     if (mgmt->spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) {
         mgmt->spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum);
         // TODO: Rest of the terminate processing
     }
 }
 
-void spursSysServiceExitIfRequired() {
-    // TODO: Implement this
+/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled
+void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    while (true) {
+        u32 nIdlingSpus = 0;
+        for (u32 i = 0; i < 8; i++) {
+            if (mgmt->spurs->m.spuIdling & (1 << i)) {
+                nIdlingSpus++;
+            }
+        }
+
+        bool shouldExit               = nIdlingSpus != mgmt->spurs->m.nSpus ? false : mgmt->spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false;
+        bool foundSchedulableWorkload = false;
+        if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
+            foundSchedulableWorkload = true;
+        } else {
+            for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                if ((mgmt->wklRunnable1 & (0x8000 >> i)) &&
+                    (mgmt->priority[i] & 0x0F) != 0 &&
+                    (mgmt->spurs->m.wklMaxContention[i].read_relaxed() & 0x0F) > (mgmt->spurs->m.wklCurrentContention[i] & 0x0F)) {
+                    foundSchedulableWorkload = true;
+                    break;
+                }
+            }
+
+            if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS && foundSchedulableWorkload == false) {
+                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                    if ((mgmt->wklRunnable2 & (0x8000 >> i)) &&
+                        (mgmt->priority[i] & 0xF0) != 0 &&
+                        (mgmt->spurs->m.wklMaxContention[i].read_relaxed() & 0xF0) > (mgmt->spurs->m.wklCurrentContention[i] & 0xF0)) {
+                        foundSchedulableWorkload = true;
+                        break;
+                    }
+                }
+            }
+        }
+
+        if ((mgmt->spurs->m.spuIdling & (1 << mgmt->spuNum)) && shouldExit == false && foundSchedulableWorkload == false) {
+            // TODO: Wait for events
+        }
+
+        if (shouldExit || foundSchedulableWorkload == false) {
+            mgmt->spurs->m.spuIdling |= 1 << mgmt->spuNum;
+        } else {
+            mgmt->spurs->m.spuIdling &= ~(1 << mgmt->spuNum);
+        }
+
+        if (shouldExit == false && foundSchedulableWorkload == false) {
+            continue;
+        }
+
+        if (shouldExit == false) {
+            return;
+        }
+
+        break;
+    }
+
+    // TODO: exit spu thread group
 }
 
 /// Main function for the system service workload
@@ -127,15 +252,15 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) {
     auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset);
 
     if (mgmt->spurs.addr() % CellSpurs::align) {
-        // TODO: Halt
+        assert(0);
     }
 
     // Initialise the system service if this is the first time its being started on this SPU
-    if (mgmt->sysSrvInitialised != 0) {
+    if (mgmt->sysSrvInitialised == 0) {
         mgmt->sysSrvInitialised = 1;
 
         if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) {
-            // TODO: Halt
+            assert(0);
         }
 
         mgmt->spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum;
@@ -143,7 +268,7 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) {
         mgmt->traceMsgCount         = -1;
 
         spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0);
-        spursSysServiceCleanup(spu, mgmt);
+        spursSysServiceCleanupAfterPreemption(spu, mgmt);
 
         // Trace - SERVICE: INIT
         CellSpursTracePacket pkt;
@@ -163,8 +288,10 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) {
     cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
 
     while (true) {
+        // Process messages for the system service workload
         spursSysServiceProcessMessages(spu, mgmt);
 
+poll:
         if (cellSpursModulePollStatus(nullptr)) {
             // Trace - SERVICE: EXIT
             CellSpursTracePacket pkt;
@@ -181,10 +308,16 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) {
             break;
         }
 
+        // If we reach here it means that either there are more system service messages to be processed
+        // or there are no workloads that can be scheduled.
+
+        // If the SPU is not idling then process the remaining system service messages
         if (mgmt->spuIdling == 0) {
             continue;
         }
 
+        // If we reach here it means that the SPU is idling
+
         // Trace - SERVICE: WAIT
         CellSpursTracePacket pkt;
         memset(&pkt, 0, sizeof(pkt));
@@ -192,7 +325,8 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) {
         pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT;
         cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
 
-        spursSysServiceExitIfRequired();
+        spursSysServiceWaitOrExit(spu, mgmt);
+        goto poll;
     }
 }
 
@@ -206,8 +340,6 @@ void spursSysServiceWorkloadEntry(SPUThread & spu) {
     *(vm::ptr<u32>::make(spu.GPR[1]._u32[3])) = 0x3FFF0;
     memset(vm::get_ptr<void>(spu.ls_offset + 0x3FFE0), 0, 32);
 
-    LV2_LOCK(0);
-
     if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
         spursSysServiceWorkloadMain(spu, pollStatus);
     } else {

From c1df79b7139f21faa0de83bceaa602f3fed46ed9 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Sat, 3 Jan 2015 15:59:22 +0530
Subject: [PATCH 11/29] SPURS: Integrate SPURS kernel and system service
 workload. Also, fixed some bugs.

---
 rpcs3/Emu/Cell/SPUThread.cpp                |   2 +-
 rpcs3/Emu/Cell/SPUThread.h                  |   2 +-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp    | 403 +---------
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h      | 193 +++--
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 786 ++++++++++++++++++++
 rpcs3/cellSpursSpu.cpp                      | 352 ---------
 rpcs3/emucore.vcxproj                       |   2 +-
 rpcs3/emucore.vcxproj.filters               |   2 +-
 8 files changed, 897 insertions(+), 845 deletions(-)
 create mode 100644 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
 delete mode 100644 rpcs3/cellSpursSpu.cpp

diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp
index 719cdc8cab..bc6d1d681f 100644
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@@ -1029,7 +1029,7 @@ void SPUThread::StopAndSignal(u32 code)
 
 	case 0x003:
 	{
-		GPR[3]._u64[1] = m_code3_func(*this);
+		m_code3_func(*this);
 		break;
 	}
 
diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h
index e7c28ed8ff..4bebaf1baf 100644
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@@ -511,7 +511,7 @@ public:
 	void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); }
 
 	std::function<void(SPUThread& SPU)> m_custom_task;
-	std::function<u64(SPUThread& SPU)> m_code3_func;
+	std::function<void(SPUThread& SPU)> m_code3_func;
 
 public:
 	SPUThread(CPUThreadType type = CPU_THREAD_SPU);
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 4cac04ffad..343ca9ecbb 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -21,6 +21,8 @@ extern u32 libsre;
 extern u32 libsre_rtoc;
 #endif
 
+void spursKernelMain(SPUThread & spu);
+
 s64 spursCreateLv2EventQueue(vm::ptr<CellSpurs> spurs, u32& queue_id, vm::ptr<u8> port, s32 size, u64 name_u64)
 {
 #ifdef PRX_DEBUG_XXX
@@ -112,7 +114,7 @@ s64 spursInit(
 	spurs->m.wklInfoSysSrv.addr.set(be_t<u64>::make(vm::read32(libsre_rtoc - 0x7EA4)));
 	spurs->m.wklInfoSysSrv.size = 0x2200;
 #else
-	spurs->m.wklInfoSysSrv.addr.set(be_t<u64>::make(0x100)); // wrong 64-bit address
+	spurs->m.wklInfoSysSrv.addr.set(be_t<u64>::make(SPURS_IMG_ADDR_SYS_SRV_WORKLOAD));
 #endif
 	spurs->m.wklInfoSysSrv.arg = 0;
 	spurs->m.wklInfoSysSrv.uniqueId.write_relaxed(0xff);
@@ -170,399 +172,16 @@ s64 spursInit(
 	name += "CellSpursKernel0";
 	for (s32 num = 0; num < nSpus; num++, name[name.size() - 1]++)
 	{
-		spurs->m.spus[num] = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, 0, 0, 0, 0, [spurs, num, isSecond](SPUThread& SPU)
+		spurs->m.spus[num] = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, 0, 0, 0, 0, [spurs, num](SPUThread& SPU)
 		{
-#ifdef PRX_DEBUG_XXX
 			SPU.GPR[3]._u32[3] = num;
 			SPU.GPR[4]._u64[1] = spurs.addr();
+
+#ifdef PRX_DEBUG_XXX
 			return SPU.FastCall(SPU.PC);
 #endif
 
-			// code replacement:
-			{
-				const u32 addr = /*SPU.ReadLS32(0x1e0) +*/ 8; //SPU.ReadLS32(0x1e4);
-				SPU.WriteLS32(addr + 0, 3); // hack for cellSpursModulePollStatus
-				SPU.WriteLS32(addr + 4, 0x35000000); // bi $0
-				SPU.WriteLS32(0x1e4, addr);
-
-				SPU.WriteLS32(SPU.ReadLS32(0x1e0), 2); // hack for cellSpursModuleExit
-			}
-
-			if (!isSecond) SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // first kernel
-			{
-				LV2_LOCK(0); // TODO: lock-free implementation if possible
-
-				auto mgmt  = vm::get_ptr<SpursKernelMgmtData>(SPU.ls_offset);
-
-				// The first and only argument to this function is a boolean that is set to false if the function
-				// is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus.
-				// If the first argument is true then the shared data is not updated with the result.
-				const auto isPoll = SPU.GPR[3]._u32[3];
-
-				// Calculate the contention (number of SPUs used) for each workload
-				u8 contention[CELL_SPURS_MAX_WORKLOAD];
-				u8 pendingContention[CELL_SPURS_MAX_WORKLOAD];
-				for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++)
-				{
-					contention[i] = mgmt->spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i];
-
-					// If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
-					// to prevent unnecessary jumps to the kernel
-					if (isPoll)
-					{
-						pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i];
-						if (i != mgmt->wklCurrentId)
-						{
-							contention[i] += pendingContention[i];
-						}
-					}
-				}
-
-				u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
-				u32 pollStatus    = 0;
-
-				// The system service workload has the highest priority. Select the system service workload if
-				// the system service message bit for this SPU is set.
-				if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum))
-				{
-					mgmt->spuIdling = 0;
-					if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
-					{
-						// Clear the message bit
-						mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
-					}
-				}
-				else
-				{
-					// Caclulate the scheduling weight for each workload
-					u16 maxWeight = 0;
-					for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++)
-					{
-						u8 runnable     = mgmt->wklRunnable1 & (0x8000 >> i);
-						u8 wklSignal    = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
-						u8 wklFlag      = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
-						u8 readyCount   = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed();
-						u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
-						u8 requestCount = readyCount + idleSpuCount;
-
-						// For a workload to be considered for scheduling:
-						// 1. Its priority must not be 0
-						// 2. The number of SPUs used by it must be less than the max contention for that workload
-						// 3. The workload should be in runnable state
-						// 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
-						//    OR the workload must be signalled
-						//    OR the workload flag is 0 and the workload is configured as the wokload flag receiver
-						if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i])
-						{
-							if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i]))
-							{
-								// The scheduling weight of the workload is formed from the following parameters in decreasing order of priority:
-								// 1. Wokload signal set or workload flag or ready count > contention
-								// 2. Priority of the workload on the SPU
-								// 3. Is the workload the last selected workload
-								// 4. Minimum contention of the workload
-								// 5. Number of SPUs that are being used by the workload (lesser the number, more the weight)
-								// 6. Is the workload executable same as the currently loaded executable
-								// 7. The workload id (lesser the number, more the weight)
-								u16 weight  = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0;
-								weight     |= (u16)(mgmt->priority[i] & 0x7F) << 16;
-								weight     |= i == mgmt->wklCurrentId ? 0x80 : 0x00;
-								weight     |= (contention[i] > 0 && mgmt->spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00;
-								weight     |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2;
-								weight     |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00;
-								weight     |= 0x01;
-
-								// In case of a tie the lower numbered workload is chosen
-								if (weight > maxWeight)
-								{
-									wklSelectedId  = i;
-									maxWeight      = weight;
-									pollStatus     = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
-									pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
-									pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
-								}
-							}
-						}
-					}
-
-					// Not sure what this does. Possibly mark the SPU as idle/in use.
-					mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
-
-					if (!isPoll || wklSelectedId == mgmt->wklCurrentId)
-					{
-						// Clear workload signal for the selected workload
-						mgmt->spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
-						mgmt->spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
-
-						// If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
-						if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed())
-						{
-							mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
-						}
-					}
-				}
-
-				if (!isPoll)
-				{
-					// Called by kernel
-					// Increment the contention for the selected workload
-					if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
-					{
-						contention[wklSelectedId]++;
-					}
-
-					for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++)
-					{
-						mgmt->spurs->m.wklCurrentContention[i] = contention[i];
-						mgmt->wklLocContention[i]              = 0;
-						mgmt->wklLocPendingContention[i]       = 0;
-					}
-
-					if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
-					{
-						mgmt->wklLocContention[wklSelectedId] = 1;
-					}
-
-					mgmt->wklCurrentId = wklSelectedId;
-				}
-				else if (wklSelectedId != mgmt->wklCurrentId)
-				{
-					// Not called by kernel but a context switch is required
-					// Increment the pending contention for the selected workload
-					if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
-					{
-						pendingContention[wklSelectedId]++;
-					}
-
-					for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++)
-					{
-						mgmt->spurs->m.wklPendingContention[i] = pendingContention[i];
-						mgmt->wklLocPendingContention[i]       = 0;
-					}
-
-					if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
-					{
-						mgmt->wklLocPendingContention[wklSelectedId] = 1;
-					}
-				}
-
-				u64 result  = (u64)wklSelectedId << 32;
-				result     |= pollStatus;
-				return result;
-			};
-			else SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // second kernel
-			{
-				LV2_LOCK(0); // TODO: lock-free implementation if possible
-
-				auto mgmt  = vm::get_ptr<SpursKernelMgmtData>(SPU.ls_offset);
-
-				// The first and only argument to this function is a boolean that is set to false if the function
-				// is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus.
-				// If the first argument is true then the shared data is not updated with the result.
-				const auto isPoll = SPU.GPR[3]._u32[3];
-
-				// Calculate the contention (number of SPUs used) for each workload
-				u8 contention[CELL_SPURS_MAX_WORKLOAD2];
-				u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2];
-				for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++)
-				{
-					contention[i] = mgmt->spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F];
-					contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4;
-
-					// If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
-					// to prevent unnecessary jumps to the kernel
-					if (isPoll)
-					{
-						pendingContention[i] = mgmt->spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F];
-						pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4;
-						if (i != mgmt->wklCurrentId)
-						{
-							contention[i] += pendingContention[i];
-						}
-					}
-				}
-
-				u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
-				u32 pollStatus    = 0;
-
-				// The system service workload has the highest priority. Select the system service workload if
-				// the system service message bit for this SPU is set.
-				if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum))
-				{
-					// Not sure what this does. Possibly Mark the SPU as in use.
-					mgmt->spuIdling = 0;
-					if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
-					{
-						// Clear the message bit
-						mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
-					}
-				}
-				else
-				{
-					// Caclulate the scheduling weight for each workload
-					u8 maxWeight = 0;
-					for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++)
-					{
-						auto j           = i & 0x0F;
-						u8 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
-						u8 priority      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
-						u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4;
-						u8 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
-						u8 wklFlag       = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
-						u8 readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
-
-						// For a workload to be considered for scheduling:
-						// 1. Its priority must be greater than 0
-						// 2. The number of SPUs used by it must be less than the max contention for that workload
-						// 3. The workload should be in runnable state
-						// 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
-						//    OR the workload must be signalled
-						//    OR the workload flag is 0 and the workload is configured as the wokload receiver
-						if (runnable && priority > 0 && maxContention > contention[i])
-						{
-							if (wklFlag || wklSignal || readyCount > contention[i])
-							{
-								// The scheduling weight of the workload is equal to the priority of the workload for the SPU.
-								// The current workload is given a sligtly higher weight presumably to reduce the number of context switches.
-								// In case of a tie the lower numbered workload is chosen.
-								u8 weight = priority << 4;
-								if (mgmt->wklCurrentId == i)
-								{
-									weight |= 0x04;
-								}
-
-								if (weight > maxWeight)
-								{
-									wklSelectedId  = i;
-									maxWeight      = weight;
-									pollStatus     = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
-									pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
-									pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
-								}
-							}
-						}
-					}
-
-					// Not sure what this does. Possibly mark the SPU as idle/in use.
-					mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
-
-					if (!isPoll || wklSelectedId == mgmt->wklCurrentId)
-					{
-						// Clear workload signal for the selected workload
-						mgmt->spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
-						mgmt->spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
-
-						// If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
-						if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed())
-						{
-							mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
-						}
-					}
-				}
-
-				if (!isPoll)
-				{
-					// Called by kernel
-					// Increment the contention for the selected workload
-					if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
-					{
-						contention[wklSelectedId]++;
-					}
-
-					for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++)
-					{
-						mgmt->spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4);
-						mgmt->wklLocContention[i]              = 0;
-						mgmt->wklLocPendingContention[i]       = 0;
-					}
-
-					mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
-					mgmt->wklCurrentId                           = wklSelectedId;
-				}
-				else if (wklSelectedId != mgmt->wklCurrentId)
-				{
-					// Not called by kernel but a context switch is required
-					// Increment the pending contention for the selected workload
-					if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID)
-					{
-						pendingContention[wklSelectedId]++;
-					}
-
-					for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++)
-					{
-						mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4);
-						mgmt->wklLocPendingContention[i]       = 0;
-					}
-
-					mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
-				}
-
-				u64 result  = (u64)wklSelectedId << 32;
-				result     |= pollStatus;
-				return result;
-			};
-			//SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // test
-			//{
-			//	LV2_LOCK(0);
-			//	SPU.FastCall(0x290);
-			//	u64 vRES = SPU.GPR[3]._u64[1];
-			//	return vRES;
-			//};
-
-			SpursKernelMgmtData * mgmt = vm::get_ptr<SpursKernelMgmtData>(SPU.ls_offset);
-			mgmt->spurs    = spurs;
-			mgmt->spuNum   = num;
-			mgmt->dmaTagId = 0x1F;
-
-			u32 wid        = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
-			u32 pollStatus = 0;
-			while (true)
-			{
-				if (Emu.IsStopped())
-				{
-					cellSpurs->Warning("Spurs Kernel aborted");
-					return;
-				}
-
-				// get current workload info:
-				auto& wkl = wid < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? spurs->m.wklInfo2[wid & 0xf] : spurs->m.wklInfoSysSrv);
-
-				if (mgmt->wklCurrentAddr != wkl.addr)
-				{
-					// load executable code:
-					memcpy(vm::get_ptr<void>(SPU.ls_offset + 0xa00), wkl.addr.get_ptr(), wkl.size);
-					mgmt->wklCurrentAddr     = wkl.addr;
-					mgmt->wklCurrentUniqueId = wkl.uniqueId.read_relaxed();
-				}
-				
-				if (!isSecond) SPU.WriteLS16(0x1e8, 0);
-
-				// run workload:
-				SPU.GPR[1]._u32[3] = 0x3FFB0;
-				SPU.GPR[3]._u32[3] = 0x100;
-				SPU.GPR[4]._u64[1] = wkl.arg;
-				SPU.GPR[5]._u32[3] = pollStatus;
-				SPU.FastCall(0xa00);
-
-				// check status:
-				auto status = SPU.SPU.Status.GetValue();
-				if (status == SPU_STATUS_STOPPED_BY_STOP)
-				{
-					return;
-				}
-				else
-				{
-					assert(status == SPU_STATUS_RUNNING);
-				}
-
-				// get workload id:
-				SPU.GPR[3].clear();
-				assert(SPU.m_code3_func);
-				u64 res = SPU.m_code3_func(SPU);
-				pollStatus = (u32)(res);
-				wid = (u32)(res >> 32);
-			}
-			
+			spursKernelMain(SPU);
 		})->GetId();
 	}
 
@@ -653,7 +272,7 @@ s64 spursInit(
 						for (u32 i = 0; i < 16; i++)
 						{
 							if (spurs->m.wklState1[i].read_relaxed() == 2 &&
-								spurs->m.wklInfo1[i].priority.ToBE() != 0 &&
+								*((u64 *)spurs->m.wklInfo1[i].priority) != 0 &&
 								spurs->m.wklMaxContention[i].read_relaxed() & 0xf
 								)
 							{
@@ -671,7 +290,7 @@ s64 spursInit(
 						if (spurs->m.flags1 & SF1_32_WORKLOADS) for (u32 i = 0; i < 16; i++)
 						{
 							if (spurs->m.wklState2[i].read_relaxed() == 2 &&
-								spurs->m.wklInfo2[i].priority.ToBE() != 0 &&
+								*((u64 *)spurs->m.wklInfo2[i].priority) != 0 &&
 								spurs->m.wklMaxContention[i].read_relaxed() & 0xf0
 								)
 							{
@@ -3383,7 +3002,7 @@ void spursTraceStatusUpdate(vm::ptr<CellSpurs> spurs)
 		spurs->m.xCD                  = 1;
 		spurs->m.sysSrvMsgUpdateTrace = (1 << spurs->m.nSpus) - 1;
 		spurs->m.sysSrvMessage.write_relaxed(0xFF);
-		sys_semaphore_wait(spurs->m.semPrv, 0);
+		sys_semaphore_wait((u32)spurs->m.semPrv, 0);
 	}
 }
 
@@ -3421,7 +3040,7 @@ s64 spursTraceInitialize(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTraceInfo> b
 	spurs->m.traceBuffer.set(buffer.addr() | (mode & CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER ? 1 : 0));
 	spurs->m.traceMode     = mode;
 
-	u32 spuTraceDataCount = (spurs->m.traceDataSize / CellSpursTracePacket::size) / spurs->m.nSpus;
+	u32 spuTraceDataCount = (u32)((spurs->m.traceDataSize / CellSpursTracePacket::size) / spurs->m.nSpus);
 	for (u32 i = 0, j = 8; i < 6; i++)
 	{
 		spurs->m.traceStartIndex[i] = j;
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 3d817e4a4d..771406b3af 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -140,7 +140,7 @@ enum SpursFlags1 : u8
 	SF1_EXIT_IF_NO_WORK = 0x80,
 };
 
-enum SpursWorkloadConstants
+enum SpursWorkloadConstants : u64
 {
 	// Workload states
 	SPURS_WKL_STATE_NON_EXISTENT    = 0,
@@ -149,6 +149,12 @@ enum SpursWorkloadConstants
 	SPURS_WKL_STATE_SHUTTING_DOWN   = 3,
 	SPURS_WKL_STATE_REMOVABLE       = 4,
 	SPURS_WKL_STATE_INVALID         = 5,
+
+	// GUID
+	SPURS_GUID_SYS_WKL              = 0x1BB841BF38F89D33ull,
+
+	// Image addresses
+	SPURS_IMG_ADDR_SYS_SRV_WORKLOAD = 0x100,
 };
 
 enum CellSpursModulePollStatus
@@ -199,7 +205,6 @@ enum SpursTaskConstants
 class SPURSManager;
 class SPURSManagerEventFlag;
 class SPURSManagerTaskset;
-
 struct CellSpurs;
 
 struct CellSpursAttribute
@@ -249,7 +254,65 @@ struct CellSpursWorkloadFlag
 
 typedef void(*CellSpursShutdownCompletionEventHook)(vm::ptr<CellSpurs>, u32 wid, vm::ptr<void> arg);
 
-struct CellSpursTraceInfo;
+struct CellSpursTraceInfo
+{
+	static const u32 size = 0x80;
+	static const u32 align = 16;
+
+	be_t<u32> spu_thread[8];    // 0x00
+	be_t<u32> count[8];         // 0x20
+	be_t<u32> spu_thread_grp;   // 0x40
+	be_t<u32> nspu;             // 0x44
+	//u8 padding[];
+};
+
+struct CellSpursTracePacket
+{
+	static const u32 size = 16;
+
+	struct
+	{
+		u8 tag;
+		u8 length;
+		u8 spu;
+		u8 workload;
+		be_t<u32> time;
+	} header;
+
+	union
+	{
+		struct
+		{
+			be_t<u32> incident;
+			be_t<u32> reserved;
+		} service;
+
+		struct
+		{
+			be_t<u32> ea;
+			be_t<u16> ls;
+			be_t<u16> size;
+		} load;
+
+		struct
+		{
+			be_t<u32> offset;
+			be_t<u16> ls;
+			be_t<u16> size;
+		} map;
+
+		struct
+		{
+			s8 module[4];
+			be_t<u16> level;
+			be_t<u16> ls;
+		} start;
+
+		be_t<u64> user;
+		be_t<u64> guid;
+		be_t<u64> stop;
+	} data;
+};
 
 // Core CellSpurs structures
 struct CellSpurs
@@ -289,7 +352,7 @@ struct CellSpurs
 		be_t<u64> arg; // spu argument
 		be_t<u32> size;
 		atomic_t<u8> uniqueId; // The unique id is the same for all workloads with the same addr
-		be_t<u8> priority[8];
+		u8 priority[8];
 	};
 
 	static_assert(sizeof(WorkloadInfo) == 0x20, "Wrong WorkloadInfo size");
@@ -311,10 +374,6 @@ struct CellSpurs
 		// real data
 		struct
 		{
-			// The first 0x80 bytes of the CellSpurs structure is shared by all instances of the SPURS kernel in a SPURS instance and the PPU.
-			// The SPURS kernel copies this from main memory to the LS (address 0x100) then runs its scheduling algorithm using this as one
-			// of the inputs. After selecting a new workload, the SPURS kernel updates this and writes it back to main memory.
-			// The read-modify-write is performed atomically by the SPURS kernel.
 			atomic_t<u8> wklReadyCount1[0x10];                  // 0x00 Number of SPUs requested by each workload (0..15 wids).
 			atomic_t<u8> wklIdleSpuCountOrReadyCount2[0x10];    // 0x10 SPURS1: Number of idle SPUs requested by each workload (0..15 wids). SPURS2: Number of SPUs requested by each workload (16..31 wids).
 			u8 wklCurrentContention[0x10];                      // 0x20 Number of SPUs used by each workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16.
@@ -332,33 +391,34 @@ struct CellSpurs
 			atomic_t<u16> wklSignal2;                           // 0x78 (bitset for 16..32 wids)
 			u8 x7A[6];                                          // 0x7A
 			atomic_t<u8> wklState1[0x10];                       // 0x80 SPURS_WKL_STATE_*
-			u8 wklStatus1[0x10];       // 0x90
-			u8 wklEvent1[0x10];       // 0xA0
-			atomic_t<u32> wklMskA; // 0xB0 - System service - Available workloads (32*u1)
-			atomic_t<u32> wklMskB; // 0xB4 - System service - Available module id
-			u8 xB8[5];            // 0xB8 - 0xBC - Syetem service exit barrier
-			atomic_t<u8> sysSrvMsgUpdateWorkload;     // 0xBD
-			u8 xBE;            // 0xBE
-			u8 sysSrvMsgTerminate;  // 0xBF
-			u8 sysSrvWorkload[8];            // 0xC0
-			u8 sysSrvOnSpu;       // 0xC8
-			u8 spuPort;           // 0xC9 - SPU port for system service
-			u8 xCA;               // 0xCA
-			u8 xCB;               // 0xCB
-			u8 xCC;               // 0xCC
-			u8 xCD;               // 0xCD
-			u8 sysSrvMsgUpdateTrace; // 0xCE
-			u8 xCF;               // 0xCF
-			atomic_t<u8> wklState2[0x10]; // 0xD0 SPURS_WKL_STATE_*
-			u8 wklStatus2[0x10];       // 0xE0
-			u8 wklEvent2[0x10];       // 0xF0
-			_sub_str1 wklF1[0x10]; // 0x100
-			vm::bptr<CellSpursTraceInfo, 1, u64> traceBuffer; // 0x900
-			be_t<u32> traceStartIndex[6];     // 0x908
-			u8 unknown7[0x948 - 0x920]; // 0x920
-			be_t<u64> traceDataSize; // 0x948
-			be_t<u32> traceMode;  // 0x950
-			u8 unknown8[0x980 - 0x954]; // 0x954
+			u8 wklStatus1[0x10];                                // 0x90
+			u8 wklEvent1[0x10];                                 // 0xA0
+			atomic_t<u32> wklMskA;                              // 0xB0 - System service - Available workloads (32*u1)
+			atomic_t<u32> wklMskB;                              // 0xB4 - System service - Available module id
+			u32 xB8;                                            // 0xB8
+			u8 sysSrvExitBarrier;                               // 0xBC
+			atomic_t<u8> sysSrvMsgUpdateWorkload;               // 0xBD
+			u8 xBE;                                             // 0xBE
+			u8 sysSrvMsgTerminate;                              // 0xBF
+			u8 sysSrvWorkload[8];                               // 0xC0
+			u8 sysSrvOnSpu;                                     // 0xC8
+			u8 spuPort;                                         // 0xC9
+			u8 xCA;                                             // 0xCA
+			u8 xCB;                                             // 0xCB
+			u8 xCC;                                             // 0xCC
+			u8 xCD;                                             // 0xCD
+			u8 sysSrvMsgUpdateTrace;                            // 0xCE
+			u8 xCF;                                             // 0xCF
+			atomic_t<u8> wklState2[0x10];                       // 0xD0 SPURS_WKL_STATE_*
+			u8 wklStatus2[0x10];                                // 0xE0
+			u8 wklEvent2[0x10];                                 // 0xF0
+			_sub_str1 wklF1[0x10];                              // 0x100
+			vm::bptr<CellSpursTraceInfo, 1, u64> traceBuffer;   // 0x900
+			be_t<u32> traceStartIndex[6];                       // 0x908
+			u8 unknown7[0x948 - 0x920];                         // 0x920
+			be_t<u64> traceDataSize;                            // 0x948
+			be_t<u32> traceMode;                                // 0x950
+			u8 unknown8[0x980 - 0x954];                         // 0x954
 			be_t<u64> semPrv;     // 0x980
 			be_t<u32> unk11;      // 0x988
 			be_t<u32> unk12;      // 0x98C
@@ -559,66 +619,6 @@ struct CellSpursExceptionInfo
 	be_t<u64> option;
 };
 
-struct CellSpursTraceInfo
-{
-	static const u32 size = 0x80;
-	static const u32 align = 16;
-
-	be_t<u32> spu_thread[8];    // 0x00
-	be_t<u32> count[8];         // 0x20
-	be_t<u32> spu_thread_grp;   // 0x40
-	be_t<u32> nspu;             // 0x44
-	//u8 padding[];
-};
-
-struct CellSpursTracePacket
-{
-	static const u32 size = 16;
-
-	struct
-	{
-		u8 tag;
-		u8 length;
-		u8 spu;
-		u8 workload;
-		be_t<u32> time;
-	} header;
-
-	union
-	{
-		struct
-		{
-			be_t<u32> incident;
-			be_t<u32> reserved;
-		} service;
-
-		struct
-		{
-			be_t<u32> ea;
-			be_t<u16> ls;
-			be_t<u16> size;
-		} load;
-
-		struct
-		{
-			be_t<u32> offset;
-			be_t<u16> ls;
-			be_t<u16> size;
-		} map;
-
-		struct
-		{
-			s8 module[4];
-			be_t<u16> level;
-			be_t<u16> ls;
-		} start;
-
-		be_t<u64> user;
-		be_t<u64> guid;
-		be_t<u64> stop;
-	} data;
-};
-
 // Exception handlers.
 //typedef void (*CellSpursGlobalExceptionEventHandler)(vm::ptr<CellSpurs> spurs, vm::ptr<const CellSpursExceptionInfo> info, 
 //													   u32 id, vm::ptr<void> arg);
@@ -793,7 +793,6 @@ struct CellSpursTaskBinInfo
 
 // The SPURS kernel data store. This resides at 0x00 of the LS.
 struct SpursKernelMgmtData {
-	u8 unk0[0x100];                                 // 0x00
 	u8 tempArea[0x80];                              // 0x100
 	u8 wklLocContention[0x10];                      // 0x180
 	u8 wklLocPendingContention[0x10];               // 0x190
@@ -804,7 +803,7 @@ struct SpursKernelMgmtData {
 	be_t<u32> dmaTagId;                             // 0x1CC
 	vm::bptr<const void, 1, u64> wklCurrentAddr;    // 0x1D0
 	be_t<u32> wklCurrentUniqueId;                   // 0x1D8
-	u32 wklCurrentId;                               // 0x1DC
+	be_t<u32> wklCurrentId;                         // 0x1DC
 	be_t<u32> yieldToKernelAddr;                    // 0x1E0
 	be_t<u32> selectWorkloadAddr;                   // 0x1E4
 	u8 x1E8;                                        // 0x1E8
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
new file mode 100644
index 0000000000..643aed981c
--- /dev/null
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -0,0 +1,786 @@
+#include "stdafx.h"
+#include "Emu/Memory/Memory.h"
+#include "Emu/System.h"
+#include "Emu/Cell/SPUThread.h"
+#include "Emu/SysCalls/Modules.h"
+#include "Emu/SysCalls/Modules/cellSpurs.h"
+
+//
+// SPURS utility functions
+//
+void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag);
+u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status);
+
+//
+// SPURS Kernel functions
+//
+void spursKernelSelectWorkload(SPUThread & spu);
+void spursKernelSelectWorkload2(SPUThread & spu);
+
+//
+// SPURS system service workload functions
+//
+void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt);
+void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt);
+void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4);
+void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet);
+void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt);
+void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt);
+void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt);
+void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus);
+void spursSysServiceWorkloadEntry(SPUThread & spu);
+
+extern Module *cellSpurs;
+
+//////////////////////////////////////////////////////////////////////////////
+// SPURS utility functions
+//////////////////////////////////////////////////////////////////////////////
+
+/// Output trace information
+void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag) {
+    // TODO: Implement this
+}
+
+/// Check for execution right requests
+u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) {
+    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+
+    spu.GPR[3]._u32[3] = 1;
+    if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) {
+        spursKernelSelectWorkload2(spu);
+    } else {
+        spursKernelSelectWorkload(spu);
+    }
+
+    auto result = spu.GPR[3]._u64[1];
+    if (status) {
+        *status = (u32)result;
+    }
+
+    u32 wklId = result >> 32;
+    return wklId == mgmt->wklCurrentId ? 0 : 1;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// SPURS kernel functions
+//////////////////////////////////////////////////////////////////////////////
+
+/// Select a workload to run
+void spursKernelSelectWorkload(SPUThread & spu) {
+    LV2_LOCK(0); // TODO: lock-free implementation if possible
+
+    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+
+    // The first and only argument to this function is a boolean that is set to false if the function
+    // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus.
+    // If the first argument is true then the shared data is not updated with the result.
+    const auto isPoll = spu.GPR[3]._u32[3];
+
+    // Calculate the contention (number of SPUs used) for each workload
+    u8 contention[CELL_SPURS_MAX_WORKLOAD];
+    u8 pendingContention[CELL_SPURS_MAX_WORKLOAD];
+    for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+        contention[i] = mgmt->spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i];
+
+        // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
+        // to prevent unnecessary jumps to the kernel
+        if (isPoll) {
+            pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i];
+            if (i != mgmt->wklCurrentId) {
+                contention[i] += pendingContention[i];
+            }
+        }
+    }
+
+    u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+    u32 pollStatus    = 0;
+
+    // The system service workload has the highest priority. Select the system service workload if
+    // the system service message bit for this SPU is set.
+    if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
+        mgmt->spuIdling = 0;
+        if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+            // Clear the message bit
+            mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
+        }
+    } else {
+        // Caclulate the scheduling weight for each workload
+        u16 maxWeight = 0;
+        for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+            u16 runnable     = mgmt->wklRunnable1 & (0x8000 >> i);
+            u16 wklSignal    = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
+            u8  wklFlag      = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+            u8  readyCount   = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed();
+            u8  idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
+            u8  requestCount = readyCount + idleSpuCount;
+
+            // For a workload to be considered for scheduling:
+            // 1. Its priority must not be 0
+            // 2. The number of SPUs used by it must be less than the max contention for that workload
+            // 3. The workload should be in runnable state
+            // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
+            //    OR the workload must be signalled
+            //    OR the workload flag is 0 and the workload is configured as the wokload flag receiver
+            if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) {
+                if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) {
+                    // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority:
+                    // 1. Wokload signal set or workload flag or ready count > contention
+                    // 2. Priority of the workload on the SPU
+                    // 3. Is the workload the last selected workload
+                    // 4. Minimum contention of the workload
+                    // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight)
+                    // 6. Is the workload executable same as the currently loaded executable
+                    // 7. The workload id (lesser the number, more the weight)
+                    u16 weight  = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0;
+                    weight     |= (u16)(mgmt->priority[i] & 0x7F) << 16;
+                    weight     |= i == mgmt->wklCurrentId ? 0x80 : 0x00;
+                    weight     |= (contention[i] > 0 && mgmt->spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00;
+                    weight     |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2;
+                    weight     |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00;
+                    weight     |= 0x01;
+
+                    // In case of a tie the lower numbered workload is chosen
+                    if (weight > maxWeight) {
+                        wklSelectedId  = i;
+                        maxWeight      = weight;
+                        pollStatus     = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
+                        pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
+                        pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
+                    }
+                }
+            }
+        }
+
+        // Not sure what this does. Possibly mark the SPU as idle/in use.
+        mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+
+        if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
+            // Clear workload signal for the selected workload
+            mgmt->spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
+            mgmt->spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
+
+            // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
+            if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) {
+                mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
+            }
+        }
+    }
+
+    if (!isPoll) {
+        // Called by kernel
+        // Increment the contention for the selected workload
+        if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+            contention[wklSelectedId]++;
+        }
+
+        for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+            mgmt->spurs->m.wklCurrentContention[i] = contention[i];
+            mgmt->wklLocContention[i]        = 0;
+            mgmt->wklLocPendingContention[i] = 0;
+        }
+
+        if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+            mgmt->wklLocContention[wklSelectedId] = 1;
+        }
+
+        mgmt->wklCurrentId = wklSelectedId;
+    } else if (wklSelectedId != mgmt->wklCurrentId) {
+        // Not called by kernel but a context switch is required
+        // Increment the pending contention for the selected workload
+        if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+            pendingContention[wklSelectedId]++;
+        }
+
+        for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+            mgmt->spurs->m.wklPendingContention[i] = pendingContention[i];
+            mgmt->wklLocPendingContention[i]       = 0;
+        }
+
+        if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+            mgmt->wklLocPendingContention[wklSelectedId] = 1;
+        }
+    }
+
+    u64 result          = (u64)wklSelectedId << 32;
+    result             |= pollStatus;
+    spu.GPR[3]._u64[1]  = result;
+}
+
+/// Select a workload to run
+void spursKernelSelectWorkload2(SPUThread & spu) {
+    LV2_LOCK(0); // TODO: lock-free implementation if possible
+
+    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+
+    // The first and only argument to this function is a boolean that is set to false if the function
+    // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus.
+    // If the first argument is true then the shared data is not updated with the result.
+    const auto isPoll = spu.GPR[3]._u32[3];
+
+    // Calculate the contention (number of SPUs used) for each workload
+    u8 contention[CELL_SPURS_MAX_WORKLOAD2];
+    u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2];
+    for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
+        contention[i] = mgmt->spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F];
+        contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4;
+
+        // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
+        // to prevent unnecessary jumps to the kernel
+        if (isPoll) {
+            pendingContention[i] = mgmt->spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F];
+            pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4;
+            if (i != mgmt->wklCurrentId) {
+                contention[i] += pendingContention[i];
+            }
+        }
+    }
+
+    u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+    u32 pollStatus    = 0;
+
+    // The system service workload has the highest priority. Select the system service workload if
+    // the system service message bit for this SPU is set.
+    if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
+        // Not sure what this does. Possibly Mark the SPU as in use.
+        mgmt->spuIdling = 0;
+        if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+            // Clear the message bit
+            mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
+        }
+    } else {
+        // Caclulate the scheduling weight for each workload
+        u8 maxWeight = 0;
+        for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
+            auto j           = i & 0x0F;
+            u16 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
+            u8  priority      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
+            u8  maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4;
+            u16 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
+            u8  wklFlag       = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+            u8  readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
+
+            // For a workload to be considered for scheduling:
+            // 1. Its priority must be greater than 0
+            // 2. The number of SPUs used by it must be less than the max contention for that workload
+            // 3. The workload should be in runnable state
+            // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
+            //    OR the workload must be signalled
+            //    OR the workload flag is 0 and the workload is configured as the wokload receiver
+            if (runnable && priority > 0 && maxContention > contention[i]) {
+                if (wklFlag || wklSignal || readyCount > contention[i]) {
+                    // The scheduling weight of the workload is equal to the priority of the workload for the SPU.
+                    // The current workload is given a sligtly higher weight presumably to reduce the number of context switches.
+                    // In case of a tie the lower numbered workload is chosen.
+                    u8 weight = priority << 4;
+                    if (mgmt->wklCurrentId == i) {
+                        weight |= 0x04;
+                    }
+
+                    if (weight > maxWeight) {
+                        wklSelectedId  = i;
+                        maxWeight      = weight;
+                        pollStatus     = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
+                        pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
+                        pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
+                    }
+                }
+            }
+        }
+
+        // Not sure what this does. Possibly mark the SPU as idle/in use.
+        mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+
+        if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
+            // Clear workload signal for the selected workload
+            mgmt->spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
+            mgmt->spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
+
+            // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
+            if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) {
+                mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
+            }
+        }
+    }
+
+    if (!isPoll) {
+        // Called by kernel
+        // Increment the contention for the selected workload
+        if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+            contention[wklSelectedId]++;
+        }
+
+        for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
+            mgmt->spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4);
+            mgmt->wklLocContention[i]        = 0;
+            mgmt->wklLocPendingContention[i] = 0;
+        }
+
+        mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
+        mgmt->wklCurrentId = wklSelectedId;
+    } else if (wklSelectedId != mgmt->wklCurrentId) {
+        // Not called by kernel but a context switch is required
+        // Increment the pending contention for the selected workload
+        if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+            pendingContention[wklSelectedId]++;
+        }
+
+        for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
+            mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4);
+            mgmt->wklLocPendingContention[i]       = 0;
+        }
+
+        mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
+    }
+
+    u64 result          = (u64)wklSelectedId << 32;
+    result             |= pollStatus;
+    spu.GPR[3]._u64[1]  = result;
+}
+
+/// Entry point of the SPURS kernel
+void spursKernelMain(SPUThread & spu) {
+    SpursKernelMgmtData * mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+    mgmt->spuNum               = spu.GPR[3]._u32[3];
+    mgmt->dmaTagId             = 0x1F;
+    mgmt->spurs.set(spu.GPR[4]._u64[1]);
+    mgmt->wklCurrentId         = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+    mgmt->wklCurrentUniqueId   = 0x20;
+
+    bool isSecond            = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
+    mgmt->yieldToKernelAddr  = isSecond ? 0x838 : 0x808;
+    mgmt->selectWorkloadAddr = 0x290;
+    spu.WriteLS32(mgmt->yieldToKernelAddr, 2);                  // hack for cellSpursModuleExit
+    spu.WriteLS32(mgmt->selectWorkloadAddr, 3);                 // hack for cellSpursModulePollStatus
+    spu.WriteLS32(mgmt->selectWorkloadAddr + 4, 0x35000000);    // bi $0
+    spu.m_code3_func = isSecond ? spursKernelSelectWorkload2 : spursKernelSelectWorkload;
+
+    u32 wid        = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+    u32 pollStatus = 0;
+    while (true) {
+        if (Emu.IsStopped()) {
+            cellSpurs->Warning("Spurs Kernel aborted");
+            return;
+        }
+
+        // Get current workload info
+        auto & wkl = wid < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? mgmt->spurs->m.wklInfo2[wid & 0xf] : mgmt->spurs->m.wklInfoSysSrv);
+
+        if (mgmt->wklCurrentAddr != wkl.addr) {
+            if (wkl.addr.addr() != SPURS_IMG_ADDR_SYS_SRV_WORKLOAD) {
+                // Load executable code
+                memcpy(vm::get_ptr<void>(spu.ls_offset + 0xA00), wkl.addr.get_ptr(), wkl.size);
+            }
+            mgmt->wklCurrentAddr     = wkl.addr;
+            mgmt->wklCurrentUniqueId = wkl.uniqueId.read_relaxed();
+        }
+
+        if (!isSecond) {
+            mgmt->x1E8 = 0;
+            mgmt->x1E9 = 0;
+        }
+
+        // Run workload
+        spu.GPR[1]._u32[3] = 0x3FFB0;
+        spu.GPR[3]._u32[3] = 0x100;
+        spu.GPR[4]._u64[1] = wkl.arg;
+        spu.GPR[5]._u32[3] = pollStatus;
+        switch (mgmt->wklCurrentAddr.addr()) {
+        case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD:
+            spursSysServiceWorkloadEntry(spu);
+            break;
+        default:
+            spu.FastCall(0xA00);
+            break;
+        }
+
+        // Check status
+        auto status = spu.SPU.Status.GetValue();
+        if (status == SPU_STATUS_STOPPED_BY_STOP) {
+            return;
+        } else {
+            assert(status == SPU_STATUS_RUNNING);
+        }
+
+        // Select next workload to run
+        spu.GPR[3].clear();
+        if (isSecond) {
+            spursKernelSelectWorkload2(spu);
+        } else {
+            spursKernelSelectWorkload(spu);
+        }
+        u64 res    = spu.GPR[3]._u64[1];
+        pollStatus = (u32)(res);
+        wid        = (u32)(res >> 32);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// SPURS system workload functions
+//////////////////////////////////////////////////////////////////////////////
+
+/// Restore scheduling parameters after a workload has been preempted by the system service workload
+void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    if (mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] != 0xFF) {
+        auto wklId = mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum];
+        mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] = 0xFF;
+
+        spursSysServiceUpdateWorkload(spu, mgmt);
+        if (wklId >= CELL_SPURS_MAX_WORKLOAD) {
+            mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10;
+            mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1);
+        } else {
+            mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01;
+            mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1);
+        }
+
+        // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace
+        // uses the current worload id to determine the workload to which the trace belongs
+        auto wklIdSaved    = mgmt->wklCurrentId;
+        mgmt->wklCurrentId = wklId;
+
+        // Trace - STOP: GUID
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
+        pkt.data.stop  = SPURS_GUID_SYS_WKL;
+        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+        mgmt->wklCurrentId = wklIdSaved;
+    }
+}
+
+/// Update the trace count for this SPU in CellSpurs
+void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    if (mgmt->traceBuffer) {
+        auto traceInfo = vm::ptr<CellSpursTraceInfo>::make((u32)(mgmt->traceBuffer - (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4)));
+        traceInfo->count[mgmt->spuNum] = mgmt->traceMsgCount;
+    }
+}
+
+/// Update trace control in SPU from CellSpurs
+void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) {
+    auto sysSrvMsgUpdateTrace           = mgmt->spurs->m.sysSrvMsgUpdateTrace;
+    mgmt->spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum);
+    mgmt->spurs->m.xCC                  &= ~(1 << mgmt->spuNum);
+    mgmt->spurs->m.xCC                  |= arg2 << mgmt->spuNum;
+
+    bool notify = false;
+    if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.sysSrvMsgUpdateTrace == 0) && (mgmt->spurs->m.xCD != 0)) {
+        mgmt->spurs->m.xCD = 0;
+        notify             = true;
+    }
+
+    if (arg4 && mgmt->spurs->m.xCD != 0) {
+        mgmt->spurs->m.xCD = 0;
+        notify             = true;
+    }
+
+    // Get trace parameters from CellSpurs and store them in the LS
+    if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) || (arg3 != 0)) {
+        if (mgmt->traceMsgCount != 0xFF || mgmt->spurs->m.traceBuffer.addr() == 0) {
+            spursSysServiceUpdateTraceCount(spu, mgmt);
+        } else {
+            mgmt->traceMsgCount = mgmt->spurs->m.traceBuffer->count[mgmt->spuNum];
+        }
+
+        mgmt->traceBuffer   = mgmt->spurs->m.traceBuffer.addr() + (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4);
+        mgmt->traceMaxCount = mgmt->spurs->m.traceStartIndex[1] - mgmt->spurs->m.traceStartIndex[0];
+        if (mgmt->traceBuffer == 0) {
+            mgmt->traceMsgCount = 0;
+        }
+    }
+
+    if (notify) {
+        // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 2, 0);
+    }
+}
+
+/// Update events in CellSpurs
+void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet) {
+    // Mark the workloads in wklShutdownBitSet as completed and also generate a bit set of the completed
+    // workloads that have a shutdown completion hook registered
+    u32 wklNotifyBitSet = 0;
+    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+        if (wklShutdownBitSet & (0x80000000u >> i)) {
+            mgmt->spurs->m.wklEvent1[i] |= 0x01;
+            if (mgmt->spurs->m.wklEvent1[i] & 0x02 || mgmt->spurs->m.wklEvent1[i] & 0x10) {
+                wklNotifyBitSet |= 0x80000000u >> i;
+            }
+        }
+
+        if (wklShutdownBitSet & (0x8000 >> i)) {
+            mgmt->spurs->m.wklEvent2[i] |= 0x01;
+            if (mgmt->spurs->m.wklEvent2[i] & 0x02 || mgmt->spurs->m.wklEvent2[i] & 0x10) {
+                wklNotifyBitSet |= 0x8000 >> i;
+            }
+        }
+    }
+
+    if (wklNotifyBitSet) {
+        // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 0, wklNotifyMask);
+    }
+}
+
+/// Update workload information in the SPU from CellSpurs
+void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    u32 wklShutdownBitSet = 0;
+    mgmt->wklRunnable1    = 0;
+    mgmt->wklRunnable2    = 0;
+    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+        // Copy the priority of the workload for this SPU and its unique id to the LS
+        mgmt->priority[i]    = mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum];
+        mgmt->wklUniqueId[i] = mgmt->spurs->m.wklInfo1[i].uniqueId.read_relaxed();
+
+        // Update workload status and runnable flag based on the workload state
+        auto wklStatus = mgmt->spurs->m.wklStatus1[i];
+        if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
+            mgmt->spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum;
+            mgmt->wklRunnable1           |= 0x8000 >> i;
+        } else {
+            mgmt->spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum);
+        }
+
+        // If the workload is shutting down and if this is the last SPU from which it is being removed then
+        // add it to the shutdown bit set
+        if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
+            if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus1[i] == 0)) {
+                mgmt->spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
+                wklShutdownBitSet |= 0x80000000u >> i;
+            }
+        }
+
+        if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) {
+            // Copy the priority of the workload for this SPU to the LS
+            if (mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) {
+                mgmt->priority[i] |= (0x10 - mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) << 4;
+            }
+
+            // Update workload status and runnable flag based on the workload state
+            wklStatus = mgmt->spurs->m.wklStatus2[i];
+            if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
+                mgmt->spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum;
+                mgmt->wklRunnable2           |= 0x8000 >> i;
+            } else {
+                mgmt->spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum);
+            }
+
+            // If the workload is shutting down and if this is the last SPU from which it is being removed then
+            // add it to the shutdown bit set
+            if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
+                if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus2[i] == 0)) {
+                    mgmt->spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
+                    wklShutdownBitSet |= 0x8000 >> i;
+                }
+            }
+        }
+    }
+
+    if (wklShutdownBitSet) {
+        spursSysServiceUpdateEvent(spu, mgmt, wklShutdownBitSet);
+    }
+}
+
+/// Process any messages
+void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    // Process update workload message
+    if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) {
+        mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum);
+        spursSysServiceUpdateWorkload(spu, mgmt);
+    }
+
+    // Process update trace message
+    if (mgmt->spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) {
+        spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0);
+    }
+
+    // Process terminate request
+    if (mgmt->spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) {
+        mgmt->spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum);
+        // TODO: Rest of the terminate processing
+    }
+}
+
+/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled
+void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    while (true) {
+        // Find the number of SPUs that are idling in this SPURS instance
+        u32 nIdlingSpus = 0;
+        for (u32 i = 0; i < 8; i++) {
+            if (mgmt->spurs->m.spuIdling & (1 << i)) {
+                nIdlingSpus++;
+            }
+        }
+
+        bool allSpusIdle  = nIdlingSpus == mgmt->spurs->m.nSpus ? true: false;
+        bool exitIfNoWork = mgmt->spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false;
+
+        // Check if any workloads can be scheduled
+        bool foundReadyWorkload = false;
+        if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
+            foundReadyWorkload = true;
+        } else {
+            if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) {
+                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
+                    u32 j            = i & 0x0F;
+                    u8 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
+                    u8 priority      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
+                    u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4;
+                    u8 contention    = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklCurrentContention[j] & 0x0F : mgmt->spurs->m.wklCurrentContention[j] >> 4;
+                    u8 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
+                    u8 wklFlag       = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                    u8 readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
+
+                    if (runnable && priority > 0 && maxContention > contention) {
+                        if (wklFlag || wklSignal || readyCount > contention) {
+                            foundReadyWorkload = true;
+                            break;
+                        }
+                    }
+                }
+            } else {
+                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                    u8 runnable     = mgmt->wklRunnable1 & (0x8000 >> i);
+                    u8 wklSignal    = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
+                    u8 wklFlag      = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                    u8 readyCount   = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed();
+                    u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
+                    u8 requestCount = readyCount + idleSpuCount;
+
+                    if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > mgmt->spurs->m.wklCurrentContention[i]) {
+                        if (wklFlag || wklSignal || (readyCount != 0 && requestCount > mgmt->spurs->m.wklCurrentContention[i])) {
+                            foundReadyWorkload = true;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events.
+        if ((mgmt->spurs->m.spuIdling & (1 << mgmt->spuNum)) && (allSpusIdle == false || exitIfNoWork == false) && foundReadyWorkload == false) {
+            // The system service blocks by making a reservation and waiting on the reservation lost event. This is unfortunately
+            // not yet completely implemented in rpcs3. So we busy wait here.
+            //u128 r;
+            //spu.ReadChannel(r, 0);
+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
+        }
+
+        if ((allSpusIdle == true && exitIfNoWork == true) || foundReadyWorkload == false) {
+            mgmt->spurs->m.spuIdling |= 1 << mgmt->spuNum;
+        } else {
+            mgmt->spurs->m.spuIdling &= ~(1 << mgmt->spuNum);
+        }
+
+        if (allSpusIdle == false || exitIfNoWork == false) {
+            if (foundReadyWorkload == true) {
+                return;
+            }
+        } else {
+            // TODO: exit spu thread group
+        }
+    }
+}
+
+/// Main function for the system service workload
+void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) {
+    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+
+    if (mgmt->spurs.addr() % CellSpurs::align) {
+        assert(0);
+    }
+
+    // Initialise the system service if this is the first time its being started on this SPU
+    if (mgmt->sysSrvInitialised == 0) {
+        mgmt->sysSrvInitialised = 1;
+
+        if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) {
+            assert(0);
+        }
+
+        mgmt->spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum;
+        mgmt->traceBuffer           = 0;
+        mgmt->traceMsgCount         = -1;
+
+        spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0);
+        spursSysServiceCleanupAfterPreemption(spu, mgmt);
+
+        // Trace - SERVICE: INIT
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT;
+        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+    }
+
+    // Trace - START: Module='SYS '
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = CELL_SPURS_TRACE_TAG_START;
+    memcpy(pkt.data.start.module, "SYS ", 4);
+    pkt.data.start.level = 1; // Policy module
+    pkt.data.start.ls    = 0xA00 >> 2;
+    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+    while (true) {
+        // Process messages for the system service workload
+        spursSysServiceProcessMessages(spu, mgmt);
+
+poll:
+        if (cellSpursModulePollStatus(spu, nullptr)) {
+            // Trace - SERVICE: EXIT
+            CellSpursTracePacket pkt;
+            memset(&pkt, 0, sizeof(pkt));
+            pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+            pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT;
+            cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+            // Trace - STOP: GUID
+            memset(&pkt, 0, sizeof(pkt));
+            pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
+            pkt.data.stop  = SPURS_GUID_SYS_WKL;
+            cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+            break;
+        }
+
+        // If we reach here it means that either there are more system service messages to be processed
+        // or there are no workloads that can be scheduled.
+
+        // If the SPU is not idling then process the remaining system service messages
+        if (mgmt->spuIdling == 0) {
+            continue;
+        }
+
+        // If we reach here it means that the SPU is idling
+
+        // Trace - SERVICE: WAIT
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT;
+        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+        spursSysServiceWaitOrExit(spu, mgmt);
+        goto poll;
+    }
+}
+
+/// Entry point of the system service workload
+void spursSysServiceWorkloadEntry(SPUThread & spu) {
+    auto mgmt       = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + spu.GPR[3]._u32[3]);
+    auto arg        = spu.GPR[4]._u64[1];
+    auto pollStatus = spu.GPR[5]._u32[3];
+
+    spu.GPR[1]._u32[3]                        = 0x3FFD0;
+    *(vm::ptr<u32>::make(spu.GPR[1]._u32[3])) = 0x3FFF0;
+    memset(vm::get_ptr<void>(spu.ls_offset + 0x3FFE0), 0, 32);
+
+    if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+        spursSysServiceWorkloadMain(spu, pollStatus);
+    } else {
+        // TODO: If we reach here it means the current workload was preempted to start the
+        // system service workload. Need to implement this.
+    }
+
+    // TODO: Ensure that this function always returns to the SPURS kernel
+    return;
+}
diff --git a/rpcs3/cellSpursSpu.cpp b/rpcs3/cellSpursSpu.cpp
deleted file mode 100644
index 33c75e8a44..0000000000
--- a/rpcs3/cellSpursSpu.cpp
+++ /dev/null
@@ -1,352 +0,0 @@
-#include "stdafx.h"
-#include "Emu/Memory/Memory.h"
-#include "Emu/System.h"
-#include "Emu/Cell/SPUThread.h"
-#include "Emu/SysCalls/Modules/cellSpurs.h"
-
-/// Output trace information
-void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag) {
-    // TODO: Implement this
-}
-
-/// Check for execution right requests
-unsigned cellSpursModulePollStatus(CellSpursModulePollStatus * status) {
-    // TODO: Implement this
-    return 0;
-}
-
-/// Restore scheduling paraneters to the right values after a workload has been preempted by the system service workload
-void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    if (mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] != -1) {
-        auto wklId = mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum];
-        mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] = -1;
-
-        spursSysServiceUpdateWorkload(spu, mgmt);
-        if (wklId >= CELL_SPURS_MAX_WORKLOAD) {
-            mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10;
-            mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1);
-        } else {
-            mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01;
-            mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1);
-        }
-
-        auto wklIdSaved    = mgmt->wklCurrentId;
-        mgmt->wklCurrentId = wklId;
-
-        // Trace - STOP: GUID
-        CellSpursTracePacket pkt;
-        memset(&pkt, 0, sizeof(pkt));
-        pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
-        pkt.data.stop  = 0; // TODO: Put GUID of the sys service workload here
-        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-
-        mgmt->wklCurrentId = wklIdSaved;
-    }
-}
-
-/// Updatre the trace count for this SPU in CellSpurs
-void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    if (mgmt->traceBuffer) {
-        mgmt->spurs->m.traceBuffer->count[mgmt->spuNum] = mgmt->traceMsgCount;
-    }
-}
-
-/// Update trace control in SPU from CellSpurs
-void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) {
-    auto sysSrvMsgUpdateTrace           = mgmt->spurs->m.sysSrvMsgUpdateTrace;
-    mgmt->spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum);
-    mgmt->spurs->m.xCC                  &= ~(1 << mgmt->spuNum);
-    mgmt->spurs->m.xCC                  |= arg2 << mgmt->spuNum;
-
-    bool notify = false;
-    if ((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum) != 0) && (mgmt->spurs->m.sysSrvMsgUpdateTrace == 0) && (mgmt->spurs->m.xCD != 0)) {
-        mgmt->spurs->m.xCD = 0;
-        notify             = true;
-    }
-
-    if (arg4 && mgmt->spurs->m.xCD != 0) {
-        mgmt->spurs->m.xCD = 0;
-        notify             = true;
-    }
-
-    if ((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum) != 0) || (arg3 != 0)) {
-        if (mgmt->traceMsgCount != 0xFF || mgmt->traceBuffer == 0 || mgmt->spurs->m.traceBuffer.addr() == 0) {
-            spursSysServiceUpdateTraceCount(spu, mgmt);
-        } else {
-            mgmt->traceMsgCount = mgmt->spurs->m.traceBuffer->count[mgmt->spuNum];
-        }
-
-        mgmt->traceBuffer   = mgmt->spurs->m.traceBuffer.addr() + (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4);
-        mgmt->traceMaxCount = mgmt->spurs->m.traceStartIndex[1] - mgmt->spurs->m.traceStartIndex[0];
-        if (mgmt->traceBuffer == 0) {
-            mgmt->traceMsgCount = 0;
-        }
-    }
-
-    if (notify) {
-        // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 2, 0);
-    }
-}
-
-/// Update events in CellSpurs
-void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownMask) {
-    u32 wklNotifyMask = 0;
-    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-        if (wklShutdownMask & (0x80000000 >> i)) {
-            mgmt->spurs->m.wklEvent1[i] |= 0x01;
-            if (mgmt->spurs->m.wklEvent1[i] & 0x02 || mgmt->spurs->m.wklEvent1[i] & 0x10) {
-                wklNotifyMask |= 0x80000000 >> i;
-            }
-        }
-    }
-
-    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-        if (wklShutdownMask & (0x8000 >> i)) {
-            mgmt->spurs->m.wklEvent2[i] |= 0x01;
-            if (mgmt->spurs->m.wklEvent2[i] & 0x02 || mgmt->spurs->m.wklEvent2[i] & 0x10) {
-                wklNotifyMask |= 0x8000 >> i;
-            }
-        }
-    }
-
-    if (wklNotifyMask) {
-        // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 0, wklNotifyMask);
-    }
-}
-
-/// Update workload information in the SPU LS from CellSpurs
-void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    u32 wklShutdownMask = 0;
-    mgmt->wklRunnable1  = 0;
-    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-        mgmt->priority[i]    = mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum];
-        mgmt->wklUniqueId[i] = mgmt->spurs->m.wklInfo1[i].uniqueId.read_relaxed();
-
-        auto wklStatus = mgmt->spurs->m.wklStatus1[i];
-        if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
-            mgmt->spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum;
-            mgmt->wklRunnable1           |= 0x8000 >> i;
-        } else {
-            mgmt->spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum);
-        }
-
-        if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
-            if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus1[i] == 0)) {
-                mgmt->spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
-                wklShutdownMask |= 0x80000000 >> i;
-            }
-        }
-    }
-
-    if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) {
-        mgmt->wklRunnable2 = 0;
-        for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-            if (mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) {
-                mgmt->priority[i] |= (0x10 - mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) << 4;
-            }
-
-            auto wklStatus = mgmt->spurs->m.wklStatus2[i];
-            if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
-                mgmt->spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum;
-                mgmt->wklRunnable2           |= 0x8000 >> i;
-            } else {
-                mgmt->spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum);
-            }
-
-            if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
-                if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus2[i] == 0)) {
-                    mgmt->spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
-                    wklShutdownMask |= 0x8000 >> i;
-                }
-            }
-        }
-    }
-
-    if (wklShutdownMask) {
-        spursSysServiceUpdateEvent(spu, mgmt, wklShutdownMask);
-    }
-}
-
-/// Process any messages
-void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    // Process update workload message
-    if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) {
-        mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum);
-        spursSysServiceUpdateWorkload(spu, mgmt);
-    }
-
-    // Process update trace message
-    if (mgmt->spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) {
-        spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0);
-    }
-
-    // Process terminate request
-    if (mgmt->spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) {
-        mgmt->spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum);
-        // TODO: Rest of the terminate processing
-    }
-}
-
-/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled
-void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    while (true) {
-        u32 nIdlingSpus = 0;
-        for (u32 i = 0; i < 8; i++) {
-            if (mgmt->spurs->m.spuIdling & (1 << i)) {
-                nIdlingSpus++;
-            }
-        }
-
-        bool shouldExit               = nIdlingSpus != mgmt->spurs->m.nSpus ? false : mgmt->spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false;
-        bool foundSchedulableWorkload = false;
-        if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
-            foundSchedulableWorkload = true;
-        } else {
-            for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-                if ((mgmt->wklRunnable1 & (0x8000 >> i)) &&
-                    (mgmt->priority[i] & 0x0F) != 0 &&
-                    (mgmt->spurs->m.wklMaxContention[i].read_relaxed() & 0x0F) > (mgmt->spurs->m.wklCurrentContention[i] & 0x0F)) {
-                    foundSchedulableWorkload = true;
-                    break;
-                }
-            }
-
-            if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS && foundSchedulableWorkload == false) {
-                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-                    if ((mgmt->wklRunnable2 & (0x8000 >> i)) &&
-                        (mgmt->priority[i] & 0xF0) != 0 &&
-                        (mgmt->spurs->m.wklMaxContention[i].read_relaxed() & 0xF0) > (mgmt->spurs->m.wklCurrentContention[i] & 0xF0)) {
-                        foundSchedulableWorkload = true;
-                        break;
-                    }
-                }
-            }
-        }
-
-        if ((mgmt->spurs->m.spuIdling & (1 << mgmt->spuNum)) && shouldExit == false && foundSchedulableWorkload == false) {
-            // TODO: Wait for events
-        }
-
-        if (shouldExit || foundSchedulableWorkload == false) {
-            mgmt->spurs->m.spuIdling |= 1 << mgmt->spuNum;
-        } else {
-            mgmt->spurs->m.spuIdling &= ~(1 << mgmt->spuNum);
-        }
-
-        if (shouldExit == false && foundSchedulableWorkload == false) {
-            continue;
-        }
-
-        if (shouldExit == false) {
-            return;
-        }
-
-        break;
-    }
-
-    // TODO: exit spu thread group
-}
-
-/// Main function for the system service workload
-void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) {
-    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset);
-
-    if (mgmt->spurs.addr() % CellSpurs::align) {
-        assert(0);
-    }
-
-    // Initialise the system service if this is the first time its being started on this SPU
-    if (mgmt->sysSrvInitialised == 0) {
-        mgmt->sysSrvInitialised = 1;
-
-        if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) {
-            assert(0);
-        }
-
-        mgmt->spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum;
-        mgmt->traceBuffer           = 0;
-        mgmt->traceMsgCount         = -1;
-
-        spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0);
-        spursSysServiceCleanupAfterPreemption(spu, mgmt);
-
-        // Trace - SERVICE: INIT
-        CellSpursTracePacket pkt;
-        memset(&pkt, 0, sizeof(pkt));
-        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
-        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT;
-        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-    }
-
-    // Trace - START: Module='SYS '
-    CellSpursTracePacket pkt;
-    memset(&pkt, 0, sizeof(pkt));
-    pkt.header.tag = CELL_SPURS_TRACE_TAG_START;
-    memcpy(pkt.data.start.module, "SYS ", 4);
-    pkt.data.start.level = 1; // Policy module
-    pkt.data.start.ls    = 0xA00 >> 2;
-    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-
-    while (true) {
-        // Process messages for the system service workload
-        spursSysServiceProcessMessages(spu, mgmt);
-
-poll:
-        if (cellSpursModulePollStatus(nullptr)) {
-            // Trace - SERVICE: EXIT
-            CellSpursTracePacket pkt;
-            memset(&pkt, 0, sizeof(pkt));
-            pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
-            pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT;
-            cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-
-            // Trace - STOP: GUID
-            memset(&pkt, 0, sizeof(pkt));
-            pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
-            pkt.data.stop  = 0; // TODO: Put GUID of the sys service workload here
-            cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-            break;
-        }
-
-        // If we reach here it means that either there are more system service messages to be processed
-        // or there are no workloads that can be scheduled.
-
-        // If the SPU is not idling then process the remaining system service messages
-        if (mgmt->spuIdling == 0) {
-            continue;
-        }
-
-        // If we reach here it means that the SPU is idling
-
-        // Trace - SERVICE: WAIT
-        CellSpursTracePacket pkt;
-        memset(&pkt, 0, sizeof(pkt));
-        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
-        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT;
-        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-
-        spursSysServiceWaitOrExit(spu, mgmt);
-        goto poll;
-    }
-}
-
-/// Entry point of the system service workload
-void spursSysServiceWorkloadEntry(SPUThread & spu) {
-    auto mgmt       = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + spu.GPR[3]._u32[3]);
-    auto arg        = spu.GPR[4]._u64[1];
-    auto pollStatus = spu.GPR[5]._u32[3];
-
-    spu.GPR[1]._u32[3]                        = 0x3FFD0;
-    *(vm::ptr<u32>::make(spu.GPR[1]._u32[3])) = 0x3FFF0;
-    memset(vm::get_ptr<void>(spu.ls_offset + 0x3FFE0), 0, 32);
-
-    if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-        spursSysServiceWorkloadMain(spu, pollStatus);
-    } else {
-        // TODO: If we reach here it means the current workload was preempted to start the
-        // system service workload. Need to implement this.
-    }
-
-    // TODO: Ensure that this function always returns to the SPURS kernel
-    return;
-}
diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj
index 3599cd2cd3..dbac3af5be 100644
--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@@ -54,7 +54,7 @@
     <ClCompile Include="..\Utilities\SSemaphore.cpp" />
     <ClCompile Include="..\Utilities\StrFmt.cpp" />
     <ClCompile Include="..\Utilities\Thread.cpp" />
-    <ClCompile Include="cellSpursSpu.cpp" />
+    <ClCompile Include="Emu\SysCalls\Modules\cellSpursSpu.cpp" />
     <ClCompile Include="Crypto\aes.cpp" />
     <ClCompile Include="Crypto\ec.cpp" />
     <ClCompile Include="Crypto\key_vault.cpp" />
diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters
index a1e6732cc9..3166265186 100644
--- a/rpcs3/emucore.vcxproj.filters
+++ b/rpcs3/emucore.vcxproj.filters
@@ -653,7 +653,7 @@
     <ClCompile Include="Emu\ARMv7\Modules\sceLibm.cpp">
       <Filter>Emu\CPU\ARMv7\Modules</Filter>
     </ClCompile>
-    <ClCompile Include="cellSpursSpu.cpp">
+    <ClCompile Include="Emu\SysCalls\Modules\cellSpursSpu.cpp">
       <Filter>Emu\SysCalls\Modules</Filter>
     </ClCompile>
   </ItemGroup>

From b01c17d1a0048e71fb232c462e0c20430255720f Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Mon, 5 Jan 2015 21:54:03 +0530
Subject: [PATCH 12/29] SPURS: Add locks in the system service workload

---
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index 2d696666aa..06ff2553be 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -585,6 +585,8 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt)
 
 /// Process any messages
 void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    LV2_LOCK(0);
+
     // Process update workload message
     if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) {
         mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum);
@@ -606,6 +608,8 @@ void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt)
 /// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled
 void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) {
     while (true) {
+        Emu.GetCoreMutex().lock();
+
         // Find the number of SPUs that are idling in this SPURS instance
         u32 nIdlingSpus = 0;
         for (u32 i = 0; i < 8; i++) {
@@ -665,7 +669,10 @@ void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) {
             // not yet completely implemented in rpcs3. So we busy wait here.
             //u128 r;
             //spu.ReadChannel(r, 0);
+
+            Emu.GetCoreMutex().unlock();
             std::this_thread::sleep_for(std::chrono::milliseconds(1));
+            Emu.GetCoreMutex().lock();
         }
 
         if ((allSpusIdle == true && exitIfNoWork == true) || foundReadyWorkload == false) {
@@ -674,6 +681,8 @@ void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) {
             mgmt->spurs->m.spuIdling &= ~(1 << mgmt->spuNum);
         }
 
+        Emu.GetCoreMutex().unlock();
+
         if (allSpusIdle == false || exitIfNoWork == false) {
             if (foundReadyWorkload == true) {
                 return;
@@ -696,6 +705,7 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) {
     if (mgmt->sysSrvInitialised == 0) {
         mgmt->sysSrvInitialised = 1;
 
+        LV2_LOCK(0);
         if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) {
             assert(0);
         }

From 17f34f512777cc56b923beb18316722d47227dea Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Wed, 21 Jan 2015 00:47:20 +0530
Subject: [PATCH 13/29] SPURS: Implement cellSpursSendSignal,
 cellSpursSendWorkloadSignal and some cellSpursEventFlag functions

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 370 +++++++++++++++++++++--
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h   |  59 +++-
 2 files changed, 399 insertions(+), 30 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 10c2348d4b..a9a50e2bbe 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -1460,13 +1460,63 @@ s64 cellSpursGetWorkloadFlag(vm::ptr<CellSpurs> spurs, vm::ptr<vm::bptr<CellSpur
 	return CELL_OK;
 }
 
-s64 cellSpursSendWorkloadSignal()
+s64 cellSpursSendWorkloadSignal(vm::ptr<CellSpurs> spurs, u32 workloadId)
 {
+	cellSpurs->Warning("%s(spurs=0x%x, workloadId=0x%x)", __FUNCTION__, spurs.addr(), workloadId);
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0xA658, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!spurs)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() %  CellSpurs::align)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN;
+	}
+
+	if (workloadId >= CELL_SPURS_MAX_WORKLOAD2 || (workloadId >= CELL_SPURS_MAX_WORKLOAD && (spurs->m.flags1 & SF1_32_WORKLOADS) == 0))
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_INVAL;
+	}
+
+	if ((spurs->m.wklMskA.read_relaxed() & (0x80000000u >> workloadId)) == 0)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_SRCH;
+	}
+
+	if (spurs->m.exception)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
+	}
+
+	u8 state;
+	if (workloadId >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		state = spurs->m.wklState2[workloadId & 0x0F].read_relaxed();
+	}
+	else
+	{
+		state = spurs->m.wklState1[workloadId].read_relaxed();
+	}
+
+	if (state != SPURS_WKL_STATE_RUNNABLE)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
+	}
+
+	// TODO: Make the below block execute atomically
+	if (workloadId >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		spurs->m.wklSignal2 |= be_t<u16>::make(0x8000 >> (workloadId & 0x0F));
+	}
+	else
+	{
+		spurs->m.wklSignal1 |= be_t<u16>::make(0x8000 >> workloadId);
+	}
+
 	return CELL_OK;
 #endif
 }
@@ -1611,64 +1661,250 @@ s64 cellSpursUnsetExceptionEventHandler()
 
 s64 _cellSpursEventFlagInitialize(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, vm::ptr<CellSpursEventFlag> eventFlag, u32 flagClearMode, u32 flagDirection)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("_cellSpursEventFlagInitialize(spurs_addr=0x%x, taskset_addr=0x%x, eventFlag_addr=0x%x, flagClearMode=%d, flagDirection=%d)",
 		spurs.addr(), taskset.addr(), eventFlag.addr(), flagClearMode, flagDirection);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1564C, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (taskset.addr() == 0 || spurs.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align || taskset.addr() % CellSpursTaskset::align || eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (taskset.addr() && taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD2)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	if (flagDirection > CELL_SPURS_EVENT_FLAG_LAST || flagClearMode > CELL_SPURS_EVENT_FLAG_CLEAR_LAST)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	memset(eventFlag.get_ptr(), 0, CellSpursEventFlag::size);
+	eventFlag->m.direction = flagDirection;
+	eventFlag->m.clearMode = flagClearMode;
+	eventFlag->m.spuPort   = -1;
+
+	if (taskset.addr())
+	{
+		eventFlag->m.addr = taskset.addr();
+	}
+	else
+	{
+		eventFlag->m.isIwl = 1;
+		eventFlag->m.addr  = spurs.addr();
+	}
+
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagAttachLv2EventQueue(eventFlag_addr=0x%x)", eventFlag.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x157B8, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!eventFlag)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_AGAIN;
+	}
+
+	if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_SPU2PPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		return CELL_SPURS_TASK_ERROR_PERM;
+	}
+
+	if (eventFlag->m.spuPort != -1)
+	{
+		return CELL_SPURS_TASK_ERROR_STAT;
+	}
+
+	vm::ptr<CellSpurs> spurs;
+	if (eventFlag->m.isIwl == 1)
+	{
+		spurs.set((u32)eventFlag->m.addr);
+	}
+	else
+	{
+		auto taskset = vm::ptr<CellSpursTaskset>::make((u32)eventFlag->m.addr);
+		spurs.set((u32)taskset->m.spurs.addr());
+	}
+
+	u32 eventQueueId;
+	auto _port = vm::ptr<u8>::make((u32)Memory.Alloc(1, 1));
+	auto rc    = spursCreateLv2EventQueue(spurs, eventQueueId, _port, 1, *((u64 *)"_spuEvF"));
+	auto port  = *_port;
+	Memory.Free(_port.addr());
+	if (rc != CELL_OK)
+	{
+		return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF));
+	}
+
+	if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		auto _eventPortId = vm::ptr<u32>::make((u32)Memory.Alloc(4, 4));
+		rc                = sys_event_port_create(_eventPortId, SYS_EVENT_PORT_LOCAL, 0);
+		auto eventPortId  = *_eventPortId;
+		Memory.Free(_eventPortId.addr());
+		if (rc == CELL_OK)
+		{
+			rc = sys_event_port_connect_local(eventPortId, eventQueueId);
+			if (rc == CELL_OK)
+			{
+				eventFlag->m.eventPortId = eventPortId;
+				goto success;
+			}
+
+			sys_event_port_destroy(eventPortId);
+		}
+
+		// TODO: Implement the following
+		// if (spursDetachLv2EventQueue(spurs, port, 1) == CELL_OK)
+		// {
+		//     sys_event_queue_destroy(eventQueueId, SYS_EVENT_QUEUE_DESTROY_FORCE);
+		// }
+		return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF));
+	}
+
+success:
+	eventFlag->m.eventQueueId = eventQueueId;
+	eventFlag->m.spuPort      = port;
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursEventFlagDetachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagDetachLv2EventQueue(eventFlag_addr=0x%x)", eventFlag.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15998, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!eventFlag)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_AGAIN;
+	}
+
+	if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_SPU2PPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		return CELL_SPURS_TASK_ERROR_PERM;
+	}
+
+	if (eventFlag->m.spuPort == -1)
+	{
+		return CELL_SPURS_TASK_ERROR_STAT;
+	}
+
+	if (eventFlag->m.x04 || eventFlag->m.x06 & 0xFF00)
+	{
+		return CELL_SPURS_TASK_ERROR_BUSY;
+	}
+
+	auto port            = eventFlag->m.spuPort;
+	eventFlag->m.spuPort = -1; // TODO: Make this execute atomically
+
+	vm::ptr<CellSpurs> spurs;
+	if (eventFlag->m.isIwl == 1)
+	{
+		spurs.set((u32)eventFlag->m.addr);
+	}
+	else
+	{
+		auto taskset = vm::ptr<CellSpursTaskset>::make((u32)eventFlag->m.addr);
+		spurs.set((u32)taskset->m.spurs.addr());
+	}
+
+	if(eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		sys_event_port_disconnect(eventFlag->m.eventPortId);
+		sys_event_port_destroy(eventFlag->m.eventPortId);
+	}
+
+	s64 rc = CELL_OK;
+	// TODO: Implement the following
+	// auto rc = spursDetachLv2EventQueue(spurs, port, 1);
+	// if (rc == CELL_OK)
+	// {
+	//     rc = sys_event_queue_destroy(eventFlag->m.eventQueueId, SYS_EVENT_QUEUE_DESTROY_FORCE);
+	// }
+
+	if (rc != CELL_OK)
+	{
+		return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF));
+	}
+
 	return CELL_OK;
 #endif
 }
 
-s64 cellSpursEventFlagWait(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u16> mask, u32 mode)
+s64 _cellSpursEventFlagWait(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u16> mask, u32 mode, u32 block)
 {
-#ifdef PRX_DEBUG
-	cellSpurs->Warning("cellSpursEventFlagWait(eventFlag_addr=0x%x, mask_addr=0x%x, mode=%d)", eventFlag.addr(), mask.addr(), mode);
-	return GetCurrentPPUThread().FastCall2(libsre + 0x15E68, libsre_rtoc);
-#else
 	UNIMPLEMENTED_FUNC(cellSpurs);
 	return CELL_OK;
+}
+
+s64 cellSpursEventFlagWait(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u16> mask, u32 mode)
+{
+	cellSpurs->Warning("cellSpursEventFlagWait(eventFlag_addr=0x%x, mask_addr=0x%x, mode=%d)", eventFlag.addr(), mask.addr(), mode);
+
+#ifdef PRX_DEBUG
+	return GetCurrentPPUThread().FastCall2(libsre + 0x15E68, libsre_rtoc);
+#else
+	return _cellSpursEventFlagWait(eventFlag, mask, mode, 1/*block*/);
 #endif
 }
 
 s64 cellSpursEventFlagClear(vm::ptr<CellSpursEventFlag> eventFlag, u16 bits)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagClear(eventFlag_addr=0x%x, bits=0x%x)", eventFlag.addr(), bits);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15E9C, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (eventFlag.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	eventFlag->m.bits &= ~bits; // TODO: Make this execute atomically
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursEventFlagSet(vm::ptr<CellSpursEventFlag> eventFlag, u16 bits)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagSet(eventFlag_addr=0x%x, bits=0x%x)", eventFlag.addr(), bits);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15F04, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -1678,44 +1914,77 @@ s64 cellSpursEventFlagSet(vm::ptr<CellSpursEventFlag> eventFlag, u16 bits)
 
 s64 cellSpursEventFlagTryWait(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u16> mask, u32 mode)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagTryWait(eventFlag_addr=0x%x, mask_addr=0x%x, mode=0x%x)", eventFlag.addr(), mask.addr(), mode);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15E70, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
-	return CELL_OK;
+	return _cellSpursEventFlagWait(eventFlag, mask, mode, 0/*block*/);
 #endif
 }
 
 s64 cellSpursEventFlagGetDirection(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u32> direction)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagGetDirection(eventFlag_addr=0x%x, direction_addr=0x%x)", eventFlag.addr(), direction.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x162C4, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (eventFlag.addr() == 0 || direction.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	*direction = eventFlag->m.direction;
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursEventFlagGetClearMode(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u32> clear_mode)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagGetClearMode(eventFlag_addr=0x%x, clear_mode_addr=0x%x)", eventFlag.addr(), clear_mode.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x16310, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (eventFlag.addr() == 0 || clear_mode.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	*clear_mode = eventFlag->m.clearMode;
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursEventFlagGetTasksetAddress(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<CellSpursTaskset> taskset)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagGetTasksetAddress(eventFlag_addr=0x%x, taskset_addr=0x%x)", eventFlag.addr(), taskset.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1635C, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (eventFlag.addr() == 0 || taskset.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	taskset.set(eventFlag->m.isIwl ? 0 : eventFlag->m.addr);
 	return CELL_OK;
 #endif
 }
@@ -2302,7 +2571,54 @@ s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID)
 	cellSpurs->Warning("_cellSpursSendSignal(taskset_addr=0x%x, taskID=%d)", taskset.addr(), taskID);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x124CC, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!taskset)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (taskID >= CELL_SPURS_MAX_TASK || taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD2)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	auto word      = taskID >> 5;
+	auto mask      = 0x80000000u >> (taskID & 0x1F);
+	auto disabled  = taskset->m.enabled_set[word] & mask ? false : true;
+	auto running   = taskset->m.running_set[word];
+	auto ready     = taskset->m.ready_set[word];
+	auto ready2    = taskset->m.ready2_set[word];
+	auto waiting   = taskset->m.waiting_set[word];
+	auto signalled = taskset->m.signal_received_set[word];
+	auto enabled   = taskset->m.enabled_set[word];
+	auto invalid   = (ready & ready2) || (running & waiting) || ((running | ready | ready2 | waiting | signalled) & ~enabled) || disabled;
+
+	if (invalid)
+	{
+		return CELL_SPURS_TASK_ERROR_SRCH;
+	}
+
+	auto shouldSignal = waiting & ~signalled & mask ? true : false;
+	taskset->m.signal_received_set[word] |= mask; // TODO: Make this execute atomically
+	if (shouldSignal)
+	{
+		cellSpursSendWorkloadSignal(vm::ptr<CellSpurs>::make((u32)taskset->m.spurs.addr()), taskset->m.wid);
+		auto rc = cellSpursWakeUp(GetCurrentPPUThread(), vm::ptr<CellSpurs>::make((u32)taskset->m.spurs.addr()));
+		if (rc == CELL_SPURS_POLICY_MODULE_ERROR_STAT)
+		{
+			return CELL_SPURS_TASK_ERROR_STAT;
+		}
+
+		if (rc != CELL_OK)
+		{
+			assert(0);
+		}
+	}
+
 	return CELL_OK;
 #endif
 }
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index dab014f898..32798e6006 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -199,6 +199,28 @@ enum SpursTaskConstants
 	CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE  = 1024,
 };
 
+enum CellSpursEventFlagWaitMode
+{
+	CELL_SPURS_EVENT_FLAG_OR  = 0,
+	CELL_SPURS_EVENT_FLAG_AND = 1
+};
+
+enum CellSpursEventFlagClearMode
+{
+	CELL_SPURS_EVENT_FLAG_CLEAR_AUTO   = 0,
+	CELL_SPURS_EVENT_FLAG_CLEAR_MANUAL = 1,
+	CELL_SPURS_EVENT_FLAG_CLEAR_LAST   = CELL_SPURS_EVENT_FLAG_CLEAR_MANUAL,
+};
+
+enum CellSpursEventFlagDirection
+{
+	CELL_SPURS_EVENT_FLAG_SPU2SPU,
+	CELL_SPURS_EVENT_FLAG_SPU2PPU,
+	CELL_SPURS_EVENT_FLAG_PPU2SPU,
+	CELL_SPURS_EVENT_FLAG_ANY2ANY,
+	CELL_SPURS_EVENT_FLAG_LAST = CELL_SPURS_EVENT_FLAG_ANY2ANY,
+};
+
 class SPURSManager;
 class SPURSManagerEventFlag;
 class SPURSManagerTaskset;
@@ -519,7 +541,38 @@ struct CellSpursWorkloadAttribute
 
 struct CellSpursEventFlag
 {
-	SPURSManagerEventFlag *eventFlag;
+	static const u32 align = 128;
+	static const u32 size = 128;
+
+	union
+	{
+		// Raw data
+		u8 _u8[size];
+
+		// Real data
+		struct _CellSpursEventFlag
+		{
+			be_t<u16> bits;                     // 0x00
+			be_t<u16> x02;                      // 0x02
+			be_t<u16> x04;                      // 0x04
+			be_t<u16> x06;                      // 0x06
+			be_t<u16> x08;                      // 0x08
+			be_t<u16> x0A;                      // 0x0A
+			u8 spuPort;                         // 0x0C
+			u8 isIwl;                           // 0x0D
+			u8 direction;                       // 0x0E
+			u8 clearMode;                       // 0x0F
+			u16 x10[16];                        // 0x10
+			u8 x30[0x70 - 0x30];                // 0x30
+			be_t<u64> addr;                     // 0x70
+			be_t<u32> eventPortId;              // 0x78
+			be_t<u32> eventQueueId;             // 0x7C
+		} m;
+
+		static_assert(sizeof(_CellSpursEventFlag) == size, "Wrong _CellSpursEventFlag size");
+
+		SPURSManagerEventFlag *eventFlag;
+	};
 };
 
 union CellSpursTaskArgument
@@ -559,7 +612,7 @@ struct CellSpursTaskset
 		{
 			be_t<u32> running_set[4];                    // 0x00
 			be_t<u32> ready_set[4];                      // 0x10
-			be_t<u32> unk_set[4];                        // 0x20 - TODO: Find out what this is
+			be_t<u32> ready2_set[4];                     // 0x20 - TODO: Find out what this is
 			be_t<u32> enabled_set[4];                    // 0x30
 			be_t<u32> signal_received_set[4];            // 0x40
 			be_t<u32> waiting_set[4];                    // 0x50
@@ -668,7 +721,7 @@ struct CellSpursTaskset2
 		{
 			be_t<u32> running_set[4];                    // 0x00
 			be_t<u32> ready_set[4];                      // 0x10
-			be_t<u32> unk_set[4];                        // 0x20 - TODO: Find out what this is
+			be_t<u32> ready2_set[4];                     // 0x20 - TODO: Find out what this is
 			be_t<u32> enabled_set[4];                    // 0x30
 			be_t<u32> signal_received_set[4];            // 0x40
 			be_t<u32> waiting_set[4];                    // 0x50

From 8717bdffa9e58b56ae31476cb003aab9033e0f79 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Fri, 23 Jan 2015 03:01:46 +0530
Subject: [PATCH 14/29] SPURS: Implement cellSpursEventFlagWait and
 cellSpursEventFlagSet. Also modify implementation to make use of vm::var

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 293 +++++++++++++++++++----
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h   |  24 +-
 2 files changed, 259 insertions(+), 58 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 25bf599222..1c15206a46 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -27,6 +27,8 @@ extern u32 libsre_rtoc;
 #endif
 
 void spursKernelMain(SPUThread & spu);
+s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u32 id);
+s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID);
 
 s64 spursCreateLv2EventQueue(vm::ptr<CellSpurs> spurs, u32& queue_id, vm::ptr<u8> port, s32 size, u64 name_u64)
 {
@@ -1302,7 +1304,7 @@ s64 cellSpursWorkloadAttributeSetShutdownCompletionEventHook(vm::ptr<CellSpursWo
 	return CELL_OK;
 }
 
-s64 cellSpursAddWorkloadWithAttribute(vm::ptr<CellSpurs> spurs, vm::ptr<u32> wid, vm::ptr<const CellSpursWorkloadAttribute> attr)
+s64 cellSpursAddWorkloadWithAttribute(vm::ptr<CellSpurs> spurs, const vm::ptr<u32> wid, vm::ptr<const CellSpursWorkloadAttribute> attr)
 {
 	cellSpurs->Warning("%s(spurs_addr=0x%x, wid_addr=0x%x, attr_addr=0x%x)", __FUNCTION__, spurs.addr(), wid.addr(), attr.addr());
 #ifdef PRX_DEBUG_XXX
@@ -1507,7 +1509,6 @@ s64 cellSpursSendWorkloadSignal(vm::ptr<CellSpurs> spurs, u32 workloadId)
 		return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
 	}
 
-	// TODO: Make the below block execute atomically
 	if (workloadId >= CELL_SPURS_MAX_WORKLOAD)
 	{
 		spurs->m.wklSignal2 |= be_t<u16>::make(0x8000 >> (workloadId & 0x0F));
@@ -1750,10 +1751,8 @@ s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 	}
 
 	u32 eventQueueId;
-	auto _port = vm::ptr<u8>::make((u32)Memory.Alloc(1, 1));
-	auto rc    = spursCreateLv2EventQueue(spurs, eventQueueId, _port, 1, *((u64 *)"_spuEvF"));
-	auto port  = *_port;
-	Memory.Free(_port.addr());
+	vm::var<u8> port;
+	auto rc = spursCreateLv2EventQueue(spurs, eventQueueId, port, 1, *((u64 *)"_spuEvF"));
 	if (rc != CELL_OK)
 	{
 		return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF));
@@ -1761,20 +1760,18 @@ s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 
 	if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
 	{
-		auto _eventPortId = vm::ptr<u32>::make((u32)Memory.Alloc(4, 4));
-		rc                = sys_event_port_create(_eventPortId, SYS_EVENT_PORT_LOCAL, 0);
-		auto eventPortId  = *_eventPortId;
-		Memory.Free(_eventPortId.addr());
+		vm::var<be_t<u32>> eventPortId;
+		rc = sys_event_port_create(vm::ptr<u32>::make(eventPortId.addr()), SYS_EVENT_PORT_LOCAL, 0);
 		if (rc == CELL_OK)
 		{
-			rc = sys_event_port_connect_local(eventPortId, eventQueueId);
+			rc = sys_event_port_connect_local(eventPortId.value(), eventQueueId);
 			if (rc == CELL_OK)
 			{
 				eventFlag->m.eventPortId = eventPortId;
 				goto success;
 			}
 
-			sys_event_port_destroy(eventPortId);
+			sys_event_port_destroy(eventPortId.value());
 		}
 
 		// TODO: Implement the following
@@ -1819,13 +1816,13 @@ s64 cellSpursEventFlagDetachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 		return CELL_SPURS_TASK_ERROR_STAT;
 	}
 
-	if (eventFlag->m.x04 || eventFlag->m.x06 & 0xFF00)
+	if (eventFlag->m.x04 || eventFlag->m.x07)
 	{
 		return CELL_SPURS_TASK_ERROR_BUSY;
 	}
 
 	auto port            = eventFlag->m.spuPort;
-	eventFlag->m.spuPort = -1; // TODO: Make this execute atomically
+	eventFlag->m.spuPort = -1;
 
 	vm::ptr<CellSpurs> spurs;
 	if (eventFlag->m.isIwl == 1)
@@ -1863,7 +1860,121 @@ s64 cellSpursEventFlagDetachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 
 s64 _cellSpursEventFlagWait(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u16> mask, u32 mode, u32 block)
 {
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (eventFlag.addr() == 0 || mask.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (mode > CELL_SPURS_EVENT_FLAG_WAIT_MODE_LAST)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_SPU2PPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		return CELL_SPURS_TASK_ERROR_PERM;
+	}
+
+	if (block && eventFlag->m.spuPort == -1)
+	{
+		return CELL_SPURS_TASK_ERROR_STAT;
+	}
+
+	if (eventFlag->m.x04 || eventFlag->m.x07)
+	{
+		return CELL_SPURS_TASK_ERROR_BUSY;
+	}
+
+	u16 bits = eventFlag->m.bits & *mask;
+	if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		u16 tmp = eventFlag->m.x08 & ~eventFlag->m.x02;
+		if (mode != CELL_SPURS_EVENT_FLAG_AND)
+		{
+			tmp &= eventFlag->m.x0A;
+		}
+
+		int i = 15;
+		while (tmp)
+		{
+			if (tmp & 0x1)
+			{
+				if (eventFlag->m.x10[i] & *mask && eventFlag->m.x10[i] != *mask)
+				{
+					return CELL_SPURS_TASK_ERROR_AGAIN;
+				}
+			}
+
+			tmp >>= 1;
+			i--;
+		}
+	}
+
+	bool recv;
+	if ((*mask & ~bits) == 0 || (mode == CELL_SPURS_EVENT_FLAG_OR && bits))
+	{
+		if (eventFlag->m.clearMode == CELL_SPURS_EVENT_FLAG_CLEAR_AUTO)
+		{
+			eventFlag->m.bits &= ~bits;
+		}
+
+		recv = false;
+	}
+	else
+	{
+		if (block == 0)
+		{
+			return CELL_SPURS_TASK_ERROR_BUSY;
+		}
+
+		eventFlag->m.x06 = 0;
+		if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
+		{
+			u8 i    = 0;
+			u16 tmp = eventFlag->m.x08;
+			while (tmp & 0x01)
+			{
+				tmp >>= 1;
+				i++;
+			}
+
+			if (i == 16)
+			{
+				return CELL_SPURS_TASK_ERROR_BUSY;
+			}
+
+			eventFlag->m.x06 = (15 - i) << 4;
+		}
+
+		eventFlag->m.x06 |= mode;
+		eventFlag->m.x04  = *mask;
+		recv              = true;
+	}
+
+	if (recv) {
+		vm::var<sys_event_data> data;
+		auto rc = sys_event_queue_receive(eventFlag->m.eventQueueId, data, 0);
+		if (rc != CELL_OK)
+		{
+			assert(0);
+		}
+
+		u8 i = 0;
+		if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
+		{
+			i = eventFlag->m.x06 >> 4;
+		}
+
+		bits             = eventFlag->m.x30[i];
+		eventFlag->m.x07 = 0;
+	}
+
+	*mask = bits;
 	return CELL_OK;
 }
 
@@ -1895,7 +2006,7 @@ s64 cellSpursEventFlagClear(vm::ptr<CellSpursEventFlag> eventFlag, u16 bits)
 		return CELL_SPURS_TASK_ERROR_ALIGN;
 	}
 
-	eventFlag->m.bits &= ~bits; // TODO: Make this execute atomically
+	eventFlag->m.bits &= ~bits;
 	return CELL_OK;
 #endif
 }
@@ -1907,7 +2018,110 @@ s64 cellSpursEventFlagSet(vm::ptr<CellSpursEventFlag> eventFlag, u16 bits)
 #ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15F04, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (eventFlag.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_PPU2SPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		return CELL_SPURS_TASK_ERROR_PERM;
+	}
+
+	u16 tmp1 = 0;
+	auto send = false;
+	u16 tmp3 = 0;
+	u16 tmp4 = 0;
+
+	if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY && eventFlag->m.x04)
+	{
+		u16 tmp = (eventFlag->m.bits | bits) & eventFlag->m.x04;
+		if ((eventFlag->m.x04 & ~tmp) == 0 || ((eventFlag->m.x06 & 0x0F) == 0 && tmp != 0))
+		{
+			eventFlag->m.x07 = 1;
+			eventFlag->m.x04 = 0;
+			tmp1 = tmp;
+			send = true;
+			tmp3 = eventFlag->m.x06 >> 4;
+			tmp4 = tmp;
+		}
+	}
+
+	u16 i    = 15;
+	u16 j    = 0;
+	u16 tmp  = eventFlag->m.x08 & ~eventFlag->m.x02;
+	u16 tmp5 = 0;
+	u16 x30[16];
+	while (tmp)
+	{
+		if (tmp & 0x0001)
+		{
+			u16 tmp6 = (eventFlag->m.bits | bits) & eventFlag->m.x10[i];
+			if ((eventFlag->m.x10[i] & ~tmp6) == 0 || (((eventFlag->m.x0A >> j) & 0x01) == 0 && (eventFlag->m.x10[i] & ~tmp6) != 0))
+			{
+				tmp4   |= tmp6;
+				tmp5   |= 1 << j;
+				x30[j]  = tmp6;
+			}
+		}
+
+		tmp >>= 1;
+		i--;
+		j++;
+	}
+
+	eventFlag->m.bits |= bits;
+	eventFlag->m.x02  |= tmp5;
+	if (eventFlag->m.clearMode == CELL_SPURS_EVENT_FLAG_CLEAR_AUTO)
+	{
+		 eventFlag->m.bits &= ~tmp4;
+	}
+
+	if (send)
+	{
+		eventFlag->m.x30[tmp3] = tmp1;
+		if (sys_event_port_send(eventFlag->m.eventPortId, 0, 0, 0) != CELL_OK)
+		{
+			assert(0);
+		}
+	}
+
+	if (tmp5)
+	{
+		for (auto i = 0; i < 16; i++)
+		{
+			if (tmp5 & (0x8000 >> i))
+			{
+				eventFlag->m.x30[i] = x30[i];
+				vm::var<u32> taskset;
+				if (eventFlag->m.isIwl)
+				{
+					cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs>::make((u32)eventFlag->m.addr), vm::ptr<CellSpursTaskset>::make(taskset.addr()), eventFlag->m.x60[i]);
+				}
+				else
+				{
+					taskset.value() = (u32)eventFlag->m.addr;
+				}
+
+				auto rc = _cellSpursSendSignal(vm::ptr<CellSpursTaskset>::make(taskset.addr()), eventFlag->m.x50[i]);
+				if (rc == CELL_SPURS_TASK_ERROR_INVAL || rc == CELL_SPURS_TASK_ERROR_STAT)
+				{
+					return CELL_SPURS_TASK_ERROR_FATAL;
+				}
+
+				if (rc != CELL_OK)
+				{
+					assert(0);
+				}
+			}
+		}
+	}
+
 	return CELL_OK;
 #endif
 }
@@ -2306,30 +2520,26 @@ s64 spursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> tasks
 	taskset->m.enable_clear_ls = enable_clear_ls > 0 ? 1 : 0;
 	taskset->m.size = size;
 
-	auto wkl_attr = vm::ptr<CellSpursWorkloadAttribute>::make((u32)Memory.Alloc(CellSpursWorkloadAttribute::size, CellSpursWorkloadAttribute::align));
+	vm::var<CellSpursWorkloadAttribute> wkl_attr;
 	_cellSpursWorkloadAttributeInitialize(wkl_attr, 1 /*revision*/, 0x33 /*sdk_version*/, vm::ptr<const void>::make(16) /*pm*/, 0x1E40 /*pm_size*/,
 		taskset.addr(), priority, 8 /*min_contention*/, max_contention);
 	// TODO: Check return code
 
-	auto cls = vm::ptr<const char>::make((u32)Memory.Alloc(1, 1));
-	*((char *)cls.get_ptr()) = 0;
-	cellSpursWorkloadAttributeSetName(wkl_attr, cls, name);
+	cellSpursWorkloadAttributeSetName(wkl_attr, vm::ptr<const char>::make(0), name);
 	// TODO: Check return code
 
 	// TODO: cellSpursWorkloadAttributeSetShutdownCompletionEventHook(wkl_attr, hook, taskset);
 	// TODO: Check return code
 
-	auto wid = vm::ptr<u32>::make((u32)Memory.Alloc(4, 4));
-	cellSpursAddWorkloadWithAttribute(spurs, wid, vm::ptr<const CellSpursWorkloadAttribute>::make(wkl_attr.addr()));
+	vm::var<be_t<u32>> wid;
+	cellSpursAddWorkloadWithAttribute(spurs, vm::ptr<u32>::make(wid.addr()), vm::ptr<const CellSpursWorkloadAttribute>::make(wkl_attr.addr()));
 	// TODO: Check return code
 
 	taskset->m.x72 = 0x80;
-	taskset->m.wid = *wid;
+	taskset->m.wid = wid.value();
 	// TODO: cellSpursSetExceptionEventHandler(spurs, wid, hook, taskset);
 	// TODO: Check return code
 
-	Memory.Free(wkl_attr.addr());
-	Memory.Free(wid.addr());
 	return CELL_OK;
 }
 
@@ -2439,14 +2649,13 @@ s64 cellSpursShutdownTaskset(vm::ptr<CellSpursTaskset> taskset)
 
 u32 _cellSpursGetSdkVersion()
 {
-	static u32 sdk_version = -2;
+	static s32 sdk_version = -2;
 
 	if (sdk_version == -2)
 	{
-		auto version = vm::ptr<s32>::make((u32)Memory.Alloc(sizeof(u32), sizeof(u32)));
-		sys_process_get_sdk_version(sys_process_getpid(), version);
-		sdk_version = *version;
-		Memory.Free(version.addr());
+		vm::var<be_t<s32>> version;
+		sys_process_get_sdk_version(sys_process_getpid(), vm::ptr<s32>::make(version.addr()));
+		sdk_version = version.value();
 	}
 
 	return sdk_version;
@@ -2522,7 +2731,6 @@ s64 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
 
 	// TODO: Verify the ELF header is proper and all its load segments are at address >= 0x3000
 
-	// TODO: Make the following block execute atomically
 	u32 tmp_task_id;
 	for (tmp_task_id = 0; tmp_task_id < CELL_SPURS_MAX_TASK; tmp_task_id++)
 	{
@@ -2603,7 +2811,7 @@ s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID)
 	}
 
 	auto shouldSignal = waiting & ~signalled & mask ? true : false;
-	taskset->m.signal_received_set[word] |= mask; // TODO: Make this execute atomically
+	taskset->m.signal_received_set[word] |= mask;
 	if (shouldSignal)
 	{
 		cellSpursSendWorkloadSignal(vm::ptr<CellSpurs>::make((u32)taskset->m.spurs.addr()), taskset->m.wid);
@@ -2872,11 +3080,11 @@ s64 cellSpursCreateTaskset2(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset2>
 #ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15108, libsre_rtoc);
 #else
-	auto free_attr = false;
+	vm::ptr<CellSpursTasksetAttribute2> tmp_attr;
+
 	if (!attr)
 	{
-		attr.set(Memory.Alloc(CellSpursTasksetAttribute2::size, CellSpursTasksetAttribute2::align));
-		free_attr = true;
+		attr.set(tmp_attr.addr());
 		_cellSpursTasksetAttribute2Initialize(attr, 0);
 	}
 
@@ -2885,10 +3093,6 @@ s64 cellSpursCreateTaskset2(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset2>
 		attr->m.max_contention, vm::ptr<const char>::make(attr->m.name.addr()), CellSpursTaskset2::size, (u8)attr->m.enable_clear_ls);
 	if (rc != CELL_OK)
 	{
-		if (free_attr)
-		{
-			Memory.Free(attr.addr());
-		}
 		return rc;
 	}
 
@@ -2898,11 +3102,6 @@ s64 cellSpursCreateTaskset2(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset2>
 	}
 
 	// TODO: Implement rest of the function
-
-	if (free_attr)
-	{
-		Memory.Free(attr.addr());
-	}
 	return CELL_OK;
 #endif
 }
@@ -3028,7 +3227,7 @@ s64 cellSpursTasksetUnsetExceptionEventHandler(vm::ptr<CellSpursTaskset> taskset
 #endif
 }
 
-s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<u32> taskset, u32 id)
+s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u32 id)
 {
 	cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, id=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), id);
 
@@ -3660,6 +3859,4 @@ void cellSpurs_init(Module *pxThis)
 	REG_FUNC(cellSpurs, cellSpursTraceStart);
 	REG_FUNC(cellSpurs, cellSpursTraceStop);
 	REG_FUNC(cellSpurs, cellSpursTraceFinalize);
-
-	// TODO: some trace funcs
 }
\ No newline at end of file
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index ecbfc03efc..1946675c54 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -201,8 +201,9 @@ enum SpursTaskConstants
 
 enum CellSpursEventFlagWaitMode
 {
-	CELL_SPURS_EVENT_FLAG_OR  = 0,
-	CELL_SPURS_EVENT_FLAG_AND = 1
+	CELL_SPURS_EVENT_FLAG_OR             = 0,
+	CELL_SPURS_EVENT_FLAG_AND            = 1,
+	CELL_SPURS_EVENT_FLAG_WAIT_MODE_LAST = CELL_SPURS_EVENT_FLAG_AND,
 };
 
 enum CellSpursEventFlagClearMode
@@ -552,18 +553,21 @@ struct CellSpursEventFlag
 		// Real data
 		struct _CellSpursEventFlag
 		{
-			be_t<u16> bits;                     // 0x00
-			be_t<u16> x02;                      // 0x02
-			be_t<u16> x04;                      // 0x04
-			be_t<u16> x06;                      // 0x06
-			be_t<u16> x08;                      // 0x08
-			be_t<u16> x0A;                      // 0x0A
+			be_t<u16> bits;                     // 0x00 Bit mask of event set bits
+			be_t<u16> x02;                      // 0x02 Bit mask of SPU thread slots whose conditions have met
+			be_t<u16> x04;                      // 0x04 Wait mask for PPU thread
+			u8 x06;                             // 0x06 Top 4 bits: Bit number for PPU thread. Bottom 4 bits: Wait mode of PPU thread
+			u8 x07;                             // 0x07 Set to 1 if the blocked PPU thread's condition has been met
+			be_t<u16> x08;                      // 0x08 Bit mask of used wait slots
+			be_t<u16> x0A;                      // 0x0A Bit mask of used wait slots whose wait mode is AND
 			u8 spuPort;                         // 0x0C
 			u8 isIwl;                           // 0x0D
 			u8 direction;                       // 0x0E
 			u8 clearMode;                       // 0x0F
-			u16 x10[16];                        // 0x10
-			u8 x30[0x70 - 0x30];                // 0x30
+			be_t<u16> x10[16];                  // 0x10 Wait mask for SPU threads
+			be_t<u16> x30[16];                  // 0x30 Received event flag mask for SPU threads
+			u8 x50[16];                         // 0x50 Task id of waiting SPU threads
+			u8 x60[16];                         // 0x50 Workload Ids of waiting SPU threads
 			be_t<u64> addr;                     // 0x70
 			be_t<u32> eventPortId;              // 0x78
 			be_t<u32> eventQueueId;             // 0x7C

From 173fb060cb5dea6c008aabf371e26566eb31a77d Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Sat, 24 Jan 2015 00:17:37 +0530
Subject: [PATCH 15/29] SPURS: Improve the readability of the event flag
 functions

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 180 ++++++++++++++---------
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h   |  43 +++---
 2 files changed, 134 insertions(+), 89 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 1c15206a46..c8fc774ad8 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -1696,7 +1696,7 @@ s64 _cellSpursEventFlagInitialize(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTas
 	memset(eventFlag.get_ptr(), 0, CellSpursEventFlag::size);
 	eventFlag->m.direction = flagDirection;
 	eventFlag->m.clearMode = flagClearMode;
-	eventFlag->m.spuPort   = -1;
+	eventFlag->m.spuPort   = CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT;
 
 	if (taskset.addr())
 	{
@@ -1734,7 +1734,7 @@ s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 		return CELL_SPURS_TASK_ERROR_PERM;
 	}
 
-	if (eventFlag->m.spuPort != -1)
+	if (eventFlag->m.spuPort != CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT)
 	{
 		return CELL_SPURS_TASK_ERROR_STAT;
 	}
@@ -1755,6 +1755,7 @@ s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 	auto rc = spursCreateLv2EventQueue(spurs, eventQueueId, port, 1, *((u64 *)"_spuEvF"));
 	if (rc != CELL_OK)
 	{
+		// Return rc if its an error code from SPURS otherwise convert the error code to a SPURS task error code
 		return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF));
 	}
 
@@ -1779,6 +1780,8 @@ s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 		// {
 		//     sys_event_queue_destroy(eventQueueId, SYS_EVENT_QUEUE_DESTROY_FORCE);
 		// }
+
+		// Return rc if its an error code from SPURS otherwise convert the error code to a SPURS task error code
 		return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF));
 	}
 
@@ -1811,18 +1814,18 @@ s64 cellSpursEventFlagDetachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 		return CELL_SPURS_TASK_ERROR_PERM;
 	}
 
-	if (eventFlag->m.spuPort == -1)
+	if (eventFlag->m.spuPort == CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT)
 	{
 		return CELL_SPURS_TASK_ERROR_STAT;
 	}
 
-	if (eventFlag->m.x04 || eventFlag->m.x07)
+	if (eventFlag->m.ppuWaitMask || eventFlag->m.ppuPendingRecv)
 	{
 		return CELL_SPURS_TASK_ERROR_BUSY;
 	}
 
 	auto port            = eventFlag->m.spuPort;
-	eventFlag->m.spuPort = -1;
+	eventFlag->m.spuPort = CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT;
 
 	vm::ptr<CellSpurs> spurs;
 	if (eventFlag->m.isIwl == 1)
@@ -1851,6 +1854,7 @@ s64 cellSpursEventFlagDetachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 
 	if (rc != CELL_OK)
 	{
+		// Return rc if its an error code from SPURS otherwise convert the error code to a SPURS task error code
 		return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF));
 	}
 
@@ -1880,83 +1884,104 @@ s64 _cellSpursEventFlagWait(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u16>
 		return CELL_SPURS_TASK_ERROR_PERM;
 	}
 
-	if (block && eventFlag->m.spuPort == -1)
+	if (block && eventFlag->m.spuPort == CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT)
 	{
 		return CELL_SPURS_TASK_ERROR_STAT;
 	}
 
-	if (eventFlag->m.x04 || eventFlag->m.x07)
+	if (eventFlag->m.ppuWaitMask || eventFlag->m.ppuPendingRecv)
 	{
 		return CELL_SPURS_TASK_ERROR_BUSY;
 	}
 
-	u16 bits = eventFlag->m.bits & *mask;
+	u16 relevantEvents = eventFlag->m.events & *mask;
 	if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
 	{
-		u16 tmp = eventFlag->m.x08 & ~eventFlag->m.x02;
-		if (mode != CELL_SPURS_EVENT_FLAG_AND)
+		// Make sure the wait mask and mode specified does not conflict with that of the already waiting tasks.
+		// Conflict scenarios:
+		// OR  vs OR  - A conflict never occurs
+		// OR  vs AND - A conflict occurs if the masks for the two tasks overlap
+		// AND vs AND - A conflict occurs if the masks for the two tasks are not the same
+
+		// Determine the set of all already waiting tasks whose wait mode/mask can possibly conflict with the specified wait mode/mask.
+		// This set is equal to 'set of all tasks waiting' - 'set of all tasks whose wait conditions have been met'.
+		// If the wait mode is OR, we prune the set of all tasks that are waiting in OR mode from the set since a conflict cannot occur
+		// with an already waiting task in OR mode.
+		u16 relevantWaitSlots = eventFlag->m.spuTaskUsedWaitSlots & ~eventFlag->m.spuTaskPendingRecv;
+		if (mode == CELL_SPURS_EVENT_FLAG_OR)
 		{
-			tmp &= eventFlag->m.x0A;
+			relevantWaitSlots &= eventFlag->m.spuTaskWaitMode;
 		}
 
-		int i = 15;
-		while (tmp)
+		int i = CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS - 1;
+		while (relevantWaitSlots)
 		{
-			if (tmp & 0x1)
+			if (relevantWaitSlots & 0x0001)
 			{
-				if (eventFlag->m.x10[i] & *mask && eventFlag->m.x10[i] != *mask)
+				if (eventFlag->m.spuTaskWaitMask[i] & *mask && eventFlag->m.spuTaskWaitMask[i] != *mask)
 				{
 					return CELL_SPURS_TASK_ERROR_AGAIN;
 				}
 			}
 
-			tmp >>= 1;
+			relevantWaitSlots >>= 1;
 			i--;
 		}
 	}
 
+	// There is no need to block if all bits required by the wait operation have already been set or
+	// if the wait mode is OR and atleast one of the bits required by the wait operation has been set.
 	bool recv;
-	if ((*mask & ~bits) == 0 || (mode == CELL_SPURS_EVENT_FLAG_OR && bits))
+	if ((*mask & ~relevantEvents) == 0 || (mode == CELL_SPURS_EVENT_FLAG_OR && relevantEvents))
 	{
+		// If the clear flag is AUTO then clear the bits comnsumed by this thread
 		if (eventFlag->m.clearMode == CELL_SPURS_EVENT_FLAG_CLEAR_AUTO)
 		{
-			eventFlag->m.bits &= ~bits;
+			eventFlag->m.events &= ~relevantEvents;
 		}
 
 		recv = false;
 	}
 	else
 	{
+		// If we reach here it means that the conditions for this thread have not been met.
+		// If this is a try wait operation then do not block but return an error code.
 		if (block == 0)
 		{
 			return CELL_SPURS_TASK_ERROR_BUSY;
 		}
 
-		eventFlag->m.x06 = 0;
+		eventFlag->m.ppuWaitSlotAndMode = 0;
 		if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
 		{
-			u8 i    = 0;
-			u16 tmp = eventFlag->m.x08;
-			while (tmp & 0x01)
+			// Find an unsed wait slot
+			int i                    = 0;
+			u16 spuTaskUsedWaitSlots = eventFlag->m.spuTaskUsedWaitSlots;
+			while (spuTaskUsedWaitSlots & 0x0001)
 			{
-				tmp >>= 1;
+				spuTaskUsedWaitSlots >>= 1;
 				i++;
 			}
 
-			if (i == 16)
+			if (i == CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS)
 			{
+				// Event flag has no empty wait slots
 				return CELL_SPURS_TASK_ERROR_BUSY;
 			}
 
-			eventFlag->m.x06 = (15 - i) << 4;
+			// Mark the found wait slot as used by this thread
+			eventFlag->m.ppuWaitSlotAndMode = (CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS - 1 - i) << 4;
 		}
 
-		eventFlag->m.x06 |= mode;
-		eventFlag->m.x04  = *mask;
-		recv              = true;
+		// Save the wait mask and mode for this thread
+		eventFlag->m.ppuWaitSlotAndMode |= mode;
+		eventFlag->m.ppuWaitMask         = *mask;
+		recv                             = true;
 	}
 
+	u16 receivedEventFlag;
 	if (recv) {
+		// Block till something happens
 		vm::var<sys_event_data> data;
 		auto rc = sys_event_queue_receive(eventFlag->m.eventQueueId, data, 0);
 		if (rc != CELL_OK)
@@ -1964,17 +1989,17 @@ s64 _cellSpursEventFlagWait(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u16>
 			assert(0);
 		}
 
-		u8 i = 0;
+		int i = 0;
 		if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
 		{
-			i = eventFlag->m.x06 >> 4;
+			i = eventFlag->m.ppuWaitSlotAndMode >> 4;
 		}
 
-		bits             = eventFlag->m.x30[i];
-		eventFlag->m.x07 = 0;
+		receivedEventFlag           = eventFlag->m.pendingRecvTaskEvents[i];
+		eventFlag->m.ppuPendingRecv = 0;
 	}
 
-	*mask = bits;
+	*mask = receivedEventFlag;
 	return CELL_OK;
 }
 
@@ -2006,7 +2031,7 @@ s64 cellSpursEventFlagClear(vm::ptr<CellSpursEventFlag> eventFlag, u16 bits)
 		return CELL_SPURS_TASK_ERROR_ALIGN;
 	}
 
-	eventFlag->m.bits &= ~bits;
+	eventFlag->m.events &= ~bits;
 	return CELL_OK;
 #endif
 }
@@ -2033,82 +2058,95 @@ s64 cellSpursEventFlagSet(vm::ptr<CellSpursEventFlag> eventFlag, u16 bits)
 		return CELL_SPURS_TASK_ERROR_PERM;
 	}
 
-	u16 tmp1 = 0;
-	auto send = false;
-	u16 tmp3 = 0;
-	u16 tmp4 = 0;
-
-	if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY && eventFlag->m.x04)
+	u16 ppuEventFlag  = 0;
+	bool send         = false;
+	int ppuWaitSlot   = 0;
+	u16 eventsToClear = 0;
+	if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY && eventFlag->m.ppuWaitMask)
 	{
-		u16 tmp = (eventFlag->m.bits | bits) & eventFlag->m.x04;
-		if ((eventFlag->m.x04 & ~tmp) == 0 || ((eventFlag->m.x06 & 0x0F) == 0 && tmp != 0))
+		u16 ppuRelevantEvents = (eventFlag->m.events | bits) & eventFlag->m.ppuWaitMask;
+
+		// Unblock the waiting PPU thread if either all the bits being waited by the thread have been set or
+		// if the wait mode of the thread is OR and atleast one bit the thread is waiting on has been set
+		if ((eventFlag->m.ppuWaitMask & ~ppuRelevantEvents) == 0 ||
+			((eventFlag->m.ppuWaitSlotAndMode & 0x0F) == CELL_SPURS_EVENT_FLAG_OR && ppuRelevantEvents != 0))
 		{
-			eventFlag->m.x07 = 1;
-			eventFlag->m.x04 = 0;
-			tmp1 = tmp;
-			send = true;
-			tmp3 = eventFlag->m.x06 >> 4;
-			tmp4 = tmp;
+			eventFlag->m.ppuPendingRecv = 1;
+			eventFlag->m.ppuWaitMask    = 0;
+			ppuEventFlag                = ppuRelevantEvents;
+			eventsToClear               = ppuRelevantEvents;
+			ppuWaitSlot                 = eventFlag->m.ppuWaitSlotAndMode >> 4;
+			send                        = true;
 		}
 	}
 
-	u16 i    = 15;
-	u16 j    = 0;
-	u16 tmp  = eventFlag->m.x08 & ~eventFlag->m.x02;
-	u16 tmp5 = 0;
-	u16 x30[16];
-	while (tmp)
+	int i                  = CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS - 1;
+	int j                  = 0;
+	u16 relevantWaitSlots  = eventFlag->m.spuTaskUsedWaitSlots & ~eventFlag->m.spuTaskPendingRecv;
+	u16 spuTaskPendingRecv = 0;
+	u16 pendingRecvTaskEvents[16];
+	while (relevantWaitSlots)
 	{
-		if (tmp & 0x0001)
+		if (relevantWaitSlots & 0x0001)
 		{
-			u16 tmp6 = (eventFlag->m.bits | bits) & eventFlag->m.x10[i];
-			if ((eventFlag->m.x10[i] & ~tmp6) == 0 || (((eventFlag->m.x0A >> j) & 0x01) == 0 && (eventFlag->m.x10[i] & ~tmp6) != 0))
+			u16 spuTaskRelevantEvents = (eventFlag->m.events | bits) & eventFlag->m.spuTaskWaitMask[i];
+
+			// Unblock the waiting SPU task if either all the bits being waited by the task have been set or
+			// if the wait mode of the task is OR and atleast one bit the thread is waiting on has been set
+			if ((eventFlag->m.spuTaskWaitMask[i] & ~spuTaskRelevantEvents) == 0 || 
+				(((eventFlag->m.spuTaskWaitMode >> j) & 0x0001) == CELL_SPURS_EVENT_FLAG_OR && spuTaskRelevantEvents != 0))
 			{
-				tmp4   |= tmp6;
-				tmp5   |= 1 << j;
-				x30[j]  = tmp6;
+				eventsToClear            |= spuTaskRelevantEvents;
+				spuTaskPendingRecv       |= 1 << j;
+				pendingRecvTaskEvents[j]  = spuTaskRelevantEvents;
 			}
 		}
 
-		tmp >>= 1;
+		relevantWaitSlots >>= 1;
 		i--;
 		j++;
 	}
 
-	eventFlag->m.bits |= bits;
-	eventFlag->m.x02  |= tmp5;
+	eventFlag->m.events             |= bits;
+	eventFlag->m.spuTaskPendingRecv |= spuTaskPendingRecv;
+
+	// If the clear flag is AUTO then clear the bits comnsumed by all tasks marked to be unblocked
 	if (eventFlag->m.clearMode == CELL_SPURS_EVENT_FLAG_CLEAR_AUTO)
 	{
-		 eventFlag->m.bits &= ~tmp4;
+		 eventFlag->m.events &= ~eventsToClear;
 	}
 
 	if (send)
 	{
-		eventFlag->m.x30[tmp3] = tmp1;
+		// Signal the PPU thread to be woken up
+		eventFlag->m.pendingRecvTaskEvents[ppuWaitSlot] = ppuEventFlag;
 		if (sys_event_port_send(eventFlag->m.eventPortId, 0, 0, 0) != CELL_OK)
 		{
 			assert(0);
 		}
 	}
 
-	if (tmp5)
+	if (spuTaskPendingRecv)
 	{
-		for (auto i = 0; i < 16; i++)
+		// Signal each SPU task whose conditions have been met to be woken up
+		for (int i = 0; i < CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS; i++)
 		{
-			if (tmp5 & (0x8000 >> i))
+			if (spuTaskPendingRecv & (0x8000 >> i))
 			{
-				eventFlag->m.x30[i] = x30[i];
+				eventFlag->m.pendingRecvTaskEvents[i] = pendingRecvTaskEvents[i];
 				vm::var<u32> taskset;
 				if (eventFlag->m.isIwl)
 				{
-					cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs>::make((u32)eventFlag->m.addr), vm::ptr<CellSpursTaskset>::make(taskset.addr()), eventFlag->m.x60[i]);
+					cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs>::make((u32)eventFlag->m.addr),
+												  vm::ptr<CellSpursTaskset>::make(taskset.addr()),
+												  eventFlag->m.waitingTaskWklId[i]);
 				}
 				else
 				{
 					taskset.value() = (u32)eventFlag->m.addr;
 				}
 
-				auto rc = _cellSpursSendSignal(vm::ptr<CellSpursTaskset>::make(taskset.addr()), eventFlag->m.x50[i]);
+				auto rc = _cellSpursSendSignal(vm::ptr<CellSpursTaskset>::make(taskset.addr()), eventFlag->m.waitingTaskId[i]);
 				if (rc == CELL_SPURS_TASK_ERROR_INVAL || rc == CELL_SPURS_TASK_ERROR_STAT)
 				{
 					return CELL_SPURS_TASK_ERROR_FATAL;
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 1946675c54..172bd43b23 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -222,6 +222,13 @@ enum CellSpursEventFlagDirection
 	CELL_SPURS_EVENT_FLAG_LAST = CELL_SPURS_EVENT_FLAG_ANY2ANY,
 };
 
+// Event flag constants
+enum SpursEventFlagConstants
+{
+	CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS   = 16,
+	CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT = 0xFF,
+};
+
 class SPURSManager;
 class SPURSManagerEventFlag;
 class SPURSManagerTaskset;
@@ -553,24 +560,24 @@ struct CellSpursEventFlag
 		// Real data
 		struct _CellSpursEventFlag
 		{
-			be_t<u16> bits;                     // 0x00 Bit mask of event set bits
-			be_t<u16> x02;                      // 0x02 Bit mask of SPU thread slots whose conditions have met
-			be_t<u16> x04;                      // 0x04 Wait mask for PPU thread
-			u8 x06;                             // 0x06 Top 4 bits: Bit number for PPU thread. Bottom 4 bits: Wait mode of PPU thread
-			u8 x07;                             // 0x07 Set to 1 if the blocked PPU thread's condition has been met
-			be_t<u16> x08;                      // 0x08 Bit mask of used wait slots
-			be_t<u16> x0A;                      // 0x0A Bit mask of used wait slots whose wait mode is AND
-			u8 spuPort;                         // 0x0C
-			u8 isIwl;                           // 0x0D
-			u8 direction;                       // 0x0E
-			u8 clearMode;                       // 0x0F
-			be_t<u16> x10[16];                  // 0x10 Wait mask for SPU threads
-			be_t<u16> x30[16];                  // 0x30 Received event flag mask for SPU threads
-			u8 x50[16];                         // 0x50 Task id of waiting SPU threads
-			u8 x60[16];                         // 0x50 Workload Ids of waiting SPU threads
-			be_t<u64> addr;                     // 0x70
-			be_t<u32> eventPortId;              // 0x78
-			be_t<u32> eventQueueId;             // 0x7C
+			be_t<u16> events;                    // 0x00 Event bits
+			be_t<u16> spuTaskPendingRecv;        // 0x02 A bit is set to 1 when the condition of the SPU task using the slot are met and back to 0 when the SPU task unblocks
+			be_t<u16> ppuWaitMask;               // 0x04 Wait mask for blocked PPU thread
+			u8 ppuWaitSlotAndMode;               // 0x06 Top 4 bits: Wait slot number of the blocked PPU threa, Bottom 4 bits: Wait mode of the blocked PPU thread
+			u8 ppuPendingRecv;                   // 0x07 Set to 1 when the blocked PPU thread's conditions are met and back to 0 when the PPU thread is ublocked
+			be_t<u16> spuTaskUsedWaitSlots;      // 0x08 A bit is set to 1 if the wait slot corresponding to the bit is used by an SPU task and 0 otherwise
+			be_t<u16> spuTaskWaitMode;           // 0x0A A bit is set to 1 if the wait mode for the SPU task corresponding to the bit is AND and 0 otherwise
+			u8 spuPort;                          // 0x0C
+			u8 isIwl;                            // 0x0D
+			u8 direction;                        // 0x0E
+			u8 clearMode;                        // 0x0F
+			be_t<u16> spuTaskWaitMask[16];       // 0x10 Wait mask for blocked SPU tasks
+			be_t<u16> pendingRecvTaskEvents[16]; // 0x30 The value of event flag when the wait condition for the thread/task was met
+			u8 waitingTaskId[16];                // 0x50 Task id of waiting SPU threads
+			u8 waitingTaskWklId[16];             // 0x60 Workload id of waiting SPU threads
+			be_t<u64> addr;                      // 0x70
+			be_t<u32> eventPortId;               // 0x78
+			be_t<u32> eventQueueId;              // 0x7C
 		} m;
 
 		static_assert(sizeof(_CellSpursEventFlag) == size, "Wrong _CellSpursEventFlag size");

From 430aa9af8912c0c00944e62a91f309153b15dd69 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Sat, 24 Jan 2015 00:41:29 +0530
Subject: [PATCH 16/29] SPURS: Implement cellSpursGetWorkloadData and
 cellSpursLookUpTasksetAddress

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 59 +++++++++++++++++++++---
 1 file changed, 53 insertions(+), 6 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index c8fc774ad8..294331a6b3 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -1469,12 +1469,12 @@ s64 cellSpursSendWorkloadSignal(vm::ptr<CellSpurs> spurs, u32 workloadId)
 #ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0xA658, libsre_rtoc);
 #else
-	if (!spurs)
+	if (spurs.addr() == 0)
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER;
 	}
 
-	if (spurs.addr() %  CellSpurs::align)
+	if (spurs.addr() % CellSpurs::align)
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN;
 	}
@@ -1522,13 +1522,47 @@ s64 cellSpursSendWorkloadSignal(vm::ptr<CellSpurs> spurs, u32 workloadId)
 #endif
 }
 
-s64 cellSpursGetWorkloadData()
+s64 cellSpursGetWorkloadData(vm::ptr<CellSpurs> spurs, vm::ptr<u64> data, u32 workloadId)
 {
+	cellSpurs->Warning("%s(spurs_addr=0x%x, data=0x%x, workloadId=%d)", __FUNCTION__, spurs.addr(), data.addr(), workloadId);
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0xA78C, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (spurs.addr() == 0 || data.addr() == 0)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN;
+	}
+
+	if (workloadId >= CELL_SPURS_MAX_WORKLOAD2 || (workloadId >= CELL_SPURS_MAX_WORKLOAD && (spurs->m.flags1 & SF1_32_WORKLOADS) == 0))
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_INVAL;
+	}
+
+	if ((spurs->m.wklMskA.read_relaxed() & (0x80000000u >> workloadId)) == 0)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_SRCH;
+	}
+
+	if (spurs->m.exception)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
+	}
+
+	if (workloadId >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		*data = spurs->m.wklInfo2[workloadId & 0x0F].arg;
+	}
+	else
+	{
+		*data = spurs->m.wklInfo1[workloadId].arg;
+	}
+
 	return CELL_OK;
 #endif
 }
@@ -3272,7 +3306,20 @@ s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTas
 #ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x133AC, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (taskset.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	vm::var<be_t<u64>> data;
+	auto rc = cellSpursGetWorkloadData(spurs, vm::ptr<u64>::make(data.addr()), id);
+	if (rc != CELL_OK)
+	{
+		// Convert policy module error code to a task error code
+		return rc ^ 0x100;
+	}
+
+	taskset.set((u32)data.value());
 	return CELL_OK;
 #endif
 }

From 2e2f92f4f64a7ddea7a020a907cc1dcb9dfb7aba Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Mon, 26 Jan 2015 20:15:58 +0530
Subject: [PATCH 17/29] SPURS: Implement some portions of taskset policy module

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp    |   4 +-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h      |  33 ++++-
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 142 +++++++++++++++++++-
 3 files changed, 170 insertions(+), 9 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 294331a6b3..99d2d35eea 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -1702,7 +1702,7 @@ s64 _cellSpursEventFlagInitialize(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTas
 #ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1564C, libsre_rtoc);
 #else
-	if (taskset.addr() == 0 || spurs.addr() == 0)
+	if (taskset.addr() == 0 && spurs.addr() == 0)
 	{
 		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
 	}
@@ -3365,7 +3365,7 @@ s64 cellSpursGetTasksetInfo()
 
 s64 _cellSpursTasksetAttributeInitialize(vm::ptr<CellSpursTasksetAttribute> attribute, u32 revision, u32 sdk_version, u64 args, vm::ptr<const u8> priority, u32 max_contention)
 {
-	cellSpurs->Warning("%s(attribute=0x%x, revision=%u, skd_version=%u, args=0x%llx, priority=0x%x, max_contention=%u)",
+	cellSpurs->Warning("%s(attribute=0x%x, revision=%d, skd_version=%d, args=0x%llx, priority=0x%x, max_contention=%d)",
 		__FUNCTION__, attribute.addr(), revision, sdk_version, args, priority.addr(), max_contention);
 
 #ifdef PRX_DEBUG
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 172bd43b23..196fedd3a8 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -564,7 +564,7 @@ struct CellSpursEventFlag
 			be_t<u16> spuTaskPendingRecv;        // 0x02 A bit is set to 1 when the condition of the SPU task using the slot are met and back to 0 when the SPU task unblocks
 			be_t<u16> ppuWaitMask;               // 0x04 Wait mask for blocked PPU thread
 			u8 ppuWaitSlotAndMode;               // 0x06 Top 4 bits: Wait slot number of the blocked PPU threa, Bottom 4 bits: Wait mode of the blocked PPU thread
-			u8 ppuPendingRecv;                   // 0x07 Set to 1 when the blocked PPU thread's conditions are met and back to 0 when the PPU thread is ublocked
+			u8 ppuPendingRecv;                   // 0x07 Set to 1 when the blocked PPU thread's conditions are met and back to 0 when the PPU thread is unblocked
 			be_t<u16> spuTaskUsedWaitSlots;      // 0x08 A bit is set to 1 if the wait slot corresponding to the bit is used by an SPU task and 0 otherwise
 			be_t<u16> spuTaskWaitMode;           // 0x0A A bit is set to 1 if the wait mode for the SPU task corresponding to the bit is AND and 0 otherwise
 			u8 spuPort;                          // 0x0C
@@ -852,8 +852,9 @@ struct CellSpursTaskBinInfo
 	CellSpursTaskLsPattern lsPattern;
 };
 
-// The SPURS kernel data store. This resides at 0x00 of the LS.
-struct SpursKernelMgmtData {
+// The SPURS kernel data store. This resides at 0x100 of the LS.
+struct SpursKernelMgmtData
+{
 	u8 tempArea[0x80];                              // 0x100
 	u8 wklLocContention[0x10];                      // 0x180
 	u8 wklLocPendingContention[0x10];               // 0x190
@@ -867,8 +868,7 @@ struct SpursKernelMgmtData {
 	be_t<u32> wklCurrentId;                         // 0x1DC
 	be_t<u32> yieldToKernelAddr;                    // 0x1E0
 	be_t<u32> selectWorkloadAddr;                   // 0x1E4
-	u8 x1E8;                                        // 0x1E8
-	u8 x1E9;                                        // 0x1E9
+	u8 moduleId[2];                                 // 0x1E8
 	u8 sysSrvInitialised;                           // 0x1EA
 	u8 spuIdling;                                   // 0x1EB
 	be_t<u16> wklRunnable1;                         // 0x1EC
@@ -880,5 +880,28 @@ struct SpursKernelMgmtData {
 	u8 wklUniqueId[0x10];                           // 0x220
 };
 
+static_assert(sizeof(SpursKernelMgmtData) == 0x130, "Incorrect size for SpursKernelMgmtData");
+
+// The SPURS taskset policy module data store. This resides at 0x2700 of the LS.
+struct SpursTasksetPmMgmtData
+{
+    u8 tempArea[0x80];                              // 0x2700
+    u8 x2780[0x27B8 - 0x2780];                      // 0x2780
+    vm::bptr<CellSpursTaskset, 1, u64> taskset;     // 0x27B8
+    be_t<u32> kernelMgmt;                           // 0x27C0
+    be_t<u32> yieldAddr;                            // 0x27C4
+    be_t<u32> x27C8;                                // 0x27C8
+    be_t<u32> spuNum;                               // 0x27CC
+    be_t<u32> dmaTagId;                             // 0x27D0
+    be_t<u32> taskId;                               // 0x27D4
+    u8 x27D8[0x2840 - 0x27D8];                      // 0x27D8
+    u8 moduleId[16];                                // 0x2840
+    u8 x2850[0x2C80 - 0x2850];                      // 0x2850
+    be_t<u128> contextSaveArea[50];                 // 0x2C80
+    u8 x2FA0[0x3000 - 0x2FA0];                      // 0x2FA0
+};
+
+static_assert(sizeof(SpursTasksetPmMgmtData) == 0x900, "Incorrect size for SpursTasksetPmMgmtData");
+
 s64 spursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> port, s32 isDynamic, bool wasCreated);
 s64 spursWakeUp(PPUThread& CPU, vm::ptr<CellSpurs> spurs);
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index 06ff2553be..624be7e805 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -33,6 +33,10 @@ void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt);
 void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus);
 void spursSysServiceWorkloadEntry(SPUThread & spu);
 
+//
+// SPURS taskset polict module functions
+//
+
 extern Module *cellSpurs;
 
 //////////////////////////////////////////////////////////////////////////////
@@ -378,8 +382,8 @@ void spursKernelMain(SPUThread & spu) {
         }
 
         if (!isSecond) {
-            mgmt->x1E8 = 0;
-            mgmt->x1E9 = 0;
+            mgmt->moduleId[0] = 0;
+            mgmt->moduleId[1] = 0;
         }
 
         // Run workload
@@ -387,6 +391,7 @@ void spursKernelMain(SPUThread & spu) {
         spu.GPR[3]._u32[3] = 0x100;
         spu.GPR[4]._u64[1] = wkl.arg;
         spu.GPR[5]._u32[3] = pollStatus;
+        spu.SetPc(0xA00);
         switch (mgmt->wklCurrentAddr.addr()) {
         case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD:
             spursSysServiceWorkloadEntry(spu);
@@ -797,3 +802,136 @@ void spursSysServiceWorkloadEntry(SPUThread & spu) {
     // TODO: Ensure that this function always returns to the SPURS kernel
     return;
 }
+
+//////////////////////////////////////////////////////////////////////////////
+// SPURS taskset policy module functions
+//////////////////////////////////////////////////////////////////////////////
+
+bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting) {
+    auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+    auto mgmt       = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+
+    // Verify taskset state is valid
+    for (auto i = 0; i < 4; i ++) {
+        if ((mgmt->taskset->m.waiting_set[i] & mgmt->taskset->m.running_set[i]) ||
+            (mgmt->taskset->m.ready_set[i] & mgmt->taskset->m.ready2_set[i]) ||
+            ((mgmt->taskset->m.running_set[i] | mgmt->taskset->m.ready_set[i] |
+              mgmt->taskset->m.ready2_set[i] | mgmt->taskset->m.signal_received_set[i] |
+              mgmt->taskset->m.waiting_set[i]) & ~mgmt->taskset->m.enabled_set[i])) {
+            assert(0);
+        }
+    }
+
+    // TODO: Implement cases
+    s32 delta = 0;
+    switch (request + 1) {
+    case 0:
+        break;
+    case 1:
+        break;
+    case 2:
+        break;
+    case 3:
+        break;
+    case 4:
+        break;
+    case 5:
+        break;
+    case 6:
+        break;
+    default:
+        assert(0);
+        break;
+    }
+
+    // Set the ready count of the workload to the number of ready tasks
+    do {
+        s32 readyCount = kernelMgmt->wklCurrentId >= CELL_SPURS_MAX_WORKLOAD ?
+                         kernelMgmt->spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].read_relaxed() :
+                         kernelMgmt->spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].read_relaxed();
+
+        auto newReadyCount = readyCount + delta > 0xFF ? 0xFF : readyCount + delta < 0 ? 0 : readyCount + delta;
+
+        if (kernelMgmt->wklCurrentId >= CELL_SPURS_MAX_WORKLOAD) {
+            kernelMgmt->spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].write_relaxed(newReadyCount);
+        } else {
+            kernelMgmt->spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].write_relaxed(newReadyCount);
+        }
+
+        delta += readyCount;
+    } while (delta > 0);
+
+    // TODO: Implement return
+    return false;
+}
+
+void spursTasksetDispatch() {
+}
+
+void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) {
+    if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) {
+        spursTasksetProcessRequest(spu, 6, nullptr, nullptr);
+    }
+}
+
+bool spursTasksetShouldYield(SPUThread & spu) {
+    u32 pollStatus;
+
+    if (cellSpursModulePollStatus(spu, &pollStatus)) {
+        return true;
+    }
+
+    spursTasksetProcessPollStatus(spu, pollStatus);
+    return false;
+}
+
+void spursTasksetInit(SPUThread & spu, u32 pollStatus) {
+    auto mgmt       = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+
+    kernelMgmt->moduleId[0] = 'T';
+    kernelMgmt->moduleId[1] = 'K';
+
+    // Trace - START: Module='TKST'
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = 0x52; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_START
+    memcpy(pkt.data.start.module, "TKST", 4);
+    pkt.data.start.level = 2;
+    pkt.data.start.ls    = 0xA00 >> 2;
+    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+    spursTasksetProcessPollStatus(spu, pollStatus);
+}
+
+void spursTasksetEntry(SPUThread & spu) {
+    auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+
+    // Check if the function was invoked by the SPURS kernel or because of a syscall
+    if (spu.PC != 0xA70) {
+        // Called from kernel
+        auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + spu.GPR[3]._u32[3]);
+        auto arg        = spu.GPR[4]._u64[1];
+        auto pollStatus = spu.GPR[5]._u32[3];
+
+        memset(mgmt, 0, sizeof(*mgmt));
+        mgmt->taskset.set(arg);
+        memcpy(mgmt->moduleId, "SPURSTASK MODULE", 16);
+        mgmt->kernelMgmt = spu.GPR[3]._u32[3];
+        mgmt->yieldAddr  = 0xA70;
+        mgmt->spuNum     = kernelMgmt->spuNum;
+        mgmt->dmaTagId   = kernelMgmt->dmaTagId;
+        mgmt->taskId     = 0xFFFFFFFF;
+
+        spursTasksetInit(spu, pollStatus);
+        // TODO: Dispatch
+    }
+
+    mgmt->contextSaveArea[0] = spu.GPR[0];
+    mgmt->contextSaveArea[1] = spu.GPR[1];
+    for (auto i = 0; i < 48; i++) {
+        mgmt->contextSaveArea[i + 2] = spu.GPR[80 + i];
+    }
+
+    // TODO: Process syscall
+}

From a7728c9067165c23d0e70000a72fefc11a84612a Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Wed, 28 Jan 2015 23:48:06 +0530
Subject: [PATCH 18/29] SPURS: Document some parts of taskset policy module

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp    |  2 +-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h      | 40 +++++++++---
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 68 ++++++++++++++++++---
 3 files changed, 92 insertions(+), 18 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 99d2d35eea..c9795203f8 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -2821,7 +2821,7 @@ s64 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
 	}
 
 	taskset->m.task_info[tmp_task_id].elf_addr.set(elf_addr.addr());
-	taskset->m.task_info[tmp_task_id].context_save_storage.set((context_addr.addr() & 0xFFFFFFF8) | alloc_ls_blocks);
+	taskset->m.task_info[tmp_task_id].context_save_storage_and_alloc_ls_blocks = (context_addr.addr() | alloc_ls_blocks);
 	for (u32 i = 0; i < 2; i++)
 	{
 		taskset->m.task_info[tmp_task_id].args.u64[i] = arg != 0 ? arg->u64[i] : 0;
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 196fedd3a8..4ccf224ec7 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -181,6 +181,12 @@ enum SpursTraceConstants
 	CELL_SPURS_TRACE_SERVICE_WAIT   = 0x02,
 	CELL_SPURS_TRACE_SERVICE_EXIT   = 0x03,
 
+	// Task incident
+	CELL_SPURS_TRACE_TASK_DISPATCH  = 0x01,
+	CELL_SPURS_TRACE_TASK_YIELD     = 0x03,
+	CELL_SPURS_TRACE_TASK_WAIT      = 0x04,
+	CELL_SPURS_TRACE_TASK_EXIT      = 0x05,
+
 	// Trace mode flags
 	CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER              = 0x1,
 	CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP   = 0x2,
@@ -335,6 +341,12 @@ struct CellSpursTracePacket
 			be_t<u16> ls;
 		} start;
 
+		struct
+		{
+			be_t<u32> incident;
+			be_t<u32> taskId;
+		} task;
+
 		be_t<u64> user;
 		be_t<u64> guid;
 		be_t<u64> stop;
@@ -607,7 +619,7 @@ struct CellSpursTaskset
 	{
 		CellSpursTaskArgument args;
 		vm::bptr<u64, 1, u64> elf_addr;
-		vm::bptr<u64, 1, u64> context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks)
+		be_t<u64> context_save_storage_and_alloc_ls_blocks; // This is (context_save_storage_addr | allocated_ls_blocks)
 		CellSpursTaskLsPattern ls_pattern;
 	};
 
@@ -716,7 +728,7 @@ struct CellSpursTaskset2
 	{
 		CellSpursTaskArgument args;
 		vm::bptr<u64, 1, u64> elf_addr;
-		vm::bptr<u64, 1, u64> context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks)
+		vm::bptr<u64, 1, u64> context_save_storage; // This is (context_save_storage_addr | allocated_ls_blocks)
 		CellSpursTaskLsPattern ls_pattern;
 	};
 
@@ -885,10 +897,11 @@ static_assert(sizeof(SpursKernelMgmtData) == 0x130, "Incorrect size for SpursKer
 // The SPURS taskset policy module data store. This resides at 0x2700 of the LS.
 struct SpursTasksetPmMgmtData
 {
-    u8 tempArea[0x80];                              // 0x2700
-    u8 x2780[0x27B8 - 0x2780];                      // 0x2780
+    u8 tempAreaTaskset[0x80];                       // 0x2700
+    u8 tempAreaTaskInfo[0x30];                      // 0x2780
+    be_t<u64> x27B0;                                // 0x27B0
     vm::bptr<CellSpursTaskset, 1, u64> taskset;     // 0x27B8
-    be_t<u32> kernelMgmt;                           // 0x27C0
+    be_t<u32> kernelMgmtAddr;                       // 0x27C0
     be_t<u32> yieldAddr;                            // 0x27C4
     be_t<u32> x27C8;                                // 0x27C8
     be_t<u32> spuNum;                               // 0x27CC
@@ -896,9 +909,20 @@ struct SpursTasksetPmMgmtData
     be_t<u32> taskId;                               // 0x27D4
     u8 x27D8[0x2840 - 0x27D8];                      // 0x27D8
     u8 moduleId[16];                                // 0x2840
-    u8 x2850[0x2C80 - 0x2850];                      // 0x2850
-    be_t<u128> contextSaveArea[50];                 // 0x2C80
-    u8 x2FA0[0x3000 - 0x2FA0];                      // 0x2FA0
+    u8 stackArea[0x2C80 - 0x2850];                  // 0x2850
+    be_t<u128> savedContextLr;                      // 0x2C80
+    be_t<u128> savedContextSp;                      // 0x2C90
+    be_t<u128> savedContextR80ToR127[48];           // 0x2CA0
+    be_t<u128> savedContextFpscr;                   // 0x2FA0
+    be_t<u32> savedWriteTagGroupQueryMask;          // 0x2FB0
+    be_t<u32> savedSpuWriteEventMask;               // 0x2FB4
+    be_t<u32> tasksetMgmtAddr;                      // 0x2FB8
+    be_t<u32> lowestLoadSegmentAddr;                // 0x2FBC
+    be_t<u64> x2FC0;                                // 0x2FC0
+    be_t<u64> x2FC8;                                // 0x2FC8
+    be_t<u32> x2FD0;                                // 0x2FD0
+    be_t<u32> taskExitCode;                         // 0x2FD4
+    u8 x2FD8[0x3000 - 0x2FD8];                      // 0x2FD8
 };
 
 static_assert(sizeof(SpursTasksetPmMgmtData) == 0x900, "Incorrect size for SpursTasksetPmMgmtData");
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index 624be7e805..e4ebca84a5 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -865,7 +865,57 @@ bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32
     return false;
 }
 
-void spursTasksetDispatch() {
+void spursTasksetDispatch(SPUThread & spu) {
+    auto mgmt       = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+
+    u32 taskId;
+    u32 isWaiting;
+    spursTasksetProcessRequest(spu, 5, &taskId, &isWaiting);
+    if (taskId >= CELL_SPURS_MAX_TASK) {
+        // TODO: spursTasksetExit(spu);
+    }
+
+    mgmt->taskId = taskId;
+    u64 elfAddr  = mgmt->taskset->m.task_info[taskId].elf_addr.addr() & 0xFFFFFFFFFFFFFFF8ull;
+
+    // Trace - Task: Incident=dispatch
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = CELL_SPURS_TRACE_TAG_TASK;
+    pkt.data.task.incident = CELL_SPURS_TRACE_TASK_DISPATCH;
+    pkt.data.task.taskId   = taskId;
+    cellSpursModulePutTrace(&pkt, 0x1F);
+
+    if (isWaiting == 0) {
+    }
+
+    if (mgmt->taskset->m.enable_clear_ls) {
+        memset(vm::get_ptr<void>(spu.ls_offset + CELL_SPURS_TASK_TOP), 0, CELL_SPURS_TASK_BOTTOM - CELL_SPURS_TASK_TOP);
+    }
+
+    // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area
+    if (mgmt->taskset->m.task_info[taskId].ls_pattern.u64[0] != 0xFFFFFFFFFFFFFFFFull ||
+        (mgmt->taskset->m.task_info[taskId].ls_pattern.u64[0] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) {
+        // Load the ELF
+        // TODO: Load ELF
+    }
+
+    // Load save context from main memory to LS
+    u64 context_save_storage = mgmt->taskset->m.task_info[taskId].context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull;
+    for (auto i = 6; i < 128; i++) {
+        bool shouldLoad = mgmt->taskset->m.task_info[taskId].ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false;
+        if (shouldLoad) {
+            memcpy(vm::get_ptr<void>(spu.ls_offset + CELL_SPURS_TASK_TOP + ((i - 6) << 11)),
+                   vm::get_ptr<void>((u32)context_save_storage + 0x400 + ((i - 6) << 11)), 0x800);
+        }
+    }
+
+    // Trace - GUID
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = CELL_SPURS_TRACE_TAG_GUID;
+    pkt.data.guid  = 0; // TODO: Put GUID of taskId here
+    cellSpursModulePutTrace(&pkt, 0x1F);
 }
 
 void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) {
@@ -917,20 +967,20 @@ void spursTasksetEntry(SPUThread & spu) {
         memset(mgmt, 0, sizeof(*mgmt));
         mgmt->taskset.set(arg);
         memcpy(mgmt->moduleId, "SPURSTASK MODULE", 16);
-        mgmt->kernelMgmt = spu.GPR[3]._u32[3];
-        mgmt->yieldAddr  = 0xA70;
-        mgmt->spuNum     = kernelMgmt->spuNum;
-        mgmt->dmaTagId   = kernelMgmt->dmaTagId;
-        mgmt->taskId     = 0xFFFFFFFF;
+        mgmt->kernelMgmtAddr = spu.GPR[3]._u32[3];
+        mgmt->yieldAddr      = 0xA70;
+        mgmt->spuNum         = kernelMgmt->spuNum;
+        mgmt->dmaTagId       = kernelMgmt->dmaTagId;
+        mgmt->taskId         = 0xFFFFFFFF;
 
         spursTasksetInit(spu, pollStatus);
         // TODO: Dispatch
     }
 
-    mgmt->contextSaveArea[0] = spu.GPR[0];
-    mgmt->contextSaveArea[1] = spu.GPR[1];
+    mgmt->savedContextLr = spu.GPR[0];
+    mgmt->savedContextSp = spu.GPR[1];
     for (auto i = 0; i < 48; i++) {
-        mgmt->contextSaveArea[i + 2] = spu.GPR[80 + i];
+        mgmt->savedContextR80ToR127[i] = spu.GPR[80 + i];
     }
 
     // TODO: Process syscall

From 62e2d8d9a7b8aaa110673862b80733b4e93b42c7 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Thu, 29 Jan 2015 20:20:34 +0530
Subject: [PATCH 19/29] SPURS: Update kernel to use lock line reservations

---
 rpcs3/Emu/Cell/MFC.h                        |   8 +
 rpcs3/Emu/Cell/SPUThread.cpp                |   9 +-
 rpcs3/Emu/Cell/SPUThread.h                  |  31 +-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp    |  19 +-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h      |  19 +-
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 623 +++++++++++---------
 rpcs3/stdafx.h                              |   2 +
 7 files changed, 414 insertions(+), 297 deletions(-)

diff --git a/rpcs3/Emu/Cell/MFC.h b/rpcs3/Emu/Cell/MFC.h
index a6c731d3da..0b669deb97 100644
--- a/rpcs3/Emu/Cell/MFC.h
+++ b/rpcs3/Emu/Cell/MFC.h
@@ -35,6 +35,14 @@ enum
 	MFC_GETLLAR_SUCCESS = 4,
 };
 
+// MFC Write Tag Status Update Request Channel (ch23) operations
+enum 
+{
+	MFC_TAG_UPDATE_IMMEDIATE = 0,
+	MFC_TAG_UPDATE_ANY       = 1,
+	MFC_TAG_UPDATE_ALL       = 2,
+};
+
 enum
 {
 	MFC_SPU_TO_PPU_MAILBOX_STATUS_MASK      = 0x000000FF,
diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp
index 95f678bf02..fcb9b012e9 100644
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@@ -1060,7 +1060,14 @@ void SPUThread::StopAndSignal(u32 code)
 
 	case 0x003:
 	{
-		m_code3_func(*this);
+		auto iter = m_addr_to_hle_function_map.find(PC);
+		assert(iter != m_addr_to_hle_function_map.end());
+
+		auto return_to_caller = iter->second(*this);
+		if (return_to_caller)
+		{
+			SetBranch(GPR[0]._u32[3] & 0x3fffc);
+		}
 		break;
 	}
 
diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h
index f880e5ca6e..d6ecbe64b0 100644
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@@ -290,6 +290,8 @@ public:
 	u32 m_event_mask;
 	u32 m_events;
 
+	std::unordered_map<u32, std::function<bool(SPUThread& SPU)>> m_addr_to_hle_function_map;
+
 	struct IntrTag
 	{
 		u32 enabled; // 1 == true
@@ -509,8 +511,35 @@ public:
 	void WriteLS64 (const u32 lsa, const u64&  data) const { vm::write64 (lsa + m_offset, data); }
 	void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); }
 
+	void RegisterHleFuncion(u32 addr, std::function<bool(SPUThread & SPU)> function)
+	{
+		m_addr_to_hle_function_map[addr] = function;
+		WriteLS32(addr, 0x00000003); // STOP 3
+	}
+
+	void UnregisterHleFunction(u32 addr)
+	{
+		WriteLS32(addr, 0x00200000); // NOP
+		m_addr_to_hle_function_map.erase(addr);
+	}
+
+	void UnregisterHleFunctions(u32 start_addr, u32 end_addr)
+	{
+		for (auto iter = m_addr_to_hle_function_map.begin(); iter != m_addr_to_hle_function_map.end();)
+		{
+			if (iter->first >= start_addr && iter->first <= end_addr)
+			{
+				WriteLS32(iter->first, 0x00200000); // NOP
+				m_addr_to_hle_function_map.erase(iter++);
+			}
+			else
+			{
+				iter++;
+			}
+		}
+	}
+
 	std::function<void(SPUThread& SPU)> m_custom_task;
-	std::function<void(SPUThread& SPU)> m_code3_func;
 
 public:
 	SPUThread(CPUThreadType type = CPU_THREAD_SPU);
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index c9795203f8..62349f276d 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -26,7 +26,7 @@ extern u32 libsre;
 extern u32 libsre_rtoc;
 #endif
 
-void spursKernelMain(SPUThread & spu);
+bool spursKernelMain(SPUThread & spu);
 s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u32 id);
 s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID);
 
@@ -155,7 +155,8 @@ s64 spursInit(
 		assert(!"spu_image_import() failed");
 	}
 #else
-	spurs->m.spuImg.addr = (u32)Memory.Alloc(0x40000, 4096);
+	spurs->m.spuImg.addr        = (u32)Memory.Alloc(0x40000, 4096);
+	spurs->m.spuImg.entry_point = isSecond ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR;
 #endif
 
 	s32 tgt = SYS_SPU_THREAD_GROUP_TYPE_NORMAL;
@@ -179,17 +180,11 @@ s64 spursInit(
 	name += "CellSpursKernel0";
 	for (s32 num = 0; num < nSpus; num++, name[name.size() - 1]++)
 	{
-		spurs->m.spus[num] = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, 0, 0, 0, 0, [spurs, num](SPUThread& SPU)
-		{
-			SPU.GPR[3]._u32[3] = num;
-			SPU.GPR[4]._u64[1] = spurs.addr();
-
-#ifdef PRX_DEBUG_XXX
-			return SPU.FastCall(SPU.PC);
+		auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, num, spurs.addr(), 0, 0);
+#ifndef PRX_DEBUG_XXX
+		spu->RegisterHleFuncion(spurs->m.spuImg.entry_point, spursKernelMain);
 #endif
-
-			spursKernelMain(SPU);
-		})->GetId();
+		spurs->m.spus[num] = spu->GetId();
 	}
 
 	if (flags & SAF_SPU_PRINTF_ENABLED)
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 4ccf224ec7..4d77a06402 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -102,6 +102,12 @@ enum SPURSKernelInterfaces
 	CELL_SPURS_INTERRUPT_VECTOR = 0x0,
 	CELL_SPURS_LOCK_LINE = 0x80,
 	CELL_SPURS_KERNEL_DMA_TAG_ID = 31,
+	CELL_SPURS_KERNEL1_ENTRY_ADDR = 0x818,
+	CELL_SPURS_KERNEL2_ENTRY_ADDR = 0x848,
+	CELL_SPURS_KERNEL1_YIELD_ADDR = 0x808,
+	CELL_SPURS_KERNEL2_YIELD_ADDR = 0x838,
+	CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR = 0x290,
+	CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR = 0x290,
 };
 
 enum RangeofEventQueuePortNumbers
@@ -885,14 +891,23 @@ struct SpursKernelMgmtData
 	u8 spuIdling;                                   // 0x1EB
 	be_t<u16> wklRunnable1;                         // 0x1EC
 	be_t<u16> wklRunnable2;                         // 0x1EE
-	u8 x1F0[0x210 - 0x1F0];                         // 0x1F0
+	be_t<u32> x1F0;                                 // 0x1F0
+	be_t<u32> x1F4;                                 // 0x1F4
+	be_t<u32> x1F8;                                 // 0x1F8
+	be_t<u32> x1FC;                                 // 0x1FC
+	be_t<u32> x200;                                 // 0x200
+	be_t<u32> x204;                                 // 0x204
+	be_t<u32> x208;                                 // 0x208
+	be_t<u32> x20C;                                 // 0x20C
 	be_t<u64> traceBuffer;                          // 0x210
 	be_t<u32> traceMsgCount;                        // 0x218
 	be_t<u32> traceMaxCount;                        // 0x21C
 	u8 wklUniqueId[0x10];                           // 0x220
+	u8 x230[0x280 - 0x230];                         // 0x230
+	be_t<u32> guid[4];                              // 0x280
 };
 
-static_assert(sizeof(SpursKernelMgmtData) == 0x130, "Incorrect size for SpursKernelMgmtData");
+static_assert(sizeof(SpursKernelMgmtData) == 0x190, "Incorrect size for SpursKernelMgmtData");
 
 // The SPURS taskset policy module data store. This resides at 0x2700 of the LS.
 struct SpursTasksetPmMgmtData
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index e4ebca84a5..898638894c 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -14,11 +14,15 @@
 void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag);
 u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status);
 
+bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag);
+u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask);
+u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll = true);
+
 //
 // SPURS Kernel functions
 //
-void spursKernelSelectWorkload(SPUThread & spu);
-void spursKernelSelectWorkload2(SPUThread & spu);
+bool spursKernel1SelectWorkload(SPUThread & spu);
+bool spursKernel2SelectWorkload(SPUThread & spu);
 
 //
 // SPURS system service workload functions
@@ -31,7 +35,7 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt);
 void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt);
 void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt);
 void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus);
-void spursSysServiceWorkloadEntry(SPUThread & spu);
+bool spursSysServiceWorkloadEntry(SPUThread & spu);
 
 //
 // SPURS taskset polict module functions
@@ -54,9 +58,9 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) {
 
     spu.GPR[3]._u32[3] = 1;
     if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) {
-        spursKernelSelectWorkload2(spu);
+        spursKernel2SelectWorkload(spu);
     } else {
-        spursKernelSelectWorkload(spu);
+        spursKernel1SelectWorkload(spu);
     }
 
     auto result = spu.GPR[3]._u64[1];
@@ -68,14 +72,51 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) {
     return wklId == mgmt->wklCurrentId ? 0 : 1;
 }
 
+/// Execute a DMA operation
+bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) {
+    spu.WriteChannel(MFC_LSA, u128::from32r(lsa));
+    spu.WriteChannel(MFC_EAH, u128::from32r((u32)(ea >> 32)));
+    spu.WriteChannel(MFC_EAL, u128::from32r((u32)ea));
+    spu.WriteChannel(MFC_Size, u128::from32r(size));
+    spu.WriteChannel(MFC_TagID, u128::from32r(tag));
+    spu.WriteChannel(MFC_Cmd, u128::from32r(cmd));
+
+    if (cmd == MFC_GETLLAR_CMD || cmd == MFC_PUTLLC_CMD || cmd == MFC_PUTLLUC_CMD) {
+        u128 rv;
+
+        spu.ReadChannel(rv, MFC_RdAtomicStat);
+        return rv._u32[3] ? true : false;
+    }
+
+    return true;
+}
+
+/// Get the status of DMA operations
+u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask) {
+    u128 rv;
+
+    spu.WriteChannel(MFC_WrTagMask, u128::from32r(tagMask));
+    spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_IMMEDIATE));
+    spu.ReadChannel(rv, MFC_RdTagStat);
+    return rv._u32[3];
+}
+
+/// Wait for DMA operations to complete
+u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll) {
+    u128 rv;
+
+    spu.WriteChannel(MFC_WrTagMask, u128::from32r(tagMask));
+    spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(waitForAll ? MFC_TAG_UPDATE_ALL : MFC_TAG_UPDATE_ANY));
+    spu.ReadChannel(rv, MFC_RdTagStat);
+    return rv._u32[3];
+}
+
 //////////////////////////////////////////////////////////////////////////////
 // SPURS kernel functions
 //////////////////////////////////////////////////////////////////////////////
 
 /// Select a workload to run
-void spursKernelSelectWorkload(SPUThread & spu) {
-    LV2_LOCK(0); // TODO: lock-free implementation if possible
-
+bool spursKernel1SelectWorkload(SPUThread & spu) {
     auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
 
     // The first and only argument to this function is a boolean that is set to false if the function
@@ -83,140 +124,148 @@ void spursKernelSelectWorkload(SPUThread & spu) {
     // If the first argument is true then the shared data is not updated with the result.
     const auto isPoll = spu.GPR[3]._u32[3];
 
-    // Calculate the contention (number of SPUs used) for each workload
-    u8 contention[CELL_SPURS_MAX_WORKLOAD];
-    u8 pendingContention[CELL_SPURS_MAX_WORKLOAD];
-    for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-        contention[i] = mgmt->spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i];
+    u32 wklSelectedId;
+    u32 pollStatus;
 
-        // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
-        // to prevent unnecessary jumps to the kernel
-        if (isPoll) {
-            pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i];
-            if (i != mgmt->wklCurrentId) {
-                contention[i] += pendingContention[i];
-            }
-        }
-    }
+    do {
+        // DMA and lock the first 0x80 bytes of spurs
+        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        CellSpurs * spurs = (CellSpurs *)mgmt->tempArea;
 
-    u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
-    u32 pollStatus    = 0;
-
-    // The system service workload has the highest priority. Select the system service workload if
-    // the system service message bit for this SPU is set.
-    if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
-        mgmt->spuIdling = 0;
-        if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-            // Clear the message bit
-            mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
-        }
-    } else {
-        // Caclulate the scheduling weight for each workload
-        u16 maxWeight = 0;
+        // Calculate the contention (number of SPUs used) for each workload
+        u8 contention[CELL_SPURS_MAX_WORKLOAD];
+        u8 pendingContention[CELL_SPURS_MAX_WORKLOAD];
         for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-            u16 runnable     = mgmt->wklRunnable1 & (0x8000 >> i);
-            u16 wklSignal    = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
-            u8  wklFlag      = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
-            u8  readyCount   = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed();
-            u8  idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
-            u8  requestCount = readyCount + idleSpuCount;
+            contention[i] = spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i];
 
-            // For a workload to be considered for scheduling:
-            // 1. Its priority must not be 0
-            // 2. The number of SPUs used by it must be less than the max contention for that workload
-            // 3. The workload should be in runnable state
-            // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
-            //    OR the workload must be signalled
-            //    OR the workload flag is 0 and the workload is configured as the wokload flag receiver
-            if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) {
-                if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) {
-                    // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority:
-                    // 1. Wokload signal set or workload flag or ready count > contention
-                    // 2. Priority of the workload on the SPU
-                    // 3. Is the workload the last selected workload
-                    // 4. Minimum contention of the workload
-                    // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight)
-                    // 6. Is the workload executable same as the currently loaded executable
-                    // 7. The workload id (lesser the number, more the weight)
-                    u16 weight  = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0;
-                    weight     |= (u16)(mgmt->priority[i] & 0x7F) << 16;
-                    weight     |= i == mgmt->wklCurrentId ? 0x80 : 0x00;
-                    weight     |= (contention[i] > 0 && mgmt->spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00;
-                    weight     |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2;
-                    weight     |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00;
-                    weight     |= 0x01;
-
-                    // In case of a tie the lower numbered workload is chosen
-                    if (weight > maxWeight) {
-                        wklSelectedId  = i;
-                        maxWeight      = weight;
-                        pollStatus     = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
-                        pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
-                        pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
-                    }
+            // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
+            // to prevent unnecessary jumps to the kernel
+            if (isPoll) {
+                pendingContention[i] = spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i];
+                if (i != mgmt->wklCurrentId) {
+                    contention[i] += pendingContention[i];
                 }
             }
         }
 
-        // Not sure what this does. Possibly mark the SPU as idle/in use.
-        mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+        wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+        pollStatus    = 0;
 
-        if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
-            // Clear workload signal for the selected workload
-            mgmt->spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
-            mgmt->spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
+        // The system service workload has the highest priority. Select the system service workload if
+        // the system service message bit for this SPU is set.
+        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
+            mgmt->spuIdling = 0;
+            if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                // Clear the message bit
+                spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
+            }
+        } else {
+            // Caclulate the scheduling weight for each workload
+            u16 maxWeight = 0;
+            for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                u16 runnable     = mgmt->wklRunnable1 & (0x8000 >> i);
+                u16 wklSignal    = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
+                u8  wklFlag      = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                u8  readyCount   = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed();
+                u8  idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
+                u8  requestCount = readyCount + idleSpuCount;
 
-            // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
-            if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) {
-                mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
+                // For a workload to be considered for scheduling:
+                // 1. Its priority must not be 0
+                // 2. The number of SPUs used by it must be less than the max contention for that workload
+                // 3. The workload should be in runnable state
+                // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
+                //    OR the workload must be signalled
+                //    OR the workload flag is 0 and the workload is configured as the wokload flag receiver
+                if (runnable && mgmt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) {
+                    if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) {
+                        // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority:
+                        // 1. Wokload signal set or workload flag or ready count > contention
+                        // 2. Priority of the workload on the SPU
+                        // 3. Is the workload the last selected workload
+                        // 4. Minimum contention of the workload
+                        // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight)
+                        // 6. Is the workload executable same as the currently loaded executable
+                        // 7. The workload id (lesser the number, more the weight)
+                        u16 weight  = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0;
+                        weight     |= (u16)(mgmt->priority[i] & 0x7F) << 16;
+                        weight     |= i == mgmt->wklCurrentId ? 0x80 : 0x00;
+                        weight     |= (contention[i] > 0 && spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00;
+                        weight     |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2;
+                        weight     |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00;
+                        weight     |= 0x01;
+
+                        // In case of a tie the lower numbered workload is chosen
+                        if (weight > maxWeight) {
+                            wklSelectedId  = i;
+                            maxWeight      = weight;
+                            pollStatus     = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
+                            pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
+                            pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
+                        }
+                    }
+                }
+            }
+
+            // Not sure what this does. Possibly mark the SPU as idle/in use.
+            mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+
+            if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
+                // Clear workload signal for the selected workload
+                spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
+                spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
+
+                // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
+                if (wklSelectedId == spurs->m.wklFlagReceiver.read_relaxed()) {
+                    spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
+                }
             }
         }
-    }
 
-    if (!isPoll) {
-        // Called by kernel
-        // Increment the contention for the selected workload
-        if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-            contention[wklSelectedId]++;
-        }
+        if (!isPoll) {
+            // Called by kernel
+            // Increment the contention for the selected workload
+            if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                contention[wklSelectedId]++;
+            }
 
-        for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-            mgmt->spurs->m.wklCurrentContention[i] = contention[i];
-            mgmt->wklLocContention[i]        = 0;
-            mgmt->wklLocPendingContention[i] = 0;
-        }
+            for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                spurs->m.wklCurrentContention[i] = contention[i];
+                mgmt->wklLocContention[i]        = 0;
+                mgmt->wklLocPendingContention[i] = 0;
+            }
 
-        if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-            mgmt->wklLocContention[wklSelectedId] = 1;
-        }
+            if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                mgmt->wklLocContention[wklSelectedId] = 1;
+            }
 
-        mgmt->wklCurrentId = wklSelectedId;
-    } else if (wklSelectedId != mgmt->wklCurrentId) {
-        // Not called by kernel but a context switch is required
-        // Increment the pending contention for the selected workload
-        if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-            pendingContention[wklSelectedId]++;
-        }
+            mgmt->wklCurrentId = wklSelectedId;
+        } else if (wklSelectedId != mgmt->wklCurrentId) {
+            // Not called by kernel but a context switch is required
+            // Increment the pending contention for the selected workload
+            if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                pendingContention[wklSelectedId]++;
+            }
 
-        for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-            mgmt->spurs->m.wklPendingContention[i] = pendingContention[i];
-            mgmt->wklLocPendingContention[i]       = 0;
-        }
+            for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                spurs->m.wklPendingContention[i] = pendingContention[i];
+                mgmt->wklLocPendingContention[i] = 0;
+            }
 
-        if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-            mgmt->wklLocPendingContention[wklSelectedId] = 1;
+            if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                mgmt->wklLocPendingContention[wklSelectedId] = 1;
+            }
         }
-    }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     u64 result          = (u64)wklSelectedId << 32;
     result             |= pollStatus;
     spu.GPR[3]._u64[1]  = result;
+    return true;
 }
 
 /// Select a workload to run
-void spursKernelSelectWorkload2(SPUThread & spu) {
-    LV2_LOCK(0); // TODO: lock-free implementation if possible
-
+bool spursKernel2SelectWorkload(SPUThread & spu) {
     auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
 
     // The first and only argument to this function is a boolean that is set to false if the function
@@ -224,202 +273,214 @@ void spursKernelSelectWorkload2(SPUThread & spu) {
     // If the first argument is true then the shared data is not updated with the result.
     const auto isPoll = spu.GPR[3]._u32[3];
 
-    // Calculate the contention (number of SPUs used) for each workload
-    u8 contention[CELL_SPURS_MAX_WORKLOAD2];
-    u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2];
-    for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
-        contention[i] = mgmt->spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F];
-        contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4;
+    u32 wklSelectedId;
+    u32 pollStatus;
 
-        // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
-        // to prevent unnecessary jumps to the kernel
-        if (isPoll) {
-            pendingContention[i] = mgmt->spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F];
-            pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4;
-            if (i != mgmt->wklCurrentId) {
-                contention[i] += pendingContention[i];
-            }
-        }
-    }
+    do {
+        // DMA and lock the first 0x80 bytes of spurs
+        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        CellSpurs * spurs = (CellSpurs *)mgmt->tempArea;
 
-    u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
-    u32 pollStatus    = 0;
-
-    // The system service workload has the highest priority. Select the system service workload if
-    // the system service message bit for this SPU is set.
-    if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
-        // Not sure what this does. Possibly Mark the SPU as in use.
-        mgmt->spuIdling = 0;
-        if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-            // Clear the message bit
-            mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
-        }
-    } else {
-        // Caclulate the scheduling weight for each workload
-        u8 maxWeight = 0;
+        // Calculate the contention (number of SPUs used) for each workload
+        u8 contention[CELL_SPURS_MAX_WORKLOAD2];
+        u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2];
         for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
-            auto j           = i & 0x0F;
-            u16 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
-            u8  priority      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
-            u8  maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4;
-            u16 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
-            u8  wklFlag       = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
-            u8  readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
+            contention[i] = spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F];
+            contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4;
 
-            // For a workload to be considered for scheduling:
-            // 1. Its priority must be greater than 0
-            // 2. The number of SPUs used by it must be less than the max contention for that workload
-            // 3. The workload should be in runnable state
-            // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
-            //    OR the workload must be signalled
-            //    OR the workload flag is 0 and the workload is configured as the wokload receiver
-            if (runnable && priority > 0 && maxContention > contention[i]) {
-                if (wklFlag || wklSignal || readyCount > contention[i]) {
-                    // The scheduling weight of the workload is equal to the priority of the workload for the SPU.
-                    // The current workload is given a sligtly higher weight presumably to reduce the number of context switches.
-                    // In case of a tie the lower numbered workload is chosen.
-                    u8 weight = priority << 4;
-                    if (mgmt->wklCurrentId == i) {
-                        weight |= 0x04;
-                    }
-
-                    if (weight > maxWeight) {
-                        wklSelectedId  = i;
-                        maxWeight      = weight;
-                        pollStatus     = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
-                        pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
-                        pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
-                    }
+            // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
+            // to prevent unnecessary jumps to the kernel
+            if (isPoll) {
+                pendingContention[i] = spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F];
+                pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4;
+                if (i != mgmt->wklCurrentId) {
+                    contention[i] += pendingContention[i];
                 }
             }
         }
 
-        // Not sure what this does. Possibly mark the SPU as idle/in use.
-        mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+        wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+        pollStatus    = 0;
 
-        if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
-            // Clear workload signal for the selected workload
-            mgmt->spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
-            mgmt->spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
+        // The system service workload has the highest priority. Select the system service workload if
+        // the system service message bit for this SPU is set.
+        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
+            // Not sure what this does. Possibly Mark the SPU as in use.
+            mgmt->spuIdling = 0;
+            if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                // Clear the message bit
+                spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
+            }
+        } else {
+            // Caclulate the scheduling weight for each workload
+            u8 maxWeight = 0;
+            for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
+                auto j           = i & 0x0F;
+                u16 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
+                u8  priority      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
+                u8  maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4;
+                u16 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
+                u8  wklFlag       = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                u8  readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
 
-            // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
-            if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) {
-                mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
+                // For a workload to be considered for scheduling:
+                // 1. Its priority must be greater than 0
+                // 2. The number of SPUs used by it must be less than the max contention for that workload
+                // 3. The workload should be in runnable state
+                // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
+                //    OR the workload must be signalled
+                //    OR the workload flag is 0 and the workload is configured as the wokload receiver
+                if (runnable && priority > 0 && maxContention > contention[i]) {
+                    if (wklFlag || wklSignal || readyCount > contention[i]) {
+                        // The scheduling weight of the workload is equal to the priority of the workload for the SPU.
+                        // The current workload is given a sligtly higher weight presumably to reduce the number of context switches.
+                        // In case of a tie the lower numbered workload is chosen.
+                        u8 weight = priority << 4;
+                        if (mgmt->wklCurrentId == i) {
+                            weight |= 0x04;
+                        }
+
+                        if (weight > maxWeight) {
+                            wklSelectedId  = i;
+                            maxWeight      = weight;
+                            pollStatus     = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
+                            pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
+                            pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
+                        }
+                    }
+                }
+            }
+
+            // Not sure what this does. Possibly mark the SPU as idle/in use.
+            mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+
+            if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
+                // Clear workload signal for the selected workload
+                spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
+                spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
+
+                // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
+                if (wklSelectedId == spurs->m.wklFlagReceiver.read_relaxed()) {
+                    spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
+                }
             }
         }
-    }
 
-    if (!isPoll) {
-        // Called by kernel
-        // Increment the contention for the selected workload
-        if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-            contention[wklSelectedId]++;
+        if (!isPoll) {
+            // Called by kernel
+            // Increment the contention for the selected workload
+            if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                contention[wklSelectedId]++;
+            }
+
+            for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
+                spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4);
+                mgmt->wklLocContention[i]        = 0;
+                mgmt->wklLocPendingContention[i] = 0;
+            }
+
+            mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
+            mgmt->wklCurrentId = wklSelectedId;
+        } else if (wklSelectedId != mgmt->wklCurrentId) {
+            // Not called by kernel but a context switch is required
+            // Increment the pending contention for the selected workload
+            if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                pendingContention[wklSelectedId]++;
+            }
+
+            for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
+                spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4);
+                mgmt->wklLocPendingContention[i] = 0;
+            }
+
+            mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
         }
-
-        for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
-            mgmt->spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4);
-            mgmt->wklLocContention[i]        = 0;
-            mgmt->wklLocPendingContention[i] = 0;
-        }
-
-        mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
-        mgmt->wklCurrentId = wklSelectedId;
-    } else if (wklSelectedId != mgmt->wklCurrentId) {
-        // Not called by kernel but a context switch is required
-        // Increment the pending contention for the selected workload
-        if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-            pendingContention[wklSelectedId]++;
-        }
-
-        for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
-            mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4);
-            mgmt->wklLocPendingContention[i]       = 0;
-        }
-
-        mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
-    }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     u64 result          = (u64)wklSelectedId << 32;
     result             |= pollStatus;
     spu.GPR[3]._u64[1]  = result;
+    return true;
 }
 
-/// Entry point of the SPURS kernel
-void spursKernelMain(SPUThread & spu) {
+/// SPURS kernel main
+bool spursKernelMain(SPUThread & spu) {
     SpursKernelMgmtData * mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
-    mgmt->spuNum               = spu.GPR[3]._u32[3];
-    mgmt->dmaTagId             = 0x1F;
-    mgmt->spurs.set(spu.GPR[4]._u64[1]);
-    mgmt->wklCurrentId         = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
-    mgmt->wklCurrentUniqueId   = 0x20;
 
-    bool isSecond            = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
-    mgmt->yieldToKernelAddr  = isSecond ? 0x838 : 0x808;
-    mgmt->selectWorkloadAddr = 0x290;
-    spu.WriteLS32(mgmt->yieldToKernelAddr, 2);                  // hack for cellSpursModuleExit
-    spu.WriteLS32(mgmt->selectWorkloadAddr, 3);                 // hack for cellSpursModulePollStatus
-    spu.WriteLS32(mgmt->selectWorkloadAddr + 4, 0x35000000);    // bi $0
-    spu.m_code3_func = isSecond ? spursKernelSelectWorkload2 : spursKernelSelectWorkload;
+    bool isKernel2;
+    u32 pollStatus;
+    const CellSpurs::WorkloadInfo * wklInfo;
+    if (spu.PC == CELL_SPURS_KERNEL1_ENTRY_ADDR || spu.PC == CELL_SPURS_KERNEL2_ENTRY_ADDR) {
+        // Entry point of SPURS kernel
+        // Save arguments
+        mgmt->spuNum = spu.GPR[3]._u32[3];
+        mgmt->spurs.set(spu.GPR[4]._u64[1]);
 
-    u32 wid        = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
-    u32 pollStatus = 0;
-    while (true) {
-        if (Emu.IsStopped()) {
-            cellSpurs->Warning("Spurs Kernel aborted");
-            return;
-        }
+        isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
 
-        // Get current workload info
-        auto & wkl = wid < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? mgmt->spurs->m.wklInfo2[wid & 0xf] : mgmt->spurs->m.wklInfoSysSrv);
+        memset(mgmt, 0, sizeof(SpursKernelMgmtData));
 
-        if (mgmt->wklCurrentAddr != wkl.addr) {
-            if (wkl.addr.addr() != SPURS_IMG_ADDR_SYS_SRV_WORKLOAD) {
-                // Load executable code
-                memcpy(vm::get_ptr<void>(spu.ls_offset + 0xA00), wkl.addr.get_ptr(), wkl.size);
-            }
-            mgmt->wklCurrentAddr     = wkl.addr;
-            mgmt->wklCurrentUniqueId = wkl.uniqueId.read_relaxed();
-        }
-
-        if (!isSecond) {
-            mgmt->moduleId[0] = 0;
-            mgmt->moduleId[1] = 0;
-        }
-
-        // Run workload
-        spu.GPR[1]._u32[3] = 0x3FFB0;
-        spu.GPR[3]._u32[3] = 0x100;
-        spu.GPR[4]._u64[1] = wkl.arg;
-        spu.GPR[5]._u32[3] = pollStatus;
-        spu.SetPc(0xA00);
-        switch (mgmt->wklCurrentAddr.addr()) {
-        case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD:
-            spursSysServiceWorkloadEntry(spu);
-            break;
-        default:
-            spu.FastCall(0xA00);
-            break;
-        }
-
-        // Check status
-        auto status = spu.SPU.Status.GetValue();
-        if (status == SPU_STATUS_STOPPED_BY_STOP) {
-            return;
+        // Initialise the SPURS management area to its initial values
+        mgmt->dmaTagId           = CELL_SPURS_KERNEL_DMA_TAG_ID;
+        mgmt->wklCurrentUniqueId = 0x20;
+        mgmt->wklCurrentId       = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+        mgmt->yieldToKernelAddr  = isKernel2 ? CELL_SPURS_KERNEL2_YIELD_ADDR : CELL_SPURS_KERNEL1_YIELD_ADDR;
+        mgmt->selectWorkloadAddr = isKernel2 ? CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR : CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR;
+        if (!isKernel2) {
+            mgmt->x1F0    = 0xF0020000;
+            mgmt->x200    = 0x20000;
+            mgmt->guid[0] = 0x423A3A02;
+            mgmt->guid[1] = 0x43F43A82;
+            mgmt->guid[2] = 0x43F26502;
+            mgmt->guid[3] = 0x420EB382;
         } else {
-            assert(status == SPU_STATUS_RUNNING);
+            mgmt->guid[0] = 0x43A08402;
+            mgmt->guid[1] = 0x43FB0A82;
+            mgmt->guid[2] = 0x435E9302;
+            mgmt->guid[3] = 0x43A3C982;
         }
 
+        spu.UnregisterHleFunctions(0, 0x40000); // TODO: use a symbolic constant
+        spu.RegisterHleFuncion(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelMain);
+        spu.RegisterHleFuncion(mgmt->yieldToKernelAddr, spursKernelMain);
+        spu.RegisterHleFuncion(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload);
+
+        // Start the system service workload
+        spu.RegisterHleFuncion(0xA00, spursSysServiceWorkloadEntry);
+        wklInfo    = &mgmt->spurs->m.wklInfoSysSrv;
+        pollStatus = 0;
+    } else if (spu.PC == mgmt->yieldToKernelAddr) {
+        isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
+
         // Select next workload to run
         spu.GPR[3].clear();
-        if (isSecond) {
-            spursKernelSelectWorkload2(spu);
+        if (isKernel2) {
+            spursKernel2SelectWorkload(spu);
         } else {
-            spursKernelSelectWorkload(spu);
+            spursKernel1SelectWorkload(spu);
         }
-        u64 res    = spu.GPR[3]._u64[1];
-        pollStatus = (u32)(res);
-        wid        = (u32)(res >> 32);
+
+        pollStatus = (u32)(spu.GPR[3]._u64[1]);
+        auto wid   = (u32)(spu.GPR[3]._u64[1] >> 32);
+        wklInfo    = wid < CELL_SPURS_MAX_WORKLOAD ? &mgmt->spurs->m.wklInfo1[wid] :
+                        (wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? &mgmt->spurs->m.wklInfo2[wid & 0xf] : &mgmt->spurs->m.wklInfoSysSrv);
+    } else {
+        assert(0);
     }
+
+    if (!isKernel2) {
+        mgmt->moduleId[0] = 0;
+        mgmt->moduleId[1] = 0;
+    }
+
+    // Run workload
+    spu.GPR[0]._u32[3] = mgmt->yieldToKernelAddr;
+    spu.GPR[1]._u32[3] = 0x3FFB0;
+    spu.GPR[3]._u32[3] = 0x100;
+    spu.GPR[4]._u64[1] = wklInfo->arg;
+    spu.GPR[5]._u32[3] = pollStatus;
+    spu.SetBranch(0xA00);
+    return false;
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -783,7 +844,7 @@ poll:
 }
 
 /// Entry point of the system service workload
-void spursSysServiceWorkloadEntry(SPUThread & spu) {
+bool spursSysServiceWorkloadEntry(SPUThread & spu) {
     auto mgmt       = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + spu.GPR[3]._u32[3]);
     auto arg        = spu.GPR[4]._u64[1];
     auto pollStatus = spu.GPR[5]._u32[3];
@@ -800,7 +861,7 @@ void spursSysServiceWorkloadEntry(SPUThread & spu) {
     }
 
     // TODO: Ensure that this function always returns to the SPURS kernel
-    return;
+    return false;
 }
 
 //////////////////////////////////////////////////////////////////////////////
diff --git a/rpcs3/stdafx.h b/rpcs3/stdafx.h
index 825c1c4007..4581c27650 100644
--- a/rpcs3/stdafx.h
+++ b/rpcs3/stdafx.h
@@ -34,6 +34,8 @@
 #include <algorithm>
 #include <random>
 #include <unordered_set>
+#include <map>
+#include <unordered_map>
 
 #include <sys/stat.h>
 #include "Utilities/GNU.h"

From 507638e6d825baa187015d49fbbe98f1db2bedc8 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Sat, 31 Jan 2015 00:36:58 +0530
Subject: [PATCH 20/29] SPURS: Update system service workload to use lock line
 reservations

---
 rpcs3/Emu/Cell/SPUThread.h                  |   2 +-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp    |   2 +-
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 412 ++++++++++++--------
 3 files changed, 255 insertions(+), 161 deletions(-)

diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h
index d6ecbe64b0..ce0036e4a5 100644
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@@ -511,7 +511,7 @@ public:
 	void WriteLS64 (const u32 lsa, const u64&  data) const { vm::write64 (lsa + m_offset, data); }
 	void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); }
 
-	void RegisterHleFuncion(u32 addr, std::function<bool(SPUThread & SPU)> function)
+	void RegisterHleFunction(u32 addr, std::function<bool(SPUThread & SPU)> function)
 	{
 		m_addr_to_hle_function_map[addr] = function;
 		WriteLS32(addr, 0x00000003); // STOP 3
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 62349f276d..83dffb54e7 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -182,7 +182,7 @@ s64 spursInit(
 	{
 		auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, num, spurs.addr(), 0, 0);
 #ifndef PRX_DEBUG_XXX
-		spu->RegisterHleFuncion(spurs->m.spuImg.entry_point, spursKernelMain);
+		spu->RegisterHleFunction(spurs->m.spuImg.entry_point, spursKernelMain);
 #endif
 		spurs->m.spus[num] = spu->GetId();
 	}
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index 898638894c..e6921057c1 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -17,6 +17,7 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status);
 bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag);
 u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask);
 u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll = true);
+void spursHalt();
 
 //
 // SPURS Kernel functions
@@ -111,6 +112,12 @@ u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll) {
     return rv._u32[3];
 }
 
+// Halt the SPU
+void spursHalt(SPUThread & spu) {
+    spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_HALT);
+    spu.Stop();
+}
+
 //////////////////////////////////////////////////////////////////////////////
 // SPURS kernel functions
 //////////////////////////////////////////////////////////////////////////////
@@ -130,7 +137,7 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
     do {
         // DMA and lock the first 0x80 bytes of spurs
         spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
-        CellSpurs * spurs = (CellSpurs *)mgmt->tempArea;
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
 
         // Calculate the contention (number of SPUs used) for each workload
         u8 contention[CELL_SPURS_MAX_WORKLOAD];
@@ -279,7 +286,7 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
     do {
         // DMA and lock the first 0x80 bytes of spurs
         spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
-        CellSpurs * spurs = (CellSpurs *)mgmt->tempArea;
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
 
         // Calculate the contention (number of SPUs used) for each workload
         u8 contention[CELL_SPURS_MAX_WORKLOAD2];
@@ -408,8 +415,8 @@ bool spursKernelMain(SPUThread & spu) {
     SpursKernelMgmtData * mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
 
     bool isKernel2;
-    u32 pollStatus;
-    const CellSpurs::WorkloadInfo * wklInfo;
+    u32  pollStatus;
+    u64  wklArg;
     if (spu.PC == CELL_SPURS_KERNEL1_ENTRY_ADDR || spu.PC == CELL_SPURS_KERNEL2_ENTRY_ADDR) {
         // Entry point of SPURS kernel
         // Save arguments
@@ -440,14 +447,15 @@ bool spursKernelMain(SPUThread & spu) {
             mgmt->guid[3] = 0x43A3C982;
         }
 
+        // Register SPURS kernel HLE functions
         spu.UnregisterHleFunctions(0, 0x40000); // TODO: use a symbolic constant
-        spu.RegisterHleFuncion(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelMain);
-        spu.RegisterHleFuncion(mgmt->yieldToKernelAddr, spursKernelMain);
-        spu.RegisterHleFuncion(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload);
+        spu.RegisterHleFunction(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelMain);
+        spu.RegisterHleFunction(mgmt->yieldToKernelAddr, spursKernelMain);
+        spu.RegisterHleFunction(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload);
 
-        // Start the system service workload
-        spu.RegisterHleFuncion(0xA00, spursSysServiceWorkloadEntry);
-        wklInfo    = &mgmt->spurs->m.wklInfoSysSrv;
+        // Register the system HLE service workload entry point
+        spu.RegisterHleFunction(0xA00, spursSysServiceWorkloadEntry);
+        wklArg = mgmt->spurs->m.wklInfoSysSrv.arg;
         pollStatus = 0;
     } else if (spu.PC == mgmt->yieldToKernelAddr) {
         isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
@@ -460,10 +468,11 @@ bool spursKernelMain(SPUThread & spu) {
             spursKernel1SelectWorkload(spu);
         }
 
-        pollStatus = (u32)(spu.GPR[3]._u64[1]);
-        auto wid   = (u32)(spu.GPR[3]._u64[1] >> 32);
-        wklInfo    = wid < CELL_SPURS_MAX_WORKLOAD ? &mgmt->spurs->m.wklInfo1[wid] :
-                        (wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? &mgmt->spurs->m.wklInfo2[wid & 0xf] : &mgmt->spurs->m.wklInfoSysSrv);
+        pollStatus   = (u32)(spu.GPR[3]._u64[1]);
+        auto wid     = (u32)(spu.GPR[3]._u64[1] >> 32);
+        auto wklInfo = wid < CELL_SPURS_MAX_WORKLOAD ? &mgmt->spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? &mgmt->spurs->m.wklInfo2[wid & 0xf] :
+                                                                                                                                      &mgmt->spurs->m.wklInfoSysSrv);
+        wklArg       = wklInfo->arg;
     } else {
         assert(0);
     }
@@ -477,7 +486,7 @@ bool spursKernelMain(SPUThread & spu) {
     spu.GPR[0]._u32[3] = mgmt->yieldToKernelAddr;
     spu.GPR[1]._u32[3] = 0x3FFB0;
     spu.GPR[3]._u32[3] = 0x100;
-    spu.GPR[4]._u64[1] = wklInfo->arg;
+    spu.GPR[4]._u64[1] = wklArg;
     spu.GPR[5]._u32[3] = pollStatus;
     spu.SetBranch(0xA00);
     return false;
@@ -489,33 +498,48 @@ bool spursKernelMain(SPUThread & spu) {
 
 /// Restore scheduling parameters after a workload has been preempted by the system service workload
 void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    if (mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] != 0xFF) {
-        auto wklId = mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum];
-        mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] = 0xFF;
+    u8 wklId;
 
-        spursSysServiceUpdateWorkload(spu, mgmt);
-        if (wklId >= CELL_SPURS_MAX_WORKLOAD) {
-            mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10;
-            mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1);
-        } else {
-            mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01;
-            mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1);
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        if (spurs->m.sysSrvWorkload[mgmt->spuNum] == 0xFF) {
+            return;
         }
 
-        // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace
-        // uses the current worload id to determine the workload to which the trace belongs
-        auto wklIdSaved    = mgmt->wklCurrentId;
-        mgmt->wklCurrentId = wklId;
+        wklId = spurs->m.sysSrvWorkload[mgmt->spuNum];
+        spurs->m.sysSrvWorkload[mgmt->spuNum] = 0xFF;
+    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
-        // Trace - STOP: GUID
-        CellSpursTracePacket pkt;
-        memset(&pkt, 0, sizeof(pkt));
-        pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
-        pkt.data.stop  = SPURS_GUID_SYS_WKL;
-        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+    spursSysServiceUpdateWorkload(spu, mgmt);
 
-        mgmt->wklCurrentId = wklIdSaved;
-    }
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
+
+        if (wklId >= CELL_SPURS_MAX_WORKLOAD) {
+            spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10;
+            spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1);
+        } else {
+            spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01;
+            spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1);
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace
+    // uses the current worload id to determine the workload to which the trace belongs
+    auto wklIdSaved    = mgmt->wklCurrentId;
+    mgmt->wklCurrentId = wklId;
+
+    // Trace - STOP: GUID
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
+    pkt.data.stop  = SPURS_GUID_SYS_WKL;
+    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+    mgmt->wklCurrentId = wklIdSaved;
 }
 
 /// Update the trace count for this SPU in CellSpurs
@@ -528,39 +552,52 @@ void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt
 
 /// Update trace control in SPU from CellSpurs
 void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) {
-    auto sysSrvMsgUpdateTrace           = mgmt->spurs->m.sysSrvMsgUpdateTrace;
-    mgmt->spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum);
-    mgmt->spurs->m.xCC                  &= ~(1 << mgmt->spuNum);
-    mgmt->spurs->m.xCC                  |= arg2 << mgmt->spuNum;
+    bool notify;
 
-    bool notify = false;
-    if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.sysSrvMsgUpdateTrace == 0) && (mgmt->spurs->m.xCD != 0)) {
-        mgmt->spurs->m.xCD = 0;
-        notify             = true;
-    }
+    u8 sysSrvMsgUpdateTrace;
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
 
-    if (arg4 && mgmt->spurs->m.xCD != 0) {
-        mgmt->spurs->m.xCD = 0;
-        notify             = true;
-    }
+        sysSrvMsgUpdateTrace           = spurs->m.sysSrvMsgUpdateTrace;
+        spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum);
+        spurs->m.xCC                  &= ~(1 << mgmt->spuNum);
+        spurs->m.xCC                  |= arg2 << mgmt->spuNum;
+
+        notify = false;
+        if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) && (spurs->m.sysSrvMsgUpdateTrace == 0) && (spurs->m.xCD != 0)) {
+            spurs->m.xCD = 0;
+            notify       = true;
+        }
+
+        if (arg4 && spurs->m.xCD != 0) {
+            spurs->m.xCD = 0;
+            notify       = true;
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     // Get trace parameters from CellSpurs and store them in the LS
     if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) || (arg3 != 0)) {
-        if (mgmt->traceMsgCount != 0xFF || mgmt->spurs->m.traceBuffer.addr() == 0) {
+        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.traceBuffer), 0x80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x80 - offsetof(CellSpurs, m.traceBuffer));
+
+        if (mgmt->traceMsgCount != 0xFF || spurs->m.traceBuffer.addr() == 0) {
             spursSysServiceUpdateTraceCount(spu, mgmt);
         } else {
-            mgmt->traceMsgCount = mgmt->spurs->m.traceBuffer->count[mgmt->spuNum];
+            spursDma(spu, MFC_GET_CMD, spurs->m.traceBuffer.addr() & 0xFFFFFFFC, 0x2C00/*LSA*/, 0x80/*size*/, mgmt->dmaTagId);
+            auto traceBuffer    = vm::get_ptr<CellSpursTraceInfo>(spu.ls_offset + 0x2C00);
+            mgmt->traceMsgCount = traceBuffer->count[mgmt->spuNum];
         }
 
-        mgmt->traceBuffer   = mgmt->spurs->m.traceBuffer.addr() + (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4);
-        mgmt->traceMaxCount = mgmt->spurs->m.traceStartIndex[1] - mgmt->spurs->m.traceStartIndex[0];
+        mgmt->traceBuffer   = spurs->m.traceBuffer.addr() + (spurs->m.traceStartIndex[mgmt->spuNum] << 4);
+        mgmt->traceMaxCount = spurs->m.traceStartIndex[1] - spurs->m.traceStartIndex[0];
         if (mgmt->traceBuffer == 0) {
             mgmt->traceMsgCount = 0;
         }
     }
 
     if (notify) {
-        // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 2, 0);
+        // TODO: sys_spu_thread_send_event(spurs->m.spuPort, 2, 0);
     }
 }
 
@@ -568,81 +605,108 @@ void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32
 void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet) {
     // Mark the workloads in wklShutdownBitSet as completed and also generate a bit set of the completed
     // workloads that have a shutdown completion hook registered
-    u32 wklNotifyBitSet = 0;
-    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-        if (wklShutdownBitSet & (0x80000000u >> i)) {
-            mgmt->spurs->m.wklEvent1[i] |= 0x01;
-            if (mgmt->spurs->m.wklEvent1[i] & 0x02 || mgmt->spurs->m.wklEvent1[i] & 0x10) {
-                wklNotifyBitSet |= 0x80000000u >> i;
-            }
-        }
+    u32 wklNotifyBitSet;
+    u8  spuPort;
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
 
-        if (wklShutdownBitSet & (0x8000 >> i)) {
-            mgmt->spurs->m.wklEvent2[i] |= 0x01;
-            if (mgmt->spurs->m.wklEvent2[i] & 0x02 || mgmt->spurs->m.wklEvent2[i] & 0x10) {
-                wklNotifyBitSet |= 0x8000 >> i;
+        wklNotifyBitSet = 0;
+        spuPort         = spurs->m.spuPort;;
+        for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+            if (wklShutdownBitSet & (0x80000000u >> i)) {
+                spurs->m.wklEvent1[i] |= 0x01;
+                if (spurs->m.wklEvent1[i] & 0x02 || spurs->m.wklEvent1[i] & 0x10) {
+                    wklNotifyBitSet |= 0x80000000u >> i;
+                }
+            }
+
+            if (wklShutdownBitSet & (0x8000 >> i)) {
+                spurs->m.wklEvent2[i] |= 0x01;
+                if (spurs->m.wklEvent2[i] & 0x02 || spurs->m.wklEvent2[i] & 0x10) {
+                    wklNotifyBitSet |= 0x8000 >> i;
+                }
             }
         }
-    }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     if (wklNotifyBitSet) {
-        // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 0, wklNotifyMask);
+        // TODO: sys_spu_thread_send_event(spuPort, 0, wklNotifyMask);
     }
 }
 
 /// Update workload information in the SPU from CellSpurs
 void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
+    spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfo1), 0x30000/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
+    if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+        spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfo2), 0x30200/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
+    }
+
     u32 wklShutdownBitSet = 0;
     mgmt->wklRunnable1    = 0;
     mgmt->wklRunnable2    = 0;
     for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+        auto wklInfo1 = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x30000);
+
         // Copy the priority of the workload for this SPU and its unique id to the LS
-        mgmt->priority[i]    = mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum];
-        mgmt->wklUniqueId[i] = mgmt->spurs->m.wklInfo1[i].uniqueId.read_relaxed();
+        mgmt->priority[i]    = wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - wklInfo1[i].priority[mgmt->spuNum];
+        mgmt->wklUniqueId[i] = wklInfo1[i].uniqueId.read_relaxed();
 
-        // Update workload status and runnable flag based on the workload state
-        auto wklStatus = mgmt->spurs->m.wklStatus1[i];
-        if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
-            mgmt->spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum;
-            mgmt->wklRunnable1           |= 0x8000 >> i;
-        } else {
-            mgmt->spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum);
-        }
+        if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+            auto wklInfo2 = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x30200);
 
-        // If the workload is shutting down and if this is the last SPU from which it is being removed then
-        // add it to the shutdown bit set
-        if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
-            if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus1[i] == 0)) {
-                mgmt->spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
-                wklShutdownBitSet |= 0x80000000u >> i;
-            }
-        }
-
-        if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) {
             // Copy the priority of the workload for this SPU to the LS
-            if (mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) {
-                mgmt->priority[i] |= (0x10 - mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) << 4;
+            if (wklInfo2[i].priority[mgmt->spuNum]) {
+                mgmt->priority[i] |= (0x10 - wklInfo2[i].priority[mgmt->spuNum]) << 4;
             }
+        }
+    }
 
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
             // Update workload status and runnable flag based on the workload state
-            wklStatus = mgmt->spurs->m.wklStatus2[i];
-            if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
-                mgmt->spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum;
-                mgmt->wklRunnable2           |= 0x8000 >> i;
+            auto wklStatus = spurs->m.wklStatus1[i];
+            if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
+                spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum;
+                mgmt->wklRunnable1     |= 0x8000 >> i;
             } else {
-                mgmt->spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum);
+                spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum);
             }
 
             // If the workload is shutting down and if this is the last SPU from which it is being removed then
             // add it to the shutdown bit set
-            if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
-                if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus2[i] == 0)) {
-                    mgmt->spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
-                    wklShutdownBitSet |= 0x8000 >> i;
+            if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
+                if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (spurs->m.wklStatus1[i] == 0)) {
+                    spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
+                    wklShutdownBitSet |= 0x80000000u >> i;
+                }
+            }
+
+            if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+                // Update workload status and runnable flag based on the workload state
+                wklStatus = spurs->m.wklStatus2[i];
+                if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
+                    spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum;
+                    mgmt->wklRunnable2     |= 0x8000 >> i;
+                } else {
+                    spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum);
+                }
+
+                // If the workload is shutting down and if this is the last SPU from which it is being removed then
+                // add it to the shutdown bit set
+                if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
+                    if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (spurs->m.wklStatus2[i] == 0)) {
+                        spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
+                        wklShutdownBitSet |= 0x8000 >> i;
+                    }
                 }
             }
         }
-    }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     if (wklShutdownBitSet) {
         spursSysServiceUpdateEvent(spu, mgmt, wklShutdownBitSet);
@@ -651,57 +715,85 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt)
 
 /// Process any messages
 void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    LV2_LOCK(0);
+    bool updateTrace    = false;
+    bool updateWorkload = false;
+    bool terminate      = false;
+
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        // Terminate request
+        if (spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) {
+            spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum);
+            terminate = true;
+        }
+
+        // Update workload message
+        if (spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) {
+            spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum);
+            updateWorkload = true;
+        }
+
+        // Update trace message
+        if (spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) {
+            updateTrace = true;
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     // Process update workload message
-    if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) {
-        mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum);
+    if (updateWorkload) {
         spursSysServiceUpdateWorkload(spu, mgmt);
     }
 
     // Process update trace message
-    if (mgmt->spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) {
+    if (updateTrace) {
         spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0);
     }
 
     // Process terminate request
-    if (mgmt->spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) {
-        mgmt->spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum);
+    if (terminate) {
         // TODO: Rest of the terminate processing
     }
 }
 
 /// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled
 void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) {
+    // Monitor only lock line reservation lost events
+    spu.WriteChannel(SPU_WrEventMask, u128::from32r(SPU_EVENT_LR));
+
+    bool shouldExit;
     while (true) {
-        Emu.GetCoreMutex().lock();
+        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
 
         // Find the number of SPUs that are idling in this SPURS instance
         u32 nIdlingSpus = 0;
         for (u32 i = 0; i < 8; i++) {
-            if (mgmt->spurs->m.spuIdling & (1 << i)) {
+            if (spurs->m.spuIdling & (1 << i)) {
                 nIdlingSpus++;
             }
         }
 
-        bool allSpusIdle  = nIdlingSpus == mgmt->spurs->m.nSpus ? true: false;
-        bool exitIfNoWork = mgmt->spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false;
+        bool allSpusIdle  = nIdlingSpus == spurs->m.nSpus ? true: false;
+        bool exitIfNoWork = spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false;
+        shouldExit        = allSpusIdle && exitIfNoWork;
 
         // Check if any workloads can be scheduled
         bool foundReadyWorkload = false;
-        if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
+        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
             foundReadyWorkload = true;
         } else {
-            if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) {
+            if (spurs->m.flags1 & SF1_32_WORKLOADS) {
                 for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
                     u32 j            = i & 0x0F;
                     u8 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
                     u8 priority      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
-                    u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4;
-                    u8 contention    = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklCurrentContention[j] & 0x0F : mgmt->spurs->m.wklCurrentContention[j] >> 4;
-                    u8 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
-                    u8 wklFlag       = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
-                    u8 readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
+                    u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4;
+                    u8 contention    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklCurrentContention[j] & 0x0F : spurs->m.wklCurrentContention[j] >> 4;
+                    u8 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
+                    u8 wklFlag       = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                    u8 readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
 
                     if (runnable && priority > 0 && maxContention > contention) {
                         if (wklFlag || wklSignal || readyCount > contention) {
@@ -713,14 +805,14 @@ void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) {
             } else {
                 for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
                     u8 runnable     = mgmt->wklRunnable1 & (0x8000 >> i);
-                    u8 wklSignal    = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
-                    u8 wklFlag      = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
-                    u8 readyCount   = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed();
-                    u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
+                    u8 wklSignal    = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
+                    u8 wklFlag      = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                    u8 readyCount   = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed();
+                    u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
                     u8 requestCount = readyCount + idleSpuCount;
 
-                    if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > mgmt->spurs->m.wklCurrentContention[i]) {
-                        if (wklFlag || wklSignal || (readyCount != 0 && requestCount > mgmt->spurs->m.wklCurrentContention[i])) {
+                    if (runnable && mgmt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > spurs->m.wklCurrentContention[i]) {
+                        if (wklFlag || wklSignal || (readyCount != 0 && requestCount > spurs->m.wklCurrentContention[i])) {
                             foundReadyWorkload = true;
                             break;
                         }
@@ -729,33 +821,29 @@ void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) {
             }
         }
 
+        bool spuIdling = spurs->m.spuIdling & (1 << mgmt->spuNum) ? true : false;
+        if (foundReadyWorkload && shouldExit == false) {
+            spurs->m.spuIdling &= ~(1 << mgmt->spuNum);
+        } else {
+            spurs->m.spuIdling |= 1 << mgmt->spuNum;
+        }
+
         // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events.
-        if ((mgmt->spurs->m.spuIdling & (1 << mgmt->spuNum)) && (allSpusIdle == false || exitIfNoWork == false) && foundReadyWorkload == false) {
-            // The system service blocks by making a reservation and waiting on the reservation lost event. This is unfortunately
-            // not yet completely implemented in rpcs3. So we busy wait here.
-            //u128 r;
-            //spu.ReadChannel(r, 0);
-
-            Emu.GetCoreMutex().unlock();
-            std::this_thread::sleep_for(std::chrono::milliseconds(1));
-            Emu.GetCoreMutex().lock();
+        if (spuIdling && shouldExit == false && foundReadyWorkload == false) {
+            // The system service blocks by making a reservation and waiting on the lock line reservation lost event.
+            u128 r;
+            spu.ReadChannel(r, SPU_RdEventStat);
+            spu.WriteChannel(SPU_WrEventAck, u128::from32r(SPU_EVENT_LR));
         }
 
-        if ((allSpusIdle == true && exitIfNoWork == true) || foundReadyWorkload == false) {
-            mgmt->spurs->m.spuIdling |= 1 << mgmt->spuNum;
-        } else {
-            mgmt->spurs->m.spuIdling &= ~(1 << mgmt->spuNum);
+        auto dmaSuccess = spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        if (dmaSuccess && (shouldExit || foundReadyWorkload)) {
+            break;
         }
+    }
 
-        Emu.GetCoreMutex().unlock();
-
-        if (allSpusIdle == false || exitIfNoWork == false) {
-            if (foundReadyWorkload == true) {
-                return;
-            }
-        } else {
-            // TODO: exit spu thread group
-        }
+    if (shouldExit) {
+        // TODO: exit spu thread group
     }
 }
 
@@ -764,22 +852,31 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) {
     auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
 
     if (mgmt->spurs.addr() % CellSpurs::align) {
-        assert(0);
+        spursHalt(spu);
+        return;
     }
 
     // Initialise the system service if this is the first time its being started on this SPU
     if (mgmt->sysSrvInitialised == 0) {
         mgmt->sysSrvInitialised = 1;
 
-        LV2_LOCK(0);
-        if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) {
-            assert(0);
-        }
+        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
 
-        mgmt->spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum;
-        mgmt->traceBuffer           = 0;
-        mgmt->traceMsgCount         = -1;
+        do {
+            spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+            CellSpurs * spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
 
+            // Halt if already initialised
+            if (spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) {
+                spursHalt(spu);
+                return;
+            }
+
+            spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum;
+        } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+        mgmt->traceBuffer   = 0;
+        mgmt->traceMsgCount = -1;
         spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0);
         spursSysServiceCleanupAfterPreemption(spu, mgmt);
 
@@ -818,6 +915,8 @@ poll:
             pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
             pkt.data.stop  = SPURS_GUID_SYS_WKL;
             cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+            spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
             break;
         }
 
@@ -849,10 +948,6 @@ bool spursSysServiceWorkloadEntry(SPUThread & spu) {
     auto arg        = spu.GPR[4]._u64[1];
     auto pollStatus = spu.GPR[5]._u32[3];
 
-    spu.GPR[1]._u32[3]                        = 0x3FFD0;
-    *(vm::ptr<u32>::make(spu.GPR[1]._u32[3])) = 0x3FFF0;
-    memset(vm::get_ptr<void>(spu.ls_offset + 0x3FFE0), 0, 32);
-
     if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
         spursSysServiceWorkloadMain(spu, pollStatus);
     } else {
@@ -860,7 +955,6 @@ bool spursSysServiceWorkloadEntry(SPUThread & spu) {
         // system service workload. Need to implement this.
     }
 
-    // TODO: Ensure that this function always returns to the SPURS kernel
     return false;
 }
 

From f7b7c234b7d745e41f35ff8cce2350b623875562 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Sat, 31 Jan 2015 01:32:07 +0530
Subject: [PATCH 21/29] SPURS: Implement some SPU thread functions used by the
 system service module

---
 rpcs3/Emu/SysCalls/lv2/sys_spu.cpp | 83 ++++++++++++++++++++++++++++++
 rpcs3/Emu/SysCalls/lv2/sys_spu.h   |  6 +++
 2 files changed, 89 insertions(+)

diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp
index 29cdb9bd39..b6d29837e2 100644
--- a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp
+++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp
@@ -1099,3 +1099,86 @@ s32 sys_raw_spu_get_spu_cfg(u32 id, vm::ptr<u32> value)
 	*value = (u32)t->cfg.value;
 	return CELL_OK;
 }
+
+void sys_spu_thread_exit(SPUThread & spu, s32 status)
+{
+	// Cancel any pending status update requests
+	u128 r;
+	spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(0));
+	while (spu.GetChannelCount(MFC_RdTagStat) != 1);
+	spu.ReadChannel(r, MFC_RdTagStat);
+
+	// Wait for all pending DMA operations to complete
+	spu.WriteChannel(MFC_WrTagMask, u128::from32r(0xFFFFFFFF));
+	spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_ALL));
+	spu.ReadChannel(r, MFC_RdTagStat);
+
+	spu.WriteChannel(SPU_WrOutMbox, u128::from32r(status));
+	spu.StopAndSignal(0x102);
+}
+
+void sys_spu_thread_group_exit(SPUThread & spu, s32 status)
+{
+	// Cancel any pending status update requests
+	u128 r;
+	spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(0));
+	while (spu.GetChannelCount(MFC_RdTagStat) != 1);
+	spu.ReadChannel(r, MFC_RdTagStat);
+
+	// Wait for all pending DMA operations to complete
+	spu.WriteChannel(MFC_WrTagMask, u128::from32r(0xFFFFFFFF));
+	spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_ALL));
+	spu.ReadChannel(r, MFC_RdTagStat);
+
+	spu.WriteChannel(SPU_WrOutMbox, u128::from32r(status));
+	spu.StopAndSignal(0x101);
+}
+
+s32 sys_spu_thread_send_event(SPUThread & spu, u8 spup, u32 data0, u32 data1)
+{
+	if (spup > 0x3F)
+	{
+		return CELL_EINVAL;
+	}
+
+	if (spu.GetChannelCount(SPU_RdInMbox))
+	{
+		return CELL_EBUSY;
+	}
+
+	spu.WriteChannel(SPU_WrOutMbox, u128::from32r(data1));
+	spu.WriteChannel(SPU_WrOutIntrMbox, u128::from32r((spup << 24) | (data0 & 0x00FFFFFF)));
+
+	u128 r;
+	spu.ReadChannel(r, SPU_RdInMbox);
+	return r._u32[3];
+}
+
+s32 sys_spu_thread_switch_system_module(SPUThread & spu, u32 status)
+{
+	if (spu.GetChannelCount(SPU_RdInMbox))
+	{
+		return CELL_EBUSY;
+	}
+
+	// Cancel any pending status update requests
+	u128 r;
+	spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(0));
+	while (spu.GetChannelCount(MFC_RdTagStat) != 1);
+	spu.ReadChannel(r, MFC_RdTagStat);
+
+	// Wait for all pending DMA operations to complete
+	spu.WriteChannel(MFC_WrTagMask, u128::from32r(0xFFFFFFFF));
+	spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_ALL));
+	spu.ReadChannel(r, MFC_RdTagStat);
+
+	do
+	{
+		spu.WriteChannel(SPU_WrOutMbox, u128::from32r(status));
+		spu.StopAndSignal(0x120);
+		spu.ReadChannel(r, SPU_RdInMbox);
+	}
+	while (r._u32[3] == CELL_EBUSY);
+
+	return r._u32[3];
+}
diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.h b/rpcs3/Emu/SysCalls/lv2/sys_spu.h
index e71c606bf7..e129455758 100644
--- a/rpcs3/Emu/SysCalls/lv2/sys_spu.h
+++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.h
@@ -204,3 +204,9 @@ s32 sys_raw_spu_get_int_stat(u32 id, u32 class_id, vm::ptr<u64> stat);
 s32 sys_raw_spu_read_puint_mb(u32 id, vm::ptr<u32> value);
 s32 sys_raw_spu_set_spu_cfg(u32 id, u32 value);
 s32 sys_raw_spu_get_spu_cfg(u32 id, vm::ptr<u32> value);
+
+// SPU Calls
+void sys_spu_thread_exit(SPUThread & spu, s32 status);
+void sys_spu_thread_group_exit(SPUThread & spu, s32 status);
+s32 sys_spu_thread_send_event(SPUThread & spu, u8 spup, u32 data0, u32 data1);
+s32 sys_spu_thread_switch_system_module(SPUThread & spu, u32 status);

From d8bed3b0ced0bc219658ad90148f68e1caa6a5bd Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Sat, 31 Jan 2015 03:35:03 +0530
Subject: [PATCH 22/29] SPURS: Implement some portions of the taskset PM

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h      |   6 +-
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 130 ++++++++++++++++----
 2 files changed, 110 insertions(+), 26 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 4d77a06402..08fff45abf 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -155,9 +155,11 @@ enum SpursWorkloadConstants : u64
 
 	// GUID
 	SPURS_GUID_SYS_WKL              = 0x1BB841BF38F89D33ull,
+	SPURS_GUID_TASKSET_PM           = 0x836E915B2E654143ull,
 
 	// Image addresses
 	SPURS_IMG_ADDR_SYS_SRV_WORKLOAD = 0x100,
+	SPURS_IMG_ADDR_TASKSET_PM       = 0x200,
 };
 
 enum CellSpursModulePollStatus
@@ -209,6 +211,8 @@ enum SpursTaskConstants
 	CELL_SPURS_TASK_ATTRIBUTE_REVISION      = 1,
 	CELL_SPURS_TASKSET_ATTRIBUTE_REVISION   = 1,
 	CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE  = 1024,
+	CELL_SPURS_TASKSET_PM_ENTRY_ADDR        = 0xA00,
+	CELL_SPURS_TASKSET_PM_SYSCALL_ADDR      = 0xA70,
 };
 
 enum CellSpursEventFlagWaitMode
@@ -917,7 +921,7 @@ struct SpursTasksetPmMgmtData
     be_t<u64> x27B0;                                // 0x27B0
     vm::bptr<CellSpursTaskset, 1, u64> taskset;     // 0x27B8
     be_t<u32> kernelMgmtAddr;                       // 0x27C0
-    be_t<u32> yieldAddr;                            // 0x27C4
+    be_t<u32> syscallAddr;                          // 0x27C4
     be_t<u32> x27C8;                                // 0x27C8
     be_t<u32> spuNum;                               // 0x27CC
     be_t<u32> dmaTagId;                             // 0x27D0
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index e6921057c1..653d4ca259 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -13,6 +13,7 @@
 //
 void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag);
 u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status);
+void cellSpursModuleExit(SPUThread & spu);
 
 bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag);
 u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask);
@@ -39,8 +40,16 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus);
 bool spursSysServiceWorkloadEntry(SPUThread & spu);
 
 //
-// SPURS taskset polict module functions
+// SPURS taskset policy module functions
 //
+bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting);
+void spursTasksetExit(SPUThread & spu);
+void spursTasksetDispatch(SPUThread & spu);
+void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus);
+bool spursTasksetShouldYield(SPUThread & spu);
+void spursTasksetInit(SPUThread & spu, u32 pollStatus);
+void spursTasksetResumeTask(SPUThread & spu);
+bool spursTasksetEntry(SPUThread & spu);
 
 extern Module *cellSpurs;
 
@@ -73,6 +82,12 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) {
     return wklId == mgmt->wklCurrentId ? 0 : 1;
 }
 
+/// Exit current workload
+void cellSpursModuleExit(SPUThread & spu) {
+    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+    spu.SetBranch(mgmt->yieldToKernelAddr);
+}
+
 /// Execute a DMA operation
 bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) {
     spu.WriteChannel(MFC_LSA, u128::from32r(lsa));
@@ -416,7 +431,6 @@ bool spursKernelMain(SPUThread & spu) {
 
     bool isKernel2;
     u32  pollStatus;
-    u64  wklArg;
     if (spu.PC == CELL_SPURS_KERNEL1_ENTRY_ADDR || spu.PC == CELL_SPURS_KERNEL2_ENTRY_ADDR) {
         // Entry point of SPURS kernel
         // Save arguments
@@ -453,9 +467,9 @@ bool spursKernelMain(SPUThread & spu) {
         spu.RegisterHleFunction(mgmt->yieldToKernelAddr, spursKernelMain);
         spu.RegisterHleFunction(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload);
 
-        // Register the system HLE service workload entry point
-        spu.RegisterHleFunction(0xA00, spursSysServiceWorkloadEntry);
-        wklArg = mgmt->spurs->m.wklInfoSysSrv.arg;
+        // DMA in the system service workload info
+        spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfoSysSrv), 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID/*tag*/);
+        spursDmaWaitForCompletion(spu, 0x80000000);
         pollStatus = 0;
     } else if (spu.PC == mgmt->yieldToKernelAddr) {
         isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
@@ -468,15 +482,38 @@ bool spursKernelMain(SPUThread & spu) {
             spursKernel1SelectWorkload(spu);
         }
 
-        pollStatus   = (u32)(spu.GPR[3]._u64[1]);
-        auto wid     = (u32)(spu.GPR[3]._u64[1] >> 32);
-        auto wklInfo = wid < CELL_SPURS_MAX_WORKLOAD ? &mgmt->spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? &mgmt->spurs->m.wklInfo2[wid & 0xf] :
-                                                                                                                                      &mgmt->spurs->m.wklInfoSysSrv);
-        wklArg       = wklInfo->arg;
+        pollStatus = (u32)(spu.GPR[3]._u64[1]);
+        auto wid   = (u32)(spu.GPR[3]._u64[1] >> 32);
+
+        // DMA in the workload info for the selected workload
+        auto wklInfoOffset =  wid < CELL_SPURS_MAX_WORKLOAD ? offsetof(CellSpurs, m.wklInfo1[wid]) :
+                                                              wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? offsetof(CellSpurs, m.wklInfo2[wid & 0xf]) :
+                                                                                                            offsetof(CellSpurs, m.wklInfoSysSrv);
+        spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + wklInfoOffset, 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
+        spursDmaWaitForCompletion(spu, 0x80000000);
     } else {
         assert(0);
     }
 
+    auto wklInfo = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x3FFE0);
+    if (mgmt->wklCurrentAddr != wklInfo->addr) {
+        switch (wklInfo->addr.addr()) {
+        case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD:
+            spu.RegisterHleFunction(0xA00, spursSysServiceWorkloadEntry);
+            break;
+        case SPURS_IMG_ADDR_TASKSET_PM:
+            spu.RegisterHleFunction(0xA00, spursTasksetEntry);
+            break;
+        default:
+            spursDma(spu, MFC_GET_CMD, wklInfo-> addr.addr(), 0xA00/*LSA*/, wklInfo->size, CELL_SPURS_KERNEL_DMA_TAG_ID);
+            spursDmaWaitForCompletion(spu, 0x80000000);
+            break;
+        }
+
+        mgmt->wklCurrentAddr     = wklInfo->addr;
+        mgmt->wklCurrentUniqueId = wklInfo->uniqueId.read_relaxed();
+    }
+
     if (!isKernel2) {
         mgmt->moduleId[0] = 0;
         mgmt->moduleId[1] = 0;
@@ -486,7 +523,7 @@ bool spursKernelMain(SPUThread & spu) {
     spu.GPR[0]._u32[3] = mgmt->yieldToKernelAddr;
     spu.GPR[1]._u32[3] = 0x3FFB0;
     spu.GPR[3]._u32[3] = 0x100;
-    spu.GPR[4]._u64[1] = wklArg;
+    spu.GPR[4]._u64[1] = wklInfo->arg;
     spu.GPR[5]._u32[3] = pollStatus;
     spu.SetBranch(0xA00);
     return false;
@@ -597,7 +634,8 @@ void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32
     }
 
     if (notify) {
-        // TODO: sys_spu_thread_send_event(spurs->m.spuPort, 2, 0);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+        sys_spu_thread_send_event(spu, spurs->m.spuPort, 2, 0);
     }
 }
 
@@ -1020,6 +1058,24 @@ bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32
     return false;
 }
 
+void spursTasksetExit(SPUThread & spu) {
+    auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+
+    // Trace - STOP
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = 0x54; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_STOP
+    pkt.data.stop  = SPURS_GUID_TASKSET_PM;
+    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+    // Not sure why this check exists. Perhaps to check for memory corruption.
+    if (memcmp(mgmt->moduleId, "SPURSTASK MODULE", 16) != 0) {
+        spursHalt(spu);
+    }
+
+    cellSpursModuleExit(spu);
+}
+
 void spursTasksetDispatch(SPUThread & spu) {
     auto mgmt       = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
     auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
@@ -1028,7 +1084,8 @@ void spursTasksetDispatch(SPUThread & spu) {
     u32 isWaiting;
     spursTasksetProcessRequest(spu, 5, &taskId, &isWaiting);
     if (taskId >= CELL_SPURS_MAX_TASK) {
-        // TODO: spursTasksetExit(spu);
+        spursTasksetExit(spu);
+        return;
     }
 
     mgmt->taskId = taskId;
@@ -1109,11 +1166,23 @@ void spursTasksetInit(SPUThread & spu, u32 pollStatus) {
     spursTasksetProcessPollStatus(spu, pollStatus);
 }
 
-void spursTasksetEntry(SPUThread & spu) {
+void spursTasksetResumeTask(SPUThread & spu) {
     auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
 
-    // Check if the function was invoked by the SPURS kernel or because of a syscall
-    if (spu.PC != 0xA70) {
+    // Restore task context
+    spu.GPR[0] = mgmt->savedContextLr;
+    spu.GPR[1] = mgmt->savedContextSp;
+    for (auto i = 0; i < 48; i++) {
+        spu.GPR[80 + i] = mgmt->savedContextR80ToR127[i];
+    }
+
+    spu.SetBranch(spu.GPR[0]._u32[3]);
+}
+
+bool spursTasksetEntry(SPUThread & spu) {
+    auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+
+    if (spu.PC == CELL_SPURS_TASKSET_PM_ENTRY_ADDR) {
         // Called from kernel
         auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + spu.GPR[3]._u32[3]);
         auto arg        = spu.GPR[4]._u64[1];
@@ -1123,20 +1192,31 @@ void spursTasksetEntry(SPUThread & spu) {
         mgmt->taskset.set(arg);
         memcpy(mgmt->moduleId, "SPURSTASK MODULE", 16);
         mgmt->kernelMgmtAddr = spu.GPR[3]._u32[3];
-        mgmt->yieldAddr      = 0xA70;
+        mgmt->syscallAddr    = CELL_SPURS_TASKSET_PM_SYSCALL_ADDR;
         mgmt->spuNum         = kernelMgmt->spuNum;
         mgmt->dmaTagId       = kernelMgmt->dmaTagId;
         mgmt->taskId         = 0xFFFFFFFF;
 
+        // Register SPURS takset policy module HLE functions
+        spu.UnregisterHleFunctions(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, 0x40000); // TODO: use a symbolic constant
+        spu.RegisterHleFunction(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, spursTasksetEntry);
+        spu.RegisterHleFunction(mgmt->syscallAddr, spursTasksetEntry);
+
         spursTasksetInit(spu, pollStatus);
-        // TODO: Dispatch
+        spursTasksetDispatch(spu);
+    } else if (spu.PC == CELL_SPURS_TASKSET_PM_SYSCALL_ADDR) {
+        // Save task context
+        mgmt->savedContextLr = spu.GPR[0];
+        mgmt->savedContextSp = spu.GPR[1];
+        for (auto i = 0; i < 48; i++) {
+            mgmt->savedContextR80ToR127[i] = spu.GPR[80 + i];
+        }
+
+        // TODO: Process syscall
+        spursTasksetResumeTask(spu);
+    } else {
+        assert(0);
     }
 
-    mgmt->savedContextLr = spu.GPR[0];
-    mgmt->savedContextSp = spu.GPR[1];
-    for (auto i = 0; i < 48; i++) {
-        mgmt->savedContextR80ToR127[i] = spu.GPR[80 + i];
-    }
-
-    // TODO: Process syscall
+    return false;
 }

From 61342946a4a84271a398f3f48bfd1ed60e91f69d Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Sun, 1 Feb 2015 02:16:06 +0530
Subject: [PATCH 23/29] SPURS: Implement some portions of taskset pm

---
 rpcs3/Emu/Cell/SPUInterpreter.h             |   5 +-
 rpcs3/Emu/Cell/SPUThread.h                  |   9 +
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h      |  34 ++-
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 235 +++++++++++++++++---
 4 files changed, 235 insertions(+), 48 deletions(-)

diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h
index 151eb4e436..8351911153 100644
--- a/rpcs3/Emu/Cell/SPUInterpreter.h
+++ b/rpcs3/Emu/Cell/SPUInterpreter.h
@@ -1373,10 +1373,7 @@ private:
 	}
 	void FSCRWR(u32 rt, u32 ra)
 	{
-		CPU.FPSCR._u32[3] = CPU.GPR[ra]._u32[3] & 0x00000F07;
-		CPU.FPSCR._u32[2] = CPU.GPR[ra]._u32[2] & 0x00003F07;
-		CPU.FPSCR._u32[1] = CPU.GPR[ra]._u32[1] & 0x00003F07;
-		CPU.FPSCR._u32[0] = CPU.GPR[ra]._u32[0] & 0x00000F07;
+		CPU.FPSCR.Write(CPU.GPR[ra]);
 	}
 	void DFTSV(u32 rt, u32 ra, s32 i7)
 	{
diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h
index ce0036e4a5..cfd45d6c0e 100644
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@@ -248,6 +248,15 @@ public:
 	{
 		_u32[1+slice] |= exceptions;
 	}
+
+	// Write the FPSCR
+	void Write(u128 & r)
+	{
+		_u32[3] = r._u32[3] & 0x00000F07;
+		_u32[2] = r._u32[2] & 0x00003F07;
+		_u32[1] = r._u32[1] & 0x00003F07;
+		_u32[0] = r._u32[0] & 0x00000F07;
+	}
 };
 
 union SPU_SNRConfig_hdr
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 08fff45abf..85df5fb582 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -40,6 +40,7 @@ enum
 {
 	CELL_SPURS_TASK_ERROR_AGAIN        = 0x80410901,
 	CELL_SPURS_TASK_ERROR_INVAL        = 0x80410902,
+	CELL_SPURS_TASK_ERROR_NOSYS        = 0x80410903,
 	CELL_SPURS_TASK_ERROR_NOMEM        = 0x80410904,
 	CELL_SPURS_TASK_ERROR_SRCH         = 0x80410905,
 	CELL_SPURS_TASK_ERROR_NOEXEC       = 0x80410907,
@@ -213,6 +214,17 @@ enum SpursTaskConstants
 	CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE  = 1024,
 	CELL_SPURS_TASKSET_PM_ENTRY_ADDR        = 0xA00,
 	CELL_SPURS_TASKSET_PM_SYSCALL_ADDR      = 0xA70,
+
+	// Task syscall numbers
+	CELL_SPURS_TASK_SYSCALL_EXIT            = 0,
+	CELL_SPURS_TASK_SYSCALL_YIELD           = 1,
+	CELL_SPURS_TASK_SYSCALL_WAIT_SIGNAL     = 2,
+	CELL_SPURS_TASK_SYSCALL_POLL            = 3,
+	CELL_SPURS_TASK_SYSCALL_RECV_WKL_FLAG   = 4,
+
+	// Task poll status
+	CELL_SPURS_TASK_POLL_FOUND_TASK         = 1,
+	CELL_SPURS_TASK_POLL_FOUND_WORKLOAD     = 2,
 };
 
 enum CellSpursEventFlagWaitMode
@@ -627,10 +639,10 @@ struct CellSpursTaskset
 
 	struct TaskInfo
 	{
-		CellSpursTaskArgument args;
-		vm::bptr<u64, 1, u64> elf_addr;
-		be_t<u64> context_save_storage_and_alloc_ls_blocks; // This is (context_save_storage_addr | allocated_ls_blocks)
-		CellSpursTaskLsPattern ls_pattern;
+		CellSpursTaskArgument args;                         // 0x00
+		vm::bptr<u64, 1, u64> elf_addr;                     // 0x10
+		be_t<u64> context_save_storage_and_alloc_ls_blocks; // 0x18 This is (context_save_storage_addr | allocated_ls_blocks)
+		CellSpursTaskLsPattern ls_pattern;                  // 0x20
 	};
 
 	static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size");
@@ -656,7 +668,7 @@ struct CellSpursTaskset
 			u8 x72;                                      // 0x72
 			u8 last_scheduled_task;                      // 0x73
 			be_t<u32> wid;                               // 0x74
-			u8 unk1[8];                                  // 0x78
+			be_t<u64> x78;                               // 0x78
 			TaskInfo task_info[128];                     // 0x80
 			vm::bptr<u64, 1, u64> exception_handler;     // 0x1880
 			vm::bptr<u64, 1, u64> exception_handler_arg; // 0x1888
@@ -765,7 +777,7 @@ struct CellSpursTaskset2
 			u8 x72;                                      // 0x72
 			u8 last_scheduled_task;                      // 0x73
 			be_t<u32> wid;                               // 0x74
-			u8 unk1[8];                                  // 0x78
+			be_t<u64> x78;                               // 0x78
 			TaskInfo task_info[128];                     // 0x80
 			vm::bptr<u64, 1, u64> exception_handler;     // 0x1880
 			vm::bptr<u64, 1, u64> exception_handler_arg; // 0x1888
@@ -773,9 +785,9 @@ struct CellSpursTaskset2
 			u32 unk2;                                    // 0x1894
 			u32 event_flag_id1;                          // 0x1898
 			u32 event_flag_id2;                          // 0x189C
-			u8 unk3[0xE8];                               // 0x18A0
-			u64 task_exit_code[256];                     // 0x1988
-			u8 unk4[0x778];                              // 0x2188
+			u8 unk3[0x1980 - 0x18A0];                    // 0x18A0
+			be_t<u128> task_exit_code[128];              // 0x1980
+			u8 unk4[0x2900 - 0x2180];                    // 0x2180
 		} m;
 
 		static_assert(sizeof(_CellSpursTaskset2) == size, "Wrong _CellSpursTaskset2 size");
@@ -939,8 +951,8 @@ struct SpursTasksetPmMgmtData
     be_t<u32> lowestLoadSegmentAddr;                // 0x2FBC
     be_t<u64> x2FC0;                                // 0x2FC0
     be_t<u64> x2FC8;                                // 0x2FC8
-    be_t<u32> x2FD0;                                // 0x2FD0
-    be_t<u32> taskExitCode;                         // 0x2FD4
+    be_t<u32> taskExitCode;                         // 0x2FD0
+    be_t<u32> x2FD4;                                // 0x2FD4
     u8 x2FD8[0x3000 - 0x2FD8];                      // 0x2FD8
 };
 
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index 653d4ca259..7118eadbd5 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -18,7 +18,7 @@ void cellSpursModuleExit(SPUThread & spu);
 bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag);
 u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask);
 u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll = true);
-void spursHalt();
+void spursHalt(SPUThread & spu);
 
 //
 // SPURS Kernel functions
@@ -44,9 +44,10 @@ bool spursSysServiceWorkloadEntry(SPUThread & spu);
 //
 bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting);
 void spursTasksetExit(SPUThread & spu);
+void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs);
 void spursTasksetDispatch(SPUThread & spu);
 void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus);
-bool spursTasksetShouldYield(SPUThread & spu);
+bool spursTasksetPollStatus(SPUThread & spu);
 void spursTasksetInit(SPUThread & spu, u32 pollStatus);
 void spursTasksetResumeTask(SPUThread & spu);
 bool spursTasksetEntry(SPUThread & spu);
@@ -1018,6 +1019,8 @@ bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32
     // TODO: Implement cases
     s32 delta = 0;
     switch (request + 1) {
+    case -1:
+        break;
     case 0:
         break;
     case 1:
@@ -1033,7 +1036,7 @@ bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32
     case 6:
         break;
     default:
-        assert(0);
+        spursHalt(spu);
         break;
     }
 
@@ -1076,9 +1079,24 @@ void spursTasksetExit(SPUThread & spu) {
     cellSpursModuleExit(spu);
 }
 
+void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) {
+    auto mgmt    = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
+
+    spu.GPR[2].clear();
+    spu.GPR[3]         = u128::from64(taskArgs.u64[0], taskArgs.u64[1]);
+    spu.GPR[4]._u64[1] = taskset->m.args;
+    spu.GPR[4]._u64[0] = taskset->m.spurs.addr();
+    for (auto i = 5; i < 128; i++) {
+        spu.GPR[i].clear();
+    }
+
+    spu.SetBranch(mgmt->savedContextLr.value()._u32[3]);
+}
+
 void spursTasksetDispatch(SPUThread & spu) {
-    auto mgmt       = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
-    auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+    auto mgmt    = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
 
     u32 taskId;
     u32 isWaiting;
@@ -1089,7 +1107,13 @@ void spursTasksetDispatch(SPUThread & spu) {
     }
 
     mgmt->taskId = taskId;
-    u64 elfAddr  = mgmt->taskset->m.task_info[taskId].elf_addr.addr() & 0xFFFFFFFFFFFFFFF8ull;
+
+    // DMA in the task info for the selected task
+    spursDma(spu, MFC_GET_CMD, mgmt->taskset.addr() + offsetof(CellSpursTaskset, m.task_info[taskId]), 0x2780/*LSA*/, sizeof(CellSpursTaskset::TaskInfo), mgmt->dmaTagId);
+    spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
+    auto taskInfo = vm::get_ptr<CellSpursTaskset::TaskInfo>(spu.ls_offset + 0x2780);
+    auto elfAddr  = taskInfo->elf_addr.addr().value();
+    taskInfo->elf_addr.set(taskInfo->elf_addr.addr() & 0xFFFFFFFFFFFFFFF8ull);
 
     // Trace - Task: Incident=dispatch
     CellSpursTracePacket pkt;
@@ -1097,37 +1121,84 @@ void spursTasksetDispatch(SPUThread & spu) {
     pkt.header.tag = CELL_SPURS_TRACE_TAG_TASK;
     pkt.data.task.incident = CELL_SPURS_TRACE_TASK_DISPATCH;
     pkt.data.task.taskId   = taskId;
-    cellSpursModulePutTrace(&pkt, 0x1F);
+    cellSpursModulePutTrace(&pkt, CELL_SPURS_KERNEL_DMA_TAG_ID);
 
     if (isWaiting == 0) {
-    }
+        // If we reach here it means that the task is being started and not being resumed
+        mgmt->lowestLoadSegmentAddr = CELL_SPURS_TASK_TOP;
 
-    if (mgmt->taskset->m.enable_clear_ls) {
-        memset(vm::get_ptr<void>(spu.ls_offset + CELL_SPURS_TASK_TOP), 0, CELL_SPURS_TASK_BOTTOM - CELL_SPURS_TASK_TOP);
-    }
+        // TODO: Load elf
+        // TODO: halt if rc of Load elf != CELL_OK
 
-    // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area
-    if (mgmt->taskset->m.task_info[taskId].ls_pattern.u64[0] != 0xFFFFFFFFFFFFFFFFull ||
-        (mgmt->taskset->m.task_info[taskId].ls_pattern.u64[0] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) {
-        // Load the ELF
-        // TODO: Load ELF
-    }
+        spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
 
-    // Load save context from main memory to LS
-    u64 context_save_storage = mgmt->taskset->m.task_info[taskId].context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull;
-    for (auto i = 6; i < 128; i++) {
-        bool shouldLoad = mgmt->taskset->m.task_info[taskId].ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false;
-        if (shouldLoad) {
-            memcpy(vm::get_ptr<void>(spu.ls_offset + CELL_SPURS_TASK_TOP + ((i - 6) << 11)),
-                   vm::get_ptr<void>((u32)context_save_storage + 0x400 + ((i - 6) << 11)), 0x800);
+        mgmt->tasksetMgmtAddr = 0x2700;
+        mgmt->x2FC0           = 0;
+        mgmt->taskExitCode    = isWaiting;
+        mgmt->x2FD4           = elfAddr & 5; // TODO: Figure this out
+
+        if ((elfAddr & 5) == 1) {
+            spursDma(spu, MFC_GET_CMD, mgmt->taskset.addr() + offsetof(CellSpursTaskset2, m.task_exit_code[taskId]), 0x2FC0/*LSA*/, 0x10/*size*/, mgmt->dmaTagId);
         }
-    }
 
-    // Trace - GUID
-    memset(&pkt, 0, sizeof(pkt));
-    pkt.header.tag = CELL_SPURS_TRACE_TAG_GUID;
-    pkt.data.guid  = 0; // TODO: Put GUID of taskId here
-    cellSpursModulePutTrace(&pkt, 0x1F);
+        // Trace - GUID
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag = CELL_SPURS_TRACE_TAG_GUID;
+        pkt.data.guid  = 0; // TODO: Put GUID of taskId here
+        cellSpursModulePutTrace(&pkt, 0x1F);
+
+        if (elfAddr & 2) { // TODO: Figure this out
+            spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_STOP);
+            spu.Stop();
+            return;
+        }
+
+        spursTasksetStartTask(spu, taskInfo->args);
+    } else {
+        if (taskset->m.enable_clear_ls) {
+            memset(vm::get_ptr<void>(spu.ls_offset + CELL_SPURS_TASK_TOP), 0, CELL_SPURS_TASK_BOTTOM - CELL_SPURS_TASK_TOP);
+        }
+
+        // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area as well
+        if (taskInfo->ls_pattern.u64[0] != 0xFFFFFFFFFFFFFFFFull ||
+            (taskInfo->ls_pattern.u64[1] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) {
+            // Load the ELF
+            // TODO: Load ELF
+            // TODO: halt if rc of Load elf != CELL_OK
+        }
+
+        // Load saved context from main memory to LS
+        u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull;
+        for (auto i = 6; i < 128; i++) {
+            bool shouldLoad = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false;
+            if (shouldLoad) {
+                // TODO: Combine DMA requests for consecutive blocks into a single request
+                spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, mgmt->dmaTagId);
+            }
+        }
+
+        spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
+
+        // Restore saved registers
+        spu.FPSCR.Write(mgmt->savedContextFpscr.value());
+        spu.WriteChannel(MFC_WrTagMask, u128::from32r(mgmt->savedWriteTagGroupQueryMask));
+        spu.WriteChannel(SPU_WrEventMask, u128::from32r(mgmt->savedSpuWriteEventMask));
+
+        // Trace - GUID
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag = CELL_SPURS_TRACE_TAG_GUID;
+        pkt.data.guid  = 0; // TODO: Put GUID of taskId here
+        cellSpursModulePutTrace(&pkt, 0x1F);
+
+        if (elfAddr & 2) { // TODO: Figure this out
+            spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_STOP);
+            spu.Stop();
+            return;
+        }
+
+        spu.GPR[3].clear();
+        spursTasksetResumeTask(spu);
+    }
 }
 
 void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) {
@@ -1136,7 +1207,7 @@ void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) {
     }
 }
 
-bool spursTasksetShouldYield(SPUThread & spu) {
+bool spursTasksetPollStatus(SPUThread & spu) {
     u32 pollStatus;
 
     if (cellSpursModulePollStatus(spu, &pollStatus)) {
@@ -1179,6 +1250,99 @@ void spursTasksetResumeTask(SPUThread & spu) {
     spu.SetBranch(spu.GPR[0]._u32[3]);
 }
 
+s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
+    auto mgmt    = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
+
+    // If the 0x10 bit is set in syscallNum then its the 2nd version of the
+    // syscall (e.g. cellSpursYield2 instead of cellSpursYield) and so don't wait
+    // for DMA completion
+    if ((syscallNum & 0x10) == 0) {
+        spursDmaWaitForCompletion(spu, 0xFFFFFFFF);
+        syscallNum &= 0x0F;
+    }
+
+    s32 rc       = 0;
+    u32 incident = 0;
+    switch (syscallNum) {
+    case CELL_SPURS_TASK_SYSCALL_EXIT:
+        if (mgmt->x2FD4 == 4 || (mgmt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out
+            if (mgmt->x2FD4 != 4) {
+                spursTasksetProcessRequest(spu, 0, nullptr, nullptr);
+            }
+
+            auto a = mgmt->x2FD4 == 4 ? taskset->m.x78 : mgmt->x2FC0;
+            auto b = mgmt->x2FD4 == 4 ? 0 : mgmt->x2FC8;
+            // TODO: onTaskExit(a, mgmt->taskId, mgmt->taskExitCode, b)
+        }
+
+        incident = CELL_SPURS_TRACE_TASK_EXIT;
+        break;
+    case CELL_SPURS_TASK_SYSCALL_YIELD:
+        if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, 3, nullptr, nullptr)) {
+            // If we reach here then it means that either another task can be scheduled or another workload can be scheduled
+            // Save the context of the current task
+            // TODO: rc = saveContext
+            if (rc == CELL_OK) {
+                spursTasksetProcessRequest(spu, 1, nullptr, nullptr);
+                incident = CELL_SPURS_TRACE_TASK_YIELD;
+            }
+        }
+        break;
+    case CELL_SPURS_TASK_SYSCALL_WAIT_SIGNAL:
+        if (spursTasksetProcessRequest(spu, -1, nullptr, nullptr)) {
+            // TODO: rc = saveContext
+            if (rc == CELL_OK) {
+                if (spursTasksetProcessRequest(spu, 2, nullptr, nullptr) == false) {
+                    incident = CELL_SPURS_TRACE_TASK_WAIT;
+                }
+            }
+        }
+        break;
+    case CELL_SPURS_TASK_SYSCALL_POLL:
+        rc  = spursTasksetPollStatus(spu) ? CELL_SPURS_TASK_POLL_FOUND_WORKLOAD : 0;
+        rc |= spursTasksetProcessRequest(spu, 3, nullptr, nullptr) ? CELL_SPURS_TASK_POLL_FOUND_TASK : 0;
+        break;
+    case CELL_SPURS_TASK_SYSCALL_RECV_WKL_FLAG:
+        if (args == 0) { // TODO: Figure this out
+            spursHalt(spu);
+        }
+
+        if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, 4, nullptr, nullptr) != true) {
+            // TODO: rc = saveContext
+            if (rc == CELL_OK) {
+                spursTasksetProcessRequest(spu, 1, nullptr, nullptr);
+                incident = CELL_SPURS_TRACE_TASK_WAIT;
+            }
+        }
+        break;
+    default:
+        rc = CELL_SPURS_TASK_ERROR_NOSYS;
+        break;
+    }
+
+    if (incident) {
+        // Trace - TASK
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag         = CELL_SPURS_TRACE_TAG_TASK;
+        pkt.data.task.incident = incident;
+        pkt.data.task.taskId   = mgmt->taskId;
+        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+
+        // Clear the GUID of the task
+        memset(vm::get_ptr<void>(spu.ls_offset + mgmt->lowestLoadSegmentAddr), 0, 0x10);
+
+        if (spursTasksetPollStatus(spu)) {
+            spursTasksetExit(spu);
+        }
+
+        spursTasksetDispatch(spu);
+    }
+
+    return rc;
+}
+
 bool spursTasksetEntry(SPUThread & spu) {
     auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
 
@@ -1188,6 +1352,7 @@ bool spursTasksetEntry(SPUThread & spu) {
         auto arg        = spu.GPR[4]._u64[1];
         auto pollStatus = spu.GPR[5]._u32[3];
 
+        // Initialise memory and save args
         memset(mgmt, 0, sizeof(*mgmt));
         mgmt->taskset.set(arg);
         memcpy(mgmt->moduleId, "SPURSTASK MODULE", 16);
@@ -1212,8 +1377,12 @@ bool spursTasksetEntry(SPUThread & spu) {
             mgmt->savedContextR80ToR127[i] = spu.GPR[80 + i];
         }
 
-        // TODO: Process syscall
-        spursTasksetResumeTask(spu);
+        spu.GPR[3]._u32[3] = spursTasksetProcessSyscall(spu, spu.GPR[3]._u32[3], spu.GPR[4]._u32[3]);
+
+        // Resume the previously executing task if the syscall did not cause a context switch
+        if (spu.m_is_branch == false) {
+            spursTasksetResumeTask(spu);
+        }
     } else {
         assert(0);
     }

From ba6ac5019ea6e022d08060c730ae0cc9af1a0c6d Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Mon, 2 Feb 2015 01:32:40 +0530
Subject: [PATCH 24/29] SPURS: Implement some portions of taskset policy
 manager

---
 rpcs3/Emu/Cell/SPUInterpreter.h             |   5 +-
 rpcs3/Emu/Cell/SPUThread.h                  |   9 +
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp    |  34 +-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h      |  16 +-
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 380 ++++++++++++++++----
 rpcs3/Loader/ELF32.cpp                      |   7 +-
 rpcs3/Loader/ELF32.h                        |   2 +-
 7 files changed, 345 insertions(+), 108 deletions(-)

diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h
index 8351911153..7c56b4c9ac 100644
--- a/rpcs3/Emu/Cell/SPUInterpreter.h
+++ b/rpcs3/Emu/Cell/SPUInterpreter.h
@@ -1316,10 +1316,7 @@ private:
 	
 	void FSCRRD(u32 rt)
 	{
-		CPU.GPR[rt]._u32[3] = CPU.FPSCR._u32[3];
-		CPU.GPR[rt]._u32[2] = CPU.FPSCR._u32[2];
-		CPU.GPR[rt]._u32[1] = CPU.FPSCR._u32[1];
-		CPU.GPR[rt]._u32[0] = CPU.FPSCR._u32[0];
+		CPU.FPSCR.Read(CPU.GPR[rt]);
 	}
 	void FESD(u32 rt, u32 ra)
 	{
diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h
index cfd45d6c0e..ff5012f146 100644
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@@ -257,6 +257,15 @@ public:
 		_u32[1] = r._u32[1] & 0x00003F07;
 		_u32[0] = r._u32[0] & 0x00000F07;
 	}
+
+	// Read the FPSCR
+	void Read(u128 & r)
+	{
+		r._u32[3] = _u32[3];
+		r._u32[2] = _u32[2];
+		r._u32[1] = _u32[1];
+		r._u32[0] = _u32[0];
+	}
 };
 
 union SPU_SNRConfig_hdr
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 83dffb54e7..b8088daaff 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -2602,8 +2602,8 @@ s64 spursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> tasks
 	cellSpursAddWorkloadWithAttribute(spurs, vm::ptr<u32>::make(wid.addr()), vm::ptr<const CellSpursWorkloadAttribute>::make(wkl_attr.addr()));
 	// TODO: Check return code
 
-	taskset->m.x72 = 0x80;
-	taskset->m.wid = wid.value();
+	taskset->m.wkl_flag_wait_task = 0x80;
+	taskset->m.wid                = wid.value();
 	// TODO: cellSpursSetExceptionEventHandler(spurs, wid, hook, taskset);
 	// TODO: Check return code
 
@@ -2801,11 +2801,9 @@ s64 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
 	u32 tmp_task_id;
 	for (tmp_task_id = 0; tmp_task_id < CELL_SPURS_MAX_TASK; tmp_task_id++)
 	{
-		u32 l = tmp_task_id >> 5;
-		u32 b = tmp_task_id & 0x1F;
-		if ((taskset->m.enabled_set[l] & (0x80000000 >> b)) == 0)
+		if (!taskset->m.enabled.value()._bit[tmp_task_id])
 		{
-			taskset->m.enabled_set[l] |= 0x80000000 >> b;
+			taskset->m.enabled.value()._bit[tmp_task_id] = true;
 			break;
 		}
 	}
@@ -2840,10 +2838,10 @@ s64 cellSpursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> taskID,
 #endif
 }
 
-s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID)
+s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskId)
 {
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("_cellSpursSendSignal(taskset_addr=0x%x, taskID=%d)", taskset.addr(), taskID);
+	cellSpurs->Warning("_cellSpursSendSignal(taskset_addr=0x%x, taskId=%d)", taskset.addr(), taskId);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x124CC, libsre_rtoc);
 #else
 	if (!taskset)
@@ -2856,29 +2854,23 @@ s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID)
 		return CELL_SPURS_TASK_ERROR_ALIGN;
 	}
 
-	if (taskID >= CELL_SPURS_MAX_TASK || taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD2)
+	if (taskId >= CELL_SPURS_MAX_TASK || taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD2)
 	{
 		return CELL_SPURS_TASK_ERROR_INVAL;
 	}
 
-	auto word      = taskID >> 5;
-	auto mask      = 0x80000000u >> (taskID & 0x1F);
-	auto disabled  = taskset->m.enabled_set[word] & mask ? false : true;
-	auto running   = taskset->m.running_set[word];
-	auto ready     = taskset->m.ready_set[word];
-	auto ready2    = taskset->m.ready2_set[word];
-	auto waiting   = taskset->m.waiting_set[word];
-	auto signalled = taskset->m.signal_received_set[word];
-	auto enabled   = taskset->m.enabled_set[word];
-	auto invalid   = (ready & ready2) || (running & waiting) || ((running | ready | ready2 | waiting | signalled) & ~enabled) || disabled;
+	auto _0       = be_t<u128>::make(u128::from32(0));
+	auto disabled = taskset->m.enabled.value()._bit[taskId] ? false : true;
+	auto invalid  = (taskset->m.ready & taskset->m.pending_ready) != _0 || (taskset->m.running & taskset->m.waiting) != _0 || disabled ||
+					((taskset->m.running | taskset->m.ready | taskset->m.pending_ready | taskset->m.waiting | taskset->m.signalled) & be_t<u128>::make(~taskset->m.enabled.value())) != _0;
 
 	if (invalid)
 	{
 		return CELL_SPURS_TASK_ERROR_SRCH;
 	}
 
-	auto shouldSignal = waiting & ~signalled & mask ? true : false;
-	taskset->m.signal_received_set[word] |= mask;
+	auto shouldSignal = (taskset->m.waiting & be_t<u128>::make(~taskset->m.signalled.value()) & be_t<u128>::make(u128::fromBit(taskId))) != _0 ? true : false;
+	((u128)taskset->m.signalled)._bit[taskId] = true;
 	if (shouldSignal)
 	{
 		cellSpursSendWorkloadSignal(vm::ptr<CellSpurs>::make((u32)taskset->m.spurs.addr()), taskset->m.wid);
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 85df5fb582..f1a4793e13 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -655,17 +655,17 @@ struct CellSpursTaskset
 		// Real data
 		struct _CellSpursTaskset
 		{
-			be_t<u32> running_set[4];                    // 0x00
-			be_t<u32> ready_set[4];                      // 0x10
-			be_t<u32> ready2_set[4];                     // 0x20 - TODO: Find out what this is
-			be_t<u32> enabled_set[4];                    // 0x30
-			be_t<u32> signal_received_set[4];            // 0x40
-			be_t<u32> waiting_set[4];                    // 0x50
+			be_t<u128> running;                          // 0x00
+			be_t<u128> ready;                            // 0x10
+			be_t<u128> pending_ready;                    // 0x20
+			be_t<u128> enabled;                          // 0x30
+			be_t<u128> signalled;                        // 0x40
+			be_t<u128> waiting;                          // 0x50
 			vm::bptr<CellSpurs, 1, u64> spurs;           // 0x60
 			be_t<u64> args;                              // 0x68
 			u8 enable_clear_ls;                          // 0x70
 			u8 x71;                                      // 0x71
-			u8 x72;                                      // 0x72
+			u8 wkl_flag_wait_task;                       // 0x72
 			u8 last_scheduled_task;                      // 0x73
 			be_t<u32> wid;                               // 0x74
 			be_t<u64> x78;                               // 0x78
@@ -948,7 +948,7 @@ struct SpursTasksetPmMgmtData
     be_t<u32> savedWriteTagGroupQueryMask;          // 0x2FB0
     be_t<u32> savedSpuWriteEventMask;               // 0x2FB4
     be_t<u32> tasksetMgmtAddr;                      // 0x2FB8
-    be_t<u32> lowestLoadSegmentAddr;                // 0x2FBC
+    be_t<u32> guidAddr;                             // 0x2FBC
     be_t<u64> x2FC0;                                // 0x2FC0
     be_t<u64> x2FC8;                                // 0x2FC8
     be_t<u32> taskExitCode;                         // 0x2FD0
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index 7118eadbd5..c7902f7240 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -7,6 +7,8 @@
 #include "Emu/SysCalls/lv2/sys_lwcond.h"
 #include "Emu/SysCalls/lv2/sys_spu.h"
 #include "Emu/SysCalls/Modules/cellSpurs.h"
+#include "Loader/ELF32.h"
+#include "Emu/FS/vfsStreamMemory.h"
 
 //
 // SPURS utility functions
@@ -42,7 +44,8 @@ bool spursSysServiceWorkloadEntry(SPUThread & spu);
 //
 // SPURS taskset policy module functions
 //
-bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting);
+s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting);
+s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments);
 void spursTasksetExit(SPUThread & spu);
 void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs);
 void spursTasksetDispatch(SPUThread & spu);
@@ -50,6 +53,9 @@ void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus);
 bool spursTasksetPollStatus(SPUThread & spu);
 void spursTasksetInit(SPUThread & spu, u32 pollStatus);
 void spursTasksetResumeTask(SPUThread & spu);
+s32 spursTasketSaveTaskContext(SPUThread & spu);
+void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args);
+s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args);
 bool spursTasksetEntry(SPUThread & spu);
 
 extern Module *cellSpurs;
@@ -1001,64 +1007,216 @@ bool spursSysServiceWorkloadEntry(SPUThread & spu) {
 // SPURS taskset policy module functions
 //////////////////////////////////////////////////////////////////////////////
 
-bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting) {
+enum SpursTasksetRequest {
+    SPURS_TASKSET_REQUEST_POLL_SIGNAL   = -1,
+    SPURS_TASKSET_REQUEST_DESTROY_TASK  = 0,
+    SPURS_TASKSET_REQUEST_YIELD_TASK    = 1,
+    SPURS_TASKSET_REQUEST_WAIT_SIGNAL   = 2,
+    SPURS_TASKSET_REQUEST_POLL          = 3,
+    SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG = 4,
+    SPURS_TASKSET_REQUEST_SELECT_TASK   = 5,
+    SPURS_TASKSET_REQUEST_RECV_WKL_FLAG = 6,
+};
+
+s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting) {
     auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
     auto mgmt       = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
 
-    // Verify taskset state is valid
-    for (auto i = 0; i < 4; i ++) {
-        if ((mgmt->taskset->m.waiting_set[i] & mgmt->taskset->m.running_set[i]) ||
-            (mgmt->taskset->m.ready_set[i] & mgmt->taskset->m.ready2_set[i]) ||
-            ((mgmt->taskset->m.running_set[i] | mgmt->taskset->m.ready_set[i] |
-              mgmt->taskset->m.ready2_set[i] | mgmt->taskset->m.signal_received_set[i] |
-              mgmt->taskset->m.waiting_set[i]) & ~mgmt->taskset->m.enabled_set[i])) {
-            assert(0);
-        }
-    }
-
-    // TODO: Implement cases
-    s32 delta = 0;
-    switch (request + 1) {
-    case -1:
-        break;
-    case 0:
-        break;
-    case 1:
-        break;
-    case 2:
-        break;
-    case 3:
-        break;
-    case 4:
-        break;
-    case 5:
-        break;
-    case 6:
-        break;
-    default:
-        spursHalt(spu);
-        break;
-    }
-
-    // Set the ready count of the workload to the number of ready tasks
+    s32 rc = CELL_OK;
+    s32 numNewlyReadyTasks;
     do {
-        s32 readyCount = kernelMgmt->wklCurrentId >= CELL_SPURS_MAX_WORKLOAD ?
-                         kernelMgmt->spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].read_relaxed() :
-                         kernelMgmt->spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].read_relaxed();
+        spursDma(spu, MFC_GETLLAR_CMD, mgmt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
 
-        auto newReadyCount = readyCount + delta > 0xFF ? 0xFF : readyCount + delta < 0 ? 0 : readyCount + delta;
-
-        if (kernelMgmt->wklCurrentId >= CELL_SPURS_MAX_WORKLOAD) {
-            kernelMgmt->spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].write_relaxed(newReadyCount);
-        } else {
-            kernelMgmt->spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].write_relaxed(newReadyCount);
+        // Verify taskset state is valid
+        auto _0 = be_t<u128>::make(u128::from32(0));
+        if ((taskset->m.waiting & taskset->m.running) != _0 || (taskset->m.ready & taskset->m.pending_ready) != _0 ||
+            ((taskset->m.running | taskset->m.ready | taskset->m.pending_ready | taskset->m.signalled | taskset->m.waiting) & be_t<u128>::make(~taskset->m.enabled.value())) != _0) {
+            spursHalt(spu);
+            return CELL_OK;
         }
 
-        delta += readyCount;
-    } while (delta > 0);
+        // Find the number of tasks that have become ready since the last iteration
+        auto newlyReadyTasks = (taskset->m.signalled | taskset->m.pending_ready).value() & ~taskset->m.ready.value();
+        numNewlyReadyTasks   = 0;
+        for (auto i = 0; i < 128; i++) {
+            if (newlyReadyTasks._bit[i]) {
+                numNewlyReadyTasks++;
+            }
+        }
 
-    // TODO: Implement return
-    return false;
+        u128 readyButNotRunning;
+        u8   selectedTaskId;
+        auto running   = taskset->m.running.value();
+        auto waiting   = taskset->m.waiting.value();
+        auto enabled   = taskset->m.enabled.value();
+        auto signalled = (taskset->m.signalled & (taskset->m.ready | taskset->m.pending_ready)).value();
+        auto ready     = (taskset->m.signalled | taskset->m.ready | taskset->m.pending_ready).value();
+
+        switch (request) {
+        case SPURS_TASKSET_REQUEST_POLL_SIGNAL:
+            rc = signalled._bit[mgmt->taskId] ? 1 : 0;
+            signalled._bit[mgmt->taskId] = false;
+            break;
+        case SPURS_TASKSET_REQUEST_DESTROY_TASK:
+            numNewlyReadyTasks--;
+            running._bit[mgmt->taskId]   = false;
+            enabled._bit[mgmt->taskId]   = false;
+            signalled._bit[mgmt->taskId] = false;
+            ready._bit[mgmt->taskId]     = false;
+            break;
+        case SPURS_TASKSET_REQUEST_YIELD_TASK:
+            running._bit[mgmt->taskId] = false;
+            waiting._bit[mgmt->taskId] = true;
+            break;
+        case SPURS_TASKSET_REQUEST_WAIT_SIGNAL:
+            if (signalled._bit[mgmt->taskId]) {
+                numNewlyReadyTasks--;
+                running._bit[mgmt->taskId]   = false;
+                waiting._bit[mgmt->taskId]   = true;
+                signalled._bit[mgmt->taskId] = false;
+                ready._bit[mgmt->taskId]     = false;
+            }
+            break;
+        case SPURS_TASKSET_REQUEST_POLL:
+            readyButNotRunning = ready & ~running;
+            if (taskset->m.wkl_flag_wait_task < CELL_SPURS_MAX_TASK) {
+                readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->m.wkl_flag_wait_task));
+            }
+
+            rc = readyButNotRunning != _0 ? 1 : 0;
+            break;
+        case SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG:
+            if (taskset->m.wkl_flag_wait_task == 0x81) {
+                // A workload flag is already pending so consume it
+                taskset->m.wkl_flag_wait_task = 0x80;
+                rc                            = 0;
+            } else if (taskset->m.wkl_flag_wait_task == 0x80) {
+                // No tasks are waiting for the workload flag. Mark this task as waiting for the workload flag.
+                taskset->m.wkl_flag_wait_task = mgmt->taskId;
+                running._bit[mgmt->taskId]    = false;
+                waiting._bit[mgmt->taskId]    = true;
+                rc                            = 1;
+                numNewlyReadyTasks--;
+            } else {
+                // Another task is already waiting for the workload signal
+                rc = CELL_SPURS_TASK_ERROR_BUSY;
+            }
+            break;
+        case SPURS_TASKSET_REQUEST_SELECT_TASK:
+            readyButNotRunning = ready & ~running;
+            if (taskset->m.wkl_flag_wait_task < CELL_SPURS_MAX_TASK) {
+                readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->m.wkl_flag_wait_task));
+            }
+
+            // Select a task from the readyButNotRunning set to run. Start from the task after the last scheduled task to ensure fairness.
+            for (selectedTaskId = taskset->m.last_scheduled_task + 1; selectedTaskId < 128; selectedTaskId++) {
+                if (readyButNotRunning._bit[selectedTaskId]) {
+                    break;
+                }
+            }
+
+            if (selectedTaskId == 128) {
+                for (selectedTaskId = 0; selectedTaskId < taskset->m.last_scheduled_task + 1; selectedTaskId++) {
+                    if (readyButNotRunning._bit[selectedTaskId]) {
+                        break;
+                    }
+                }
+
+                if (selectedTaskId == taskset->m.last_scheduled_task + 1) {
+                    selectedTaskId = CELL_SPURS_MAX_TASK;
+                }
+            }
+
+            *taskId    = selectedTaskId;
+            *isWaiting = waiting._bit[selectedTaskId < CELL_SPURS_MAX_TASK ? selectedTaskId : 0] ? 1 : 0;
+            if (selectedTaskId != CELL_SPURS_MAX_TASK) {
+                taskset->m.last_scheduled_task = selectedTaskId;
+                running._bit[mgmt->taskId]     = true;
+                waiting._bit[mgmt->taskId]     = false;
+            }
+            break;
+        case SPURS_TASKSET_REQUEST_RECV_WKL_FLAG:
+            if (taskset->m.wkl_flag_wait_task < CELL_SPURS_MAX_TASK) {
+                // There is a task waiting for the workload flag
+                taskset->m.wkl_flag_wait_task = 0x80;
+                rc                            = 1;
+                numNewlyReadyTasks++;
+            } else {
+                // No tasks are waiting for the workload flag
+                taskset->m.wkl_flag_wait_task = 0x81;
+                rc                            = 0;
+            }
+            break;
+        default:
+            spursHalt(spu);
+            return CELL_OK;
+        }
+
+        taskset->m.pending_ready = _0;
+        taskset->m.running       = running;
+        taskset->m.waiting       = waiting;
+        taskset->m.enabled       = enabled;
+        taskset->m.signalled     = signalled;
+        taskset->m.ready         = ready;
+    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    // Increment the ready count of the workload by the number of tasks that have become ready
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, kernelMgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        s32 readyCount  = kernelMgmt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].read_relaxed();
+        readyCount     += numNewlyReadyTasks;
+        readyCount      = readyCount < 0 ? 0 : readyCount > 0xFF ? 0xFF : readyCount;
+
+        if (kernelMgmt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD) {
+            spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].write_relaxed(readyCount);
+        } else {
+            spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].write_relaxed(readyCount);
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, kernelMgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    return rc;
+}
+
+s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments) {
+    if (elfAddr == 0 || (elfAddr & 0x0F) != 0) {
+        return CELL_SPURS_TASK_ERROR_INVAL;
+    }
+
+    vfsStreamMemory         stream(elfAddr);
+    loader::handlers::elf32 loader;
+    auto rc = loader.init(stream);
+    if (rc != loader::handler::ok) {
+        return CELL_SPURS_TASK_ERROR_NOEXEC;
+    }
+
+    u32 _lowestLoadAddr = CELL_SPURS_TASK_BOTTOM;
+    for (auto & phdr : loader.m_phdrs) {
+        if (phdr.data_be.p_paddr >= CELL_SPURS_TASK_BOTTOM) {
+            break;
+        }
+
+        if (phdr.data_be.p_type == 1/*PT_LOAD*/) {
+            if (skipWriteableSegments == false || (phdr.data_be.p_flags & 2/*PF_W*/) == 0) {
+                if (phdr.data_be.p_vaddr < CELL_SPURS_TASK_TOP ||
+                    phdr.data_be.p_vaddr + phdr.data_be.p_memsz > CELL_SPURS_TASK_BOTTOM) {
+                    return CELL_SPURS_TASK_ERROR_FAULT;
+                }
+
+                _lowestLoadAddr = _lowestLoadAddr > phdr.data_be.p_vaddr ? phdr.data_be.p_vaddr : _lowestLoadAddr;
+            }
+        }
+    }
+
+    loader.load_data(spu.ls_offset, skipWriteableSegments);
+    *entryPoint = loader.m_ehdr.data_be.e_entry;
+    if (*lowestLoadAddr) {
+        *lowestLoadAddr = _lowestLoadAddr;
+    }
+
+    return CELL_OK;
 }
 
 void spursTasksetExit(SPUThread & spu) {
@@ -1100,7 +1258,7 @@ void spursTasksetDispatch(SPUThread & spu) {
 
     u32 taskId;
     u32 isWaiting;
-    spursTasksetProcessRequest(spu, 5, &taskId, &isWaiting);
+    spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_SELECT_TASK, &taskId, &isWaiting);
     if (taskId >= CELL_SPURS_MAX_TASK) {
         spursTasksetExit(spu);
         return;
@@ -1125,13 +1283,19 @@ void spursTasksetDispatch(SPUThread & spu) {
 
     if (isWaiting == 0) {
         // If we reach here it means that the task is being started and not being resumed
-        mgmt->lowestLoadSegmentAddr = CELL_SPURS_TASK_TOP;
+        mgmt->guidAddr = CELL_SPURS_TASK_TOP;
 
-        // TODO: Load elf
-        // TODO: halt if rc of Load elf != CELL_OK
+        u32 entryPoint;
+        u32 lowestLoadAddr;
+        if (spursTasksetLoadElf(spu, &entryPoint, &lowestLoadAddr, taskInfo->elf_addr.addr(), false) != CELL_OK) {
+            spursHalt(spu);
+            return;
+        }
 
         spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
 
+        mgmt->savedContextLr.value()._u32[3] = entryPoint;
+        mgmt->guidAddr        = lowestLoadAddr;
         mgmt->tasksetMgmtAddr = 0x2700;
         mgmt->x2FC0           = 0;
         mgmt->taskExitCode    = isWaiting;
@@ -1163,8 +1327,11 @@ void spursTasksetDispatch(SPUThread & spu) {
         if (taskInfo->ls_pattern.u64[0] != 0xFFFFFFFFFFFFFFFFull ||
             (taskInfo->ls_pattern.u64[1] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) {
             // Load the ELF
-            // TODO: Load ELF
-            // TODO: halt if rc of Load elf != CELL_OK
+            u32 entryPoint;
+            if (spursTasksetLoadElf(spu, &entryPoint, nullptr, taskInfo->elf_addr.addr(), true) != CELL_OK) {
+                spursHalt(spu);
+                return;
+            }
         }
 
         // Load saved context from main memory to LS
@@ -1203,7 +1370,7 @@ void spursTasksetDispatch(SPUThread & spu) {
 
 void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) {
     if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) {
-        spursTasksetProcessRequest(spu, 6, nullptr, nullptr);
+        spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_RECV_WKL_FLAG, nullptr, nullptr);
     }
 }
 
@@ -1250,6 +1417,74 @@ void spursTasksetResumeTask(SPUThread & spu) {
     spu.SetBranch(spu.GPR[0]._u32[3]);
 }
 
+s32 spursTasketSaveTaskContext(SPUThread & spu) {
+    auto mgmt     = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto taskInfo = vm::get_ptr<CellSpursTaskset::TaskInfo>(spu.ls_offset + 0x2780);
+
+    spursDmaWaitForCompletion(spu, 0xFFFFFFFF);
+
+    if (taskInfo->context_save_storage_and_alloc_ls_blocks == 0) {
+        return CELL_SPURS_TASK_ERROR_STAT;
+    }
+
+    u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F;
+    u32 lsBlocks      = 0;
+    for (auto i = 0; i < 128; i++) {
+        if (taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) {
+            lsBlocks++;
+        }
+    }
+
+    if (lsBlocks > allocLsBlocks) {
+        return CELL_SPURS_TASK_ERROR_STAT;
+    }
+
+    // Make sure the stack is area is specified in the ls pattern
+    for (auto i = (mgmt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) {
+        if ((taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) == 0) {
+            return CELL_SPURS_TASK_ERROR_STAT;
+        }
+    }
+
+    // Get the processor context
+    u128 r;
+    spu.FPSCR.Read(r);
+    mgmt->savedContextFpscr = r;
+    spu.ReadChannel(r, SPU_RdEventMask);
+    mgmt->savedSpuWriteEventMask = r._u32[3];
+    spu.ReadChannel(r, MFC_RdTagMask);
+    mgmt->savedWriteTagGroupQueryMask = r._u32[3];
+
+    // Store the processor context
+    u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull;
+    spursDma(spu, MFC_PUT_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, mgmt->dmaTagId);
+
+    // Save LS context
+    for (auto i = 6; i < 128; i++) {
+        bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false;
+        if (shouldStore) {
+            // TODO: Combine DMA requests for consecutive blocks into a single request
+            spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, mgmt->dmaTagId);
+        }
+    }
+
+    spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
+    return CELL_OK;
+}
+
+void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args) {
+    auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+
+    spursDma(spu, MFC_GET_CMD, addr & 0xFFFFFF80, 0x10000/*LSA*/, (addr & 0x7F) << 11/*size*/, 0);
+    spursDmaWaitForCompletion(spu, 1);
+
+    spu.GPR[3]._u64[1] = mgmt->taskset.addr();
+    spu.GPR[4]._u32[3] = taskId;
+    spu.GPR[5]._u32[3] = exitCode;
+    spu.GPR[6]._u64[1] = args;
+    spu.FastCall(0x10000);
+}
+
 s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
     auto mgmt    = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
     auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
@@ -1268,32 +1503,32 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
     case CELL_SPURS_TASK_SYSCALL_EXIT:
         if (mgmt->x2FD4 == 4 || (mgmt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out
             if (mgmt->x2FD4 != 4) {
-                spursTasksetProcessRequest(spu, 0, nullptr, nullptr);
+                spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_DESTROY_TASK, nullptr, nullptr);
             }
 
-            auto a = mgmt->x2FD4 == 4 ? taskset->m.x78 : mgmt->x2FC0;
-            auto b = mgmt->x2FD4 == 4 ? 0 : mgmt->x2FC8;
-            // TODO: onTaskExit(a, mgmt->taskId, mgmt->taskExitCode, b)
+            auto addr = mgmt->x2FD4 == 4 ? taskset->m.x78 : mgmt->x2FC0;
+            auto args = mgmt->x2FD4 == 4 ? 0 : mgmt->x2FC8;
+            spursTasksetOnTaskExit(spu, addr, mgmt->taskId, mgmt->taskExitCode, args);
         }
 
         incident = CELL_SPURS_TRACE_TASK_EXIT;
         break;
     case CELL_SPURS_TASK_SYSCALL_YIELD:
-        if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, 3, nullptr, nullptr)) {
+        if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_POLL, nullptr, nullptr)) {
             // If we reach here then it means that either another task can be scheduled or another workload can be scheduled
             // Save the context of the current task
-            // TODO: rc = saveContext
+            rc = spursTasketSaveTaskContext(spu);
             if (rc == CELL_OK) {
-                spursTasksetProcessRequest(spu, 1, nullptr, nullptr);
+                spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_YIELD_TASK, nullptr, nullptr);
                 incident = CELL_SPURS_TRACE_TASK_YIELD;
             }
         }
         break;
     case CELL_SPURS_TASK_SYSCALL_WAIT_SIGNAL:
-        if (spursTasksetProcessRequest(spu, -1, nullptr, nullptr)) {
-            // TODO: rc = saveContext
+        if (spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_POLL_SIGNAL, nullptr, nullptr) == 0) {
+            rc = spursTasketSaveTaskContext(spu);
             if (rc == CELL_OK) {
-                if (spursTasksetProcessRequest(spu, 2, nullptr, nullptr) == false) {
+                if (spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_WAIT_SIGNAL, nullptr, nullptr) == 0) {
                     incident = CELL_SPURS_TRACE_TASK_WAIT;
                 }
             }
@@ -1301,17 +1536,16 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
         break;
     case CELL_SPURS_TASK_SYSCALL_POLL:
         rc  = spursTasksetPollStatus(spu) ? CELL_SPURS_TASK_POLL_FOUND_WORKLOAD : 0;
-        rc |= spursTasksetProcessRequest(spu, 3, nullptr, nullptr) ? CELL_SPURS_TASK_POLL_FOUND_TASK : 0;
+        rc |= spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_POLL, nullptr, nullptr) ? CELL_SPURS_TASK_POLL_FOUND_TASK : 0;
         break;
     case CELL_SPURS_TASK_SYSCALL_RECV_WKL_FLAG:
         if (args == 0) { // TODO: Figure this out
             spursHalt(spu);
         }
 
-        if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, 4, nullptr, nullptr) != true) {
-            // TODO: rc = saveContext
+        if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG, nullptr, nullptr) != 1) {
+            rc = spursTasketSaveTaskContext(spu);
             if (rc == CELL_OK) {
-                spursTasksetProcessRequest(spu, 1, nullptr, nullptr);
                 incident = CELL_SPURS_TRACE_TASK_WAIT;
             }
         }
@@ -1331,7 +1565,7 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
         cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
 
         // Clear the GUID of the task
-        memset(vm::get_ptr<void>(spu.ls_offset + mgmt->lowestLoadSegmentAddr), 0, 0x10);
+        memset(vm::get_ptr<void>(spu.ls_offset + mgmt->guidAddr), 0, 0x10);
 
         if (spursTasksetPollStatus(spu)) {
             spursTasksetExit(spu);
diff --git a/rpcs3/Loader/ELF32.cpp b/rpcs3/Loader/ELF32.cpp
index cb9be62759..a24e64c05f 100644
--- a/rpcs3/Loader/ELF32.cpp
+++ b/rpcs3/Loader/ELF32.cpp
@@ -247,7 +247,7 @@ namespace loader
 			return ok;
 		}
 
-		handler::error_code elf32::load_data(u32 offset)
+		handler::error_code elf32::load_data(u32 offset, bool skip_writeable)
 		{
 			Elf_Machine machine = (Elf_Machine)(u16)(m_ehdr.is_le() ? m_ehdr.data_le.e_machine : m_ehdr.data_be.e_machine);
 
@@ -270,6 +270,11 @@ namespace loader
 							return loading_error;
 						}
 
+						if (skip_writeable == true && (phdr.data_be.p_flags & 2/*PF_W*/) != 0)
+						{
+							continue;
+						}
+
 						if (filesz)
 						{
 							m_stream->Seek(handler::get_stream_offset() + offset);
diff --git a/rpcs3/Loader/ELF32.h b/rpcs3/Loader/ELF32.h
index d3d37f543c..6a13b6f7cb 100644
--- a/rpcs3/Loader/ELF32.h
+++ b/rpcs3/Loader/ELF32.h
@@ -132,7 +132,7 @@ namespace loader
 
 			error_code init(vfsStream& stream) override;
 			error_code load() override;
-			error_code load_data(u32 offset);
+			error_code load_data(u32 offset, bool skip_writeable = false);
 
 			virtual ~elf32() = default;
 		};

From 2c70f5168a89e081f2bfde4482df94eef2426c7c Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Mon, 2 Feb 2015 09:02:38 +0530
Subject: [PATCH 25/29] SPURS: Reorder, rename and some cleanup

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp    |    4 +-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h      |   18 +-
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 1599 ++++++++++---------
 3 files changed, 826 insertions(+), 795 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index b8088daaff..18eed2c944 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -26,7 +26,7 @@ extern u32 libsre;
 extern u32 libsre_rtoc;
 #endif
 
-bool spursKernelMain(SPUThread & spu);
+bool spursKernelEntry(SPUThread & spu);
 s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u32 id);
 s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID);
 
@@ -182,7 +182,7 @@ s64 spursInit(
 	{
 		auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, num, spurs.addr(), 0, 0);
 #ifndef PRX_DEBUG_XXX
-		spu->RegisterHleFunction(spurs->m.spuImg.entry_point, spursKernelMain);
+		spu->RegisterHleFunction(spurs->m.spuImg.entry_point, spursKernelEntry);
 #endif
 		spurs->m.spus[num] = spu->GetId();
 	}
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index f1a4793e13..21f011eda2 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -105,8 +105,8 @@ enum SPURSKernelInterfaces
 	CELL_SPURS_KERNEL_DMA_TAG_ID = 31,
 	CELL_SPURS_KERNEL1_ENTRY_ADDR = 0x818,
 	CELL_SPURS_KERNEL2_ENTRY_ADDR = 0x848,
-	CELL_SPURS_KERNEL1_YIELD_ADDR = 0x808,
-	CELL_SPURS_KERNEL2_YIELD_ADDR = 0x838,
+	CELL_SPURS_KERNEL1_EXIT_ADDR = 0x808,
+	CELL_SPURS_KERNEL2_EXIT_ADDR = 0x838,
 	CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR = 0x290,
 	CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR = 0x290,
 };
@@ -886,8 +886,8 @@ struct CellSpursTaskBinInfo
 	CellSpursTaskLsPattern lsPattern;
 };
 
-// The SPURS kernel data store. This resides at 0x100 of the LS.
-struct SpursKernelMgmtData
+// The SPURS kernel context. This resides at 0x100 of the LS.
+struct SpursKernelContext
 {
 	u8 tempArea[0x80];                              // 0x100
 	u8 wklLocContention[0x10];                      // 0x180
@@ -900,7 +900,7 @@ struct SpursKernelMgmtData
 	vm::bptr<const void, 1, u64> wklCurrentAddr;    // 0x1D0
 	be_t<u32> wklCurrentUniqueId;                   // 0x1D8
 	be_t<u32> wklCurrentId;                         // 0x1DC
-	be_t<u32> yieldToKernelAddr;                    // 0x1E0
+	be_t<u32> exitToKernelAddr;                     // 0x1E0
 	be_t<u32> selectWorkloadAddr;                   // 0x1E4
 	u8 moduleId[2];                                 // 0x1E8
 	u8 sysSrvInitialised;                           // 0x1EA
@@ -923,10 +923,10 @@ struct SpursKernelMgmtData
 	be_t<u32> guid[4];                              // 0x280
 };
 
-static_assert(sizeof(SpursKernelMgmtData) == 0x190, "Incorrect size for SpursKernelMgmtData");
+static_assert(sizeof(SpursKernelContext) == 0x190, "Incorrect size for SpursKernelContext");
 
-// The SPURS taskset policy module data store. This resides at 0x2700 of the LS.
-struct SpursTasksetPmMgmtData
+// The SPURS taskset policy module context. This resides at 0x2700 of the LS.
+struct SpursTasksetContext
 {
     u8 tempAreaTaskset[0x80];                       // 0x2700
     u8 tempAreaTaskInfo[0x30];                      // 0x2780
@@ -956,7 +956,7 @@ struct SpursTasksetPmMgmtData
     u8 x2FD8[0x3000 - 0x2FD8];                      // 0x2FD8
 };
 
-static_assert(sizeof(SpursTasksetPmMgmtData) == 0x900, "Incorrect size for SpursTasksetPmMgmtData");
+static_assert(sizeof(SpursTasksetContext) == 0x900, "Incorrect size for SpursTasksetContext");
 
 s64 spursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> port, s32 isDynamic, bool wasCreated);
 s64 spursWakeUp(PPUThread& CPU, vm::ptr<CellSpurs> spurs);
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index c7902f7240..451bfef848 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -13,7 +13,7 @@
 //
 // SPURS utility functions
 //
-void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag);
+void cellSpursModulePutTrace(CellSpursTracePacket * packet, u32 dmaTagId);
 u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status);
 void cellSpursModuleExit(SPUThread & spu);
 
@@ -27,36 +27,44 @@ void spursHalt(SPUThread & spu);
 //
 bool spursKernel1SelectWorkload(SPUThread & spu);
 bool spursKernel2SelectWorkload(SPUThread & spu);
+void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus);
+bool spursKernelWorkloadExit(SPUThread & spu);
+bool spursKernelEntry(SPUThread & spu);
 
 //
-// SPURS system service workload functions
+// SPURS System Service functions
 //
-void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt);
-void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt);
-void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4);
-void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet);
-void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt);
-void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt);
-void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt);
-void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus);
-bool spursSysServiceWorkloadEntry(SPUThread & spu);
+bool spursSysServiceEntry(SPUThread & spu);
+// TODO: Exit
+void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt);
+void spursSysServiceMain(SPUThread & spu, u32 pollStatus);
+void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt);
+void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt);
+// TODO: Deactivate workload
+void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelContext * ctxt, u32 wklShutdownBitSet);
+void spursSysServiceTraceSaveCount(SPUThread & spu, SpursKernelContext * ctxt);
+void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 arg2, u32 arg3, u32 arg4);
+// TODO: Deactivate trace
+// TODO: System workload entry
+void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelContext * ctxt);
 
 //
-// SPURS taskset policy module functions
+// SPURS Taskset Policy Module functions
 //
-s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting);
-s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments);
-void spursTasksetExit(SPUThread & spu);
+bool spursTasksetEntry(SPUThread & spu);
+bool spursTasksetSyscallEntry(SPUThread & spu);
+void spursTasksetResumeTask(SPUThread & spu);
 void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs);
-void spursTasksetDispatch(SPUThread & spu);
+s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting);
 void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus);
 bool spursTasksetPollStatus(SPUThread & spu);
-void spursTasksetInit(SPUThread & spu, u32 pollStatus);
-void spursTasksetResumeTask(SPUThread & spu);
-s32 spursTasketSaveTaskContext(SPUThread & spu);
+void spursTasksetExit(SPUThread & spu);
 void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args);
+s32 spursTasketSaveTaskContext(SPUThread & spu);
+void spursTasksetDispatch(SPUThread & spu);
 s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args);
-bool spursTasksetEntry(SPUThread & spu);
+void spursTasksetInit(SPUThread & spu, u32 pollStatus);
+s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments);
 
 extern Module *cellSpurs;
 
@@ -65,16 +73,16 @@ extern Module *cellSpurs;
 //////////////////////////////////////////////////////////////////////////////
 
 /// Output trace information
-void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag) {
+void cellSpursModulePutTrace(CellSpursTracePacket * packet, u32 dmaTagId) {
     // TODO: Implement this
 }
 
 /// Check for execution right requests
 u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) {
-    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
 
     spu.GPR[3]._u32[3] = 1;
-    if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) {
+    if (ctxt->spurs->m.flags1 & SF1_32_WORKLOADS) {
         spursKernel2SelectWorkload(spu);
     } else {
         spursKernel1SelectWorkload(spu);
@@ -86,13 +94,13 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) {
     }
 
     u32 wklId = result >> 32;
-    return wklId == mgmt->wklCurrentId ? 0 : 1;
+    return wklId == ctxt->wklCurrentId ? 0 : 1;
 }
 
 /// Exit current workload
 void cellSpursModuleExit(SPUThread & spu) {
-    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
-    spu.SetBranch(mgmt->yieldToKernelAddr);
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    spu.SetBranch(ctxt->exitToKernelAddr);
 }
 
 /// Execute a DMA operation
@@ -134,7 +142,7 @@ u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll) {
     return rv._u32[3];
 }
 
-// Halt the SPU
+/// Halt the SPU
 void spursHalt(SPUThread & spu) {
     spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_HALT);
     spu.Stop();
@@ -146,7 +154,7 @@ void spursHalt(SPUThread & spu) {
 
 /// Select a workload to run
 bool spursKernel1SelectWorkload(SPUThread & spu) {
-    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
 
     // The first and only argument to this function is a boolean that is set to false if the function
     // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus.
@@ -158,20 +166,20 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
 
     do {
         // DMA and lock the first 0x80 bytes of spurs
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
 
         // Calculate the contention (number of SPUs used) for each workload
         u8 contention[CELL_SPURS_MAX_WORKLOAD];
         u8 pendingContention[CELL_SPURS_MAX_WORKLOAD];
         for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-            contention[i] = spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i];
+            contention[i] = spurs->m.wklCurrentContention[i] - ctxt->wklLocContention[i];
 
             // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
             // to prevent unnecessary jumps to the kernel
             if (isPoll) {
-                pendingContention[i] = spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i];
-                if (i != mgmt->wklCurrentId) {
+                pendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i];
+                if (i != ctxt->wklCurrentId) {
                     contention[i] += pendingContention[i];
                 }
             }
@@ -180,19 +188,19 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
         wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
         pollStatus    = 0;
 
-        // The system service workload has the highest priority. Select the system service workload if
+        // The system service has the highest priority. Select the system service if
         // the system service message bit for this SPU is set.
-        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
-            mgmt->spuIdling = 0;
-            if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) {
+            ctxt->spuIdling = 0;
+            if (!isPoll || ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
                 // Clear the message bit
-                spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
+                spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << ctxt->spuNum));
             }
         } else {
             // Caclulate the scheduling weight for each workload
             u16 maxWeight = 0;
             for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-                u16 runnable     = mgmt->wklRunnable1 & (0x8000 >> i);
+                u16 runnable     = ctxt->wklRunnable1 & (0x8000 >> i);
                 u16 wklSignal    = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
                 u8  wklFlag      = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
                 u8  readyCount   = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed();
@@ -206,7 +214,7 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
                 // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
                 //    OR the workload must be signalled
                 //    OR the workload flag is 0 and the workload is configured as the wokload flag receiver
-                if (runnable && mgmt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) {
+                if (runnable && ctxt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) {
                     if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) {
                         // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority:
                         // 1. Wokload signal set or workload flag or ready count > contention
@@ -217,11 +225,11 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
                         // 6. Is the workload executable same as the currently loaded executable
                         // 7. The workload id (lesser the number, more the weight)
                         u16 weight  = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0;
-                        weight     |= (u16)(mgmt->priority[i] & 0x7F) << 16;
-                        weight     |= i == mgmt->wklCurrentId ? 0x80 : 0x00;
+                        weight     |= (u16)(ctxt->priority[i] & 0x7F) << 16;
+                        weight     |= i == ctxt->wklCurrentId ? 0x80 : 0x00;
                         weight     |= (contention[i] > 0 && spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00;
                         weight     |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2;
-                        weight     |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00;
+                        weight     |= ctxt->wklUniqueId[i] == ctxt->wklCurrentId ? 0x02 : 0x00;
                         weight     |= 0x01;
 
                         // In case of a tie the lower numbered workload is chosen
@@ -237,9 +245,9 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
             }
 
             // Not sure what this does. Possibly mark the SPU as idle/in use.
-            mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+            ctxt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
 
-            if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
+            if (!isPoll || wklSelectedId == ctxt->wklCurrentId) {
                 // Clear workload signal for the selected workload
                 spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
                 spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
@@ -260,16 +268,16 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
 
             for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
                 spurs->m.wklCurrentContention[i] = contention[i];
-                mgmt->wklLocContention[i]        = 0;
-                mgmt->wklLocPendingContention[i] = 0;
+                ctxt->wklLocContention[i]        = 0;
+                ctxt->wklLocPendingContention[i] = 0;
             }
 
             if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-                mgmt->wklLocContention[wklSelectedId] = 1;
+                ctxt->wklLocContention[wklSelectedId] = 1;
             }
 
-            mgmt->wklCurrentId = wklSelectedId;
-        } else if (wklSelectedId != mgmt->wklCurrentId) {
+            ctxt->wklCurrentId = wklSelectedId;
+        } else if (wklSelectedId != ctxt->wklCurrentId) {
             // Not called by kernel but a context switch is required
             // Increment the pending contention for the selected workload
             if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
@@ -278,14 +286,14 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
 
             for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
                 spurs->m.wklPendingContention[i] = pendingContention[i];
-                mgmt->wklLocPendingContention[i] = 0;
+                ctxt->wklLocPendingContention[i] = 0;
             }
 
             if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-                mgmt->wklLocPendingContention[wklSelectedId] = 1;
+                ctxt->wklLocPendingContention[wklSelectedId] = 1;
             }
         }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     u64 result          = (u64)wklSelectedId << 32;
     result             |= pollStatus;
@@ -295,7 +303,7 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
 
 /// Select a workload to run
 bool spursKernel2SelectWorkload(SPUThread & spu) {
-    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
 
     // The first and only argument to this function is a boolean that is set to false if the function
     // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus.
@@ -307,22 +315,22 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
 
     do {
         // DMA and lock the first 0x80 bytes of spurs
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
 
         // Calculate the contention (number of SPUs used) for each workload
         u8 contention[CELL_SPURS_MAX_WORKLOAD2];
         u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2];
         for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
-            contention[i] = spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F];
+            contention[i] = spurs->m.wklCurrentContention[i & 0x0F] - ctxt->wklLocContention[i & 0x0F];
             contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4;
 
             // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
             // to prevent unnecessary jumps to the kernel
             if (isPoll) {
-                pendingContention[i] = spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F];
+                pendingContention[i] = spurs->m.wklPendingContention[i & 0x0F] - ctxt->wklLocPendingContention[i & 0x0F];
                 pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4;
-                if (i != mgmt->wklCurrentId) {
+                if (i != ctxt->wklCurrentId) {
                     contention[i] += pendingContention[i];
                 }
             }
@@ -331,22 +339,22 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
         wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
         pollStatus    = 0;
 
-        // The system service workload has the highest priority. Select the system service workload if
+        // The system service has the highest priority. Select the system service if
         // the system service message bit for this SPU is set.
-        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
+        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) {
             // Not sure what this does. Possibly Mark the SPU as in use.
-            mgmt->spuIdling = 0;
-            if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+            ctxt->spuIdling = 0;
+            if (!isPoll || ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
                 // Clear the message bit
-                spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
+                spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << ctxt->spuNum));
             }
         } else {
             // Caclulate the scheduling weight for each workload
             u8 maxWeight = 0;
             for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
                 auto j           = i & 0x0F;
-                u16 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
-                u8  priority      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
+                u16 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j);
+                u8  priority      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4;
                 u8  maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4;
                 u16 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
                 u8  wklFlag       = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
@@ -365,7 +373,7 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
                         // The current workload is given a sligtly higher weight presumably to reduce the number of context switches.
                         // In case of a tie the lower numbered workload is chosen.
                         u8 weight = priority << 4;
-                        if (mgmt->wklCurrentId == i) {
+                        if (ctxt->wklCurrentId == i) {
                             weight |= 0x04;
                         }
 
@@ -381,9 +389,9 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
             }
 
             // Not sure what this does. Possibly mark the SPU as idle/in use.
-            mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+            ctxt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
 
-            if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
+            if (!isPoll || wklSelectedId == ctxt->wklCurrentId) {
                 // Clear workload signal for the selected workload
                 spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
                 spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
@@ -404,13 +412,13 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
 
             for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
                 spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4);
-                mgmt->wklLocContention[i]        = 0;
-                mgmt->wklLocPendingContention[i] = 0;
+                ctxt->wklLocContention[i]        = 0;
+                ctxt->wklLocPendingContention[i] = 0;
             }
 
-            mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
-            mgmt->wklCurrentId = wklSelectedId;
-        } else if (wklSelectedId != mgmt->wklCurrentId) {
+            ctxt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
+            ctxt->wklCurrentId = wklSelectedId;
+        } else if (wklSelectedId != ctxt->wklCurrentId) {
             // Not called by kernel but a context switch is required
             // Increment the pending contention for the selected workload
             if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
@@ -419,12 +427,12 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
 
             for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
                 spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4);
-                mgmt->wklLocPendingContention[i] = 0;
+                ctxt->wklLocPendingContention[i] = 0;
             }
 
-            mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
+            ctxt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
         }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     u64 result          = (u64)wklSelectedId << 32;
     result             |= pollStatus;
@@ -432,81 +440,27 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
     return true;
 }
 
-/// SPURS kernel main
-bool spursKernelMain(SPUThread & spu) {
-    SpursKernelMgmtData * mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+/// SPURS kernel dispatch workload
+void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus) {
+    SpursKernelContext * ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    auto isKernel2            = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
 
-    bool isKernel2;
-    u32  pollStatus;
-    if (spu.PC == CELL_SPURS_KERNEL1_ENTRY_ADDR || spu.PC == CELL_SPURS_KERNEL2_ENTRY_ADDR) {
-        // Entry point of SPURS kernel
-        // Save arguments
-        mgmt->spuNum = spu.GPR[3]._u32[3];
-        mgmt->spurs.set(spu.GPR[4]._u64[1]);
+    auto pollStatus = (u32)(spu.GPR[3]._u64[1]);
+    auto wid        = (u32)(spu.GPR[3]._u64[1] >> 32);
 
-        isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
-
-        memset(mgmt, 0, sizeof(SpursKernelMgmtData));
-
-        // Initialise the SPURS management area to its initial values
-        mgmt->dmaTagId           = CELL_SPURS_KERNEL_DMA_TAG_ID;
-        mgmt->wklCurrentUniqueId = 0x20;
-        mgmt->wklCurrentId       = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
-        mgmt->yieldToKernelAddr  = isKernel2 ? CELL_SPURS_KERNEL2_YIELD_ADDR : CELL_SPURS_KERNEL1_YIELD_ADDR;
-        mgmt->selectWorkloadAddr = isKernel2 ? CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR : CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR;
-        if (!isKernel2) {
-            mgmt->x1F0    = 0xF0020000;
-            mgmt->x200    = 0x20000;
-            mgmt->guid[0] = 0x423A3A02;
-            mgmt->guid[1] = 0x43F43A82;
-            mgmt->guid[2] = 0x43F26502;
-            mgmt->guid[3] = 0x420EB382;
-        } else {
-            mgmt->guid[0] = 0x43A08402;
-            mgmt->guid[1] = 0x43FB0A82;
-            mgmt->guid[2] = 0x435E9302;
-            mgmt->guid[3] = 0x43A3C982;
-        }
-
-        // Register SPURS kernel HLE functions
-        spu.UnregisterHleFunctions(0, 0x40000); // TODO: use a symbolic constant
-        spu.RegisterHleFunction(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelMain);
-        spu.RegisterHleFunction(mgmt->yieldToKernelAddr, spursKernelMain);
-        spu.RegisterHleFunction(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload);
-
-        // DMA in the system service workload info
-        spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfoSysSrv), 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID/*tag*/);
-        spursDmaWaitForCompletion(spu, 0x80000000);
-        pollStatus = 0;
-    } else if (spu.PC == mgmt->yieldToKernelAddr) {
-        isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
-
-        // Select next workload to run
-        spu.GPR[3].clear();
-        if (isKernel2) {
-            spursKernel2SelectWorkload(spu);
-        } else {
-            spursKernel1SelectWorkload(spu);
-        }
-
-        pollStatus = (u32)(spu.GPR[3]._u64[1]);
-        auto wid   = (u32)(spu.GPR[3]._u64[1] >> 32);
-
-        // DMA in the workload info for the selected workload
-        auto wklInfoOffset =  wid < CELL_SPURS_MAX_WORKLOAD ? offsetof(CellSpurs, m.wklInfo1[wid]) :
-                                                              wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? offsetof(CellSpurs, m.wklInfo2[wid & 0xf]) :
-                                                                                                            offsetof(CellSpurs, m.wklInfoSysSrv);
-        spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + wklInfoOffset, 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
-        spursDmaWaitForCompletion(spu, 0x80000000);
-    } else {
-        assert(0);
-    }
+    // DMA in the workload info for the selected workload
+    auto wklInfoOffset =  wid < CELL_SPURS_MAX_WORKLOAD ? offsetof(CellSpurs, m.wklInfo1[wid]) :
+                                                          wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? offsetof(CellSpurs, m.wklInfo2[wid & 0xf]) :
+                                                                                                        offsetof(CellSpurs, m.wklInfoSysSrv);
+    spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + wklInfoOffset, 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
+    spursDmaWaitForCompletion(spu, 0x80000000);
 
+    // Load the workload to LS
     auto wklInfo = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x3FFE0);
-    if (mgmt->wklCurrentAddr != wklInfo->addr) {
+    if (ctxt->wklCurrentAddr != wklInfo->addr) {
         switch (wklInfo->addr.addr()) {
         case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD:
-            spu.RegisterHleFunction(0xA00, spursSysServiceWorkloadEntry);
+            spu.RegisterHleFunction(0xA00, spursSysServiceEntry);
             break;
         case SPURS_IMG_ADDR_TASKSET_PM:
             spu.RegisterHleFunction(0xA00, spursTasksetEntry);
@@ -517,22 +471,81 @@ bool spursKernelMain(SPUThread & spu) {
             break;
         }
 
-        mgmt->wklCurrentAddr     = wklInfo->addr;
-        mgmt->wklCurrentUniqueId = wklInfo->uniqueId.read_relaxed();
+        ctxt->wklCurrentAddr     = wklInfo->addr;
+        ctxt->wklCurrentUniqueId = wklInfo->uniqueId.read_relaxed();
     }
 
     if (!isKernel2) {
-        mgmt->moduleId[0] = 0;
-        mgmt->moduleId[1] = 0;
+        ctxt->moduleId[0] = 0;
+        ctxt->moduleId[1] = 0;
     }
 
     // Run workload
-    spu.GPR[0]._u32[3] = mgmt->yieldToKernelAddr;
+    spu.GPR[0]._u32[3] = ctxt->exitToKernelAddr;
     spu.GPR[1]._u32[3] = 0x3FFB0;
     spu.GPR[3]._u32[3] = 0x100;
     spu.GPR[4]._u64[1] = wklInfo->arg;
     spu.GPR[5]._u32[3] = pollStatus;
     spu.SetBranch(0xA00);
+}
+
+/// SPURS kernel workload exit
+bool spursKernelWorkloadExit(SPUThread & spu) {
+    SpursKernelContext * ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    auto isKernel2            = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
+
+    // Select next workload to run
+    spu.GPR[3].clear();
+    if (isKernel2) {
+        spursKernel2SelectWorkload(spu);
+    } else {
+        spursKernel1SelectWorkload(spu);
+    }
+
+    spursKernelDispatchWorkload(spu, spu.GPR[3]._u64[1]);
+    return false;
+}
+
+/// SPURS kernel entry point
+bool spursKernelEntry(SPUThread & spu) {
+    SpursKernelContext * ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+
+    // Save arguments
+    ctxt->spuNum = spu.GPR[3]._u32[3];
+    ctxt->spurs.set(spu.GPR[4]._u64[1]);
+
+    auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
+
+    memset(ctxt, 0, sizeof(SpursKernelContext));
+
+    // Initialise the SPURS context to its initial values
+    ctxt->dmaTagId           = CELL_SPURS_KERNEL_DMA_TAG_ID;
+    ctxt->wklCurrentUniqueId = 0x20;
+    ctxt->wklCurrentId       = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+    ctxt->exitToKernelAddr   = isKernel2 ? CELL_SPURS_KERNEL2_EXIT_ADDR : CELL_SPURS_KERNEL1_EXIT_ADDR;
+    ctxt->selectWorkloadAddr = isKernel2 ? CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR : CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR;
+    if (!isKernel2) {
+        ctxt->x1F0    = 0xF0020000;
+        ctxt->x200    = 0x20000;
+        ctxt->guid[0] = 0x423A3A02;
+        ctxt->guid[1] = 0x43F43A82;
+        ctxt->guid[2] = 0x43F26502;
+        ctxt->guid[3] = 0x420EB382;
+    } else {
+        ctxt->guid[0] = 0x43A08402;
+        ctxt->guid[1] = 0x43FB0A82;
+        ctxt->guid[2] = 0x435E9302;
+        ctxt->guid[3] = 0x43A3C982;
+    }
+
+    // Register SPURS kernel HLE functions
+    spu.UnregisterHleFunctions(0, 0x40000/*LS_BOTTOM*/);
+    spu.RegisterHleFunction(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelEntry);
+    spu.RegisterHleFunction(ctxt->exitToKernelAddr, spursKernelWorkloadExit);
+    spu.RegisterHleFunction(ctxt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload);
+
+    // Start the system service
+    spursKernelDispatchWorkload(spu, ((u64)CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) << 32);
     return false;
 }
 
@@ -540,120 +553,337 @@ bool spursKernelMain(SPUThread & spu) {
 // SPURS system workload functions
 //////////////////////////////////////////////////////////////////////////////
 
-/// Restore scheduling parameters after a workload has been preempted by the system service workload
-void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    u8 wklId;
+/// Entry point of the system service
+bool spursSysServiceEntry(SPUThread & spu) {
+    auto ctxt       = vm::get_ptr<SpursKernelContext>(spu.ls_offset + spu.GPR[3]._u32[3]);
+    auto arg        = spu.GPR[4]._u64[1];
+    auto pollStatus = spu.GPR[5]._u32[3];
 
-    do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
-        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+    if (ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+        spursSysServiceMain(spu, pollStatus);
+    } else {
+        // TODO: If we reach here it means the current workload was preempted to start the
+        // system workload. Need to implement this.
+    }
 
-        if (spurs->m.sysSrvWorkload[mgmt->spuNum] == 0xFF) {
-            return;
-        }
+    return false;
+}
 
-        wklId = spurs->m.sysSrvWorkload[mgmt->spuNum];
-        spurs->m.sysSrvWorkload[mgmt->spuNum] = 0xFF;
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled
+void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) {
+    // Monitor only lock line reservation lost events
+    spu.WriteChannel(SPU_WrEventMask, u128::from32r(SPU_EVENT_LR));
 
-    spursSysServiceUpdateWorkload(spu, mgmt);
-
-    do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+    bool shouldExit;
+    while (true) {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
 
-        if (wklId >= CELL_SPURS_MAX_WORKLOAD) {
-            spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10;
-            spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1);
-        } else {
-            spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01;
-            spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1);
+        // Find the number of SPUs that are idling in this SPURS instance
+        u32 nIdlingSpus = 0;
+        for (u32 i = 0; i < 8; i++) {
+            if (spurs->m.spuIdling & (1 << i)) {
+                nIdlingSpus++;
+            }
         }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
-    // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace
-    // uses the current worload id to determine the workload to which the trace belongs
-    auto wklIdSaved    = mgmt->wklCurrentId;
-    mgmt->wklCurrentId = wklId;
+        bool allSpusIdle  = nIdlingSpus == spurs->m.nSpus ? true: false;
+        bool exitIfNoWork = spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false;
+        shouldExit        = allSpusIdle && exitIfNoWork;
 
-    // Trace - STOP: GUID
+        // Check if any workloads can be scheduled
+        bool foundReadyWorkload = false;
+        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) {
+            foundReadyWorkload = true;
+        } else {
+            if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
+                    u32 j            = i & 0x0F;
+                    u8 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j);
+                    u8 priority      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4;
+                    u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4;
+                    u8 contention    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklCurrentContention[j] & 0x0F : spurs->m.wklCurrentContention[j] >> 4;
+                    u8 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
+                    u8 wklFlag       = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                    u8 readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
+
+                    if (runnable && priority > 0 && maxContention > contention) {
+                        if (wklFlag || wklSignal || readyCount > contention) {
+                            foundReadyWorkload = true;
+                            break;
+                        }
+                    }
+                }
+            } else {
+                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                    u8 runnable     = ctxt->wklRunnable1 & (0x8000 >> i);
+                    u8 wklSignal    = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
+                    u8 wklFlag      = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                    u8 readyCount   = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed();
+                    u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
+                    u8 requestCount = readyCount + idleSpuCount;
+
+                    if (runnable && ctxt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > spurs->m.wklCurrentContention[i]) {
+                        if (wklFlag || wklSignal || (readyCount != 0 && requestCount > spurs->m.wklCurrentContention[i])) {
+                            foundReadyWorkload = true;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        bool spuIdling = spurs->m.spuIdling & (1 << ctxt->spuNum) ? true : false;
+        if (foundReadyWorkload && shouldExit == false) {
+            spurs->m.spuIdling &= ~(1 << ctxt->spuNum);
+        } else {
+            spurs->m.spuIdling |= 1 << ctxt->spuNum;
+        }
+
+        // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events.
+        if (spuIdling && shouldExit == false && foundReadyWorkload == false) {
+            // The system service blocks by making a reservation and waiting on the lock line reservation lost event.
+            u128 r;
+            spu.ReadChannel(r, SPU_RdEventStat);
+            spu.WriteChannel(SPU_WrEventAck, u128::from32r(SPU_EVENT_LR));
+        }
+
+        auto dmaSuccess = spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        if (dmaSuccess && (shouldExit || foundReadyWorkload)) {
+            break;
+        }
+    }
+
+    if (shouldExit) {
+        // TODO: exit spu thread group
+    }
+}
+
+/// Main function for the system service
+void spursSysServiceMain(SPUThread & spu, u32 pollStatus) {
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+
+    if (ctxt->spurs.addr() % CellSpurs::align) {
+        spursHalt(spu);
+        return;
+    }
+
+    // Initialise the system service if this is the first time its being started on this SPU
+    if (ctxt->sysSrvInitialised == 0) {
+        ctxt->sysSrvInitialised = 1;
+
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+
+        do {
+            spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+            CellSpurs * spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+            // Halt if already initialised
+            if (spurs->m.sysSrvOnSpu & (1 << ctxt->spuNum)) {
+                spursHalt(spu);
+                return;
+            }
+
+            spurs->m.sysSrvOnSpu |= 1 << ctxt->spuNum;
+        } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+        ctxt->traceBuffer   = 0;
+        ctxt->traceMsgCount = -1;
+        spursSysServiceTraceUpdate(spu, ctxt, 1, 1, 0);
+        spursSysServiceCleanupAfterSystemWorkload(spu, ctxt);
+
+        // Trace - SERVICE: INIT
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT;
+        cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+    }
+
+    // Trace - START: Module='SYS '
     CellSpursTracePacket pkt;
     memset(&pkt, 0, sizeof(pkt));
-    pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
-    pkt.data.stop  = SPURS_GUID_SYS_WKL;
-    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+    pkt.header.tag = CELL_SPURS_TRACE_TAG_START;
+    memcpy(pkt.data.start.module, "SYS ", 4);
+    pkt.data.start.level = 1; // Policy module
+    pkt.data.start.ls    = 0xA00 >> 2;
+    cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
 
-    mgmt->wklCurrentId = wklIdSaved;
-}
+    while (true) {
+        // Process requests for the system service
+        spursSysServiceProcessRequests(spu, ctxt);
 
-/// Update the trace count for this SPU in CellSpurs
-void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    if (mgmt->traceBuffer) {
-        auto traceInfo = vm::ptr<CellSpursTraceInfo>::make((u32)(mgmt->traceBuffer - (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4)));
-        traceInfo->count[mgmt->spuNum] = mgmt->traceMsgCount;
+poll:
+        if (cellSpursModulePollStatus(spu, nullptr)) {
+            // Trace - SERVICE: EXIT
+            CellSpursTracePacket pkt;
+            memset(&pkt, 0, sizeof(pkt));
+            pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+            pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT;
+            cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+            // Trace - STOP: GUID
+            memset(&pkt, 0, sizeof(pkt));
+            pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
+            pkt.data.stop  = SPURS_GUID_SYS_WKL;
+            cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+            spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
+            break;
+        }
+
+        // If we reach here it means that either there are more system service messages to be processed
+        // or there are no workloads that can be scheduled.
+
+        // If the SPU is not idling then process the remaining system service messages
+        if (ctxt->spuIdling == 0) {
+            continue;
+        }
+
+        // If we reach here it means that the SPU is idling
+
+        // Trace - SERVICE: WAIT
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT;
+        cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+        spursSysServiceIdleHandler(spu, ctxt);
+        goto poll;
     }
 }
 
-/// Update trace control in SPU from CellSpurs
-void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) {
-    bool notify;
+/// Process any requests
+void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt) {
+    bool updateTrace    = false;
+    bool updateWorkload = false;
+    bool terminate      = false;
 
-    u8 sysSrvMsgUpdateTrace;
     do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
 
-        sysSrvMsgUpdateTrace           = spurs->m.sysSrvMsgUpdateTrace;
-        spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum);
-        spurs->m.xCC                  &= ~(1 << mgmt->spuNum);
-        spurs->m.xCC                  |= arg2 << mgmt->spuNum;
-
-        notify = false;
-        if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) && (spurs->m.sysSrvMsgUpdateTrace == 0) && (spurs->m.xCD != 0)) {
-            spurs->m.xCD = 0;
-            notify       = true;
+        // Terminate request
+        if (spurs->m.sysSrvMsgTerminate & (1 << ctxt->spuNum)) {
+            spurs->m.sysSrvOnSpu &= ~(1 << ctxt->spuNum);
+            terminate = true;
         }
 
-        if (arg4 && spurs->m.xCD != 0) {
-            spurs->m.xCD = 0;
-            notify       = true;
-        }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
-
-    // Get trace parameters from CellSpurs and store them in the LS
-    if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) || (arg3 != 0)) {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.traceBuffer), 0x80/*LSA*/, 0x80/*size*/, 0/*tag*/);
-        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x80 - offsetof(CellSpurs, m.traceBuffer));
-
-        if (mgmt->traceMsgCount != 0xFF || spurs->m.traceBuffer.addr() == 0) {
-            spursSysServiceUpdateTraceCount(spu, mgmt);
-        } else {
-            spursDma(spu, MFC_GET_CMD, spurs->m.traceBuffer.addr() & 0xFFFFFFFC, 0x2C00/*LSA*/, 0x80/*size*/, mgmt->dmaTagId);
-            auto traceBuffer    = vm::get_ptr<CellSpursTraceInfo>(spu.ls_offset + 0x2C00);
-            mgmt->traceMsgCount = traceBuffer->count[mgmt->spuNum];
+        // Update workload message
+        if (spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << ctxt->spuNum)) {
+            spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << ctxt->spuNum);
+            updateWorkload = true;
         }
 
-        mgmt->traceBuffer   = spurs->m.traceBuffer.addr() + (spurs->m.traceStartIndex[mgmt->spuNum] << 4);
-        mgmt->traceMaxCount = spurs->m.traceStartIndex[1] - spurs->m.traceStartIndex[0];
-        if (mgmt->traceBuffer == 0) {
-            mgmt->traceMsgCount = 0;
+        // Update trace message
+        if (spurs->m.sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) {
+            updateTrace = true;
         }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    // Process update workload message
+    if (updateWorkload) {
+        spursSysServiceActivateWorkload(spu, ctxt);
     }
 
-    if (notify) {
-        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
-        sys_spu_thread_send_event(spu, spurs->m.spuPort, 2, 0);
+    // Process update trace message
+    if (updateTrace) {
+        spursSysServiceTraceUpdate(spu, ctxt, 1, 0, 0);
+    }
+
+    // Process terminate request
+    if (terminate) {
+        // TODO: Rest of the terminate processing
     }
 }
 
-/// Update events in CellSpurs
-void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet) {
+/// Activate a workload
+void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) {
+    auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
+    spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo1), 0x30000/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
+    if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+        spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo2), 0x30200/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
+    }
+
+    u32 wklShutdownBitSet = 0;
+    ctxt->wklRunnable1    = 0;
+    ctxt->wklRunnable2    = 0;
+    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+        auto wklInfo1 = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x30000);
+
+        // Copy the priority of the workload for this SPU and its unique id to the LS
+        ctxt->priority[i]    = wklInfo1[i].priority[ctxt->spuNum] == 0 ? 0 : 0x10 - wklInfo1[i].priority[ctxt->spuNum];
+        ctxt->wklUniqueId[i] = wklInfo1[i].uniqueId.read_relaxed();
+
+        if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+            auto wklInfo2 = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x30200);
+
+            // Copy the priority of the workload for this SPU to the LS
+            if (wklInfo2[i].priority[ctxt->spuNum]) {
+                ctxt->priority[i] |= (0x10 - wklInfo2[i].priority[ctxt->spuNum]) << 4;
+            }
+        }
+    }
+
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+            // Update workload status and runnable flag based on the workload state
+            auto wklStatus = spurs->m.wklStatus1[i];
+            if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
+                spurs->m.wklStatus1[i] |= 1 << ctxt->spuNum;
+                ctxt->wklRunnable1     |= 0x8000 >> i;
+            } else {
+                spurs->m.wklStatus1[i] &= ~(1 << ctxt->spuNum);
+            }
+
+            // If the workload is shutting down and if this is the last SPU from which it is being removed then
+            // add it to the shutdown bit set
+            if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
+                if (((wklStatus & (1 << ctxt->spuNum)) != 0) && (spurs->m.wklStatus1[i] == 0)) {
+                    spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
+                    wklShutdownBitSet |= 0x80000000u >> i;
+                }
+            }
+
+            if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+                // Update workload status and runnable flag based on the workload state
+                wklStatus = spurs->m.wklStatus2[i];
+                if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
+                    spurs->m.wklStatus2[i] |= 1 << ctxt->spuNum;
+                    ctxt->wklRunnable2     |= 0x8000 >> i;
+                } else {
+                    spurs->m.wklStatus2[i] &= ~(1 << ctxt->spuNum);
+                }
+
+                // If the workload is shutting down and if this is the last SPU from which it is being removed then
+                // add it to the shutdown bit set
+                if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
+                    if (((wklStatus & (1 << ctxt->spuNum)) != 0) && (spurs->m.wklStatus2[i] == 0)) {
+                        spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
+                        wklShutdownBitSet |= 0x8000 >> i;
+                    }
+                }
+            }
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    if (wklShutdownBitSet) {
+        spursSysServiceUpdateShutdownCompletionEvents(spu, ctxt, wklShutdownBitSet);
+    }
+}
+
+/// Update shutdown completion events
+void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelContext * ctxt, u32 wklShutdownBitSet) {
     // Mark the workloads in wklShutdownBitSet as completed and also generate a bit set of the completed
     // workloads that have a shutdown completion hook registered
     u32 wklNotifyBitSet;
     u8  spuPort;
     do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
 
         wklNotifyBitSet = 0;
@@ -673,334 +903,117 @@ void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32
                 }
             }
         }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     if (wklNotifyBitSet) {
         // TODO: sys_spu_thread_send_event(spuPort, 0, wklNotifyMask);
     }
 }
 
-/// Update workload information in the SPU from CellSpurs
-void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
-    spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfo1), 0x30000/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
-    if (spurs->m.flags1 & SF1_32_WORKLOADS) {
-        spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfo2), 0x30200/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
-    }
-
-    u32 wklShutdownBitSet = 0;
-    mgmt->wklRunnable1    = 0;
-    mgmt->wklRunnable2    = 0;
-    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-        auto wklInfo1 = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x30000);
-
-        // Copy the priority of the workload for this SPU and its unique id to the LS
-        mgmt->priority[i]    = wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - wklInfo1[i].priority[mgmt->spuNum];
-        mgmt->wklUniqueId[i] = wklInfo1[i].uniqueId.read_relaxed();
-
-        if (spurs->m.flags1 & SF1_32_WORKLOADS) {
-            auto wklInfo2 = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x30200);
-
-            // Copy the priority of the workload for this SPU to the LS
-            if (wklInfo2[i].priority[mgmt->spuNum]) {
-                mgmt->priority[i] |= (0x10 - wklInfo2[i].priority[mgmt->spuNum]) << 4;
-            }
-        }
-    }
-
-    do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
-        spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
-
-        for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-            // Update workload status and runnable flag based on the workload state
-            auto wklStatus = spurs->m.wklStatus1[i];
-            if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
-                spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum;
-                mgmt->wklRunnable1     |= 0x8000 >> i;
-            } else {
-                spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum);
-            }
-
-            // If the workload is shutting down and if this is the last SPU from which it is being removed then
-            // add it to the shutdown bit set
-            if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
-                if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (spurs->m.wklStatus1[i] == 0)) {
-                    spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
-                    wklShutdownBitSet |= 0x80000000u >> i;
-                }
-            }
-
-            if (spurs->m.flags1 & SF1_32_WORKLOADS) {
-                // Update workload status and runnable flag based on the workload state
-                wklStatus = spurs->m.wklStatus2[i];
-                if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
-                    spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum;
-                    mgmt->wklRunnable2     |= 0x8000 >> i;
-                } else {
-                    spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum);
-                }
-
-                // If the workload is shutting down and if this is the last SPU from which it is being removed then
-                // add it to the shutdown bit set
-                if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
-                    if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (spurs->m.wklStatus2[i] == 0)) {
-                        spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
-                        wklShutdownBitSet |= 0x8000 >> i;
-                    }
-                }
-            }
-        }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
-
-    if (wklShutdownBitSet) {
-        spursSysServiceUpdateEvent(spu, mgmt, wklShutdownBitSet);
+/// Update the trace count for this SPU
+void spursSysServiceTraceSaveCount(SPUThread & spu, SpursKernelContext * ctxt) {
+    if (ctxt->traceBuffer) {
+        auto traceInfo = vm::ptr<CellSpursTraceInfo>::make((u32)(ctxt->traceBuffer - (ctxt->spurs->m.traceStartIndex[ctxt->spuNum] << 4)));
+        traceInfo->count[ctxt->spuNum] = ctxt->traceMsgCount;
     }
 }
 
-/// Process any messages
-void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    bool updateTrace    = false;
-    bool updateWorkload = false;
-    bool terminate      = false;
+/// Update trace control
+void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 arg2, u32 arg3, u32 arg4) {
+    bool notify;
 
+    u8 sysSrvMsgUpdateTrace;
     do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
 
-        // Terminate request
-        if (spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) {
-            spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum);
-            terminate = true;
+        sysSrvMsgUpdateTrace           = spurs->m.sysSrvMsgUpdateTrace;
+        spurs->m.sysSrvMsgUpdateTrace &= ~(1 << ctxt->spuNum);
+        spurs->m.xCC                  &= ~(1 << ctxt->spuNum);
+        spurs->m.xCC                  |= arg2 << ctxt->spuNum;
+
+        notify = false;
+        if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) && (spurs->m.sysSrvMsgUpdateTrace == 0) && (spurs->m.xCD != 0)) {
+            spurs->m.xCD = 0;
+            notify       = true;
         }
 
-        // Update workload message
-        if (spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) {
-            spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum);
-            updateWorkload = true;
+        if (arg4 && spurs->m.xCD != 0) {
+            spurs->m.xCD = 0;
+            notify       = true;
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    // Get trace parameters from CellSpurs and store them in the LS
+    if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) || (arg3 != 0)) {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.traceBuffer), 0x80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x80 - offsetof(CellSpurs, m.traceBuffer));
+
+        if (ctxt->traceMsgCount != 0xFF || spurs->m.traceBuffer.addr() == 0) {
+            spursSysServiceTraceSaveCount(spu, ctxt);
+        } else {
+            spursDma(spu, MFC_GET_CMD, spurs->m.traceBuffer.addr() & 0xFFFFFFFC, 0x2C00/*LSA*/, 0x80/*size*/, ctxt->dmaTagId);
+            auto traceBuffer    = vm::get_ptr<CellSpursTraceInfo>(spu.ls_offset + 0x2C00);
+            ctxt->traceMsgCount = traceBuffer->count[ctxt->spuNum];
         }
 
-        // Update trace message
-        if (spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) {
-            updateTrace = true;
+        ctxt->traceBuffer   = spurs->m.traceBuffer.addr() + (spurs->m.traceStartIndex[ctxt->spuNum] << 4);
+        ctxt->traceMaxCount = spurs->m.traceStartIndex[1] - spurs->m.traceStartIndex[0];
+        if (ctxt->traceBuffer == 0) {
+            ctxt->traceMsgCount = 0;
         }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
-
-    // Process update workload message
-    if (updateWorkload) {
-        spursSysServiceUpdateWorkload(spu, mgmt);
     }
 
-    // Process update trace message
-    if (updateTrace) {
-        spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0);
-    }
-
-    // Process terminate request
-    if (terminate) {
-        // TODO: Rest of the terminate processing
+    if (notify) {
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+        sys_spu_thread_send_event(spu, spurs->m.spuPort, 2, 0);
     }
 }
 
-/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled
-void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    // Monitor only lock line reservation lost events
-    spu.WriteChannel(SPU_WrEventMask, u128::from32r(SPU_EVENT_LR));
+/// Restore state after executing the system workload
+void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelContext * ctxt) {
+    u8 wklId;
 
-    bool shouldExit;
-    while (true) {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        if (spurs->m.sysSrvWorkload[ctxt->spuNum] == 0xFF) {
+            return;
+        }
+
+        wklId = spurs->m.sysSrvWorkload[ctxt->spuNum];
+        spurs->m.sysSrvWorkload[ctxt->spuNum] = 0xFF;
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    spursSysServiceActivateWorkload(spu, ctxt);
+
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
 
-        // Find the number of SPUs that are idling in this SPURS instance
-        u32 nIdlingSpus = 0;
-        for (u32 i = 0; i < 8; i++) {
-            if (spurs->m.spuIdling & (1 << i)) {
-                nIdlingSpus++;
-            }
-        }
-
-        bool allSpusIdle  = nIdlingSpus == spurs->m.nSpus ? true: false;
-        bool exitIfNoWork = spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false;
-        shouldExit        = allSpusIdle && exitIfNoWork;
-
-        // Check if any workloads can be scheduled
-        bool foundReadyWorkload = false;
-        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
-            foundReadyWorkload = true;
+        if (wklId >= CELL_SPURS_MAX_WORKLOAD) {
+            spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10;
+            spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1);
         } else {
-            if (spurs->m.flags1 & SF1_32_WORKLOADS) {
-                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
-                    u32 j            = i & 0x0F;
-                    u8 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
-                    u8 priority      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
-                    u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4;
-                    u8 contention    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklCurrentContention[j] & 0x0F : spurs->m.wklCurrentContention[j] >> 4;
-                    u8 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
-                    u8 wklFlag       = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
-                    u8 readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
-
-                    if (runnable && priority > 0 && maxContention > contention) {
-                        if (wklFlag || wklSignal || readyCount > contention) {
-                            foundReadyWorkload = true;
-                            break;
-                        }
-                    }
-                }
-            } else {
-                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-                    u8 runnable     = mgmt->wklRunnable1 & (0x8000 >> i);
-                    u8 wklSignal    = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
-                    u8 wklFlag      = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
-                    u8 readyCount   = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed();
-                    u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
-                    u8 requestCount = readyCount + idleSpuCount;
-
-                    if (runnable && mgmt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > spurs->m.wklCurrentContention[i]) {
-                        if (wklFlag || wklSignal || (readyCount != 0 && requestCount > spurs->m.wklCurrentContention[i])) {
-                            foundReadyWorkload = true;
-                            break;
-                        }
-                    }
-                }
-            }
+            spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01;
+            spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1);
         }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
-        bool spuIdling = spurs->m.spuIdling & (1 << mgmt->spuNum) ? true : false;
-        if (foundReadyWorkload && shouldExit == false) {
-            spurs->m.spuIdling &= ~(1 << mgmt->spuNum);
-        } else {
-            spurs->m.spuIdling |= 1 << mgmt->spuNum;
-        }
+    // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace
+    // uses the current worload id to determine the workload to which the trace belongs
+    auto wklIdSaved    = ctxt->wklCurrentId;
+    ctxt->wklCurrentId = wklId;
 
-        // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events.
-        if (spuIdling && shouldExit == false && foundReadyWorkload == false) {
-            // The system service blocks by making a reservation and waiting on the lock line reservation lost event.
-            u128 r;
-            spu.ReadChannel(r, SPU_RdEventStat);
-            spu.WriteChannel(SPU_WrEventAck, u128::from32r(SPU_EVENT_LR));
-        }
-
-        auto dmaSuccess = spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
-        if (dmaSuccess && (shouldExit || foundReadyWorkload)) {
-            break;
-        }
-    }
-
-    if (shouldExit) {
-        // TODO: exit spu thread group
-    }
-}
-
-/// Main function for the system service workload
-void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) {
-    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
-
-    if (mgmt->spurs.addr() % CellSpurs::align) {
-        spursHalt(spu);
-        return;
-    }
-
-    // Initialise the system service if this is the first time its being started on this SPU
-    if (mgmt->sysSrvInitialised == 0) {
-        mgmt->sysSrvInitialised = 1;
-
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
-
-        do {
-            spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
-            CellSpurs * spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
-
-            // Halt if already initialised
-            if (spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) {
-                spursHalt(spu);
-                return;
-            }
-
-            spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum;
-        } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
-
-        mgmt->traceBuffer   = 0;
-        mgmt->traceMsgCount = -1;
-        spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0);
-        spursSysServiceCleanupAfterPreemption(spu, mgmt);
-
-        // Trace - SERVICE: INIT
-        CellSpursTracePacket pkt;
-        memset(&pkt, 0, sizeof(pkt));
-        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
-        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT;
-        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-    }
-
-    // Trace - START: Module='SYS '
+    // Trace - STOP: GUID
     CellSpursTracePacket pkt;
     memset(&pkt, 0, sizeof(pkt));
-    pkt.header.tag = CELL_SPURS_TRACE_TAG_START;
-    memcpy(pkt.data.start.module, "SYS ", 4);
-    pkt.data.start.level = 1; // Policy module
-    pkt.data.start.ls    = 0xA00 >> 2;
-    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+    pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
+    pkt.data.stop  = SPURS_GUID_SYS_WKL;
+    cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
 
-    while (true) {
-        // Process messages for the system service workload
-        spursSysServiceProcessMessages(spu, mgmt);
-
-poll:
-        if (cellSpursModulePollStatus(spu, nullptr)) {
-            // Trace - SERVICE: EXIT
-            CellSpursTracePacket pkt;
-            memset(&pkt, 0, sizeof(pkt));
-            pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
-            pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT;
-            cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-
-            // Trace - STOP: GUID
-            memset(&pkt, 0, sizeof(pkt));
-            pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
-            pkt.data.stop  = SPURS_GUID_SYS_WKL;
-            cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-
-            spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
-            break;
-        }
-
-        // If we reach here it means that either there are more system service messages to be processed
-        // or there are no workloads that can be scheduled.
-
-        // If the SPU is not idling then process the remaining system service messages
-        if (mgmt->spuIdling == 0) {
-            continue;
-        }
-
-        // If we reach here it means that the SPU is idling
-
-        // Trace - SERVICE: WAIT
-        CellSpursTracePacket pkt;
-        memset(&pkt, 0, sizeof(pkt));
-        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
-        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT;
-        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-
-        spursSysServiceWaitOrExit(spu, mgmt);
-        goto poll;
-    }
-}
-
-/// Entry point of the system service workload
-bool spursSysServiceWorkloadEntry(SPUThread & spu) {
-    auto mgmt       = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + spu.GPR[3]._u32[3]);
-    auto arg        = spu.GPR[4]._u64[1];
-    auto pollStatus = spu.GPR[5]._u32[3];
-
-    if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-        spursSysServiceWorkloadMain(spu, pollStatus);
-    } else {
-        // TODO: If we reach here it means the current workload was preempted to start the
-        // system service workload. Need to implement this.
-    }
-
-    return false;
+    ctxt->wklCurrentId = wklIdSaved;
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -1018,14 +1031,98 @@ enum SpursTasksetRequest {
     SPURS_TASKSET_REQUEST_RECV_WKL_FLAG = 6,
 };
 
+/// Taskset PM entry point
+bool spursTasksetEntry(SPUThread & spu) {
+    auto ctxt       = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto kernelCtxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + spu.GPR[3]._u32[3]);
+
+    auto arg        = spu.GPR[4]._u64[1];
+    auto pollStatus = spu.GPR[5]._u32[3];
+
+    // Initialise memory and save args
+    memset(ctxt, 0, sizeof(*ctxt));
+    ctxt->taskset.set(arg);
+    memcpy(ctxt->moduleId, "SPURSTASK MODULE", sizeof(ctxt->moduleId));
+    ctxt->kernelMgmtAddr = spu.GPR[3]._u32[3];
+    ctxt->syscallAddr    = CELL_SPURS_TASKSET_PM_SYSCALL_ADDR;
+    ctxt->spuNum         = kernelCtxt->spuNum;
+    ctxt->dmaTagId       = kernelCtxt->dmaTagId;
+    ctxt->taskId         = 0xFFFFFFFF;
+
+    // Register SPURS takset policy module HLE functions
+    spu.UnregisterHleFunctions(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, 0x40000/*LS_BOTTOM*/);
+    spu.RegisterHleFunction(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, spursTasksetEntry);
+    spu.RegisterHleFunction(ctxt->syscallAddr, spursTasksetSyscallEntry);
+
+    // Initialise the taskset policy module
+    spursTasksetInit(spu, pollStatus);
+
+    // Dispatch
+    spursTasksetDispatch(spu);
+    return false;
+}
+
+/// Entry point into the Taskset PM for task syscalls
+bool spursTasksetSyscallEntry(SPUThread & spu) {
+    auto ctxt = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+
+    // Save task context
+    ctxt->savedContextLr = spu.GPR[0];
+    ctxt->savedContextSp = spu.GPR[1];
+    for (auto i = 0; i < 48; i++) {
+        ctxt->savedContextR80ToR127[i] = spu.GPR[80 + i];
+    }
+
+    // Handle the syscall
+    spu.GPR[3]._u32[3] = spursTasksetProcessSyscall(spu, spu.GPR[3]._u32[3], spu.GPR[4]._u32[3]);
+
+    // Resume the previously executing task if the syscall did not cause a context switch
+    if (spu.m_is_branch == false) {
+        spursTasksetResumeTask(spu);
+    }
+
+    return false;
+}
+
+/// Resume a task
+void spursTasksetResumeTask(SPUThread & spu) {
+    auto ctxt = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+
+    // Restore task context
+    spu.GPR[0] = ctxt->savedContextLr;
+    spu.GPR[1] = ctxt->savedContextSp;
+    for (auto i = 0; i < 48; i++) {
+        spu.GPR[80 + i] = ctxt->savedContextR80ToR127[i];
+    }
+
+    spu.SetBranch(spu.GPR[0]._u32[3]);
+}
+
+/// Start a task
+void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) {
+    auto ctxt    = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
+
+    spu.GPR[2].clear();
+    spu.GPR[3]         = u128::from64(taskArgs.u64[0], taskArgs.u64[1]);
+    spu.GPR[4]._u64[1] = taskset->m.args;
+    spu.GPR[4]._u64[0] = taskset->m.spurs.addr();
+    for (auto i = 5; i < 128; i++) {
+        spu.GPR[i].clear();
+    }
+
+    spu.SetBranch(ctxt->savedContextLr.value()._u32[3]);
+}
+
+/// Process a request and update the state of the taskset
 s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting) {
-    auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
-    auto mgmt       = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto kernelCtxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    auto ctxt       = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
 
     s32 rc = CELL_OK;
     s32 numNewlyReadyTasks;
     do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
 
         // Verify taskset state is valid
@@ -1055,27 +1152,27 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
 
         switch (request) {
         case SPURS_TASKSET_REQUEST_POLL_SIGNAL:
-            rc = signalled._bit[mgmt->taskId] ? 1 : 0;
-            signalled._bit[mgmt->taskId] = false;
+            rc = signalled._bit[ctxt->taskId] ? 1 : 0;
+            signalled._bit[ctxt->taskId] = false;
             break;
         case SPURS_TASKSET_REQUEST_DESTROY_TASK:
             numNewlyReadyTasks--;
-            running._bit[mgmt->taskId]   = false;
-            enabled._bit[mgmt->taskId]   = false;
-            signalled._bit[mgmt->taskId] = false;
-            ready._bit[mgmt->taskId]     = false;
+            running._bit[ctxt->taskId]   = false;
+            enabled._bit[ctxt->taskId]   = false;
+            signalled._bit[ctxt->taskId] = false;
+            ready._bit[ctxt->taskId]     = false;
             break;
         case SPURS_TASKSET_REQUEST_YIELD_TASK:
-            running._bit[mgmt->taskId] = false;
-            waiting._bit[mgmt->taskId] = true;
+            running._bit[ctxt->taskId] = false;
+            waiting._bit[ctxt->taskId] = true;
             break;
         case SPURS_TASKSET_REQUEST_WAIT_SIGNAL:
-            if (signalled._bit[mgmt->taskId]) {
+            if (signalled._bit[ctxt->taskId]) {
                 numNewlyReadyTasks--;
-                running._bit[mgmt->taskId]   = false;
-                waiting._bit[mgmt->taskId]   = true;
-                signalled._bit[mgmt->taskId] = false;
-                ready._bit[mgmt->taskId]     = false;
+                running._bit[ctxt->taskId]   = false;
+                waiting._bit[ctxt->taskId]   = true;
+                signalled._bit[ctxt->taskId] = false;
+                ready._bit[ctxt->taskId]     = false;
             }
             break;
         case SPURS_TASKSET_REQUEST_POLL:
@@ -1093,9 +1190,9 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
                 rc                            = 0;
             } else if (taskset->m.wkl_flag_wait_task == 0x80) {
                 // No tasks are waiting for the workload flag. Mark this task as waiting for the workload flag.
-                taskset->m.wkl_flag_wait_task = mgmt->taskId;
-                running._bit[mgmt->taskId]    = false;
-                waiting._bit[mgmt->taskId]    = true;
+                taskset->m.wkl_flag_wait_task = ctxt->taskId;
+                running._bit[ctxt->taskId]    = false;
+                waiting._bit[ctxt->taskId]    = true;
                 rc                            = 1;
                 numNewlyReadyTasks--;
             } else {
@@ -1132,8 +1229,8 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
             *isWaiting = waiting._bit[selectedTaskId < CELL_SPURS_MAX_TASK ? selectedTaskId : 0] ? 1 : 0;
             if (selectedTaskId != CELL_SPURS_MAX_TASK) {
                 taskset->m.last_scheduled_task = selectedTaskId;
-                running._bit[mgmt->taskId]     = true;
-                waiting._bit[mgmt->taskId]     = false;
+                running._bit[ctxt->taskId]     = true;
+                waiting._bit[ctxt->taskId]     = false;
             }
             break;
         case SPURS_TASKSET_REQUEST_RECV_WKL_FLAG:
@@ -1159,101 +1256,138 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
         taskset->m.enabled       = enabled;
         taskset->m.signalled     = signalled;
         taskset->m.ready         = ready;
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     // Increment the ready count of the workload by the number of tasks that have become ready
     do {
-        spursDma(spu, MFC_GETLLAR_CMD, kernelMgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, kernelCtxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
 
-        s32 readyCount  = kernelMgmt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].read_relaxed();
+        s32 readyCount  = kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[kernelCtxt->wklCurrentId].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].read_relaxed();
         readyCount     += numNewlyReadyTasks;
         readyCount      = readyCount < 0 ? 0 : readyCount > 0xFF ? 0xFF : readyCount;
 
-        if (kernelMgmt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD) {
-            spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].write_relaxed(readyCount);
+        if (kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD) {
+            spurs->m.wklReadyCount1[kernelCtxt->wklCurrentId].write_relaxed(readyCount);
         } else {
-            spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].write_relaxed(readyCount);
+            spurs->m.wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].write_relaxed(readyCount);
         }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, kernelMgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+    } while (spursDma(spu, MFC_PUTLLC_CMD, kernelCtxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     return rc;
 }
 
-s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments) {
-    if (elfAddr == 0 || (elfAddr & 0x0F) != 0) {
-        return CELL_SPURS_TASK_ERROR_INVAL;
+/// Process pollStatus received from the SPURS kernel
+void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) {
+    if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) {
+        spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_RECV_WKL_FLAG, nullptr, nullptr);
     }
-
-    vfsStreamMemory         stream(elfAddr);
-    loader::handlers::elf32 loader;
-    auto rc = loader.init(stream);
-    if (rc != loader::handler::ok) {
-        return CELL_SPURS_TASK_ERROR_NOEXEC;
-    }
-
-    u32 _lowestLoadAddr = CELL_SPURS_TASK_BOTTOM;
-    for (auto & phdr : loader.m_phdrs) {
-        if (phdr.data_be.p_paddr >= CELL_SPURS_TASK_BOTTOM) {
-            break;
-        }
-
-        if (phdr.data_be.p_type == 1/*PT_LOAD*/) {
-            if (skipWriteableSegments == false || (phdr.data_be.p_flags & 2/*PF_W*/) == 0) {
-                if (phdr.data_be.p_vaddr < CELL_SPURS_TASK_TOP ||
-                    phdr.data_be.p_vaddr + phdr.data_be.p_memsz > CELL_SPURS_TASK_BOTTOM) {
-                    return CELL_SPURS_TASK_ERROR_FAULT;
-                }
-
-                _lowestLoadAddr = _lowestLoadAddr > phdr.data_be.p_vaddr ? phdr.data_be.p_vaddr : _lowestLoadAddr;
-            }
-        }
-    }
-
-    loader.load_data(spu.ls_offset, skipWriteableSegments);
-    *entryPoint = loader.m_ehdr.data_be.e_entry;
-    if (*lowestLoadAddr) {
-        *lowestLoadAddr = _lowestLoadAddr;
-    }
-
-    return CELL_OK;
 }
 
+/// Check execution rights
+bool spursTasksetPollStatus(SPUThread & spu) {
+    u32 pollStatus;
+
+    if (cellSpursModulePollStatus(spu, &pollStatus)) {
+        return true;
+    }
+
+    spursTasksetProcessPollStatus(spu, pollStatus);
+    return false;
+}
+
+/// Exit the Taskset PM
 void spursTasksetExit(SPUThread & spu) {
-    auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto ctxt = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
 
     // Trace - STOP
     CellSpursTracePacket pkt;
     memset(&pkt, 0, sizeof(pkt));
     pkt.header.tag = 0x54; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_STOP
     pkt.data.stop  = SPURS_GUID_TASKSET_PM;
-    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+    cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
 
     // Not sure why this check exists. Perhaps to check for memory corruption.
-    if (memcmp(mgmt->moduleId, "SPURSTASK MODULE", 16) != 0) {
+    if (memcmp(ctxt->moduleId, "SPURSTASK MODULE", 16) != 0) {
         spursHalt(spu);
     }
 
     cellSpursModuleExit(spu);
 }
 
-void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) {
-    auto mgmt    = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
-    auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
+/// Invoked when a task exits
+void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args) {
+    auto ctxt = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
 
-    spu.GPR[2].clear();
-    spu.GPR[3]         = u128::from64(taskArgs.u64[0], taskArgs.u64[1]);
-    spu.GPR[4]._u64[1] = taskset->m.args;
-    spu.GPR[4]._u64[0] = taskset->m.spurs.addr();
-    for (auto i = 5; i < 128; i++) {
-        spu.GPR[i].clear();
-    }
+    spursDma(spu, MFC_GET_CMD, addr & 0xFFFFFF80, 0x10000/*LSA*/, (addr & 0x7F) << 11/*size*/, 0);
+    spursDmaWaitForCompletion(spu, 1);
 
-    spu.SetBranch(mgmt->savedContextLr.value()._u32[3]);
+    spu.GPR[3]._u64[1] = ctxt->taskset.addr();
+    spu.GPR[4]._u32[3] = taskId;
+    spu.GPR[5]._u32[3] = exitCode;
+    spu.GPR[6]._u64[1] = args;
+    spu.FastCall(0x10000);
 }
 
+/// Save the context of a task
+s32 spursTasketSaveTaskContext(SPUThread & spu) {
+    auto ctxt     = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto taskInfo = vm::get_ptr<CellSpursTaskset::TaskInfo>(spu.ls_offset + 0x2780);
+
+    spursDmaWaitForCompletion(spu, 0xFFFFFFFF);
+
+    if (taskInfo->context_save_storage_and_alloc_ls_blocks == 0) {
+        return CELL_SPURS_TASK_ERROR_STAT;
+    }
+
+    u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F;
+    u32 lsBlocks      = 0;
+    for (auto i = 0; i < 128; i++) {
+        if (taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) {
+            lsBlocks++;
+        }
+    }
+
+    if (lsBlocks > allocLsBlocks) {
+        return CELL_SPURS_TASK_ERROR_STAT;
+    }
+
+    // Make sure the stack is area is specified in the ls pattern
+    for (auto i = (ctxt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) {
+        if ((taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) == 0) {
+            return CELL_SPURS_TASK_ERROR_STAT;
+        }
+    }
+
+    // Get the processor context
+    u128 r;
+    spu.FPSCR.Read(r);
+    ctxt->savedContextFpscr = r;
+    spu.ReadChannel(r, SPU_RdEventMask);
+    ctxt->savedSpuWriteEventMask = r._u32[3];
+    spu.ReadChannel(r, MFC_RdTagMask);
+    ctxt->savedWriteTagGroupQueryMask = r._u32[3];
+
+    // Store the processor context
+    u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull;
+    spursDma(spu, MFC_PUT_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId);
+
+    // Save LS context
+    for (auto i = 6; i < 128; i++) {
+        bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false;
+        if (shouldStore) {
+            // TODO: Combine DMA requests for consecutive blocks into a single request
+            spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId);
+        }
+    }
+
+    spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
+    return CELL_OK;
+}
+
+/// Taskset dispatcher
 void spursTasksetDispatch(SPUThread & spu) {
-    auto mgmt    = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto ctxt    = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
     auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
 
     u32 taskId;
@@ -1264,11 +1398,11 @@ void spursTasksetDispatch(SPUThread & spu) {
         return;
     }
 
-    mgmt->taskId = taskId;
+    ctxt->taskId = taskId;
 
     // DMA in the task info for the selected task
-    spursDma(spu, MFC_GET_CMD, mgmt->taskset.addr() + offsetof(CellSpursTaskset, m.task_info[taskId]), 0x2780/*LSA*/, sizeof(CellSpursTaskset::TaskInfo), mgmt->dmaTagId);
-    spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
+    spursDma(spu, MFC_GET_CMD, ctxt->taskset.addr() + offsetof(CellSpursTaskset, m.task_info[taskId]), 0x2780/*LSA*/, sizeof(CellSpursTaskset::TaskInfo), ctxt->dmaTagId);
+    spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
     auto taskInfo = vm::get_ptr<CellSpursTaskset::TaskInfo>(spu.ls_offset + 0x2780);
     auto elfAddr  = taskInfo->elf_addr.addr().value();
     taskInfo->elf_addr.set(taskInfo->elf_addr.addr() & 0xFFFFFFFFFFFFFFF8ull);
@@ -1283,7 +1417,7 @@ void spursTasksetDispatch(SPUThread & spu) {
 
     if (isWaiting == 0) {
         // If we reach here it means that the task is being started and not being resumed
-        mgmt->guidAddr = CELL_SPURS_TASK_TOP;
+        ctxt->guidAddr = CELL_SPURS_TASK_TOP;
 
         u32 entryPoint;
         u32 lowestLoadAddr;
@@ -1292,17 +1426,17 @@ void spursTasksetDispatch(SPUThread & spu) {
             return;
         }
 
-        spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
+        spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
 
-        mgmt->savedContextLr.value()._u32[3] = entryPoint;
-        mgmt->guidAddr        = lowestLoadAddr;
-        mgmt->tasksetMgmtAddr = 0x2700;
-        mgmt->x2FC0           = 0;
-        mgmt->taskExitCode    = isWaiting;
-        mgmt->x2FD4           = elfAddr & 5; // TODO: Figure this out
+        ctxt->savedContextLr.value()._u32[3] = entryPoint;
+        ctxt->guidAddr        = lowestLoadAddr;
+        ctxt->tasksetMgmtAddr = 0x2700;
+        ctxt->x2FC0           = 0;
+        ctxt->taskExitCode    = isWaiting;
+        ctxt->x2FD4           = elfAddr & 5; // TODO: Figure this out
 
         if ((elfAddr & 5) == 1) {
-            spursDma(spu, MFC_GET_CMD, mgmt->taskset.addr() + offsetof(CellSpursTaskset2, m.task_exit_code[taskId]), 0x2FC0/*LSA*/, 0x10/*size*/, mgmt->dmaTagId);
+            spursDma(spu, MFC_GET_CMD, ctxt->taskset.addr() + offsetof(CellSpursTaskset2, m.task_exit_code[taskId]), 0x2FC0/*LSA*/, 0x10/*size*/, ctxt->dmaTagId);
         }
 
         // Trace - GUID
@@ -1340,16 +1474,16 @@ void spursTasksetDispatch(SPUThread & spu) {
             bool shouldLoad = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false;
             if (shouldLoad) {
                 // TODO: Combine DMA requests for consecutive blocks into a single request
-                spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, mgmt->dmaTagId);
+                spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId);
             }
         }
 
-        spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
+        spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
 
         // Restore saved registers
-        spu.FPSCR.Write(mgmt->savedContextFpscr.value());
-        spu.WriteChannel(MFC_WrTagMask, u128::from32r(mgmt->savedWriteTagGroupQueryMask));
-        spu.WriteChannel(SPU_WrEventMask, u128::from32r(mgmt->savedSpuWriteEventMask));
+        spu.FPSCR.Write(ctxt->savedContextFpscr.value());
+        spu.WriteChannel(MFC_WrTagMask, u128::from32r(ctxt->savedWriteTagGroupQueryMask));
+        spu.WriteChannel(SPU_WrEventMask, u128::from32r(ctxt->savedSpuWriteEventMask));
 
         // Trace - GUID
         memset(&pkt, 0, sizeof(pkt));
@@ -1368,125 +1502,9 @@ void spursTasksetDispatch(SPUThread & spu) {
     }
 }
 
-void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) {
-    if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) {
-        spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_RECV_WKL_FLAG, nullptr, nullptr);
-    }
-}
-
-bool spursTasksetPollStatus(SPUThread & spu) {
-    u32 pollStatus;
-
-    if (cellSpursModulePollStatus(spu, &pollStatus)) {
-        return true;
-    }
-
-    spursTasksetProcessPollStatus(spu, pollStatus);
-    return false;
-}
-
-void spursTasksetInit(SPUThread & spu, u32 pollStatus) {
-    auto mgmt       = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
-    auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
-
-    kernelMgmt->moduleId[0] = 'T';
-    kernelMgmt->moduleId[1] = 'K';
-
-    // Trace - START: Module='TKST'
-    CellSpursTracePacket pkt;
-    memset(&pkt, 0, sizeof(pkt));
-    pkt.header.tag = 0x52; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_START
-    memcpy(pkt.data.start.module, "TKST", 4);
-    pkt.data.start.level = 2;
-    pkt.data.start.ls    = 0xA00 >> 2;
-    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-
-    spursTasksetProcessPollStatus(spu, pollStatus);
-}
-
-void spursTasksetResumeTask(SPUThread & spu) {
-    auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
-
-    // Restore task context
-    spu.GPR[0] = mgmt->savedContextLr;
-    spu.GPR[1] = mgmt->savedContextSp;
-    for (auto i = 0; i < 48; i++) {
-        spu.GPR[80 + i] = mgmt->savedContextR80ToR127[i];
-    }
-
-    spu.SetBranch(spu.GPR[0]._u32[3]);
-}
-
-s32 spursTasketSaveTaskContext(SPUThread & spu) {
-    auto mgmt     = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
-    auto taskInfo = vm::get_ptr<CellSpursTaskset::TaskInfo>(spu.ls_offset + 0x2780);
-
-    spursDmaWaitForCompletion(spu, 0xFFFFFFFF);
-
-    if (taskInfo->context_save_storage_and_alloc_ls_blocks == 0) {
-        return CELL_SPURS_TASK_ERROR_STAT;
-    }
-
-    u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F;
-    u32 lsBlocks      = 0;
-    for (auto i = 0; i < 128; i++) {
-        if (taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) {
-            lsBlocks++;
-        }
-    }
-
-    if (lsBlocks > allocLsBlocks) {
-        return CELL_SPURS_TASK_ERROR_STAT;
-    }
-
-    // Make sure the stack is area is specified in the ls pattern
-    for (auto i = (mgmt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) {
-        if ((taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) == 0) {
-            return CELL_SPURS_TASK_ERROR_STAT;
-        }
-    }
-
-    // Get the processor context
-    u128 r;
-    spu.FPSCR.Read(r);
-    mgmt->savedContextFpscr = r;
-    spu.ReadChannel(r, SPU_RdEventMask);
-    mgmt->savedSpuWriteEventMask = r._u32[3];
-    spu.ReadChannel(r, MFC_RdTagMask);
-    mgmt->savedWriteTagGroupQueryMask = r._u32[3];
-
-    // Store the processor context
-    u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull;
-    spursDma(spu, MFC_PUT_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, mgmt->dmaTagId);
-
-    // Save LS context
-    for (auto i = 6; i < 128; i++) {
-        bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false;
-        if (shouldStore) {
-            // TODO: Combine DMA requests for consecutive blocks into a single request
-            spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, mgmt->dmaTagId);
-        }
-    }
-
-    spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
-    return CELL_OK;
-}
-
-void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args) {
-    auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
-
-    spursDma(spu, MFC_GET_CMD, addr & 0xFFFFFF80, 0x10000/*LSA*/, (addr & 0x7F) << 11/*size*/, 0);
-    spursDmaWaitForCompletion(spu, 1);
-
-    spu.GPR[3]._u64[1] = mgmt->taskset.addr();
-    spu.GPR[4]._u32[3] = taskId;
-    spu.GPR[5]._u32[3] = exitCode;
-    spu.GPR[6]._u64[1] = args;
-    spu.FastCall(0x10000);
-}
-
+/// Process a syscall request
 s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
-    auto mgmt    = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto ctxt    = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
     auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
 
     // If the 0x10 bit is set in syscallNum then its the 2nd version of the
@@ -1501,14 +1519,14 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
     u32 incident = 0;
     switch (syscallNum) {
     case CELL_SPURS_TASK_SYSCALL_EXIT:
-        if (mgmt->x2FD4 == 4 || (mgmt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out
-            if (mgmt->x2FD4 != 4) {
+        if (ctxt->x2FD4 == 4 || (ctxt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out
+            if (ctxt->x2FD4 != 4) {
                 spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_DESTROY_TASK, nullptr, nullptr);
             }
 
-            auto addr = mgmt->x2FD4 == 4 ? taskset->m.x78 : mgmt->x2FC0;
-            auto args = mgmt->x2FD4 == 4 ? 0 : mgmt->x2FC8;
-            spursTasksetOnTaskExit(spu, addr, mgmt->taskId, mgmt->taskExitCode, args);
+            auto addr = ctxt->x2FD4 == 4 ? taskset->m.x78 : ctxt->x2FC0;
+            auto args = ctxt->x2FD4 == 4 ? 0 : ctxt->x2FC8;
+            spursTasksetOnTaskExit(spu, addr, ctxt->taskId, ctxt->taskExitCode, args);
         }
 
         incident = CELL_SPURS_TRACE_TASK_EXIT;
@@ -1561,11 +1579,11 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
         memset(&pkt, 0, sizeof(pkt));
         pkt.header.tag         = CELL_SPURS_TRACE_TAG_TASK;
         pkt.data.task.incident = incident;
-        pkt.data.task.taskId   = mgmt->taskId;
-        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+        pkt.data.task.taskId   = ctxt->taskId;
+        cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
 
         // Clear the GUID of the task
-        memset(vm::get_ptr<void>(spu.ls_offset + mgmt->guidAddr), 0, 0x10);
+        memset(vm::get_ptr<void>(spu.ls_offset + ctxt->guidAddr), 0, 0x10);
 
         if (spursTasksetPollStatus(spu)) {
             spursTasksetExit(spu);
@@ -1577,49 +1595,62 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
     return rc;
 }
 
-bool spursTasksetEntry(SPUThread & spu) {
-    auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+/// Initialise the Taskset PM
+void spursTasksetInit(SPUThread & spu, u32 pollStatus) {
+    auto ctxt       = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto kernelCtxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
 
-    if (spu.PC == CELL_SPURS_TASKSET_PM_ENTRY_ADDR) {
-        // Called from kernel
-        auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + spu.GPR[3]._u32[3]);
-        auto arg        = spu.GPR[4]._u64[1];
-        auto pollStatus = spu.GPR[5]._u32[3];
+    kernelCtxt->moduleId[0] = 'T';
+    kernelCtxt->moduleId[1] = 'K';
 
-        // Initialise memory and save args
-        memset(mgmt, 0, sizeof(*mgmt));
-        mgmt->taskset.set(arg);
-        memcpy(mgmt->moduleId, "SPURSTASK MODULE", 16);
-        mgmt->kernelMgmtAddr = spu.GPR[3]._u32[3];
-        mgmt->syscallAddr    = CELL_SPURS_TASKSET_PM_SYSCALL_ADDR;
-        mgmt->spuNum         = kernelMgmt->spuNum;
-        mgmt->dmaTagId       = kernelMgmt->dmaTagId;
-        mgmt->taskId         = 0xFFFFFFFF;
+    // Trace - START: Module='TKST'
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = 0x52; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_START
+    memcpy(pkt.data.start.module, "TKST", 4);
+    pkt.data.start.level = 2;
+    pkt.data.start.ls    = 0xA00 >> 2;
+    cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
 
-        // Register SPURS takset policy module HLE functions
-        spu.UnregisterHleFunctions(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, 0x40000); // TODO: use a symbolic constant
-        spu.RegisterHleFunction(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, spursTasksetEntry);
-        spu.RegisterHleFunction(mgmt->syscallAddr, spursTasksetEntry);
+    spursTasksetProcessPollStatus(spu, pollStatus);
+}
 
-        spursTasksetInit(spu, pollStatus);
-        spursTasksetDispatch(spu);
-    } else if (spu.PC == CELL_SPURS_TASKSET_PM_SYSCALL_ADDR) {
-        // Save task context
-        mgmt->savedContextLr = spu.GPR[0];
-        mgmt->savedContextSp = spu.GPR[1];
-        for (auto i = 0; i < 48; i++) {
-            mgmt->savedContextR80ToR127[i] = spu.GPR[80 + i];
-        }
-
-        spu.GPR[3]._u32[3] = spursTasksetProcessSyscall(spu, spu.GPR[3]._u32[3], spu.GPR[4]._u32[3]);
-
-        // Resume the previously executing task if the syscall did not cause a context switch
-        if (spu.m_is_branch == false) {
-            spursTasksetResumeTask(spu);
-        }
-    } else {
-        assert(0);
+/// Load an ELF
+s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments) {
+    if (elfAddr == 0 || (elfAddr & 0x0F) != 0) {
+        return CELL_SPURS_TASK_ERROR_INVAL;
     }
 
-    return false;
+    vfsStreamMemory         stream(elfAddr);
+    loader::handlers::elf32 loader;
+    auto rc = loader.init(stream);
+    if (rc != loader::handler::ok) {
+        return CELL_SPURS_TASK_ERROR_NOEXEC;
+    }
+
+    u32 _lowestLoadAddr = CELL_SPURS_TASK_BOTTOM;
+    for (auto & phdr : loader.m_phdrs) {
+        if (phdr.data_be.p_paddr >= CELL_SPURS_TASK_BOTTOM) {
+            break;
+        }
+
+        if (phdr.data_be.p_type == 1/*PT_LOAD*/) {
+            if (skipWriteableSegments == false || (phdr.data_be.p_flags & 2/*PF_W*/) == 0) {
+                if (phdr.data_be.p_vaddr < CELL_SPURS_TASK_TOP ||
+                    phdr.data_be.p_vaddr + phdr.data_be.p_memsz > CELL_SPURS_TASK_BOTTOM) {
+                    return CELL_SPURS_TASK_ERROR_FAULT;
+                }
+
+                _lowestLoadAddr = _lowestLoadAddr > phdr.data_be.p_vaddr ? phdr.data_be.p_vaddr : _lowestLoadAddr;
+            }
+        }
+    }
+
+    loader.load_data(spu.ls_offset, skipWriteableSegments);
+    *entryPoint = loader.m_ehdr.data_be.e_entry;
+    if (*lowestLoadAddr) {
+        *lowestLoadAddr = _lowestLoadAddr;
+    }
+
+    return CELL_OK;
 }

From 67342781b7a91ac4d0737279c6e6d1ea565b3c7b Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Tue, 3 Feb 2015 02:13:32 +0530
Subject: [PATCH 26/29] SPURS: Fix some issues

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp    | 78 +++++++++++++++++----
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 56 ++++++++-------
 2 files changed, 95 insertions(+), 39 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 18eed2c944..dad4f10484 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -2588,7 +2588,7 @@ s64 spursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> tasks
 	taskset->m.size = size;
 
 	vm::var<CellSpursWorkloadAttribute> wkl_attr;
-	_cellSpursWorkloadAttributeInitialize(wkl_attr, 1 /*revision*/, 0x33 /*sdk_version*/, vm::ptr<const void>::make(16) /*pm*/, 0x1E40 /*pm_size*/,
+	_cellSpursWorkloadAttributeInitialize(wkl_attr, 1 /*revision*/, 0x33 /*sdk_version*/, vm::ptr<const void>::make(SPURS_IMG_ADDR_TASKSET_PM), 0x1E40 /*pm_size*/,
 		taskset.addr(), priority, 8 /*min_contention*/, max_contention);
 	// TODO: Check return code
 
@@ -2768,14 +2768,16 @@ s64 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
 		if (ls_pattern.addr() != 0)
 		{
 			u32 ls_blocks = 0;
-			for (u32 i = 0; i < 2; i++)
+			for (auto i = 0; i < 64; i++)
 			{
-				for (u32 j = 0; j < 64; j++)
+				if (ls_pattern->u64[0] & ((u64)1 << i))
 				{
-					if (ls_pattern->u64[0] & ((u64)1 << j))
-					{
-						ls_blocks++;
-					}
+					ls_blocks++;
+				}
+
+				if (ls_pattern->u64[1] & ((u64)1 << i))
+				{
+					ls_blocks++;
 				}
 			}
 
@@ -2803,7 +2805,9 @@ s64 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
 	{
 		if (!taskset->m.enabled.value()._bit[tmp_task_id])
 		{
-			taskset->m.enabled.value()._bit[tmp_task_id] = true;
+			auto enabled              = taskset->m.enabled.value();
+			enabled._bit[tmp_task_id] = true;
+			taskset->m.enabled        = enabled;
 			break;
 		}
 	}
@@ -2818,23 +2822,73 @@ s64 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
 	for (u32 i = 0; i < 2; i++)
 	{
 		taskset->m.task_info[tmp_task_id].args.u64[i] = arg != 0 ? arg->u64[i] : 0;
-		taskset->m.task_info[tmp_task_id].ls_pattern.u64[i] = ls_pattern != 0 ? ls_pattern->u64[i] : 0;
+		if (ls_pattern.addr())
+		{
+			taskset->m.task_info[tmp_task_id].ls_pattern.u64[i] = ls_pattern->u64[i];
+		}
 	}
 
 	*task_id = tmp_task_id;
 	return CELL_OK;
 }
 
-s64 cellSpursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> taskID, u32 elf_addr, u32 context_addr, u32 context_size, vm::ptr<CellSpursTaskLsPattern> lsPattern,
+s64 spursTaskStart(vm::ptr<CellSpursTaskset> taskset, u32 taskId)
+{
+	auto pendingReady         = taskset->m.pending_ready.value();
+	pendingReady._bit[taskId] = true;
+	taskset->m.pending_ready  = pendingReady;
+
+	cellSpursSendWorkloadSignal(vm::ptr<CellSpurs>::make((u32)taskset->m.spurs.addr()), taskset->m.wid);
+	auto rc = cellSpursWakeUp(GetCurrentPPUThread(), vm::ptr<CellSpurs>::make((u32)taskset->m.spurs.addr()));
+	if (rc != CELL_OK)
+	{
+		if (rc == CELL_SPURS_POLICY_MODULE_ERROR_STAT)
+		{
+			rc = CELL_SPURS_TASK_ERROR_STAT;
+		}
+		else
+		{
+			assert(0);
+		}
+	}
+
+	return rc;
+}
+
+s64 cellSpursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> taskId, u32 elf_addr, u32 context_addr, u32 context_size, vm::ptr<CellSpursTaskLsPattern> lsPattern,
 	vm::ptr<CellSpursTaskArgument> argument)
 {
 	cellSpurs->Warning("cellSpursCreateTask(taskset_addr=0x%x, taskID_addr=0x%x, elf_addr_addr=0x%x, context_addr_addr=0x%x, context_size=%d, lsPattern_addr=0x%x, argument_addr=0x%x)",
-		taskset.addr(), taskID.addr(), elf_addr, context_addr, context_size, lsPattern.addr(), argument.addr());
+		taskset.addr(), taskId.addr(), elf_addr, context_addr, context_size, lsPattern.addr(), argument.addr());
 
 #ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x12414, libsre_rtoc);
 #else
-	return spursCreateTask(taskset, taskID, vm::ptr<u32>::make(elf_addr), vm::ptr<u32>::make(context_addr), context_size, lsPattern, argument);
+	if (!taskset)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	vm::var<u32> tmpTaskId;
+	auto rc = spursCreateTask(taskset, tmpTaskId, vm::ptr<u32>::make(elf_addr), vm::ptr<u32>::make(context_addr), context_size, lsPattern, argument);
+    if (rc != CELL_OK) 
+    {
+        return rc;
+    }
+
+    rc = spursTaskStart(taskset, tmpTaskId);
+    if (rc != CELL_OK) 
+    {
+        return rc;
+    }
+
+    *taskId = tmpTaskId;
+    return CELL_OK;
 #endif
 }
 
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index 451bfef848..aa1b25a081 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -116,7 +116,9 @@ bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) {
         u128 rv;
 
         spu.ReadChannel(rv, MFC_RdAtomicStat);
-        return rv._u32[3] ? true : false;
+        auto success = rv._u32[3] ? true : false;
+        success      = cmd == MFC_PUTLLC_CMD ? !success : success;
+        return success;
     }
 
     return true;
@@ -442,11 +444,11 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
 
 /// SPURS kernel dispatch workload
 void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus) {
-    SpursKernelContext * ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
-    auto isKernel2            = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
+    auto ctxt      = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
 
-    auto pollStatus = (u32)(spu.GPR[3]._u64[1]);
-    auto wid        = (u32)(spu.GPR[3]._u64[1] >> 32);
+    auto pollStatus = (u32)widAndPollStatus;
+    auto wid        = (u32)(widAndPollStatus >> 32);
 
     // DMA in the workload info for the selected workload
     auto wklInfoOffset =  wid < CELL_SPURS_MAX_WORKLOAD ? offsetof(CellSpurs, m.wklInfo1[wid]) :
@@ -491,8 +493,8 @@ void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus) {
 
 /// SPURS kernel workload exit
 bool spursKernelWorkloadExit(SPUThread & spu) {
-    SpursKernelContext * ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
-    auto isKernel2            = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
+    auto ctxt      = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
 
     // Select next workload to run
     spu.GPR[3].clear();
@@ -508,7 +510,8 @@ bool spursKernelWorkloadExit(SPUThread & spu) {
 
 /// SPURS kernel entry point
 bool spursKernelEntry(SPUThread & spu) {
-    SpursKernelContext * ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    memset(ctxt, 0, sizeof(SpursKernelContext));
 
     // Save arguments
     ctxt->spuNum = spu.GPR[3]._u32[3];
@@ -516,8 +519,6 @@ bool spursKernelEntry(SPUThread & spu) {
 
     auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
 
-    memset(ctxt, 0, sizeof(SpursKernelContext));
-
     // Initialise the SPURS context to its initial values
     ctxt->dmaTagId           = CELL_SPURS_KERNEL_DMA_TAG_ID;
     ctxt->wklCurrentUniqueId = 0x20;
@@ -565,7 +566,8 @@ bool spursSysServiceEntry(SPUThread & spu) {
         // TODO: If we reach here it means the current workload was preempted to start the
         // system workload. Need to implement this.
     }
-
+    
+    cellSpursModuleExit(spu);
     return false;
 }
 
@@ -599,11 +601,11 @@ void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) {
             if (spurs->m.flags1 & SF1_32_WORKLOADS) {
                 for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
                     u32 j            = i & 0x0F;
-                    u8 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j);
+                    u16 runnable     = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j);
                     u8 priority      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4;
                     u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4;
                     u8 contention    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklCurrentContention[j] & 0x0F : spurs->m.wklCurrentContention[j] >> 4;
-                    u8 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
+                    u16 wklSignal    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
                     u8 wklFlag       = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
                     u8 readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
 
@@ -616,8 +618,8 @@ void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) {
                 }
             } else {
                 for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-                    u8 runnable     = ctxt->wklRunnable1 & (0x8000 >> i);
-                    u8 wklSignal    = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
+                    u16 runnable    = ctxt->wklRunnable1 & (0x8000 >> i);
+                    u16 wklSignal   = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
                     u8 wklFlag      = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
                     u8 readyCount   = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed();
                     u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
@@ -676,7 +678,7 @@ void spursSysServiceMain(SPUThread & spu, u32 pollStatus) {
 
         do {
             spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
-            CellSpurs * spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+            auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
 
             // Halt if already initialised
             if (spurs->m.sysSrvOnSpu & (1 << ctxt->spuNum)) {
@@ -1167,7 +1169,7 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
             waiting._bit[ctxt->taskId] = true;
             break;
         case SPURS_TASKSET_REQUEST_WAIT_SIGNAL:
-            if (signalled._bit[ctxt->taskId]) {
+            if (signalled._bit[ctxt->taskId] == false) {
                 numNewlyReadyTasks--;
                 running._bit[ctxt->taskId]   = false;
                 waiting._bit[ctxt->taskId]   = true;
@@ -1229,8 +1231,8 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
             *isWaiting = waiting._bit[selectedTaskId < CELL_SPURS_MAX_TASK ? selectedTaskId : 0] ? 1 : 0;
             if (selectedTaskId != CELL_SPURS_MAX_TASK) {
                 taskset->m.last_scheduled_task = selectedTaskId;
-                running._bit[ctxt->taskId]     = true;
-                waiting._bit[ctxt->taskId]     = false;
+                running._bit[selectedTaskId]   = true;
+                waiting._bit[selectedTaskId]   = false;
             }
             break;
         case SPURS_TASKSET_REQUEST_RECV_WKL_FLAG:
@@ -1343,7 +1345,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) {
     u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F;
     u32 lsBlocks      = 0;
     for (auto i = 0; i < 128; i++) {
-        if (taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) {
+        if (taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i)) {
             lsBlocks++;
         }
     }
@@ -1354,7 +1356,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) {
 
     // Make sure the stack is area is specified in the ls pattern
     for (auto i = (ctxt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) {
-        if ((taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) == 0) {
+        if ((taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i)) == 0) {
             return CELL_SPURS_TASK_ERROR_STAT;
         }
     }
@@ -1374,7 +1376,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) {
 
     // Save LS context
     for (auto i = 6; i < 128; i++) {
-        bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false;
+        bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i) ? true : false;
         if (shouldStore) {
             // TODO: Combine DMA requests for consecutive blocks into a single request
             spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId);
@@ -1417,6 +1419,7 @@ void spursTasksetDispatch(SPUThread & spu) {
 
     if (isWaiting == 0) {
         // If we reach here it means that the task is being started and not being resumed
+        memset(vm::get_ptr<void>(spu.ls_offset + CELL_SPURS_TASK_TOP), 0, CELL_SPURS_TASK_BOTTOM - CELL_SPURS_TASK_TOP);
         ctxt->guidAddr = CELL_SPURS_TASK_TOP;
 
         u32 entryPoint;
@@ -1428,7 +1431,7 @@ void spursTasksetDispatch(SPUThread & spu) {
 
         spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
 
-        ctxt->savedContextLr.value()._u32[3] = entryPoint;
+        ctxt->savedContextLr  = u128::from32r(entryPoint);
         ctxt->guidAddr        = lowestLoadAddr;
         ctxt->tasksetMgmtAddr = 0x2700;
         ctxt->x2FC0           = 0;
@@ -1512,12 +1515,11 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
     // for DMA completion
     if ((syscallNum & 0x10) == 0) {
         spursDmaWaitForCompletion(spu, 0xFFFFFFFF);
-        syscallNum &= 0x0F;
     }
 
     s32 rc       = 0;
     u32 incident = 0;
-    switch (syscallNum) {
+    switch (syscallNum & 0x0F) {
     case CELL_SPURS_TASK_SYSCALL_EXIT:
         if (ctxt->x2FD4 == 4 || (ctxt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out
             if (ctxt->x2FD4 != 4) {
@@ -1587,9 +1589,9 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
 
         if (spursTasksetPollStatus(spu)) {
             spursTasksetExit(spu);
+        } else {
+            spursTasksetDispatch(spu);
         }
-
-        spursTasksetDispatch(spu);
     }
 
     return rc;

From 0191955ab67d178f6b27526853a63ed800b049d9 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Tue, 3 Feb 2015 11:03:49 +0530
Subject: [PATCH 27/29] SPURS: Fix more issues

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp    |  6 ++++--
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 21 ++++++++++++++++++---
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index dad4f10484..2d37755f48 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -2923,8 +2923,10 @@ s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskId)
 		return CELL_SPURS_TASK_ERROR_SRCH;
 	}
 
-	auto shouldSignal = (taskset->m.waiting & be_t<u128>::make(~taskset->m.signalled.value()) & be_t<u128>::make(u128::fromBit(taskId))) != _0 ? true : false;
-	((u128)taskset->m.signalled)._bit[taskId] = true;
+	auto shouldSignal      = (taskset->m.waiting & be_t<u128>::make(~taskset->m.signalled.value()) & be_t<u128>::make(u128::fromBit(taskId))) != _0 ? true : false;
+	auto signalled         = taskset->m.signalled.value();
+	signalled._bit[taskId] = true;
+	taskset->m.signalled   = signalled;
 	if (shouldSignal)
 	{
 		cellSpursSendWorkloadSignal(vm::ptr<CellSpurs>::make((u32)taskset->m.spurs.addr()), taskset->m.wid);
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index aa1b25a081..bbcc00e056 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -270,6 +270,7 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
 
             for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
                 spurs->m.wklCurrentContention[i] = contention[i];
+                spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i];
                 ctxt->wklLocContention[i]        = 0;
                 ctxt->wklLocPendingContention[i] = 0;
             }
@@ -294,6 +295,12 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
             if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
                 ctxt->wklLocPendingContention[wklSelectedId] = 1;
             }
+        } else {
+            // Not called by kernel and no context switch is required
+            for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i];
+                ctxt->wklLocPendingContention[i] = 0;
+            }
         }
     } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
@@ -414,6 +421,7 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
 
             for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
                 spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4);
+                spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i];
                 ctxt->wklLocContention[i]        = 0;
                 ctxt->wklLocPendingContention[i] = 0;
             }
@@ -433,6 +441,12 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
             }
 
             ctxt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
+        } else {
+            // Not called by kernel and no context switch is required
+            for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i];
+                ctxt->wklLocPendingContention[i] = 0;
+            }
         }
     } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
@@ -1461,8 +1475,8 @@ void spursTasksetDispatch(SPUThread & spu) {
         }
 
         // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area as well
-        if (taskInfo->ls_pattern.u64[0] != 0xFFFFFFFFFFFFFFFFull ||
-            (taskInfo->ls_pattern.u64[1] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) {
+        if (taskInfo->ls_pattern.u64[1] != 0xFFFFFFFFFFFFFFFFull ||
+            (taskInfo->ls_pattern.u64[0] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) {
             // Load the ELF
             u32 entryPoint;
             if (spursTasksetLoadElf(spu, &entryPoint, nullptr, taskInfo->elf_addr.addr(), true) != CELL_OK) {
@@ -1473,8 +1487,9 @@ void spursTasksetDispatch(SPUThread & spu) {
 
         // Load saved context from main memory to LS
         u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull;
+        spursDma(spu, MFC_GET_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId);
         for (auto i = 6; i < 128; i++) {
-            bool shouldLoad = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false;
+            bool shouldLoad = taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i) ? true : false;
             if (shouldLoad) {
                 // TODO: Combine DMA requests for consecutive blocks into a single request
                 spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId);

From daaa5059e9da18efeabd2e94e71159786f735092 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Wed, 4 Feb 2015 20:59:34 +0530
Subject: [PATCH 28/29] SPURS: Fixed more issues

---
 Utilities/BEType.h                          | 20 ++++++++++---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp    | 32 +++++++--------------
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h      |  7 ++---
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 15 ++++------
 4 files changed, 35 insertions(+), 39 deletions(-)

diff --git a/Utilities/BEType.h b/Utilities/BEType.h
index 4965eac336..8cf5e9a89c 100644
--- a/Utilities/BEType.h
+++ b/Utilities/BEType.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#define IS_LE_MACHINE
+
 union _CRT_ALIGN(16) u128
 {
 	u64 _u64[2];
@@ -136,16 +138,28 @@ union _CRT_ALIGN(16) u128
 			}
 		};
 
+		// Index 0 returns the MSB and index 127 returns the LSB
 		bit_element operator [] (u32 index)
 		{
 			assert(index < 128);
-			return bit_element(data[index / 64], 1ull << (index % 64));
+
+#ifdef IS_LE_MACHINE
+			return bit_element(data[1 - (index >> 6)], 0x8000000000000000ull >> (index & 0x3F));
+#else
+			return bit_element(data[index >> 6], 0x8000000000000000ull >> (index & 0x3F));
+#endif
 		}
 
+		// Index 0 returns the MSB and index 127 returns the LSB
 		const bool operator [] (u32 index) const
 		{
 			assert(index < 128);
-			return (data[index / 64] & (1ull << (index % 64))) != 0;
+
+#ifdef IS_LE_MACHINE
+			return (data[1 - (index >> 6)] & (0x8000000000000000ull >> (index & 0x3F))) != 0;
+#else
+			return (data[index >> 6] & (0x8000000000000000ull >> (index & 0x3F))) != 0;
+#endif
 		}
 
 	} _bit;
@@ -509,8 +523,6 @@ struct be_storage_t<T, 16>
 	typedef u128 type;
 };
 
-#define IS_LE_MACHINE
-
 template<typename T, typename T2 = T>
 class be_t
 {
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 2d37755f48..18eaca3722 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -34,9 +34,9 @@ s64 spursCreateLv2EventQueue(vm::ptr<CellSpurs> spurs, u32& queue_id, vm::ptr<u8
 {
 #ifdef PRX_DEBUG_XXX
 	vm::var<be_t<u32>> queue;
-	s32 res = cb_call<s32, vm::ptr<CellSpurs>, vm::ptr<u32>, vm::ptr<u8>, s32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc,
+	s32 res = cb_call<s32, vm::ptr<CellSpurs>, vm::ptr<be_t<u32>>, vm::ptr<u8>, s32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc,
 		spurs, queue, port, size, vm::read32(libsre_rtoc - 0x7E2C));
-	queue_id = queue;
+	queue_id = queue.value();
 	return res;
 #endif
 
@@ -2768,14 +2768,9 @@ s64 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
 		if (ls_pattern.addr() != 0)
 		{
 			u32 ls_blocks = 0;
-			for (auto i = 0; i < 64; i++)
+			for (auto i = 0; i < 128; i++)
 			{
-				if (ls_pattern->u64[0] & ((u64)1 << i))
-				{
-					ls_blocks++;
-				}
-
-				if (ls_pattern->u64[1] & ((u64)1 << i))
+				if (ls_pattern->_u128.value()._bit[i])
 				{
 					ls_blocks++;
 				}
@@ -2786,7 +2781,8 @@ s64 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
 				return CELL_SPURS_TASK_ERROR_INVAL;
 			}
 
-			if (ls_pattern->u32[0] & 0xFC000000)
+			u128 _0 = u128::from32(0);
+			if ((ls_pattern->_u128.value() & u128::from32r(0xFC000000)) != _0)
 			{
 				// Prevent save/restore to SPURS management area
 				return CELL_SPURS_TASK_ERROR_INVAL;
@@ -2819,13 +2815,10 @@ s64 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
 
 	taskset->m.task_info[tmp_task_id].elf_addr.set(elf_addr.addr());
 	taskset->m.task_info[tmp_task_id].context_save_storage_and_alloc_ls_blocks = (context_addr.addr() | alloc_ls_blocks);
-	for (u32 i = 0; i < 2; i++)
+	taskset->m.task_info[tmp_task_id].args                                     = *arg;
+	if (ls_pattern.addr())
 	{
-		taskset->m.task_info[tmp_task_id].args.u64[i] = arg != 0 ? arg->u64[i] : 0;
-		if (ls_pattern.addr())
-		{
-			taskset->m.task_info[tmp_task_id].ls_pattern.u64[i] = ls_pattern->u64[i];
-		}
+		taskset->m.task_info[tmp_task_id].ls_pattern = *ls_pattern;
 	}
 
 	*task_id = tmp_task_id;
@@ -3162,12 +3155,7 @@ s64 _cellSpursTaskAttribute2Initialize(vm::ptr<CellSpursTaskAttribute2> attribut
 	
 	for (s32 c = 0; c < 4; c++)
 	{
-		attribute->lsPattern.u32[c] = 0;
-	}
-
-	for (s32 i = 0; i < 2; i++)
-	{
-		attribute->lsPattern.u64[i] = 0;
+		attribute->lsPattern._u128 = u128::from64r(0);
 	}
 
 	attribute->name_addr = 0;
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 3188d3238c..348c795653 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -413,6 +413,7 @@ struct CellSpurs
 		be_t<u64> arg; // spu argument
 		be_t<u32> size;
 		atomic_t<u8> uniqueId; // The unique id is the same for all workloads with the same addr
+		u8 pad[3];
 		u8 priority[8];
 	};
 
@@ -622,14 +623,12 @@ struct CellSpursEventFlag
 
 union CellSpursTaskArgument
 {
-	be_t<u32> u32[4];
-	be_t<u64> u64[2];
+	be_t<u128> _u128;
 };
 
 union CellSpursTaskLsPattern
 {
-	be_t<u32> u32[4];
-	be_t<u64> u64[2];
+	be_t<u128> _u128;
 };
 
 struct CellSpursTaskset
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index bbcc00e056..9070a8ce8e 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -1120,7 +1120,7 @@ void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) {
     auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
 
     spu.GPR[2].clear();
-    spu.GPR[3]         = u128::from64(taskArgs.u64[0], taskArgs.u64[1]);
+    spu.GPR[3]         = taskArgs._u128;
     spu.GPR[4]._u64[1] = taskset->m.args;
     spu.GPR[4]._u64[0] = taskset->m.spurs.addr();
     for (auto i = 5; i < 128; i++) {
@@ -1359,7 +1359,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) {
     u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F;
     u32 lsBlocks      = 0;
     for (auto i = 0; i < 128; i++) {
-        if (taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i)) {
+        if (taskInfo->ls_pattern._u128.value()._bit[i]) {
             lsBlocks++;
         }
     }
@@ -1370,7 +1370,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) {
 
     // Make sure the stack is area is specified in the ls pattern
     for (auto i = (ctxt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) {
-        if ((taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i)) == 0) {
+        if (taskInfo->ls_pattern._u128.value()._bit[i] == false) {
             return CELL_SPURS_TASK_ERROR_STAT;
         }
     }
@@ -1390,8 +1390,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) {
 
     // Save LS context
     for (auto i = 6; i < 128; i++) {
-        bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i) ? true : false;
-        if (shouldStore) {
+        if (taskInfo->ls_pattern._u128.value()._bit[i]) {
             // TODO: Combine DMA requests for consecutive blocks into a single request
             spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId);
         }
@@ -1475,8 +1474,7 @@ void spursTasksetDispatch(SPUThread & spu) {
         }
 
         // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area as well
-        if (taskInfo->ls_pattern.u64[1] != 0xFFFFFFFFFFFFFFFFull ||
-            (taskInfo->ls_pattern.u64[0] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) {
+        if (taskInfo->ls_pattern._u128.value() != u128::from64r(0x03FFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull)) {
             // Load the ELF
             u32 entryPoint;
             if (spursTasksetLoadElf(spu, &entryPoint, nullptr, taskInfo->elf_addr.addr(), true) != CELL_OK) {
@@ -1489,8 +1487,7 @@ void spursTasksetDispatch(SPUThread & spu) {
         u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull;
         spursDma(spu, MFC_GET_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId);
         for (auto i = 6; i < 128; i++) {
-            bool shouldLoad = taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i) ? true : false;
-            if (shouldLoad) {
+            if (taskInfo->ls_pattern._u128.value()._bit[i]) {
                 // TODO: Combine DMA requests for consecutive blocks into a single request
                 spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId);
             }

From d675c67f798acf696818782aa181b2b8abb1ad20 Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Wed, 11 Feb 2015 15:45:43 +0530
Subject: [PATCH 29/29] SPURS: Disable the SPURS kernel

---
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index 9070a8ce8e..d1dc487eeb 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -524,6 +524,13 @@ bool spursKernelWorkloadExit(SPUThread & spu) {
 
 /// SPURS kernel entry point
 bool spursKernelEntry(SPUThread & spu) {
+    while (true) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        if (Emu.IsStopped()) {
+            return false;
+        }
+    }
+
     auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
     memset(ctxt, 0, sizeof(SpursKernelContext));