From a254507be1fe7fb3cf0f1274e35eeb545f65618f Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Wed, 10 Dec 2014 10:12:13 +0530 Subject: [PATCH 01/29] SPURS: Implement taskset attribute functions --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 124 ++++++++++++++++++++--- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 23 +++++ 2 files changed, 134 insertions(+), 13 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 4248c35d74..0aff338d50 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -2296,15 +2296,49 @@ s64 cellSpursJobChainGetSpursAddress() #endif } -s64 cellSpursCreateTasksetWithAttribute() +s64 spursCreateTaskset(vm::ptr spurs, vm::ptr taskset, u64 args, vm::ptr priority, + u32 max_contention, u32 enable_clear_ls, u32 size, u8 unknown /*TODO: Figure this out*/) +{ + if (!spurs || !taskset) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CELL_SPURS_ALIGN || taskset.addr() % CELL_SPURS_TASKSET_ALIGN) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + memset((void *)taskset.addr(), 0, size); + + // TODO: Implement the rest of this + return CELL_OK; +} + +s64 cellSpursCreateTasksetWithAttribute(vm::ptr spurs, vm::ptr taskset, vm::ptr attr) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x14BEC, libsre_rtoc); -#else - UNIMPLEMENTED_FUNC(cellSpurs); - return CELL_OK; #endif + + if (!attr) + { + CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (attr.addr() % 8) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (attr->revision != 1) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + return spursCreateTaskset(spurs, taskset, attr->args, vm::ptr::make(attr.addr() + offsetof(CellSpursTasksetAttribute, priority)), + attr->max_contention, attr->enable_clear_ls, attr->taskset_size, 0 /*TODO: Figure this out*/); } s64 cellSpursCreateTaskset(vm::ptr spurs, vm::ptr taskset, u64 args, vm::ptr priority, u32 maxContention) @@ -2314,12 +2348,16 @@ s64 cellSpursCreateTaskset(vm::ptr spurs, vm::ptr t #ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x14CB8, libsre_rtoc); -#else +#endif + +#if 0 SPURSManagerTasksetAttribute *tattr = new SPURSManagerTasksetAttribute(args, priority, maxContention); taskset->taskset = new SPURSManagerTaskset(taskset.addr(), tattr); return CELL_OK; #endif + + return spursCreateTaskset(spurs, taskset, args, priority, maxContention, 0, CELL_SPURS_TASKSET_SIZE, 0); } s64 cellSpursJoinTaskset(vm::ptr taskset) @@ -2390,35 +2428,70 @@ s64 cellSpursCreateTaskWithAttribute() #endif } -s64 cellSpursTasksetAttributeSetName() +s64 cellSpursTasksetAttributeSetName(vm::ptr attr, vm::ptr name) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x14210, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!attr || !name) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (attr.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + attr->name = name.addr(); return CELL_OK; #endif } -s64 cellSpursTasksetAttributeSetTasksetSize() +s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr attr, size_t size) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x14254, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!attr) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (attr.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (size != CELL_SPURS_TASKSET_SIZE && size != CELL_SPURS_TASKSET2_SIZE) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + attr->taskset_size = size; return CELL_OK; #endif } -s64 cellSpursTasksetAttributeEnableClearLS() +s64 cellSpursTasksetAttributeEnableClearLS(vm::ptr attr, s32 enable) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x142AC, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!attr) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (attr.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + attr->enable_clear_ls = enable ? 1 : 0; return CELL_OK; #endif } @@ -2430,6 +2503,7 @@ s64 _cellSpursTasksetAttribute2Initialize(vm::ptr at #ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1474C, libsre_rtoc); #else + memset(attribute.get_ptr(), 0, CELL_SPURS_TASKSET_ATTRIBUTE2_SIZE); attribute->revision = revision; attribute->name_addr = NULL; attribute->argTaskset = 0; @@ -2694,13 +2768,37 @@ s64 cellSpursGetTasksetInfo() #endif } -s64 _cellSpursTasksetAttributeInitialize() +s64 _cellSpursTasksetAttributeInitialize(vm::ptr attribute, u32 revision, u32 sdk_version, u64 args, vm::ptr priority, u32 max_contention) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x142FC, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!attribute) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (attribute.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + for (u32 i = 0; i < 8; i++) + { + if (priority[i] > 0xF) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + } + + memset(attribute.get_ptr(), 0, CELL_SPURS_TASKSET_ATTRIBUTE_SIZE); + attribute->revision = revision; + attribute->sdk_version = sdk_version; + attribute->args = args; + memcpy(attribute->priority, priority.get_ptr(), 8); + attribute->taskset_size = CELL_SPURS_TASKSET_SIZE; + attribute->max_contention = max_contention; return CELL_OK; #endif } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 4e72b50826..0248311f70 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -130,6 +130,17 @@ enum TaskConstants CELL_SPURS_TASK_TOP = 0x3000, CELL_SPURS_TASK_BOTTOM = 0x40000, CELL_SPURS_MAX_TASK_NAME_LENGTH = 32, + CELL_SPURS_TASK_ATTRIBUTE_REVISION = 1, + CELL_SPURS_TASK_ATTRIBUTE_SIZE = 256, + CELL_SPURS_TASKSET_SIZE = 6400, + CELL_SPURS_TASKSET_ALIGN = 128, + CELL_SPURS_TASKSET_ATTRIBUTE_REVISION = 1, + CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN = 8, + CELL_SPURS_TASKSET_ATTRIBUTE_SIZE = 512, + CELL_SPURS_TASKSET2_SIZE = 10496, + CELL_SPURS_TASKSET2_ALIGN = 128, + CELL_SPURS_TASKSET_ATTRIBUTE2_ALIGN = 8, + CELL_SPURS_TASKSET_ATTRIBUTE2_SIZE = 512, }; class SPURSManager; @@ -531,6 +542,18 @@ struct CellSpursTaskset2 be_t skip[10496]; }; +struct CellSpursTasksetAttribute +{ + be_t revision; + be_t sdk_version; + be_t args; + u8 priority[8]; + be_t max_contention; + be_t name; + be_t taskset_size; + be_t enable_clear_ls; +}; + struct CellSpursTasksetAttribute2 { be_t revision; From 5dd15b3c47bb9acc47e2e351fec0aaeeae78afb4 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sat, 13 Dec 2014 01:42:09 +0530 Subject: [PATCH 02/29] SPURS: Implement create taskset --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 142 ++++++++++++++++------- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 106 ++++++++++++----- 2 files changed, 178 insertions(+), 70 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 0aff338d50..aebf52dbf8 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -2296,22 +2296,50 @@ s64 cellSpursJobChainGetSpursAddress() #endif } -s64 spursCreateTaskset(vm::ptr spurs, vm::ptr taskset, u64 args, vm::ptr priority, - u32 max_contention, u32 enable_clear_ls, u32 size, u8 unknown /*TODO: Figure this out*/) +s64 spursCreateTaskset(vm::ptr spurs, vm::ptr taskset, u64 args, vm::ptr priority, + u32 max_contention, vm::ptr name, u32 size, s32 enable_clear_ls) { if (!spurs || !taskset) { return CELL_SPURS_TASK_ERROR_NULL_POINTER; } - if (spurs.addr() % CELL_SPURS_ALIGN || taskset.addr() % CELL_SPURS_TASKSET_ALIGN) + if (spurs.addr() % CellSpurs::align || taskset.addr() % CellSpursTaskset::align) { return CELL_SPURS_TASK_ERROR_ALIGN; } memset((void *)taskset.addr(), 0, size); - // TODO: Implement the rest of this + taskset->m.spurs = spurs; + taskset->m.args = args; + taskset->m.enable_clear_ls = enable_clear_ls > 0 ? 1 : 0; + taskset->m.size = size; + + auto wkl_attr = vm::ptr::make((u32)Memory.Alloc(CellSpursWorkloadAttribute::size, CellSpursWorkloadAttribute::align)); + _cellSpursWorkloadAttributeInitialize(wkl_attr, 1 /*revision*/, 0x33 /*sdk_version*/, vm::ptr::make(16) /*pm*/, 0x1E40 /*pm_size*/, + taskset.addr(), priority, 8 /*min_contention*/, max_contention); + // TODO: Check return code + + auto cls = vm::ptr::make((u32)Memory.Alloc(1, 1)); + *((char *)cls.get_ptr()) = 0; + cellSpursWorkloadAttributeSetName(wkl_attr, cls, name); + // TODO: Check return code + + // TODO: cellSpursWorkloadAttributeSetShutdownCompletionEventHook(wkl_attr, hook, taskset); + // TODO: Check return code + + auto wid = vm::ptr::make((u32)Memory.Alloc(4, 4)); + cellSpursAddWorkloadWithAttribute(spurs, wid, vm::ptr::make(wkl_attr.addr())); + // TODO: Check return code + + taskset->m.x72 = 0x80; + taskset->m.wid.FromBE(*wid); + // TODO: cellSpursSetExceptionEventHandler(spurs, wid, hook, taskset); + // TODO: Check return code + + Memory.Free(wkl_attr.addr()); + Memory.Free(wid.addr()); return CELL_OK; } @@ -2327,21 +2355,28 @@ s64 cellSpursCreateTasksetWithAttribute(vm::ptr spurs, vm::ptrrevision != 1) + if (attr->m.revision != CELL_SPURS_TASKSET_ATTRIBUTE_REVISION) { return CELL_SPURS_TASK_ERROR_INVAL; } - return spursCreateTaskset(spurs, taskset, attr->args, vm::ptr::make(attr.addr() + offsetof(CellSpursTasksetAttribute, priority)), - attr->max_contention, attr->enable_clear_ls, attr->taskset_size, 0 /*TODO: Figure this out*/); + auto rc = spursCreateTaskset(spurs, taskset, attr->m.args, vm::ptr::make(attr.addr() + offsetof(CellSpursTasksetAttribute, m.priority)), + attr->m.max_contention, vm::ptr::make(attr->m.name.addr()), attr->m.taskset_size, attr->m.enable_clear_ls); + + if (attr->m.taskset_size >= CellSpursTaskset2::size) + { + // TODO: Implement this + } + + return rc; } -s64 cellSpursCreateTaskset(vm::ptr spurs, vm::ptr taskset, u64 args, vm::ptr priority, u32 maxContention) +s64 cellSpursCreateTaskset(vm::ptr spurs, vm::ptr taskset, u64 args, vm::ptr priority, u32 maxContention) { cellSpurs->Warning("cellSpursCreateTaskset(spurs_addr=0x%x, taskset_addr=0x%x, args=0x%llx, priority_addr=0x%x, maxContention=%d)", spurs.addr(), taskset.addr(), args, priority.addr(), maxContention); @@ -2357,7 +2392,7 @@ s64 cellSpursCreateTaskset(vm::ptr spurs, vm::ptr t return CELL_OK; #endif - return spursCreateTaskset(spurs, taskset, args, priority, maxContention, 0, CELL_SPURS_TASKSET_SIZE, 0); + return spursCreateTaskset(spurs, taskset, args, priority, maxContention, vm::ptr::make(0), CellSpursTaskset::size, 0); } s64 cellSpursJoinTaskset(vm::ptr taskset) @@ -2439,17 +2474,17 @@ s64 cellSpursTasksetAttributeSetName(vm::ptr attr, vm return CELL_SPURS_TASK_ERROR_NULL_POINTER; } - if (attr.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN) + if (attr.addr() % CellSpursTasksetAttribute::align) { return CELL_SPURS_TASK_ERROR_ALIGN; } - attr->name = name.addr(); + attr->m.name = name; return CELL_OK; #endif } -s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr attr, size_t size) +s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr attr, u32 size) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); @@ -2460,17 +2495,17 @@ s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr a return CELL_SPURS_TASK_ERROR_NULL_POINTER; } - if (attr.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN) + if (attr.addr() % CellSpursTasksetAttribute::align) { return CELL_SPURS_TASK_ERROR_ALIGN; } - if (size != CELL_SPURS_TASKSET_SIZE && size != CELL_SPURS_TASKSET2_SIZE) + if (size != CellSpursTaskset::size && size != CellSpursTaskset2::size) { return CELL_SPURS_TASK_ERROR_INVAL; } - attr->taskset_size = size; + attr->m.taskset_size = size; return CELL_OK; #endif } @@ -2486,12 +2521,12 @@ s64 cellSpursTasksetAttributeEnableClearLS(vm::ptr at return CELL_SPURS_TASK_ERROR_NULL_POINTER; } - if (attr.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN) + if (attr.addr() % CellSpursTasksetAttribute::align) { return CELL_SPURS_TASK_ERROR_ALIGN; } - attr->enable_clear_ls = enable ? 1 : 0; + attr->m.enable_clear_ls = enable ? 1 : 0; return CELL_OK; #endif } @@ -2503,20 +2538,19 @@ s64 _cellSpursTasksetAttribute2Initialize(vm::ptr at #ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1474C, libsre_rtoc); #else - memset(attribute.get_ptr(), 0, CELL_SPURS_TASKSET_ATTRIBUTE2_SIZE); - attribute->revision = revision; - attribute->name_addr = NULL; - attribute->argTaskset = 0; + memset(attribute.get_ptr(), 0, CellSpursTasksetAttribute2::size); + attribute->m.revision = revision; + attribute->m.name.set(0); + attribute->m.args = 0; for (s32 i = 0; i < 8; i++) { - attribute->priority[i] = 1; + attribute->m.priority[i] = 1; } - attribute->maxContention = 8; - attribute->enableClearLs = 0; - attribute->CellSpursTaskNameBuffer_addr = 0; - + attribute->m.max_contention = 8; + attribute->m.enable_clear_ls = 0; + attribute->m.task_name_buffer.set(0); return CELL_OK; #endif } @@ -2647,13 +2681,43 @@ s64 cellSpursTaskGetContextSaveAreaSize() #endif } -s64 cellSpursCreateTaskset2() +s64 cellSpursCreateTaskset2(vm::ptr spurs, vm::ptr taskset, vm::ptr attr) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x15108, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + auto free_attr = false; + if (!attr) + { + attr.set(Memory.Alloc(CellSpursTasksetAttribute2::size, CellSpursTasksetAttribute2::align)); + free_attr = true; + _cellSpursTasksetAttribute2Initialize(attr, 0); + } + + auto rc = spursCreateTaskset(spurs, vm::ptr::make(taskset.addr()), attr->m.args, + vm::ptr::make(attr.addr() + offsetof(CellSpursTasksetAttribute, m.priority)), + attr->m.max_contention, vm::ptr::make(attr->m.name.addr()), CellSpursTaskset2::size, (u8)attr->m.enable_clear_ls); + if (rc != CELL_OK) + { + if (free_attr) + { + Memory.Free(attr.addr()); + } + return rc; + } + + if (attr->m.task_name_buffer.addr() % CellSpursTaskNameBuffer::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + // TODO: Implement rest of the function + + if (free_attr) + { + Memory.Free(attr.addr()); + } return CELL_OK; #endif } @@ -2735,7 +2799,7 @@ s64 cellSpursTasksetUnsetExceptionEventHandler() #endif } -s64 cellSpursLookUpTasksetAddress() +s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, u32 id) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); @@ -2746,7 +2810,7 @@ s64 cellSpursLookUpTasksetAddress() #endif } -s64 cellSpursTasksetGetSpursAddress() +s64 cellSpursTasksetGetSpursAddress(vm::ptr taskset, vm::ptr spurs) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); @@ -2779,7 +2843,7 @@ s64 _cellSpursTasksetAttributeInitialize(vm::ptr attr return CELL_SPURS_TASK_ERROR_NULL_POINTER; } - if (attribute.addr() % CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN) + if (attribute.addr() % CellSpursTasksetAttribute::align) { return CELL_SPURS_TASK_ERROR_ALIGN; } @@ -2792,13 +2856,13 @@ s64 _cellSpursTasksetAttributeInitialize(vm::ptr attr } } - memset(attribute.get_ptr(), 0, CELL_SPURS_TASKSET_ATTRIBUTE_SIZE); - attribute->revision = revision; - attribute->sdk_version = sdk_version; - attribute->args = args; - memcpy(attribute->priority, priority.get_ptr(), 8); - attribute->taskset_size = CELL_SPURS_TASKSET_SIZE; - attribute->max_contention = max_contention; + memset(attribute.get_ptr(), 0, CellSpursTasksetAttribute::size); + attribute->m.revision = revision; + attribute->m.sdk_version = sdk_version; + attribute->m.args = args; + memcpy(attribute->m.priority, priority.get_ptr(), 8); + attribute->m.taskset_size = CellSpursTaskset::size; + attribute->m.max_contention = max_contention; return CELL_OK; #endif } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 0248311f70..adb1a60799 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -131,16 +131,7 @@ enum TaskConstants CELL_SPURS_TASK_BOTTOM = 0x40000, CELL_SPURS_MAX_TASK_NAME_LENGTH = 32, CELL_SPURS_TASK_ATTRIBUTE_REVISION = 1, - CELL_SPURS_TASK_ATTRIBUTE_SIZE = 256, - CELL_SPURS_TASKSET_SIZE = 6400, - CELL_SPURS_TASKSET_ALIGN = 128, CELL_SPURS_TASKSET_ATTRIBUTE_REVISION = 1, - CELL_SPURS_TASKSET_ATTRIBUTE_ALIGN = 8, - CELL_SPURS_TASKSET_ATTRIBUTE_SIZE = 512, - CELL_SPURS_TASKSET2_SIZE = 10496, - CELL_SPURS_TASKSET2_ALIGN = 128, - CELL_SPURS_TASKSET_ATTRIBUTE2_ALIGN = 8, - CELL_SPURS_TASKSET_ATTRIBUTE2_SIZE = 512, }; class SPURSManager; @@ -428,7 +419,31 @@ struct CellSpursEventFlag struct CellSpursTaskset { - SPURSManagerTaskset *taskset; + static const u32 align = 128; + static const u32 size = 6400; + + union + { + // Raw data + u8 _u8[size]; + + // Real data + struct + { + u8 unk1[0x64]; // 0x00 + vm::bptr spurs; // 0x64 + be_t args; // 0x68 + u8 enable_clear_ls; // 0x70 + u8 x71; // 0x71 + u8 x72; // 0x72 + u8 x73; // 0x73 + be_t wid; // 0x74 + u8 unk3[0x1818]; // 0x78 + be_t size; // 0x1890 + } m; + + SPURSManagerTaskset *taskset; + }; }; struct CellSpursInfo @@ -524,6 +539,13 @@ struct CellSpursTracePacket //typedef void (*CellSpursTasksetExceptionEventHandler)(vm::ptr spurs, vm::ptr taskset, // u32 idTask, vm::ptr info, vm::ptr arg); +struct CellSpursTaskNameBuffer +{ + static const u32 align = 16; + + char taskName[CELL_SPURS_MAX_TASK][CELL_SPURS_MAX_TASK_NAME_LENGTH]; +}; + struct CellSpursTasksetInfo { //CellSpursTaskInfo taskInfo[CELL_SPURS_MAX_TASK]; @@ -539,37 +561,59 @@ struct CellSpursTasksetInfo struct CellSpursTaskset2 { + static const u32 align = 128; + static const u32 size = 10496; + be_t skip[10496]; }; struct CellSpursTasksetAttribute { - be_t revision; - be_t sdk_version; - be_t args; - u8 priority[8]; - be_t max_contention; - be_t name; - be_t taskset_size; - be_t enable_clear_ls; + static const u32 align = 8; + static const u32 size = 512; + + union + { + // Raw data + u8 _u8[size]; + + // Real data + struct + { + be_t revision; // 0x00 + be_t sdk_version; // 0x04 + be_t args; // 0x08 + u8 priority[8]; // 0x10 + be_t max_contention; // 0x18 + vm::bptr name; // 0x1C + be_t taskset_size; // 0x20 + be_t enable_clear_ls; // 0x24 + } m; + }; }; struct CellSpursTasksetAttribute2 { - be_t revision; - be_t name_addr; - be_t argTaskset; - u8 priority[8]; - be_t maxContention; - be_t enableClearLs; - be_t CellSpursTaskNameBuffer_addr; //??? *taskNameBuffer - //be_t __reserved__[]; -}; + static const u32 align = 8; + static const u32 size = 512; -// cellSpurs task structures. -struct CellSpursTaskNameBuffer -{ - char taskName[CELL_SPURS_MAX_TASK][CELL_SPURS_MAX_TASK_NAME_LENGTH]; + union + { + // Raw data + u8 _u8[size]; + + // Real data + struct + { + be_t revision; // 0x00 + vm::bptr name; // 0x04 + be_t args; // 0x08 + u8 priority[8]; // 0x10 + be_t max_contention; // 0x18 + be_t enable_clear_ls; // 0x1C + vm::bptr task_name_buffer; // 0x20 + } m; + }; }; struct CellSpursTraceTaskData From 40f5f73658a2e4ad388884feacfdf9f963aed359 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sun, 14 Dec 2014 00:22:31 +0530 Subject: [PATCH 03/29] SPURS: Implement some taskset functions --- rpcs3/Emu/Memory/vm_ptr.h | 4 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 91 ++++++++++++++++++++---- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 81 ++++++++++++++++++--- rpcs3/emucore.vcxproj | 5 ++ 4 files changed, 156 insertions(+), 25 deletions(-) diff --git a/rpcs3/Emu/Memory/vm_ptr.h b/rpcs3/Emu/Memory/vm_ptr.h index 838caf930c..6f0c65ed0d 100644 --- a/rpcs3/Emu/Memory/vm_ptr.h +++ b/rpcs3/Emu/Memory/vm_ptr.h @@ -253,7 +253,7 @@ namespace vm void* get_ptr() const { - return vm::get_ptr(m_addr); + return vm::get_ptr((u32)m_addr); } explicit operator void*() const @@ -313,7 +313,7 @@ namespace vm const void* get_ptr() const { - return vm::get_ptr(m_addr); + return vm::get_ptr((u32)m_addr); } explicit operator const void*() const diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index aebf52dbf8..831748949b 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -2309,7 +2309,7 @@ s64 spursCreateTaskset(vm::ptr spurs, vm::ptr tasks return CELL_SPURS_TASK_ERROR_ALIGN; } - memset((void *)taskset.addr(), 0, size); + memset(taskset.get_ptr(), 0, size); taskset->m.spurs = spurs; taskset->m.args = args; @@ -2406,13 +2406,28 @@ s64 cellSpursJoinTaskset(vm::ptr taskset) #endif } -s64 cellSpursGetTasksetId(vm::ptr taskset, vm::ptr workloadId) +s64 cellSpursGetTasksetId(vm::ptr taskset, vm::ptr wid) { #ifdef PRX_DEBUG cellSpurs->Warning("cellSpursGetTasksetId(taskset_addr=0x%x, workloadId_addr=0x%x)", taskset.addr(), workloadId.addr()); return GetCurrentPPUThread().FastCall2(libsre + 0x14EA0, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!taskset || !wid) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + *wid = taskset->m.wid.ToBE(); return CELL_OK; #endif } @@ -2777,29 +2792,66 @@ s64 cellSpursCreateTask2WithBinInfo() #endif } -s64 cellSpursTasksetSetExceptionEventHandler() +s64 cellSpursTasksetSetExceptionEventHandler(vm::ptr taskset, vm::ptr handler, vm::ptr arg) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x13124, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!taskset || !handler) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + if (taskset->m.exception_handler != 0) + { + return CELL_SPURS_TASK_ERROR_BUSY; + } + + taskset->m.exception_handler = handler; + taskset->m.exception_handler_arg = arg; return CELL_OK; #endif } -s64 cellSpursTasksetUnsetExceptionEventHandler() +s64 cellSpursTasksetUnsetExceptionEventHandler(vm::ptr taskset) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x13194, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!taskset) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + taskset->m.exception_handler.set(0); + taskset->m.exception_handler_arg.set(0); return CELL_OK; #endif } -s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, u32 id) +s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, u32 id) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); @@ -2810,13 +2862,28 @@ s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, vm::ptr spurs) +s64 cellSpursTasksetGetSpursAddress(vm::ptr taskset, vm::ptr spurs) { #ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x14408, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!taskset || !spurs) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + *spurs = (u32)taskset->m.spurs.addr().ToBE(); return CELL_OK; #endif } @@ -3092,6 +3159,8 @@ void cellSpurs_init(Module *pxThis) REG_FUNC(cellSpurs, cellSpursEnableExceptionEventHandler); REG_FUNC(cellSpurs, cellSpursSetGlobalExceptionEventHandler); REG_FUNC(cellSpurs, cellSpursUnsetGlobalExceptionEventHandler); + REG_FUNC(cellSpurs, cellSpursSetExceptionEventHandler); + REG_FUNC(cellSpurs, cellSpursUnsetExceptionEventHandler); // Event flag REG_FUNC(cellSpurs, _cellSpursEventFlagInitialize); @@ -3137,8 +3206,6 @@ void cellSpurs_init(Module *pxThis) REG_FUNC(cellSpurs, cellSpursCreateTask2WithBinInfo); REG_FUNC(cellSpurs, cellSpursLookUpTasksetAddress); REG_FUNC(cellSpurs, cellSpursTasksetGetSpursAddress); - REG_FUNC(cellSpurs, cellSpursSetExceptionEventHandler); - REG_FUNC(cellSpurs, cellSpursUnsetExceptionEventHandler); REG_FUNC(cellSpurs, cellSpursGetTasksetInfo); REG_FUNC(cellSpurs, cellSpursTasksetSetExceptionEventHandler); REG_FUNC(cellSpurs, cellSpursTasksetUnsetExceptionEventHandler); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index adb1a60799..fc8871ecd6 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -427,19 +427,38 @@ struct CellSpursTaskset // Raw data u8 _u8[size]; + struct TaskInfo + { + be_t args[4]; + vm::bptr elf; + vm::bptr context_save_storage; + be_t ls_pattern[4]; + }; + // Real data struct { - u8 unk1[0x64]; // 0x00 - vm::bptr spurs; // 0x64 - be_t args; // 0x68 - u8 enable_clear_ls; // 0x70 - u8 x71; // 0x71 - u8 x72; // 0x72 - u8 x73; // 0x73 - be_t wid; // 0x74 - u8 unk3[0x1818]; // 0x78 - be_t size; // 0x1890 + be_t running_set[4]; // 0x00 + be_t ready_set[4]; // 0x10 + be_t unk_set[4]; // 0x20 - TODO: Find out what this is + be_t enabled_set[4]; // 0x30 + be_t signal_received_set[4]; // 0x40 + be_t waiting_set[4]; // 0x50 + vm::bptr spurs; // 0x60 + be_t args; // 0x68 + u8 enable_clear_ls; // 0x70 + u8 x71; // 0x71 + u8 x72; // 0x72 + u8 last_scheduled_task; // 0x73 + be_t wid; // 0x74 + u8 unk1[8]; // 0x78 + TaskInfo task_info[128]; // 0x80 + vm::bptr exception_handler; // 0x1880 + vm::bptr exception_handler_arg; // 0x1888 + be_t size; // 0x1890 + u32 unk2; // 0x1894 + u32 event_flag_id1; // 0x1898 + u32 event_flag_id2; // 0x189C } m; SPURSManagerTaskset *taskset; @@ -564,7 +583,47 @@ struct CellSpursTaskset2 static const u32 align = 128; static const u32 size = 10496; - be_t skip[10496]; + union + { + // Raw data + u8 _u8[size]; + + struct TaskInfo + { + be_t args[4]; + vm::bptr elf_address; + vm::bptr context_save_storage; + be_t ls_pattern[4]; + }; + + // Real data + struct + { + be_t running_set[4]; // 0x00 + be_t ready_set[4]; // 0x10 + be_t unk_set[4]; // 0x20 - TODO: Find out what this is + be_t enabled_set[4]; // 0x30 + be_t signal_received_set[4]; // 0x40 + be_t waiting_set[4]; // 0x50 + vm::bptr spurs; // 0x60 + be_t args; // 0x68 + u8 enable_clear_ls; // 0x70 + u8 x71; // 0x71 + u8 x72; // 0x72 + u8 last_scheduled_task; // 0x73 + be_t wid; // 0x74 + u8 unk1[8]; // 0x78 + TaskInfo task_info[128]; // 0x80 + vm::bptr exception_handler; // 0x1880 + vm::bptr exception_handler_arg; // 0x1888 + be_t size; // 0x1890 + u32 unk2; // 0x1894 + u32 event_flag_id1; // 0x1898 + u32 event_flag_id2; // 0x189C + u8 unk3[0x88]; // 0x1900 + u128 task_exit_code[128]; // 0x1988 + } m; + }; }; struct CellSpursTasksetAttribute diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index b0990ba2f6..8e2629cea3 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -662,6 +662,7 @@ _UNICODE;UNICODE;%(PreprocessorDefinitions) stdafx.h Async + true true @@ -676,6 +677,7 @@ _UNICODE;UNICODE;LLVM_AVAILABLE;%(PreprocessorDefinitions) stdafx.h Async + true true @@ -694,6 +696,7 @@ _UNICODE;UNICODE;MSVC_CRT_MEMLEAK_DETECTION;%(PreprocessorDefinitions) stdafx.h Async + true true @@ -741,6 +744,7 @@ Use stdafx.h Async + true true @@ -759,6 +763,7 @@ stdafx.h Async LLVM_AVAILABLE;%(PreprocessorDefinitions) + true true From 53e0979f2910879090dbae21345625bafea08b97 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sun, 14 Dec 2014 03:19:06 +0530 Subject: [PATCH 04/29] SPURS: Implement cellSpursCreateTask --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 122 ++++++++++++++++++++++- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 62 +++++++----- 2 files changed, 153 insertions(+), 31 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 831748949b..2810152043 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -2427,7 +2427,7 @@ s64 cellSpursGetTasksetId(vm::ptr taskset, vm::ptr wid) return CELL_SPURS_TASK_ERROR_INVAL; } - *wid = taskset->m.wid.ToBE(); + *wid = taskset->m.wid; return CELL_OK; #endif } @@ -2443,6 +2443,121 @@ s64 cellSpursShutdownTaskset(vm::ptr taskset) #endif } +u32 _cellSpursGetSdkVersion() +{ + static u32 sdk_version = -2; + + if (sdk_version == -2) + { + auto version = vm::ptr::make((u32)Memory.Alloc(sizeof(u32), sizeof(u32))); + sys_process_get_sdk_version(sys_process_getpid(), version); + sdk_version = *version; + Memory.Free(version.addr()); + } + + return sdk_version; +} + +s64 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm::ptr elf_addr, vm::ptr context_addr, u32 context_size, vm::ptr ls_pattern, vm::ptr arg) +{ + if (!taskset || !elf_addr) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (elf_addr.addr() % 16) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + auto sdk_version = _cellSpursGetSdkVersion(); + if (sdk_version < 0x27FFFF) + { + if (context_addr.addr() % 16) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + } + else + { + if (context_addr.addr() % 128) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + } + + u32 alloc_ls_blocks = 0; + if (context_addr.addr() != 0) + { + if (context_size < CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + alloc_ls_blocks = context_size > 0x3D400 ? 0x7A : ((context_size - 0x400) >> 11); + if (ls_pattern.addr() != 0) + { + u32 ls_blocks = 0; + for (u32 i = 0; i < 2; i++) + { + for (u32 j = 0; j < 64; j++) + { + if (ls_pattern->u64[0] & ((u64)1 << j)) + { + ls_blocks++; + } + } + } + + if (ls_blocks > alloc_ls_blocks) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + if (ls_pattern->u32[0] & 0xFC000000) + { + // Prevent save/restore to SPURS management area + return CELL_SPURS_TASK_ERROR_INVAL; + } + } + } + else + { + alloc_ls_blocks = 0; + } + + // TODO: Verify the ELF header is proper and all its load segments are at address >= 0x3000 + + // TODO: Make the following block execute atomically + u32 tmp_task_id; + for (tmp_task_id = 0; tmp_task_id < CELL_SPURS_MAX_TASK; tmp_task_id++) + { + u32 l = tmp_task_id >> 5; + u32 b = tmp_task_id & 0x1F; + if ((taskset->m.enabled_set[l] & (0x80000000 >> b)) == 0) + { + taskset->m.enabled_set[l] |= 0x80000000 >> b; + break; + } + } + + if (tmp_task_id >= CELL_SPURS_MAX_TASK) + { + CELL_SPURS_TASK_ERROR_AGAIN; + } + + taskset->m.task_info[tmp_task_id].elf_addr.set(elf_addr.addr()); + taskset->m.task_info[tmp_task_id].context_save_storage.set((context_addr.addr() & 0xFFFFFFF8) | alloc_ls_blocks); + for (u32 i = 0; i < 2; i++) + { + taskset->m.task_info[tmp_task_id].args.u64[i] = arg != 0 ? arg->u64[i] : 0; + taskset->m.task_info[tmp_task_id].ls_pattern.u64[i] = ls_pattern != 0 ? ls_pattern->u64[i] : 0; + } + + *task_id = tmp_task_id; + return CELL_OK; +} + s64 cellSpursCreateTask(vm::ptr taskset, vm::ptr taskID, u32 elf_addr, u32 context_addr, u32 context_size, vm::ptr lsPattern, vm::ptr argument) { @@ -2451,8 +2566,7 @@ s64 cellSpursCreateTask(vm::ptr taskset, vm::ptr taskID, taskset.addr(), taskID.addr(), elf_addr, context_addr, context_size, lsPattern.addr(), argument.addr()); return GetCurrentPPUThread().FastCall2(libsre + 0x12414, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); - return CELL_OK; + return spursCreateTask(taskset, taskID, vm::ptr::make(elf_addr), vm::ptr::make(context_addr), context_size, lsPattern, argument); #endif } @@ -2883,7 +2997,7 @@ s64 cellSpursTasksetGetSpursAddress(vm::ptr taskset, vm: return CELL_SPURS_TASK_ERROR_INVAL; } - *spurs = (u32)taskset->m.spurs.addr().ToBE(); + *spurs = (u32)taskset->m.spurs.addr(); return CELL_OK; #endif } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index fc8871ecd6..4a8ba993af 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -132,6 +132,7 @@ enum TaskConstants CELL_SPURS_MAX_TASK_NAME_LENGTH = 32, CELL_SPURS_TASK_ATTRIBUTE_REVISION = 1, CELL_SPURS_TASKSET_ATTRIBUTE_REVISION = 1, + CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE = 1024, }; class SPURSManager; @@ -417,6 +418,18 @@ struct CellSpursEventFlag SPURSManagerEventFlag *eventFlag; }; +union CellSpursTaskArgument +{ + be_t u32[4]; + be_t u64[2]; +}; + +union CellSpursTaskLsPattern +{ + be_t u32[4]; + be_t u64[2]; +}; + struct CellSpursTaskset { static const u32 align = 128; @@ -429,14 +442,16 @@ struct CellSpursTaskset struct TaskInfo { - be_t args[4]; - vm::bptr elf; - vm::bptr context_save_storage; - be_t ls_pattern[4]; + CellSpursTaskArgument args; + vm::bptr elf_addr; + vm::bptr context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks) + CellSpursTaskLsPattern ls_pattern; }; + static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size"); + // Real data - struct + struct _CellSpursTaskset { be_t running_set[4]; // 0x00 be_t ready_set[4]; // 0x10 @@ -459,8 +474,11 @@ struct CellSpursTaskset u32 unk2; // 0x1894 u32 event_flag_id1; // 0x1898 u32 event_flag_id2; // 0x189C + u8 unk3[0x60]; // 0x18A0 } m; + static_assert(sizeof(_CellSpursTaskset) == size, "Wrong _CellSpursTaskset size"); + SPURSManagerTaskset *taskset; }; }; @@ -590,14 +608,16 @@ struct CellSpursTaskset2 struct TaskInfo { - be_t args[4]; - vm::bptr elf_address; - vm::bptr context_save_storage; - be_t ls_pattern[4]; + CellSpursTaskArgument args; + vm::bptr elf_addr; + vm::bptr context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks) + CellSpursTaskLsPattern ls_pattern; }; + static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size"); + // Real data - struct + struct _CellSpursTaskset2 { be_t running_set[4]; // 0x00 be_t ready_set[4]; // 0x10 @@ -620,9 +640,12 @@ struct CellSpursTaskset2 u32 unk2; // 0x1894 u32 event_flag_id1; // 0x1898 u32 event_flag_id2; // 0x189C - u8 unk3[0x88]; // 0x1900 - u128 task_exit_code[128]; // 0x1988 + u8 unk3[0xE8]; // 0x18A0 + u64 task_exit_code[256]; // 0x1988 + u8 unk4[0x778]; // 0x2188 } m; + + static_assert(sizeof(_CellSpursTaskset2) == size, "Wrong _CellSpursTaskset2 size"); }; }; @@ -681,21 +704,6 @@ struct CellSpursTraceTaskData be_t task; }; -typedef be_t be_u32; -typedef be_t be_u64; - -struct CellSpursTaskArgument -{ - be_u32 u32[4]; - be_u64 u64[2]; -}; - -struct CellSpursTaskLsPattern -{ - be_u32 u32[4]; - be_u64 u64[2]; -}; - struct CellSpursTaskAttribute2 { be_t revision; From 68cc9b205920c3579d5d89897caa022f506d44bc Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sun, 14 Dec 2014 12:07:46 +0530 Subject: [PATCH 05/29] SPURS: Enable logging of taskset calls --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 91 ++++++++++++++++-------- 1 file changed, 61 insertions(+), 30 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 2810152043..8163443e42 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -2345,8 +2345,9 @@ s64 spursCreateTaskset(vm::ptr spurs, vm::ptr tasks s64 cellSpursCreateTasksetWithAttribute(vm::ptr spurs, vm::ptr taskset, vm::ptr attr) { + cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, attr=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), attr.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x14BEC, libsre_rtoc); #endif @@ -2397,8 +2398,9 @@ s64 cellSpursCreateTaskset(vm::ptr spurs, vm::ptr t s64 cellSpursJoinTaskset(vm::ptr taskset) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursJoinTaskset(taskset_addr=0x%x)", taskset.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x152F8, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2408,8 +2410,9 @@ s64 cellSpursJoinTaskset(vm::ptr taskset) s64 cellSpursGetTasksetId(vm::ptr taskset, vm::ptr wid) { + cellSpurs->Warning("cellSpursGetTasksetId(taskset_addr=0x%x, wid=0x%x)", taskset.addr(), wid.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("cellSpursGetTasksetId(taskset_addr=0x%x, workloadId_addr=0x%x)", taskset.addr(), workloadId.addr()); return GetCurrentPPUThread().FastCall2(libsre + 0x14EA0, libsre_rtoc); #else if (!taskset || !wid) @@ -2434,8 +2437,9 @@ s64 cellSpursGetTasksetId(vm::ptr taskset, vm::ptr wid) s64 cellSpursShutdownTaskset(vm::ptr taskset) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursShutdownTaskset(taskset_addr=0x%x)", taskset.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x14868, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2561,9 +2565,10 @@ s64 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm: s64 cellSpursCreateTask(vm::ptr taskset, vm::ptr taskID, u32 elf_addr, u32 context_addr, u32 context_size, vm::ptr lsPattern, vm::ptr argument) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursCreateTask(taskset_addr=0x%x, taskID_addr=0x%x, elf_addr_addr=0x%x, context_addr_addr=0x%x, context_size=%d, lsPattern_addr=0x%x, argument_addr=0x%x)", taskset.addr(), taskID.addr(), elf_addr, context_addr, context_size, lsPattern.addr(), argument.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x12414, libsre_rtoc); #else return spursCreateTask(taskset, taskID, vm::ptr::make(elf_addr), vm::ptr::make(context_addr), context_size, lsPattern, argument); @@ -2583,8 +2588,9 @@ s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskID) s64 cellSpursCreateTaskWithAttribute() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x12204, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2594,8 +2600,9 @@ s64 cellSpursCreateTaskWithAttribute() s64 cellSpursTasksetAttributeSetName(vm::ptr attr, vm::ptr name) { + cellSpurs->Warning("%s(attr=0x%x, name=0x%x)", __FUNCTION__, attr.addr(), name.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x14210, libsre_rtoc); #else if (!attr || !name) @@ -2615,8 +2622,9 @@ s64 cellSpursTasksetAttributeSetName(vm::ptr attr, vm s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr attr, u32 size) { + cellSpurs->Warning("%s(attr=0x%x, size=0x%x)", __FUNCTION__, attr.addr(), size); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x14254, libsre_rtoc); #else if (!attr) @@ -2641,8 +2649,9 @@ s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr a s64 cellSpursTasksetAttributeEnableClearLS(vm::ptr attr, s32 enable) { + cellSpurs->Warning("%s(attr=0x%x, enable=%d)", __FUNCTION__, attr.addr(), enable); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x142AC, libsre_rtoc); #else if (!attr) @@ -2686,8 +2695,9 @@ s64 _cellSpursTasksetAttribute2Initialize(vm::ptr at s64 cellSpursTaskExitCodeGet() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1397C, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2697,8 +2707,9 @@ s64 cellSpursTaskExitCodeGet() s64 cellSpursTaskExitCodeInitialize() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1352C, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2708,8 +2719,9 @@ s64 cellSpursTaskExitCodeInitialize() s64 cellSpursTaskExitCodeTryGet() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x13974, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2719,8 +2731,9 @@ s64 cellSpursTaskExitCodeTryGet() s64 cellSpursTaskGetLoadableSegmentPattern() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x13ED4, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2730,8 +2743,9 @@ s64 cellSpursTaskGetLoadableSegmentPattern() s64 cellSpursTaskGetReadOnlyAreaPattern() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x13CFC, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2741,8 +2755,9 @@ s64 cellSpursTaskGetReadOnlyAreaPattern() s64 cellSpursTaskGenerateLsPattern() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x13B78, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2752,8 +2767,9 @@ s64 cellSpursTaskGenerateLsPattern() s64 _cellSpursTaskAttributeInitialize() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x10C30, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2763,8 +2779,9 @@ s64 _cellSpursTaskAttributeInitialize() s64 cellSpursTaskAttributeSetExitCodeContainer() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x10A98, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2801,8 +2818,9 @@ s64 _cellSpursTaskAttribute2Initialize(vm::ptr attribut s64 cellSpursTaskGetContextSaveAreaSize() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1409C, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2812,8 +2830,9 @@ s64 cellSpursTaskGetContextSaveAreaSize() s64 cellSpursCreateTaskset2(vm::ptr spurs, vm::ptr taskset, vm::ptr attr) { + cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, attr=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), attr.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x15108, libsre_rtoc); #else auto free_attr = false; @@ -2853,8 +2872,9 @@ s64 cellSpursCreateTaskset2(vm::ptr spurs, vm::ptr s64 cellSpursCreateTask2() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x11E54, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2864,8 +2884,9 @@ s64 cellSpursCreateTask2() s64 cellSpursJoinTask2() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x11378, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2875,8 +2896,9 @@ s64 cellSpursJoinTask2() s64 cellSpursTryJoinTask2() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x11748, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2886,8 +2908,9 @@ s64 cellSpursTryJoinTask2() s64 cellSpursDestroyTaskset2() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x14EE8, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2897,8 +2920,9 @@ s64 cellSpursDestroyTaskset2() s64 cellSpursCreateTask2WithBinInfo() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x120E0, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2908,8 +2932,9 @@ s64 cellSpursCreateTask2WithBinInfo() s64 cellSpursTasksetSetExceptionEventHandler(vm::ptr taskset, vm::ptr handler, vm::ptr arg) { + cellSpurs->Warning("%s(taskset=0x5x, handler=0x%x, arg=0x%x)", __FUNCTION__, taskset.addr(), handler.addr(), arg.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x13124, libsre_rtoc); #else if (!taskset || !handler) @@ -2940,8 +2965,9 @@ s64 cellSpursTasksetSetExceptionEventHandler(vm::ptr taskset, s64 cellSpursTasksetUnsetExceptionEventHandler(vm::ptr taskset) { + cellSpurs->Warning("%s(taskset=0x%x)", __FUNCTION__, taskset.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x13194, libsre_rtoc); #else if (!taskset) @@ -2967,8 +2993,9 @@ s64 cellSpursTasksetUnsetExceptionEventHandler(vm::ptr taskset s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, u32 id) { + cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, id=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), id); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x133AC, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2978,8 +3005,9 @@ s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset s64 cellSpursTasksetGetSpursAddress(vm::ptr taskset, vm::ptr spurs) { + cellSpurs->Warning("%s(taskset=0x%x, spurs=0x%x)", __FUNCTION__, taskset.addr(), spurs.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x14408, libsre_rtoc); #else if (!taskset || !spurs) @@ -3004,8 +3032,9 @@ s64 cellSpursTasksetGetSpursAddress(vm::ptr taskset, vm: s64 cellSpursGetTasksetInfo() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1445C, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -3015,8 +3044,10 @@ s64 cellSpursGetTasksetInfo() s64 _cellSpursTasksetAttributeInitialize(vm::ptr attribute, u32 revision, u32 sdk_version, u64 args, vm::ptr priority, u32 max_contention) { + cellSpurs->Warning("%s(attribute=0x%x, revision=%u, skd_version=%u, args=0x%llx, priority=0x%x, max_contention=%u)", + __FUNCTION__, attribute.addr(), revision, sdk_version, args, priority.addr(), max_contention); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x142FC, libsre_rtoc); #else if (!attribute) From 5a6016003542d7064ac716f7f7f7c71d0d1b59e7 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Tue, 16 Dec 2014 09:12:50 +0530 Subject: [PATCH 06/29] SPURS: Added comments on CellSpurs fields --- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 126 ++++++++++++------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 4a8ba993af..849a597223 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -224,7 +224,7 @@ struct CellSpurs struct _sub_str1 { - u8 unk0[0x20]; + u8 unk0[0x20]; // 0x00 - SPU exceptionh handler 0x08 - SPU exception handler args be_t sem; // 0x20 u8 unk1[0x8]; vm::bptr hook; // 0x30 @@ -234,14 +234,14 @@ struct CellSpurs static_assert(sizeof(_sub_str1) == 0x80, "Wrong _sub_str1 size"); - struct _sub_str2 + struct _sub_str2 // Event port multiplexer { - be_t unk0; - be_t unk1; - be_t unk2; - be_t unk3; + be_t unk0; // 0x00 Outstanding requests + be_t unk1; // 0x04 + be_t unk2; // 0x08 + be_t unk3; // 0x0C be_t port; // 0x10 - u8 unk_[0x68]; + u8 unk_[0x68]; // 0x18 - The first u64 seems to be the start of a linked list. The linked list struct seems to be {u64 next; u64 data; u64 handler} }; static_assert(sizeof(_sub_str2) == 0x80, "Wrong _sub_str2 size"); @@ -275,43 +275,43 @@ struct CellSpurs struct { atomic_t wklReadyCount[0x20]; // 0x0 (index = wid) - u8 wklA[0x10]; // 0x20 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - u8 wklB[0x10]; // 0x30 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - u8 wklMinCnt[0x10]; // 0x40 (seems only for first 0..15 wids) - atomic_t wklMaxCnt[0x10]; // 0x50 (packed 4-bit data, index = wid % 16, internal index = wid / 16) + u8 wklA[0x10]; // 0x20 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Current contention + u8 wklB[0x10]; // 0x30 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Pending + u8 wklMinCnt[0x10]; // 0x40 (seems only for first 0..15 wids) - Min contention + atomic_t wklMaxCnt[0x10]; // 0x50 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Max contention CellSpursWorkloadFlag wklFlag; // 0x60 - atomic_t wklSet1; // 0x70 (bitset for 0..15 wids) - atomic_t x72; // 0x72 - u8 x73; // 0x73 - u8 flags1; // 0x74 - u8 x75; // 0x75 - u8 nSpus; // 0x76 + atomic_t wklSet1; // 0x70 (bitset for 0..15 wids) - Workload signal + atomic_t x72; // 0x72 - message + u8 x73; // 0x73 - idling + u8 flags1; // 0x74 - Bit0(MSB)=exit_if_no_work, Bit1=32_workloads + u8 x75; // 0x75 - trace control + u8 nSpus; // 0x76 - Number of SPUs atomic_t flagRecv; // 0x77 - atomic_t wklSet2; // 0x78 (bitset for 16..32 wids) + atomic_t wklSet2; // 0x78 (bitset for 16..32 wids) - Workload signal u8 x7A[6]; // 0x7A - atomic_t wklStat1[0x10]; // 0x80 - u8 wklD1[0x10]; // 0x90 - u8 wklE1[0x10]; // 0xA0 - atomic_t wklMskA; // 0xB0 - atomic_t wklMskB; // 0xB4 - u8 xB8[5]; // 0xB8 - atomic_t xBD; // 0xBD - u8 xBE[2]; // 0xBE - u8 xC0[8]; // 0xC0 - u8 xC8; // 0xC8 - u8 spuPort; // 0xC9 + atomic_t wklStat1[0x10]; // 0x80 - Workload state (16*u8) - State enum {non_exitent, preparing, runnable, shutting_down, removable, invalid} + u8 wklD1[0x10]; // 0x90 - Workload status (16*u8) + u8 wklE1[0x10]; // 0xA0 - Workload event (16*u8) + atomic_t wklMskA; // 0xB0 - Available workloads (32*u1) + atomic_t wklMskB; // 0xB4 - Available module id + u8 xB8[5]; // 0xB8 - 0xBC - exit barrier + atomic_t xBD; // 0xBD - update workload + u8 xBE[2]; // 0xBE - 0xBF - message - terminate + u8 xC0[8]; // 0xC0 - System workload + u8 xC8; // 0xC8 - System service - on spu + u8 spuPort; // 0xC9 - SPU port for system service u8 xCA; // 0xCA u8 xCB; // 0xCB u8 xCC; // 0xCC u8 xCD; // 0xCD - u8 xCE; // 0xCE + u8 xCE; // 0xCE - message - update trace u8 xCF; // 0xCF - atomic_t wklStat2[0x10]; // 0xD0 - u8 wklD2[0x10]; // 0xE0 - u8 wklE2[0x10]; // 0xF0 + atomic_t wklStat2[0x10]; // 0xD0 - Workload state (16*u8) + u8 wklD2[0x10]; // 0xE0 - Workload status (16*u8) + u8 wklE2[0x10]; // 0xF0 - Workload event (16*u8) _sub_str1 wklF1[0x10]; // 0x100 - be_t unk22; // 0x900 - u8 unknown7[0x980 - 0x908]; + be_t unk22; // 0x900 - SPURS trace buffer + u8 unknown7[0x980 - 0x908]; // 0x908 - Per SPU trace info ??? (8*u32) 0x950 - SPURS trace mode (u32) be_t semPrv; // 0x980 be_t unk11; // 0x988 be_t unk12; // 0x98C @@ -321,14 +321,14 @@ struct CellSpurs _sub_str3 wklSysG; // 0xD00 be_t ppu0; // 0xD20 be_t ppu1; // 0xD28 - be_t spuTG; // 0xD30 + be_t spuTG; // 0xD30 - SPU thread group be_t spus[8]; // 0xD34 u8 unknown3[0xD5C - 0xD54]; - be_t queue; // 0xD5C - be_t port; // 0xD60 - atomic_t xD64; // 0xD64 - atomic_t xD65; // 0xD65 - atomic_t xD66; // 0xD66 + be_t queue; // 0xD5C - Event queue + be_t port; // 0xD60 - Event port + atomic_t xD64; // 0xD64 - SPURS handler dirty + atomic_t xD65; // 0xD65 - SPURS handler waiting + atomic_t xD66; // 0xD66 - SPURS handler exiting atomic_t enableEH; // 0xD68 be_t exception; // 0xD6C sys_spu_image spuImg; // 0xD70 @@ -340,13 +340,13 @@ struct CellSpurs be_t unk5; // 0xD9C be_t revision; // 0xDA0 be_t sdkVersion; // 0xDA4 - atomic_t spups; // 0xDA8 + atomic_t spups; // 0xDA8 - SPU port bits sys_lwmutex_t mutex; // 0xDB0 sys_lwcond_t cond; // 0xDC8 u8 unknown9[0xE00 - 0xDD0]; _sub_str4 wklH1[0x10]; // 0xE00 _sub_str2 sub3; // 0xF00 - u8 unknown6[0x1000 - 0xF80]; + u8 unknown6[0x1000 - 0xF80]; // 0xF80 - Gloabl SPU exception handler 0xF88 - Gloabl SPU exception handlers args _sub_str3 wklG2[0x10]; // 0x1000 _sub_str1 wklF2[0x10]; // 0x1200 _sub_str4 wklH2[0x10]; // 0x1A00 @@ -435,21 +435,21 @@ struct CellSpursTaskset static const u32 align = 128; static const u32 size = 6400; + struct TaskInfo + { + CellSpursTaskArgument args; + vm::bptr elf_addr; + vm::bptr context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks) + CellSpursTaskLsPattern ls_pattern; + }; + + static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size"); + union { // Raw data u8 _u8[size]; - struct TaskInfo - { - CellSpursTaskArgument args; - vm::bptr elf_addr; - vm::bptr context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks) - CellSpursTaskLsPattern ls_pattern; - }; - - static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size"); - // Real data struct _CellSpursTaskset { @@ -601,21 +601,21 @@ struct CellSpursTaskset2 static const u32 align = 128; static const u32 size = 10496; + struct TaskInfo + { + CellSpursTaskArgument args; + vm::bptr elf_addr; + vm::bptr context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks) + CellSpursTaskLsPattern ls_pattern; + }; + + static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size"); + union { // Raw data u8 _u8[size]; - struct TaskInfo - { - CellSpursTaskArgument args; - vm::bptr elf_addr; - vm::bptr context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks) - CellSpursTaskLsPattern ls_pattern; - }; - - static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size"); - // Real data struct _CellSpursTaskset2 { From 698f4fd4506dd0c8429d82026c82b0761c7b1706 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sun, 21 Dec 2014 01:37:29 +0530 Subject: [PATCH 07/29] SPURS: Improve the readability of the SPURS2 kernel at the cost of some performance --- rpcs3/Emu/Cell/SPUThread.h | 2 +- rpcs3/Emu/Memory/vm_ptr.h | 2 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 332 ++++++++++++----------- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 91 +++++-- 4 files changed, 242 insertions(+), 185 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index aaffd7d00d..e7c28ed8ff 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -608,7 +608,7 @@ public: for (auto &arg : values) { u32 arg_size = align(u32(arg.size() + 1), stack_align); - u32 arg_addr = Memory.MainMem.AllocAlign(arg_size, stack_align); + u32 arg_addr = (u32)Memory.MainMem.AllocAlign(arg_size, stack_align); std::strcpy(vm::get_ptr(arg_addr), arg.c_str()); diff --git a/rpcs3/Emu/Memory/vm_ptr.h b/rpcs3/Emu/Memory/vm_ptr.h index 6f0c65ed0d..0970ad7996 100644 --- a/rpcs3/Emu/Memory/vm_ptr.h +++ b/rpcs3/Emu/Memory/vm_ptr.h @@ -112,7 +112,7 @@ namespace vm __forceinline T* const operator -> () const { - return vm::get_ptr(m_addr); + return vm::get_ptr((u32)m_addr); } _ptr_base operator++ (int) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 8163443e42..483c3d56c6 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -37,7 +37,7 @@ s64 spursCreateLv2EventQueue(vm::ptr spurs, u32& queue_id, vm::ptrm.wklSysG.pm.set(be_t::make(vm::read32(libsre_rtoc - 0x7EA4))); spurs->m.wklSysG.size = 0x2200; #else - spurs->m.wklSysG.pm.set(be_t::make(0x100)); // wrong 64-bit address + spurs->m.wklInfoSysSrv.pm.set(be_t::make(0x100)); // wrong 64-bit address #endif - spurs->m.wklSysG.data = 0; - spurs->m.wklSysG.copy.write_relaxed(0xff); + spurs->m.wklInfoSysSrv.data = 0; + spurs->m.wklInfoSysSrv.copy.write_relaxed(0xff); u32 sem; for (u32 i = 0; i < 0x10; i++) { @@ -146,7 +146,7 @@ s64 spursInit( assert(!"spu_image_import() failed"); } #else - spurs->m.spuImg.addr = Memory.Alloc(0x40000, 4096); + spurs->m.spuImg.addr = (u32)Memory.Alloc(0x40000, 4096); #endif s32 tgt = SYS_SPU_THREAD_GROUP_TYPE_NORMAL; @@ -208,12 +208,12 @@ s64 spursInit( u64 vRES = 0x20ull << 32; u128 vSET = {}; - if (spurs->m.x72.read_relaxed() & (1 << num)) + if (spurs->m.sysSrvMessage.read_relaxed() & (1 << num)) { SPU.WriteLS8(0x1eb, 0); // var4 if (arg1 == 0 || var1 == 0x20) { - spurs->m.x72._and_not(1 << num); + spurs->m.sysSrvMessage._and_not(1 << num); } } else @@ -224,9 +224,9 @@ s64 spursInit( u128 savedD = SPU.ReadLS128(0x1B0); u128 vRC = u128::add8(u128::minu8(wklReadyCount0, u128::from8p(8)), u128::minu8(wklReadyCount1, u128::from8p(8))); u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed(); - u32 flagRecv = spurs->m.flagRecv.read_relaxed(); + u32 flagRecv = spurs->m.wklFlagReceiver.read_relaxed(); u128 vFM = u128::fromV(g_imm_table.fsmb_table[(wklFlag == 0) && (flagRecv < 16) ? 0x8000 >> flagRecv : 0]); - u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet1.read_relaxed()]); + u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSignal1.read_relaxed()]); u128 vFMS1 = vFM | wklSet1; u128 vFMV1 = u128::fromV(g_imm_table.fsmb_table[(var1 < 16) ? 0x8000 >> var1 : 0]); u32 var5 = SPU.ReadLS32(0x1ec); @@ -270,7 +270,7 @@ s64 spursInit( if (!arg1 || var1 == vNUM) { - spurs->m.wklSet1._and_not(be_t::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0))); + spurs->m.wklSignal1._and_not(be_t::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0))); if (vNUM == flagRecv && wklFlag == 0) { spurs->m.wklFlag.flag.write_relaxed(be_t::make(-1)); @@ -305,126 +305,145 @@ s64 spursInit( { LV2_LOCK(0); // TODO: lock-free implementation if possible - const u32 arg1 = SPU.GPR[3]._u32[3]; - u32 var0 = SPU.ReadLS32(0x1d8); - u32 var1 = SPU.ReadLS32(0x1dc); - u128 wklA = vm::read128(spurs.addr() + 0x20); - u128 wklB = vm::read128(spurs.addr() + 0x30); - u128 savedA = SPU.ReadLS128(0x180); - u128 savedB = SPU.ReadLS128(0x190); - u128 vAA = u128::sub8(wklA, savedA); - u128 vBB = u128::sub8(wklB, savedB); - u128 vM1 = {}; if (var1 <= 31) vM1.u8r[var1 & 0xf] = (var1 <= 15) ? 0xf : 0xf0; - u128 vAABB = (arg1 == 0) ? vAA : u128::add8(vAA, u128::andnot(vM1, vBB)); + auto mgmt = vm::get_ptr(SPU.ls_offset); - u32 vNUM = 0x20; - u64 vRES = 0x20ull << 32; - u128 vSET = {}; + // The first and only argument to this function is a boolean that is set to false if the function + // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. + // If the first argument is true then the shared data is not updated with the result. + const auto isPoll = SPU.GPR[3]._u32[3]; - if (spurs->m.x72.read_relaxed() & (1 << num)) + // Calculate the contention (number of SPUs used) for each workload + u8 contention[CELL_SPURS_MAX_WORKLOAD2]; + u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2]; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { - SPU.WriteLS8(0x1eb, 0); // var4 - if (arg1 == 0 || var1 == 0x20) + contention[i] = mgmt->spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F]; + contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4; + + // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably + // to prevent unnecessary jumps to the kernel + if (isPoll) { - spurs->m.x72._and_not(1 << num); + pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i]; + pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4; + if (i != mgmt->wklCurrentId) + { + contention[i] += pendingContention[i]; + } + } + } + + u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + u32 pollStatus = 0; + + // The system service workload has the highest priority. Select the system service workload if + // the system service message bit for this SPU is set. + if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) + { + // Not sure what this does. Possibly Mark the SPU as in use. + mgmt->x1EB = 0; + if (!isPoll || mgmt->wklCurrentId == 0x20) + { + // Clear the message bit + mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); } } else { - u128 wklReadyCount0 = vm::read128(spurs.addr() + 0x0); - u128 wklReadyCount1 = vm::read128(spurs.addr() + 0x10); - u128 savedC = SPU.ReadLS128(0x1A0); - u128 wklMaxCnt = vm::read128(spurs.addr() + 0x50); - u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed(); - u32 flagRecv = spurs->m.flagRecv.read_relaxed(); - u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet1.read_relaxed()]); - u128 wklSet2 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet2.read_relaxed()]); - u128 vABL = vAABB & u128::from8p(0x0f); - u128 vABH = u128::fromV(_mm_srli_epi32((vAABB & u128::from8p(0xf0)).vi, 4)); - u32 var5 = SPU.ReadLS32(0x1ec); - u128 v5L = u128::fromV(g_imm_table.fsmb_table[var5 >> 16]); - u128 v5H = u128::fromV(g_imm_table.fsmb_table[(u16)var5]); - u128 vFML = u128::fromV(g_imm_table.fsmb_table[(wklFlag == 0) && (flagRecv < 16) ? 0x8000 >> flagRecv : 0]); - u128 vFMH = u128::fromV(g_imm_table.fsmb_table[(u16)((wklFlag == 0) && (flagRecv < 32) ? 0x80000000 >> flagRecv : 0)]); - u128 vCL = u128::fromV(_mm_slli_epi32((savedC & u128::from8p(0x0f)).vi, 4)); - u128 vCH = savedC & u128::from8p(0xf0); - u128 vABRL = u128::gtu8(wklReadyCount0, vABL); - u128 vABRH = u128::gtu8(wklReadyCount1, vABH); - u128 vCCL = v5L & u128::gtu8(vCL, {}) & u128::gtu8(wklMaxCnt & u128::from8p(0x0f), vABL) & (wklSet1 | vFML | vABRL); - u128 vCCH = v5H & u128::gtu8(vCH, {}) & u128::gtu8(u128::fromV(_mm_srli_epi32((wklMaxCnt & u128::from8p(0xf0)).vi, 4)), vABH) & (wklSet2 | vFMH | vABRH); - u128 v1H = {}; if (var1 <= 31 && var1 > 15) v1H.u8r[var1 & 0xf] = 4; - u128 v1L = {}; if (var1 <= 15) v1L.u8r[var1] = 4; - u128 vCH1 = (v1H | vCH & u128::from8p(0xFB)) & vCCH; - u128 vCL1 = (v1L | vCL & u128::from8p(0xFB)) & vCCL; - u128 vSTATL = vABRL & u128::from8p(1) | wklSet1 & u128::from8p(2) | vFML & u128::from8p(4); - u128 vSTATH = vABRH & u128::from8p(1) | wklSet2 & u128::from8p(2) | vFMH & u128::from8p(4); + // Caclulate the scheduling weight for each worjload + u8 maxWeight = 0; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) + { + auto j = i & 0x0F; + u8 x1ECx1EE = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->x1EC & (0x8000 >> j) : mgmt->x1EE & (0x8000 >> j); + u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; + u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4; + u8 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); + u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - s32 max = -1; - for (u32 i = 0; i < 0x10; i++) - { - const s32 value = vCL1.u8r[i]; - if (value > max && (vCCL.u8r[i] & 1)) + // For a worload to be considered for scheduling: + // 1. Its priority must be greater than 0 + // 2. The number of SPUs used by it must be less than the max contention for that workload + // 3. The bit in 0x1EC/0x1EE for the wokload must be set + // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) + // Condition #4 may be overriden using a workload signal or using the workload flag + if (x1ECx1EE && priority > 0 && maxContention > contention[i]) { - vNUM = i; - max = value; - } - } - for (u32 i = 16; i < 0x20; i++) - { - const s32 value = vCH1.u8r[i]; - if (value > max && (vCCH.u8r[i] & 1)) - { - vNUM = i; - max = value; + if (wklFlag || wklSignal || mgmt->spurs->m.wklReadyCount[i].read_relaxed() > contention[i]) + { + // The scheduling weight of the workload is equal to the priority of the workload for the SPU. + // The current workload is given a sligtly higher weight presumably to reduce the number of context switches. + u8 weight = priority << 4; + if (mgmt->wklCurrentId == i) + { + weight |= 0x04; + } + + // In case of a tie the lower numbered workload is chosen + if (weight > maxWeight) + { + wklSelectedId = i; + maxWeight = weight; + pollStatus = mgmt->spurs->m.wklReadyCount[i].read_relaxed() > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; + pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; + pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; + } + } } } - if (vNUM < 0x10) - { - vRES = ((u64)vNUM << 32) | vSTATL.u8r[vNUM]; - vSET.u8r[vNUM] = 0x01; - } - else if (vNUM < 0x20) - { - vRES = ((u64)vNUM << 32) | vSTATH.u8r[vNUM & 0xf]; - vSET.u8r[vNUM] = 0x10; - } + // Not sure what this does. Possibly mark the SPU as idle/in use. + mgmt->x1EB = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; - SPU.WriteLS8(0x1eb, vNUM == 0x20); - - if (!arg1 || var1 == vNUM) + if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { - spurs->m.wklSet1._and_not(be_t::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0))); - spurs->m.wklSet2._and_not(be_t::make((u16)(0x80000000 >> vNUM))); - if (vNUM == flagRecv && wklFlag == 0) + // Clear workload signal for the selected workload + mgmt->spurs->m.wklSignal1.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); + mgmt->spurs->m.wklSignal2.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + + // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s + if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) { - spurs->m.wklFlag.flag.write_relaxed(be_t::make(-1)); + mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); } } } - if (arg1 == 0) + if (!isPoll) { - vm::write128(spurs.addr() + 0x20, u128::add8(vAA, vSET)); // update wklA + // Called by kernel + // Increment the contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) + { + contention[wklSelectedId]++; + } - SPU.WriteLS128(0x180, vSET); // update savedA - SPU.WriteLS32(0x1dc, vNUM); // update var1 + for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) + { + mgmt->spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4); + mgmt->wklLocContention[i] = 0; + mgmt->wklLocPendingContention[i] = 0; + } + + mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; + mgmt->wklCurrentId = wklSelectedId; + } + else if (wklSelectedId != mgmt->wklCurrentId) + { + // Not called by kernel but a context switch is required + // Increment the pending contention for the selected workload + for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) + { + mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4); + mgmt->wklLocPendingContention[i] = 0; + } + + mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; } - if (arg1 == 1 && vNUM != var1) - { - vm::write128(spurs.addr() + 0x30, u128::add8(vBB, vSET)); // update wklB - - SPU.WriteLS128(0x190, vSET); // update savedB - } - else - { - vm::write128(spurs.addr() + 0x30, vBB); // update wklB - - SPU.WriteLS128(0x190, {}); // update savedB - } - - return vRES; + u64 result = (u64)wklSelectedId << 32; + result |= pollStatus; + return result; }; //SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // test //{ @@ -434,10 +453,13 @@ s64 spursInit( // return vRES; //}; - SPU.WriteLS128(0x1c0, u128::from32r(0, spurs.addr(), num, 0x1f)); + SpursKernelMgmtData * mgmt = vm::get_ptr(SPU.ls_offset); + mgmt->spurs = spurs; + mgmt->spuNum = num; + mgmt->dmaTagId = 0x1F; - u32 wid = 0x20; - u32 stat = 0; + u32 wid = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + u32 pollStatus = 0; while (true) { if (Emu.IsStopped()) @@ -447,14 +469,14 @@ s64 spursInit( } // get current workload info: - auto& wkl = wid <= 15 ? spurs->m.wklG1[wid] : (wid <= 31 && isSecond ? spurs->m.wklG2[wid & 0xf] : spurs->m.wklSysG); + auto& wkl = wid < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? spurs->m.wklInfo2[wid & 0xf] : spurs->m.wklInfoSysSrv); - if (SPU.ReadLS64(0x1d0) != wkl.pm.addr()) + if (mgmt->wklCurrentAddr != wkl.pm) { // load executable code: memcpy(vm::get_ptr(SPU.ls_offset + 0xa00), wkl.pm.get_ptr(), wkl.size); - SPU.WriteLS64(0x1d0, wkl.pm.addr()); - SPU.WriteLS32(0x1d8, wkl.copy.read_relaxed()); + mgmt->wklCurrentAddr = wkl.pm; + mgmt->x1D8 = wkl.copy.read_relaxed(); } if (!isSecond) SPU.WriteLS16(0x1e8, 0); @@ -463,7 +485,7 @@ s64 spursInit( SPU.GPR[1]._u32[3] = 0x3FFB0; SPU.GPR[3]._u32[3] = 0x100; SPU.GPR[4]._u64[1] = wkl.data; - SPU.GPR[5]._u32[3] = stat; + SPU.GPR[5]._u32[3] = pollStatus; SPU.FastCall(0xa00); // check status: @@ -481,7 +503,7 @@ s64 spursInit( SPU.GPR[3].clear(); assert(SPU.m_code3_func); u64 res = SPU.m_code3_func(SPU); - stat = (u32)(res); + pollStatus = (u32)(res); wid = (u32)(res >> 32); } @@ -507,8 +529,8 @@ s64 spursInit( assert(!"lwcond_create() failed"); } - spurs->m.flags1 = (flags & SAF_EXIT_IF_NO_WORK ? SF1_EXIT_IF_NO_WORK : 0) | (isSecond ? SF1_IS_SECOND : 0); - spurs->m.flagRecv.write_relaxed(0xff); + spurs->m.flags1 = (flags & SAF_EXIT_IF_NO_WORK ? SF1_EXIT_IF_NO_WORK : 0) | (isSecond ? SF1_32_WORKLOADS : 0); + spurs->m.wklFlagReceiver.write_relaxed(0xff); spurs->m.wklFlag.flag.write_relaxed(be_t::make(-1)); spurs->_u8[0xD64] = 0; spurs->_u8[0xD65] = 0; @@ -516,7 +538,7 @@ s64 spursInit( spurs->m.ppuPriority = ppuPriority; u32 queue; - if (s32 res = spursCreateLv2EventQueue(spurs, queue, vm::ptr::make(spurs.addr() + 0xc9), 0x2a, *(u64*)"_spuPrv")) + if (s32 res = (s32)spursCreateLv2EventQueue(spurs, queue, vm::ptr::make(spurs.addr() + 0xc9), 0x2a, *(u64*)"_spuPrv")) { assert(!"spursCreateLv2EventQueue() failed"); } @@ -575,14 +597,14 @@ s64 spursInit( for (u32 i = 0; i < 16; i++) { if (spurs->m.wklStat1[i].read_relaxed() == 2 && - spurs->m.wklG1[i].priority.ToBE() != 0 && - spurs->m.wklMaxCnt[i].read_relaxed() & 0xf + spurs->m.wklInfo1[i].priority.ToBE() != 0 && + spurs->m.wklMaxContention[i].read_relaxed() & 0xf ) { if (spurs->m.wklReadyCount[i].read_relaxed() || - spurs->m.wklSet1.read_relaxed() & (0x8000u >> i) || + spurs->m.wklSignal1.read_relaxed() & (0x8000u >> i) || (spurs->m.wklFlag.flag.read_relaxed() == 0 && - spurs->m.flagRecv.read_relaxed() == (u8)i + spurs->m.wklFlagReceiver.read_relaxed() == (u8)i )) { do_break = true; @@ -590,17 +612,17 @@ s64 spursInit( } } } - if (spurs->m.flags1 & SF1_IS_SECOND) for (u32 i = 0; i < 16; i++) + if (spurs->m.flags1 & SF1_32_WORKLOADS) for (u32 i = 0; i < 16; i++) { if (spurs->m.wklStat2[i].read_relaxed() == 2 && - spurs->m.wklG2[i].priority.ToBE() != 0 && - spurs->m.wklMaxCnt[i].read_relaxed() & 0xf0 + spurs->m.wklInfo2[i].priority.ToBE() != 0 && + spurs->m.wklMaxContention[i].read_relaxed() & 0xf0 ) { if (spurs->m.wklReadyCount[i + 0x10].read_relaxed() || - spurs->m.wklSet2.read_relaxed() & (0x8000u >> i) || + spurs->m.wklSignal2.read_relaxed() & (0x8000u >> i) || (spurs->m.wklFlag.flag.read_relaxed() == 0 && - spurs->m.flagRecv.read_relaxed() == (u8)i + 0x10 + spurs->m.wklFlagReceiver.read_relaxed() == (u8)i + 0x10 )) { do_break = true; @@ -1339,7 +1361,7 @@ s32 spursAddWorkload( } u32 wnum; - const u32 wmax = spurs->m.flags1 & SF1_IS_SECOND ? 0x20u : 0x10u; // TODO: check if can be changed + const u32 wmax = spurs->m.flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u; // TODO: check if can be changed spurs->m.wklMskA.atomic_op([spurs, wmax, &wnum](be_t& value) { wnum = cntlz32(~(u32)value); // found empty position @@ -1358,15 +1380,15 @@ s32 spursAddWorkload( u32 index = wnum & 0xf; if (wnum <= 15) { - assert((spurs->m.wklA[wnum] & 0xf) == 0); - assert((spurs->m.wklB[wnum] & 0xf) == 0); + assert((spurs->m.wklCurrentContention[wnum] & 0xf) == 0); + assert((spurs->m.wklPendingContention[wnum] & 0xf) == 0); spurs->m.wklStat1[wnum].write_relaxed(1); spurs->m.wklD1[wnum] = 0; spurs->m.wklE1[wnum] = 0; - spurs->m.wklG1[wnum].pm = pm; - spurs->m.wklG1[wnum].data = data; - spurs->m.wklG1[wnum].size = size; - spurs->m.wklG1[wnum].priority = *(be_t*)priorityTable; + spurs->m.wklInfo1[wnum].pm = pm; + spurs->m.wklInfo1[wnum].data = data; + spurs->m.wklInfo1[wnum].size = size; + spurs->m.wklInfo1[wnum].priority = *(be_t*)priorityTable; spurs->m.wklH1[wnum].nameClass = nameClass; spurs->m.wklH1[wnum].nameInstance = nameInstance; memset(spurs->m.wklF1[wnum].unk0, 0, 0x20); // clear struct preserving semaphore id @@ -1377,23 +1399,23 @@ s32 spursAddWorkload( spurs->m.wklF1[wnum].hookArg = hookArg; spurs->m.wklE1[wnum] |= 2; } - if ((spurs->m.flags1 & SF1_IS_SECOND) == 0) + if ((spurs->m.flags1 & SF1_32_WORKLOADS) == 0) { spurs->m.wklReadyCount[wnum + 16].write_relaxed(0); - spurs->m.wklMinCnt[wnum] = minContention > 8 ? 8 : minContention; + spurs->m.wklMinContention[wnum] = minContention > 8 ? 8 : minContention; } } else { - assert((spurs->m.wklA[index] & 0xf0) == 0); - assert((spurs->m.wklB[index] & 0xf0) == 0); + assert((spurs->m.wklCurrentContention[index] & 0xf0) == 0); + assert((spurs->m.wklPendingContention[index] & 0xf0) == 0); spurs->m.wklStat2[index].write_relaxed(1); spurs->m.wklD2[index] = 0; spurs->m.wklE2[index] = 0; - spurs->m.wklG2[index].pm = pm; - spurs->m.wklG2[index].data = data; - spurs->m.wklG2[index].size = size; - spurs->m.wklG2[index].priority = *(be_t*)priorityTable; + spurs->m.wklInfo2[index].pm = pm; + spurs->m.wklInfo2[index].data = data; + spurs->m.wklInfo2[index].size = size; + spurs->m.wklInfo2[index].priority = *(be_t*)priorityTable; spurs->m.wklH2[index].nameClass = nameClass; spurs->m.wklH2[index].nameInstance = nameInstance; memset(spurs->m.wklF2[index].unk0, 0, 0x20); // clear struct preserving semaphore id @@ -1409,27 +1431,27 @@ s32 spursAddWorkload( if (wnum <= 15) { - spurs->m.wklMaxCnt[wnum].atomic_op([maxContention](u8& v) + spurs->m.wklMaxContention[wnum].atomic_op([maxContention](u8& v) { v &= ~0xf; v |= (maxContention > 8 ? 8 : maxContention); }); - spurs->m.wklSet1._and_not({ be_t::make(0x8000 >> index) }); // clear bit in wklFlag1 + spurs->m.wklSignal1._and_not({ be_t::make(0x8000 >> index) }); // clear bit in wklFlag1 } else { - spurs->m.wklMaxCnt[index].atomic_op([maxContention](u8& v) + spurs->m.wklMaxContention[index].atomic_op([maxContention](u8& v) { v &= ~0xf0; v |= (maxContention > 8 ? 8 : maxContention) << 4; }); - spurs->m.wklSet2._and_not({ be_t::make(0x8000 >> index) }); // clear bit in wklFlag2 + spurs->m.wklSignal2._and_not({ be_t::make(0x8000 >> index) }); // clear bit in wklFlag2 } - spurs->m.flagRecv.compare_and_swap(wnum, 0xff); + spurs->m.wklFlagReceiver.compare_and_swap(wnum, 0xff); u32 res_wkl; - CellSpurs::_sub_str3& wkl = wnum <= 15 ? spurs->m.wklG1[wnum] : spurs->m.wklG2[wnum & 0xf]; + CellSpurs::WorkloadInfo& wkl = wnum <= 15 ? spurs->m.wklInfo1[wnum] : spurs->m.wklInfo2[wnum & 0xf]; spurs->m.wklMskB.atomic_op_sync([spurs, &wkl, wnum, &res_wkl](be_t& v) { const u32 mask = v.ToLE() & ~(0x80000000u >> wnum); @@ -1439,7 +1461,7 @@ s32 spursAddWorkload( { if (mask & m) { - CellSpurs::_sub_str3& current = i <= 15 ? spurs->m.wklG1[i] : spurs->m.wklG2[i & 0xf]; + CellSpurs::WorkloadInfo& current = i <= 15 ? spurs->m.wklInfo1[i] : spurs->m.wklInfo2[i & 0xf]; if (current.pm.addr() == wkl.pm.addr()) { // if a workload with identical policy module found @@ -1461,7 +1483,7 @@ s32 spursAddWorkload( spurs->wklStat(wnum).exchange(2); spurs->m.xBD.exchange(0xff); - spurs->m.x72.exchange(0xff); + spurs->m.sysSrvMessage.exchange(0xff); return CELL_OK; } @@ -1671,7 +1693,7 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr spurs, u32 wid, u32 is_set { return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN; } - if (wid >= (spurs->m.flags1 & SF1_IS_SECOND ? 0x20u : 0x10u)) + if (wid >= (spurs->m.flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u)) { return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; } @@ -1687,14 +1709,14 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr spurs, u32 wid, u32 is_set { if (is_set) { - if (spurs->m.flagRecv.read_relaxed() != 0xff) + if (spurs->m.wklFlagReceiver.read_relaxed() != 0xff) { return CELL_SPURS_POLICY_MODULE_ERROR_BUSY; } } else { - if (spurs->m.flagRecv.read_relaxed() != wid) + if (spurs->m.wklFlagReceiver.read_relaxed() != wid) { return CELL_SPURS_POLICY_MODULE_ERROR_PERM; } @@ -1706,7 +1728,7 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr spurs, u32 wid, u32 is_set return res; } - spurs->m.flagRecv.atomic_op([wid, is_set](u8& FR) + spurs->m.wklFlagReceiver.atomic_op([wid, is_set](u8& FR) { if (is_set) { @@ -1783,7 +1805,7 @@ s64 cellSpursReadyCountStore(vm::ptr spurs, u32 wid, u32 value) { return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN; } - if (wid >= (spurs->m.flags1 & SF1_IS_SECOND ? 0x20u : 0x10u) || value > 0xff) + if (wid >= (spurs->m.flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u) || value > 0xff) { return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 849a597223..61fcc42648 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -94,6 +94,7 @@ enum SPURSKernelInterfaces CELL_SPURS_MAX_SPU = 8, CELL_SPURS_MAX_WORKLOAD = 16, CELL_SPURS_MAX_WORKLOAD2 = 32, + CELL_SPURS_SYS_SERVICE_WORKLOAD_ID = 32, CELL_SPURS_MAX_PRIORITY = 16, CELL_SPURS_NAME_MAX_LENGTH = 15, CELL_SPURS_SIZE = 4096, @@ -162,8 +163,8 @@ enum SpursAttrFlags : u32 enum SpursFlags1 : u8 { SF1_NONE = 0x0, - - SF1_IS_SECOND = 0x40, + + SF1_32_WORKLOADS = 0x40, SF1_EXIT_IF_NO_WORK = 0x80, }; @@ -214,6 +215,12 @@ struct CellSpursWorkloadFlag typedef void(*CellSpursShutdownCompletionEventHook)(vm::ptr, u32 wid, vm::ptr arg); +enum CellSpursModulePollStatus { + CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT = 1, + CELL_SPURS_MODULE_POLL_STATUS_SIGNAL = 2, + CELL_SPURS_MODULE_POLL_STATUS_FLAG = 4 +}; + // Core CellSpurs structures struct CellSpurs { @@ -246,7 +253,7 @@ struct CellSpurs static_assert(sizeof(_sub_str2) == 0x80, "Wrong _sub_str2 size"); - struct _sub_str3 + struct WorkloadInfo { vm::bptr pm; // policy module be_t data; // spu argument @@ -255,7 +262,7 @@ struct CellSpurs be_t priority; }; - static_assert(sizeof(_sub_str3) == 0x20, "Wrong _sub_str3 size"); + static_assert(sizeof(WorkloadInfo) == 0x20, "Wrong WorkloadInfo size"); struct _sub_str4 { @@ -274,29 +281,33 @@ struct CellSpurs // real data struct { - atomic_t wklReadyCount[0x20]; // 0x0 (index = wid) - u8 wklA[0x10]; // 0x20 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Current contention - u8 wklB[0x10]; // 0x30 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Pending - u8 wklMinCnt[0x10]; // 0x40 (seems only for first 0..15 wids) - Min contention - atomic_t wklMaxCnt[0x10]; // 0x50 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Max contention - CellSpursWorkloadFlag wklFlag; // 0x60 - atomic_t wklSet1; // 0x70 (bitset for 0..15 wids) - Workload signal - atomic_t x72; // 0x72 - message - u8 x73; // 0x73 - idling - u8 flags1; // 0x74 - Bit0(MSB)=exit_if_no_work, Bit1=32_workloads - u8 x75; // 0x75 - trace control - u8 nSpus; // 0x76 - Number of SPUs - atomic_t flagRecv; // 0x77 - atomic_t wklSet2; // 0x78 (bitset for 16..32 wids) - Workload signal - u8 x7A[6]; // 0x7A + // The first 0x80 bytes of the CellSpurs structure is shared by all instances of the SPURS kernel in a SPURS instance and the PPU. + // The SPURS kernel copies this from main memory to the LS (address 0x100) then runs its scheduling algorithm using this as one + // of the inputs. After selecting a new workload, the SPURS kernel updates this and writes it back to main memory. + // The read-modify-write is performed atomically by the SPURS kernel. + atomic_t wklReadyCount[0x10]; // 0x00 (index = wid) - Number of SPUs requested by each workload + u8 wklCurrentContention[0x10]; // 0x20 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Number of SPUs used by each workload + u8 wklPendingContention[0x10]; // 0x30 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Number of SPUs that are pending to context switch to the workload + u8 wklMinContention[0x10]; // 0x40 (seems only for first 0..15 wids) - Min SPUs required for each workload + atomic_t wklMaxContention[0x10]; // 0x50 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Max SPUs that may be allocated to each workload + CellSpursWorkloadFlag wklFlag; // 0x60 + atomic_t wklSignal1; // 0x70 (bitset for 0..15 wids) + atomic_t sysSrvMessage; // 0x72 + u8 sysSrvIdling; // 0x73 + u8 flags1; // 0x74 Type is SpursFlags1 + u8 sysSrvTraceControl; // 0x75 + u8 nSpus; // 0x76 + atomic_t wklFlagReceiver; // 0x77 + atomic_t wklSignal2; // 0x78 (bitset for 16..32 wids) + u8 x7A[6]; // 0x7A atomic_t wklStat1[0x10]; // 0x80 - Workload state (16*u8) - State enum {non_exitent, preparing, runnable, shutting_down, removable, invalid} u8 wklD1[0x10]; // 0x90 - Workload status (16*u8) u8 wklE1[0x10]; // 0xA0 - Workload event (16*u8) - atomic_t wklMskA; // 0xB0 - Available workloads (32*u1) - atomic_t wklMskB; // 0xB4 - Available module id - u8 xB8[5]; // 0xB8 - 0xBC - exit barrier - atomic_t xBD; // 0xBD - update workload - u8 xBE[2]; // 0xBE - 0xBF - message - terminate + atomic_t wklMskA; // 0xB0 - System service - Available workloads (32*u1) + atomic_t wklMskB; // 0xB4 - System service - Available module id + u8 xB8[5]; // 0xB8 - 0xBC - Syetem service exit barrier + atomic_t xBD; // 0xBD - System service message - update workload + u8 xBE[2]; // 0xBE - 0xBF - System service message - terminate u8 xC0[8]; // 0xC0 - System workload u8 xC8; // 0xC8 - System service - on spu u8 spuPort; // 0xC9 - SPU port for system service @@ -304,7 +315,7 @@ struct CellSpurs u8 xCB; // 0xCB u8 xCC; // 0xCC u8 xCD; // 0xCD - u8 xCE; // 0xCE - message - update trace + u8 xCE; // 0xCE - System service message - update trace u8 xCF; // 0xCF atomic_t wklStat2[0x10]; // 0xD0 - Workload state (16*u8) u8 wklD2[0x10]; // 0xE0 - Workload status (16*u8) @@ -317,8 +328,8 @@ struct CellSpurs be_t unk12; // 0x98C be_t unk13; // 0x990 u8 unknown4[0xB00 - 0x998]; - _sub_str3 wklG1[0x10]; // 0xB00 - _sub_str3 wklSysG; // 0xD00 + WorkloadInfo wklInfo1[0x10]; // 0xB00 + WorkloadInfo wklInfoSysSrv; // 0xD00 be_t ppu0; // 0xD20 be_t ppu1; // 0xD28 be_t spuTG; // 0xD30 - SPU thread group @@ -347,7 +358,7 @@ struct CellSpurs _sub_str4 wklH1[0x10]; // 0xE00 _sub_str2 sub3; // 0xF00 u8 unknown6[0x1000 - 0xF80]; // 0xF80 - Gloabl SPU exception handler 0xF88 - Gloabl SPU exception handlers args - _sub_str3 wklG2[0x10]; // 0x1000 + WorkloadInfo wklInfo2[0x10]; // 0x1000 _sub_str1 wklF2[0x10]; // 0x1200 _sub_str4 wklH2[0x10]; // 0x1A00 } m; @@ -741,5 +752,29 @@ struct CellSpursTaskBinInfo CellSpursTaskLsPattern lsPattern; }; +// The SPURS kernel data store. This resides at 0x00 of the LS. +struct SpursKernelMgmtData { + u8 unk0[0x100]; // 0x00 + u8 tempArea[0x80]; // 0x100 + u8 wklLocContention[0x10]; // 0x180 + u8 wklLocPendingContention[0x10]; // 0x190 + u8 priority[0x10]; // 0x1A0 + u8 x1B0[0x10]; // 0x1B0 + vm::bptr spurs; // 0x1C0 + be_t spuNum; // 0x1C8 + be_t dmaTagId; // 0x1CC + vm::bptr wklCurrentAddr; // 0x1D0 + be_t x1D8; // 0x1D8 + u32 wklCurrentId; // 0x1DC + be_t yieldToKernelAddr; // 0x1E0 + be_t selectWorkloadAddr; // 0x1E4 + u8 x1E8; // 0x1E8 + u8 x1E9; // 0x1E9 + u8 x1EA; // 0x1EA + u8 x1EB; // 0x1EB - This might be spuIdling + be_t x1EC; // 0x1EC - This might be wklEnable1 + be_t x1EE; // 0x1EE - This might be wklEnable2 +}; + s64 spursAttachLv2EventQueue(vm::ptr spurs, u32 queue, vm::ptr port, s32 isDynamic, bool wasCreated); s64 spursWakeUp(vm::ptr spurs); From 52b342464be7c067d62473730703345923f3148f Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Mon, 22 Dec 2014 01:07:53 +0530 Subject: [PATCH 08/29] SPURS: Improve readability of SPURS1 kernel at the cost of some perormance --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 311 ++++++++++++++--------- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 41 +-- 2 files changed, 210 insertions(+), 142 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 483c3d56c6..557ff274ac 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -109,13 +109,13 @@ s64 spursInit( // default or system workload: #ifdef PRX_DEBUG - spurs->m.wklSysG.pm.set(be_t::make(vm::read32(libsre_rtoc - 0x7EA4))); - spurs->m.wklSysG.size = 0x2200; + spurs->m.wklInfoSysSrv.addr.set(be_t::make(vm::read32(libsre_rtoc - 0x7EA4))); + spurs->m.wklInfoSysSrv.size = 0x2200; #else - spurs->m.wklInfoSysSrv.pm.set(be_t::make(0x100)); // wrong 64-bit address + spurs->m.wklInfoSysSrv.addr.set(be_t::make(0x100)); // wrong 64-bit address #endif - spurs->m.wklInfoSysSrv.data = 0; - spurs->m.wklInfoSysSrv.copy.write_relaxed(0xff); + spurs->m.wklInfoSysSrv.arg = 0; + spurs->m.wklInfoSysSrv.uniqueId.write_relaxed(0xff); u32 sem; for (u32 i = 0; i < 0x10; i++) { @@ -192,114 +192,164 @@ s64 spursInit( { LV2_LOCK(0); // TODO: lock-free implementation if possible - const u32 arg1 = SPU.GPR[3]._u32[3]; - u32 var0 = SPU.ReadLS32(0x1d8); - u32 var1 = SPU.ReadLS32(0x1dc); - u128 wklA = vm::read128(spurs.addr() + 0x20); - u128 wklB = vm::read128(spurs.addr() + 0x30); - u128 savedA = SPU.ReadLS128(0x180); - u128 savedB = SPU.ReadLS128(0x190); - u128 vAA = u128::sub8(wklA, savedA); - u128 vBB = u128::sub8(wklB, savedB); - u128 vM1 = {}; if (var1 <= 15) vM1.u8r[var1] = 0xff; - u128 vAABB = (arg1 == 0) ? vAA : u128::add8(vAA, u128::andnot(vM1, vBB)); - - u32 vNUM = 0x20; - u64 vRES = 0x20ull << 32; - u128 vSET = {}; + auto mgmt = vm::get_ptr(SPU.ls_offset); - if (spurs->m.sysSrvMessage.read_relaxed() & (1 << num)) + // The first and only argument to this function is a boolean that is set to false if the function + // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. + // If the first argument is true then the shared data is not updated with the result. + const auto isPoll = SPU.GPR[3]._u32[3]; + + // Calculate the contention (number of SPUs used) for each workload + u8 contention[CELL_SPURS_MAX_WORKLOAD]; + u8 pendingContention[CELL_SPURS_MAX_WORKLOAD]; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - SPU.WriteLS8(0x1eb, 0); // var4 - if (arg1 == 0 || var1 == 0x20) - { - spurs->m.sysSrvMessage._and_not(1 << num); - } - } - else - { - u128 wklReadyCount0 = vm::read128(spurs.addr() + 0x0); - u128 wklReadyCount1 = vm::read128(spurs.addr() + 0x10); - u128 savedC = SPU.ReadLS128(0x1A0); - u128 savedD = SPU.ReadLS128(0x1B0); - u128 vRC = u128::add8(u128::minu8(wklReadyCount0, u128::from8p(8)), u128::minu8(wklReadyCount1, u128::from8p(8))); - u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed(); - u32 flagRecv = spurs->m.wklFlagReceiver.read_relaxed(); - u128 vFM = u128::fromV(g_imm_table.fsmb_table[(wklFlag == 0) && (flagRecv < 16) ? 0x8000 >> flagRecv : 0]); - u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSignal1.read_relaxed()]); - u128 vFMS1 = vFM | wklSet1; - u128 vFMV1 = u128::fromV(g_imm_table.fsmb_table[(var1 < 16) ? 0x8000 >> var1 : 0]); - u32 var5 = SPU.ReadLS32(0x1ec); - u128 wklMinCnt = vm::read128(spurs.addr() + 0x40); - u128 wklMaxCnt = vm::read128(spurs.addr() + 0x50); - u128 vCC = u128::andnot(vFMS1, u128::eq8(wklReadyCount0, {}) | u128::leu8(vRC, vAABB)) | - u128::leu8(wklMaxCnt, vAABB) | - u128::eq8(savedC, {}) | - u128::fromV(g_imm_table.fsmb_table[(~var5) >> 16]); - u128 vCCH1 = u128::andnot(vCC, - u128::from8p(0x80) & (vFMS1 | u128::gtu8(wklReadyCount0, vAABB)) | - u128::from8p(0x7f) & savedC); - u128 vCCL1 = u128::andnot(vCC, - u128::from8p(0x80) & vFMV1 | - u128::from8p(0x40) & u128::gtu8(vAABB, {}) & u128::gtu8(wklMinCnt, vAABB) | - u128::from8p(0x3c) & u128::fromV(_mm_slli_epi32(u128::sub8(u128::from8p(8), vAABB).vi, 2)) | - u128::from8p(0x02) & u128::eq8(savedD, u128::from8p((u8)var0)) | - u128::from8p(0x01)); - u128 vSTAT = - u128::from8p(0x01) & u128::gtu8(wklReadyCount0, vAABB) | - u128::from8p(0x02) & wklSet1 | - u128::from8p(0x04) & vFM; + contention[i] = mgmt->spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i]; - for (s32 i = 0, max = -1; i < 0x10; i++) + // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably + // to prevent unnecessary jumps to the kernel + if (isPoll) { - const s32 value = ((s32)vCCH1.u8r[i] << 8) | ((s32)vCCL1.u8r[i]); - if (value > max && (vCC.u8r[i] & 1) == 0) + pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i]; + if (i != mgmt->wklCurrentId) { - vNUM = i; - max = value; - } - } - - if (vNUM < 0x10) - { - vRES = ((u64)vNUM << 32) | vSTAT.u8r[vNUM]; - vSET.u8r[vNUM] = 0x01; - } - - SPU.WriteLS8(0x1eb, vNUM == 0x20); - - if (!arg1 || var1 == vNUM) - { - spurs->m.wklSignal1._and_not(be_t::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0))); - if (vNUM == flagRecv && wklFlag == 0) - { - spurs->m.wklFlag.flag.write_relaxed(be_t::make(-1)); + contention[i] += pendingContention[i]; } } } - if (arg1 == 0) + u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + u32 pollStatus = 0; + + // The system service workload has the highest priority. Select the system service workload if + // the system service message bit for this SPU is set. + if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { - vm::write128(spurs.addr() + 0x20, u128::add8(vAA, vSET)); // update wklA - - SPU.WriteLS128(0x180, vSET); // update savedA - SPU.WriteLS32(0x1dc, vNUM); // update var1 - } - - if (arg1 == 1 && vNUM != var1) - { - vm::write128(spurs.addr() + 0x30, u128::add8(vBB, vSET)); // update wklB - - SPU.WriteLS128(0x190, vSET); // update savedB + // Not sure what this does. Possibly Mark the SPU as in use. + mgmt->x1EB = 0; + if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) + { + // Clear the message bit + mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); + } } else { - vm::write128(spurs.addr() + 0x30, vBB); // update wklB + // Caclulate the scheduling weight for each workload + u16 maxWeight = 0; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) + { + u8 x1EC = mgmt->x1EC & (0x8000 >> i); + u8 wklSignal = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); + u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed(); + u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); + u8 requestCount = readyCount + idleSpuCount; - SPU.WriteLS128(0x190, {}); // update savedB + // For a workload to be considered for scheduling: + // 1. Its priority must not be 0 + // 2. The number of SPUs used by it must be less than the max contention for that workload + // 3. The bit in 0x1EC for the wokload must be set + // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) + // OR the workload must be signalled + // OR the workload flag is 0 and the workload is configured as the wokload flag receiver + if (x1EC && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) + { + if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) + { + // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority: + // 1. Wokload signal set or workload flag or ready count > contention + // 2. Priority of the workload on the SPU + // 3. Is the workload the last selected workload + // 4. Minimum contention of the workload + // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight) + // 6. Is the workload executable same as the currently loaded executable + // 7. The workload id (lesser the number, more the weight) + u16 weight = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0; + weight |= (u16)(mgmt->priority[i] & 0x7F) << 16; + weight |= i == mgmt->wklCurrentId ? 0x80 : 0x00; + weight |= (contention[i] > 0 && mgmt->spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00; + weight |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2; + weight |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00; + weight |= 0x01; + + // In case of a tie the lower numbered workload is chosen + if (weight > maxWeight) + { + wklSelectedId = i; + maxWeight = weight; + pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; + pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; + pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; + } + } + } + } + + // Not sure what this does. Possibly mark the SPU as idle/in use. + mgmt->x1EB = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + + if (!isPoll || wklSelectedId == mgmt->wklCurrentId) + { + // Clear workload signal for the selected workload + mgmt->spurs->m.wklSignal1.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); + mgmt->spurs->m.wklSignal2.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + + // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s + if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) + { + mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + } + } } - return vRES; + if (!isPoll) + { + // Called by kernel + // Increment the contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) + { + contention[wklSelectedId]++; + } + + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) + { + mgmt->spurs->m.wklCurrentContention[i] = contention[i]; + mgmt->wklLocContention[i] = 0; + mgmt->wklLocPendingContention[i] = 0; + } + + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) + { + mgmt->wklLocContention[wklSelectedId] = 1; + } + + mgmt->wklCurrentId = wklSelectedId; + } + else if (wklSelectedId != mgmt->wklCurrentId) + { + // Not called by kernel but a context switch is required + // Increment the pending contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) + { + pendingContention[wklSelectedId]++; + } + + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) + { + mgmt->spurs->m.wklPendingContention[i] = pendingContention[i]; + mgmt->wklLocPendingContention[i] = 0; + } + + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) + { + mgmt->wklLocPendingContention[wklSelectedId] = 1; + } + } + + u64 result = (u64)wklSelectedId << 32; + result |= pollStatus; + return result; }; else SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // second kernel { @@ -324,7 +374,7 @@ s64 spursInit( // to prevent unnecessary jumps to the kernel if (isPoll) { - pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i]; + pendingContention[i] = mgmt->spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F]; pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4; if (i != mgmt->wklCurrentId) { @@ -342,7 +392,7 @@ s64 spursInit( { // Not sure what this does. Possibly Mark the SPU as in use. mgmt->x1EB = 0; - if (!isPoll || mgmt->wklCurrentId == 0x20) + if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { // Clear the message bit mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); @@ -350,7 +400,7 @@ s64 spursInit( } else { - // Caclulate the scheduling weight for each worjload + // Caclulate the scheduling weight for each workload u8 maxWeight = 0; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { @@ -360,31 +410,33 @@ s64 spursInit( u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4; u8 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); - // For a worload to be considered for scheduling: + // For a workload to be considered for scheduling: // 1. Its priority must be greater than 0 // 2. The number of SPUs used by it must be less than the max contention for that workload // 3. The bit in 0x1EC/0x1EE for the wokload must be set // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) - // Condition #4 may be overriden using a workload signal or using the workload flag + // OR the workload must be signalled + // OR the workload flag is 0 and the workload is configured as the wokload receiver if (x1ECx1EE && priority > 0 && maxContention > contention[i]) { - if (wklFlag || wklSignal || mgmt->spurs->m.wklReadyCount[i].read_relaxed() > contention[i]) + if (wklFlag || wklSignal || readyCount > contention[i]) { // The scheduling weight of the workload is equal to the priority of the workload for the SPU. // The current workload is given a sligtly higher weight presumably to reduce the number of context switches. + // In case of a tie the lower numbered workload is chosen. u8 weight = priority << 4; if (mgmt->wklCurrentId == i) { weight |= 0x04; } - // In case of a tie the lower numbered workload is chosen if (weight > maxWeight) { wklSelectedId = i; maxWeight = weight; - pollStatus = mgmt->spurs->m.wklReadyCount[i].read_relaxed() > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; + pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; } @@ -432,6 +484,11 @@ s64 spursInit( { // Not called by kernel but a context switch is required // Increment the pending contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) + { + pendingContention[wklSelectedId]++; + } + for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4); @@ -471,12 +528,12 @@ s64 spursInit( // get current workload info: auto& wkl = wid < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? spurs->m.wklInfo2[wid & 0xf] : spurs->m.wklInfoSysSrv); - if (mgmt->wklCurrentAddr != wkl.pm) + if (mgmt->wklCurrentAddr != wkl.addr) { // load executable code: - memcpy(vm::get_ptr(SPU.ls_offset + 0xa00), wkl.pm.get_ptr(), wkl.size); - mgmt->wklCurrentAddr = wkl.pm; - mgmt->x1D8 = wkl.copy.read_relaxed(); + memcpy(vm::get_ptr(SPU.ls_offset + 0xa00), wkl.addr.get_ptr(), wkl.size); + mgmt->wklCurrentAddr = wkl.addr; + mgmt->wklCurrentUniqueId = wkl.uniqueId.read_relaxed(); } if (!isSecond) SPU.WriteLS16(0x1e8, 0); @@ -484,7 +541,7 @@ s64 spursInit( // run workload: SPU.GPR[1]._u32[3] = 0x3FFB0; SPU.GPR[3]._u32[3] = 0x100; - SPU.GPR[4]._u64[1] = wkl.data; + SPU.GPR[4]._u64[1] = wkl.arg; SPU.GPR[5]._u32[3] = pollStatus; SPU.FastCall(0xa00); @@ -601,7 +658,7 @@ s64 spursInit( spurs->m.wklMaxContention[i].read_relaxed() & 0xf ) { - if (spurs->m.wklReadyCount[i].read_relaxed() || + if (spurs->m.wklReadyCount1[i].read_relaxed() || spurs->m.wklSignal1.read_relaxed() & (0x8000u >> i) || (spurs->m.wklFlag.flag.read_relaxed() == 0 && spurs->m.wklFlagReceiver.read_relaxed() == (u8)i @@ -619,7 +676,7 @@ s64 spursInit( spurs->m.wklMaxContention[i].read_relaxed() & 0xf0 ) { - if (spurs->m.wklReadyCount[i + 0x10].read_relaxed() || + if (spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() || spurs->m.wklSignal2.read_relaxed() & (0x8000u >> i) || (spurs->m.wklFlag.flag.read_relaxed() == 0 && spurs->m.wklFlagReceiver.read_relaxed() == (u8)i + 0x10 @@ -1385,8 +1442,8 @@ s32 spursAddWorkload( spurs->m.wklStat1[wnum].write_relaxed(1); spurs->m.wklD1[wnum] = 0; spurs->m.wklE1[wnum] = 0; - spurs->m.wklInfo1[wnum].pm = pm; - spurs->m.wklInfo1[wnum].data = data; + spurs->m.wklInfo1[wnum].addr = pm; + spurs->m.wklInfo1[wnum].arg = data; spurs->m.wklInfo1[wnum].size = size; spurs->m.wklInfo1[wnum].priority = *(be_t*)priorityTable; spurs->m.wklH1[wnum].nameClass = nameClass; @@ -1401,9 +1458,10 @@ s32 spursAddWorkload( } if ((spurs->m.flags1 & SF1_32_WORKLOADS) == 0) { - spurs->m.wklReadyCount[wnum + 16].write_relaxed(0); + spurs->m.wklIdleSpuCountOrReadyCount2[wnum].write_relaxed(0); spurs->m.wklMinContention[wnum] = minContention > 8 ? 8 : minContention; } + spurs->m.wklReadyCount1[wnum].write_relaxed(0); } else { @@ -1412,8 +1470,8 @@ s32 spursAddWorkload( spurs->m.wklStat2[index].write_relaxed(1); spurs->m.wklD2[index] = 0; spurs->m.wklE2[index] = 0; - spurs->m.wklInfo2[index].pm = pm; - spurs->m.wklInfo2[index].data = data; + spurs->m.wklInfo2[index].addr = pm; + spurs->m.wklInfo2[index].arg = data; spurs->m.wklInfo2[index].size = size; spurs->m.wklInfo2[index].priority = *(be_t*)priorityTable; spurs->m.wklH2[index].nameClass = nameClass; @@ -1426,8 +1484,8 @@ s32 spursAddWorkload( spurs->m.wklF2[index].hookArg = hookArg; spurs->m.wklE2[index] |= 2; } + spurs->m.wklIdleSpuCountOrReadyCount2[wnum].write_relaxed(0); } - spurs->m.wklReadyCount[wnum].write_relaxed(0); if (wnum <= 15) { @@ -1462,21 +1520,21 @@ s32 spursAddWorkload( if (mask & m) { CellSpurs::WorkloadInfo& current = i <= 15 ? spurs->m.wklInfo1[i] : spurs->m.wklInfo2[i & 0xf]; - if (current.pm.addr() == wkl.pm.addr()) + if (current.addr.addr() == wkl.addr.addr()) { // if a workload with identical policy module found - res_wkl = current.copy.read_relaxed(); + res_wkl = current.uniqueId.read_relaxed(); break; } else { - k |= 0x80000000 >> current.copy.read_relaxed(); + k |= 0x80000000 >> current.uniqueId.read_relaxed(); res_wkl = cntlz32(~k); } } } - wkl.copy.exchange((u8)res_wkl); + wkl.uniqueId.exchange((u8)res_wkl); v = mask | (0x80000000u >> wnum); }); assert(res_wkl <= 31); @@ -1818,7 +1876,14 @@ s64 cellSpursReadyCountStore(vm::ptr spurs, u32 wid, u32 value) return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } - spurs->m.wklReadyCount[wid].exchange((u8)value); + if (wid < CELL_SPURS_MAX_WORKLOAD) + { + spurs->m.wklReadyCount1[wid].exchange((u8)value); + } + else + { + spurs->m.wklIdleSpuCountOrReadyCount2[wid].exchange((u8)value); + } return CELL_OK; } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 61fcc42648..379679207f 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -255,10 +255,10 @@ struct CellSpurs struct WorkloadInfo { - vm::bptr pm; // policy module - be_t data; // spu argument + vm::bptr addr; // Address of the executable + be_t arg; // spu argument be_t size; - atomic_t copy; + atomic_t uniqueId; // The unique id is the same for all workloads with the same addr be_t priority; }; @@ -285,21 +285,22 @@ struct CellSpurs // The SPURS kernel copies this from main memory to the LS (address 0x100) then runs its scheduling algorithm using this as one // of the inputs. After selecting a new workload, the SPURS kernel updates this and writes it back to main memory. // The read-modify-write is performed atomically by the SPURS kernel. - atomic_t wklReadyCount[0x10]; // 0x00 (index = wid) - Number of SPUs requested by each workload - u8 wklCurrentContention[0x10]; // 0x20 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Number of SPUs used by each workload - u8 wklPendingContention[0x10]; // 0x30 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Number of SPUs that are pending to context switch to the workload - u8 wklMinContention[0x10]; // 0x40 (seems only for first 0..15 wids) - Min SPUs required for each workload - atomic_t wklMaxContention[0x10]; // 0x50 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - Max SPUs that may be allocated to each workload - CellSpursWorkloadFlag wklFlag; // 0x60 - atomic_t wklSignal1; // 0x70 (bitset for 0..15 wids) - atomic_t sysSrvMessage; // 0x72 - u8 sysSrvIdling; // 0x73 - u8 flags1; // 0x74 Type is SpursFlags1 - u8 sysSrvTraceControl; // 0x75 - u8 nSpus; // 0x76 - atomic_t wklFlagReceiver; // 0x77 - atomic_t wklSignal2; // 0x78 (bitset for 16..32 wids) - u8 x7A[6]; // 0x7A + atomic_t wklReadyCount1[0x10]; // 0x00 Number of SPUs requested by each workload (0..15 wids). + atomic_t wklIdleSpuCountOrReadyCount2[0x10]; // 0x10 SPURS1: Number of idle SPUs requested by each workload (0..15 wids). SPURS2: Number of SPUs requested by each workload (16..31 wids). + u8 wklCurrentContention[0x10]; // 0x20 Number of SPUs used by each workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16. + u8 wklPendingContention[0x10]; // 0x30 Number of SPUs that are pending to context switch to the workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16. + u8 wklMinContention[0x10]; // 0x40 Min SPUs required for each workload. SPURS1: index = wid. SPURS2: Unused. + atomic_t wklMaxContention[0x10]; // 0x50 Max SPUs that may be allocated to each workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16. + CellSpursWorkloadFlag wklFlag; // 0x60 + atomic_t wklSignal1; // 0x70 (bitset for 0..15 wids) + atomic_t sysSrvMessage; // 0x72 + u8 sysSrvIdling; // 0x73 + u8 flags1; // 0x74 Type is SpursFlags1 + u8 sysSrvTraceControl; // 0x75 + u8 nSpus; // 0x76 + atomic_t wklFlagReceiver; // 0x77 + atomic_t wklSignal2; // 0x78 (bitset for 16..32 wids) + u8 x7A[6]; // 0x7A atomic_t wklStat1[0x10]; // 0x80 - Workload state (16*u8) - State enum {non_exitent, preparing, runnable, shutting_down, removable, invalid} u8 wklD1[0x10]; // 0x90 - Workload status (16*u8) u8 wklE1[0x10]; // 0xA0 - Workload event (16*u8) @@ -764,7 +765,7 @@ struct SpursKernelMgmtData { be_t spuNum; // 0x1C8 be_t dmaTagId; // 0x1CC vm::bptr wklCurrentAddr; // 0x1D0 - be_t x1D8; // 0x1D8 + be_t wklCurrentUniqueId; // 0x1D8 u32 wklCurrentId; // 0x1DC be_t yieldToKernelAddr; // 0x1E0 be_t selectWorkloadAddr; // 0x1E4 @@ -774,6 +775,8 @@ struct SpursKernelMgmtData { u8 x1EB; // 0x1EB - This might be spuIdling be_t x1EC; // 0x1EC - This might be wklEnable1 be_t x1EE; // 0x1EE - This might be wklEnable2 + u8 x1F0[0x220 - 0x1F0]; // 0x1F0 + u8 wklUniqueId[0x10]; // 0x220 }; s64 spursAttachLv2EventQueue(vm::ptr spurs, u32 queue, vm::ptr port, s32 isDynamic, bool wasCreated); From 66641160422e704616065c717f815c60cb647499 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Thu, 1 Jan 2015 01:51:22 +0530 Subject: [PATCH 09/29] SPURS: System service workload - initial commit --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 251 ++++++++++++++++++++--- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 189 ++++++++++------- rpcs3/cellSpursSpu.cpp | 220 ++++++++++++++++++++ rpcs3/emucore.vcxproj | 1 + rpcs3/emucore.vcxproj.filters | 3 + 5 files changed, 561 insertions(+), 103 deletions(-) create mode 100644 rpcs3/cellSpursSpu.cpp diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 557ff274ac..bdf0d97101 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -101,7 +101,7 @@ s64 spursInit( } spurs->m.xCC = 0; spurs->m.xCD = 0; - spurs->m.xCE = 0; + spurs->m.sysSrvMsgUpdateTrace = 0; for (u32 i = 0; i < 8; i++) { spurs->m.xC0[i] = -1; @@ -225,8 +225,7 @@ s64 spursInit( // the system service message bit for this SPU is set. if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { - // Not sure what this does. Possibly Mark the SPU as in use. - mgmt->x1EB = 0; + mgmt->spuIdling = 0; if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { // Clear the message bit @@ -239,7 +238,7 @@ s64 spursInit( u16 maxWeight = 0; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - u8 x1EC = mgmt->x1EC & (0x8000 >> i); + u8 runnable = mgmt->wklRunnable1 & (0x8000 >> i); u8 wklSignal = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; u8 readyCount = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed(); @@ -249,11 +248,11 @@ s64 spursInit( // For a workload to be considered for scheduling: // 1. Its priority must not be 0 // 2. The number of SPUs used by it must be less than the max contention for that workload - // 3. The bit in 0x1EC for the wokload must be set + // 3. The workload should be in runnable state // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) // OR the workload must be signalled // OR the workload flag is 0 and the workload is configured as the wokload flag receiver - if (x1EC && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) + if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) { if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) { @@ -287,7 +286,7 @@ s64 spursInit( } // Not sure what this does. Possibly mark the SPU as idle/in use. - mgmt->x1EB = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { @@ -391,7 +390,7 @@ s64 spursInit( if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { // Not sure what this does. Possibly Mark the SPU as in use. - mgmt->x1EB = 0; + mgmt->spuIdling = 0; if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { // Clear the message bit @@ -405,7 +404,7 @@ s64 spursInit( for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { auto j = i & 0x0F; - u8 x1ECx1EE = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->x1EC & (0x8000 >> j) : mgmt->x1EE & (0x8000 >> j); + u8 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4; u8 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); @@ -415,11 +414,11 @@ s64 spursInit( // For a workload to be considered for scheduling: // 1. Its priority must be greater than 0 // 2. The number of SPUs used by it must be less than the max contention for that workload - // 3. The bit in 0x1EC/0x1EE for the wokload must be set + // 3. The workload should be in runnable state // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) // OR the workload must be signalled // OR the workload flag is 0 and the workload is configured as the wokload receiver - if (x1ECx1EE && priority > 0 && maxContention > contention[i]) + if (runnable && priority > 0 && maxContention > contention[i]) { if (wklFlag || wklSignal || readyCount > contention[i]) { @@ -445,7 +444,7 @@ s64 spursInit( } // Not sure what this does. Possibly mark the SPU as idle/in use. - mgmt->x1EB = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { @@ -653,7 +652,7 @@ s64 spursInit( bool do_break = false; for (u32 i = 0; i < 16; i++) { - if (spurs->m.wklStat1[i].read_relaxed() == 2 && + if (spurs->m.wklState1[i].read_relaxed() == 2 && spurs->m.wklInfo1[i].priority.ToBE() != 0 && spurs->m.wklMaxContention[i].read_relaxed() & 0xf ) @@ -671,7 +670,7 @@ s64 spursInit( } if (spurs->m.flags1 & SF1_32_WORKLOADS) for (u32 i = 0; i < 16; i++) { - if (spurs->m.wklStat2[i].read_relaxed() == 2 && + if (spurs->m.wklState2[i].read_relaxed() == 2 && spurs->m.wklInfo2[i].priority.ToBE() != 0 && spurs->m.wklMaxContention[i].read_relaxed() & 0xf0 ) @@ -756,7 +755,7 @@ s64 spursInit( } } - spurs->m.unk22 = 0; + spurs->m.traceBuffer = 0; // can also use cellLibprof if available (omitted) // some unknown subroutine @@ -1439,13 +1438,16 @@ s32 spursAddWorkload( { assert((spurs->m.wklCurrentContention[wnum] & 0xf) == 0); assert((spurs->m.wklPendingContention[wnum] & 0xf) == 0); - spurs->m.wklStat1[wnum].write_relaxed(1); - spurs->m.wklD1[wnum] = 0; - spurs->m.wklE1[wnum] = 0; + spurs->m.wklState1[wnum].write_relaxed(1); + spurs->m.wklStatus1[wnum] = 0; + spurs->m.wklEvent1[wnum] = 0; spurs->m.wklInfo1[wnum].addr = pm; spurs->m.wklInfo1[wnum].arg = data; spurs->m.wklInfo1[wnum].size = size; - spurs->m.wklInfo1[wnum].priority = *(be_t*)priorityTable; + for (u32 i = 0; i < 8; i++) + { + spurs->m.wklInfo1[wnum].priority[i] = priorityTable[i]; + } spurs->m.wklH1[wnum].nameClass = nameClass; spurs->m.wklH1[wnum].nameInstance = nameInstance; memset(spurs->m.wklF1[wnum].unk0, 0, 0x20); // clear struct preserving semaphore id @@ -1454,7 +1456,7 @@ s32 spursAddWorkload( { spurs->m.wklF1[wnum].hook = hook; spurs->m.wklF1[wnum].hookArg = hookArg; - spurs->m.wklE1[wnum] |= 2; + spurs->m.wklEvent1[wnum] |= 2; } if ((spurs->m.flags1 & SF1_32_WORKLOADS) == 0) { @@ -1467,13 +1469,16 @@ s32 spursAddWorkload( { assert((spurs->m.wklCurrentContention[index] & 0xf0) == 0); assert((spurs->m.wklPendingContention[index] & 0xf0) == 0); - spurs->m.wklStat2[index].write_relaxed(1); - spurs->m.wklD2[index] = 0; - spurs->m.wklE2[index] = 0; + spurs->m.wklState2[index].write_relaxed(1); + spurs->m.wklStatus2[index] = 0; + spurs->m.wklEvent2[index] = 0; spurs->m.wklInfo2[index].addr = pm; spurs->m.wklInfo2[index].arg = data; spurs->m.wklInfo2[index].size = size; - spurs->m.wklInfo2[index].priority = *(be_t*)priorityTable; + for (u32 i = 0; i < 8; i++) + { + spurs->m.wklInfo2[index].priority[i] = priorityTable[i]; + } spurs->m.wklH2[index].nameClass = nameClass; spurs->m.wklH2[index].nameInstance = nameInstance; memset(spurs->m.wklF2[index].unk0, 0, 0x20); // clear struct preserving semaphore id @@ -1482,7 +1487,7 @@ s32 spursAddWorkload( { spurs->m.wklF2[index].hook = hook; spurs->m.wklF2[index].hookArg = hookArg; - spurs->m.wklE2[index] |= 2; + spurs->m.wklEvent2[index] |= 2; } spurs->m.wklIdleSpuCountOrReadyCount2[wnum].write_relaxed(0); } @@ -1539,8 +1544,8 @@ s32 spursAddWorkload( }); assert(res_wkl <= 31); - spurs->wklStat(wnum).exchange(2); - spurs->m.xBD.exchange(0xff); + spurs->wklState(wnum).exchange(2); + spurs->m.sysSrvMsgUpdateWorkload.exchange(0xff); spurs->m.sysSrvMessage.exchange(0xff); return CELL_OK; } @@ -1871,7 +1876,7 @@ s64 cellSpursReadyCountStore(vm::ptr spurs, u32 wid, u32 value) { return CELL_SPURS_POLICY_MODULE_ERROR_SRCH; } - if (spurs->m.exception.ToBE() || spurs->wklStat(wid).read_relaxed() != 2) + if (spurs->m.exception.ToBE() || spurs->wklState(wid).read_relaxed() != 2) { return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } @@ -3364,6 +3369,190 @@ s64 cellSpursSemaphoreGetTasksetAddress() #endif } +bool spursIsLibProfLoaded() +{ + return false; +} + +void spursTraceStatusUpdate(vm::ptr spurs) +{ + LV2_LOCK(0); + + if (spurs->m.xCC != 0) + { + spurs->m.xCD = 1; + spurs->m.sysSrvMsgUpdateTrace = (1 << spurs->m.nSpus) - 1; + spurs->m.sysSrvMessage.write_relaxed(0xFF); + sys_semaphore_wait(spurs->m.semPrv, 0); + } +} + +s64 spursTraceInitialize(vm::ptr spurs, vm::ptr buffer, u32 size, u32 mode, u32 updateStatus) +{ + if (!spurs || !buffer) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align || buffer.addr() % CellSpursTraceInfo::align) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + if (size < CellSpursTraceInfo::size || mode & ~(CELL_SPURS_TRACE_MODE_FLAG_MASK)) + { + return CELL_SPURS_CORE_ERROR_INVAL; + } + + if (spurs->m.traceBuffer != 0) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + spurs->m.traceDataSize = size - CellSpursTraceInfo::size; + for (u32 i = 0; i < 8; i++) + { + buffer->spu_thread[i] = spurs->m.spus[i]; + buffer->count[i] = 0; + } + + buffer->spu_thread_grp = spurs->m.spuTG; + buffer->nspu = spurs->m.nSpus; + spurs->m.traceBuffer = buffer.addr() | (mode & CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER ? 1 : 0); + spurs->m.traceMode = mode; + + u32 spuTraceDataCount = (spurs->m.traceDataSize / CellSpursTracePacket::size) / spurs->m.nSpus; + for (u32 i = 0, j = 8; i < 6; i++) + { + spurs->m.x908[i] = j; + j += spuTraceDataCount; + } + + spurs->m.sysSrvTraceControl = 0; + if (updateStatus) + { + spursTraceStatusUpdate(spurs); + } + + return CELL_OK; +} + +s64 cellSpursTraceInitialize(vm::ptr spurs, vm::ptr buffer, u32 size, u32 mode) +{ + if (spursIsLibProfLoaded()) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + return spursTraceInitialize(spurs, buffer, size, mode, 1); +} + +s64 spursTraceStart(vm::ptr spurs, u32 updateStatus) +{ + if (!spurs) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + if (!spurs->m.traceBuffer) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + spurs->m.sysSrvTraceControl = 1; + if (updateStatus) + { + spursTraceStatusUpdate(spurs); + } + + return CELL_OK; +} + +s64 cellSpursTraceStart(vm::ptr spurs) +{ + if (!spurs) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + return spursTraceStart(spurs, spurs->m.traceMode & CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP); +} + +s64 spursTraceStop(vm::ptr spurs, u32 updateStatus) +{ + if (!spurs) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + if (!spurs->m.traceBuffer) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + spurs->m.sysSrvTraceControl = 2; + if (updateStatus) + { + spursTraceStatusUpdate(spurs); + } + + return CELL_OK; +} + +s64 cellSpursTraceStop(vm::ptr spurs) +{ + if (!spurs) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + return spursTraceStop(spurs, spurs->m.traceMode & CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP); +} + +s64 cellSpursTraceFinalize(vm::ptr spurs) +{ + if (!spurs) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + if (!spurs->m.traceBuffer) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + spurs->m.sysSrvTraceControl = 0; + spurs->m.traceMode = 0; + spurs->m.traceBuffer = 0; + spursTraceStatusUpdate(spurs); + return CELL_OK; +} + void cellSpurs_init(Module *pxThis) { cellSpurs = pxThis; @@ -3521,5 +3710,11 @@ void cellSpurs_init(Module *pxThis) REG_FUNC(cellSpurs, _cellSpursSemaphoreInitialize); REG_FUNC(cellSpurs, cellSpursSemaphoreGetTasksetAddress); + // Trace + REG_FUNC(cellSpurs, cellSpursTraceInitialize); + REG_FUNC(cellSpurs, cellSpursTraceStart); + REG_FUNC(cellSpurs, cellSpursTraceStop); + REG_FUNC(cellSpurs, cellSpursTraceFinalize); + // TODO: some trace funcs } \ No newline at end of file diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 379679207f..1307de72e9 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -114,34 +114,6 @@ enum RangeofEventQueuePortNumbers CELL_SPURS_DYNAMIC_PORT_RANGE_BOTTOM = 63, }; -enum SPURSTraceTypes -{ - CELL_SPURS_TRACE_TAG_LOAD = 0x2a, - CELL_SPURS_TRACE_TAG_MAP = 0x2b, - CELL_SPURS_TRACE_TAG_START = 0x2c, - CELL_SPURS_TRACE_TAG_STOP = 0x2d, - CELL_SPURS_TRACE_TAG_USER = 0x2e, - CELL_SPURS_TRACE_TAG_GUID = 0x2f, -}; - -// SPURS task defines. -enum TaskConstants -{ - CELL_SPURS_MAX_TASK = 128, - CELL_SPURS_TASK_TOP = 0x3000, - CELL_SPURS_TASK_BOTTOM = 0x40000, - CELL_SPURS_MAX_TASK_NAME_LENGTH = 32, - CELL_SPURS_TASK_ATTRIBUTE_REVISION = 1, - CELL_SPURS_TASKSET_ATTRIBUTE_REVISION = 1, - CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE = 1024, -}; - -class SPURSManager; -class SPURSManagerEventFlag; -class SPURSManagerTaskset; - -struct CellSpurs; - enum SpursAttrFlags : u32 { SAF_NONE = 0x0, @@ -168,6 +140,68 @@ enum SpursFlags1 : u8 SF1_EXIT_IF_NO_WORK = 0x80, }; +enum SpursWorkloadConstants +{ + // Workload states + SPURS_WKL_STATE_NON_EXISTENT = 0, + SPURS_WKL_STATE_PREPARING = 1, + SPURS_WKL_STATE_RUNNABLE = 2, + SPURS_WKL_STATE_SHUTTING_DOWN = 3, + SPURS_WKL_STATE_REMOVABLE = 4, + SPURS_WKL_STATE_INVALID = 5, +}; + +enum CellSpursModulePollStatus +{ + CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT = 1, + CELL_SPURS_MODULE_POLL_STATUS_SIGNAL = 2, + CELL_SPURS_MODULE_POLL_STATUS_FLAG = 4 +}; + +enum SpursTraceConstants +{ + // Trace tag types + CELL_SPURS_TRACE_TAG_KERNEL = 0x20, + CELL_SPURS_TRACE_TAG_SERVICE = 0x21, + CELL_SPURS_TRACE_TAG_TASK = 0x22, + CELL_SPURS_TRACE_TAG_JOB = 0x23, + CELL_SPURS_TRACE_TAG_OVIS = 0x24, + CELL_SPURS_TRACE_TAG_LOAD = 0x2a, + CELL_SPURS_TRACE_TAG_MAP = 0x2b, + CELL_SPURS_TRACE_TAG_START = 0x2c, + CELL_SPURS_TRACE_TAG_STOP = 0x2d, + CELL_SPURS_TRACE_TAG_USER = 0x2e, + CELL_SPURS_TRACE_TAG_GUID = 0x2f, + + // Service incident + CELL_SPURS_TRACE_SERVICE_INIT = 0x01, + CELL_SPURS_TRACE_SERVICE_WAIT = 0x02, + CELL_SPURS_TRACE_SERVICE_EXIT = 0x03, + + // Trace mode flags + CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER = 0x1, + CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP = 0x2, + CELL_SPURS_TRACE_MODE_FLAG_MASK = 0x3, +}; + +// SPURS task constants +enum SpursTaskConstants +{ + CELL_SPURS_MAX_TASK = 128, + CELL_SPURS_TASK_TOP = 0x3000, + CELL_SPURS_TASK_BOTTOM = 0x40000, + CELL_SPURS_MAX_TASK_NAME_LENGTH = 32, + CELL_SPURS_TASK_ATTRIBUTE_REVISION = 1, + CELL_SPURS_TASKSET_ATTRIBUTE_REVISION = 1, + CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE = 1024, +}; + +class SPURSManager; +class SPURSManagerEventFlag; +class SPURSManagerTaskset; + +struct CellSpurs; + struct CellSpursAttribute { static const uint align = 8; @@ -215,12 +249,6 @@ struct CellSpursWorkloadFlag typedef void(*CellSpursShutdownCompletionEventHook)(vm::ptr, u32 wid, vm::ptr arg); -enum CellSpursModulePollStatus { - CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT = 1, - CELL_SPURS_MODULE_POLL_STATUS_SIGNAL = 2, - CELL_SPURS_MODULE_POLL_STATUS_FLAG = 4 -}; - // Core CellSpurs structures struct CellSpurs { @@ -259,7 +287,7 @@ struct CellSpurs be_t arg; // spu argument be_t size; atomic_t uniqueId; // The unique id is the same for all workloads with the same addr - be_t priority; + be_t priority[8]; }; static_assert(sizeof(WorkloadInfo) == 0x20, "Wrong WorkloadInfo size"); @@ -294,36 +322,41 @@ struct CellSpurs CellSpursWorkloadFlag wklFlag; // 0x60 atomic_t wklSignal1; // 0x70 (bitset for 0..15 wids) atomic_t sysSrvMessage; // 0x72 - u8 sysSrvIdling; // 0x73 + u8 spuIdling; // 0x73 u8 flags1; // 0x74 Type is SpursFlags1 u8 sysSrvTraceControl; // 0x75 u8 nSpus; // 0x76 atomic_t wklFlagReceiver; // 0x77 atomic_t wklSignal2; // 0x78 (bitset for 16..32 wids) u8 x7A[6]; // 0x7A - atomic_t wklStat1[0x10]; // 0x80 - Workload state (16*u8) - State enum {non_exitent, preparing, runnable, shutting_down, removable, invalid} - u8 wklD1[0x10]; // 0x90 - Workload status (16*u8) - u8 wklE1[0x10]; // 0xA0 - Workload event (16*u8) + atomic_t wklState1[0x10]; // 0x80 SPURS_WKL_STATE_* + u8 wklStatus1[0x10]; // 0x90 + u8 wklEvent1[0x10]; // 0xA0 atomic_t wklMskA; // 0xB0 - System service - Available workloads (32*u1) atomic_t wklMskB; // 0xB4 - System service - Available module id u8 xB8[5]; // 0xB8 - 0xBC - Syetem service exit barrier - atomic_t xBD; // 0xBD - System service message - update workload - u8 xBE[2]; // 0xBE - 0xBF - System service message - terminate + atomic_t sysSrvMsgUpdateWorkload; // 0xBD + u8 xBE[2]; // 0xBE + u8 sysSrvMsgTerminate; // 0xBF u8 xC0[8]; // 0xC0 - System workload - u8 xC8; // 0xC8 - System service - on spu + u8 sysSrvOnSpu; // 0xC8 u8 spuPort; // 0xC9 - SPU port for system service u8 xCA; // 0xCA u8 xCB; // 0xCB u8 xCC; // 0xCC u8 xCD; // 0xCD - u8 xCE; // 0xCE - System service message - update trace + u8 sysSrvMsgUpdateTrace; // 0xCE u8 xCF; // 0xCF - atomic_t wklStat2[0x10]; // 0xD0 - Workload state (16*u8) - u8 wklD2[0x10]; // 0xE0 - Workload status (16*u8) - u8 wklE2[0x10]; // 0xF0 - Workload event (16*u8) + atomic_t wklState2[0x10]; // 0xD0 SPURS_WKL_STATE_* + u8 wklStatus2[0x10]; // 0xE0 + u8 wklEvent2[0x10]; // 0xF0 _sub_str1 wklF1[0x10]; // 0x100 - be_t unk22; // 0x900 - SPURS trace buffer - u8 unknown7[0x980 - 0x908]; // 0x908 - Per SPU trace info ??? (8*u32) 0x950 - SPURS trace mode (u32) + be_t traceBuffer; // 0x900 + be_t x908[6]; // 0x908 - Indices to traceData (a guess) + u8 unknown7[0x948 - 0x920]; // 0x920 + be_t traceDataSize; // 0x948 + be_t traceMode; // 0x950 + u8 unknown8[0x980 - 0x954]; // 0x954 be_t semPrv; // 0x980 be_t unk11; // 0x988 be_t unk12; // 0x98C @@ -371,15 +404,15 @@ struct CellSpurs } c; }; - __forceinline atomic_t& wklStat(const u32 wid) + __forceinline atomic_t& wklState(const u32 wid) { if (wid & 0x10) { - return m.wklStat2[wid & 0xf]; + return m.wklState2[wid & 0xf]; } else { - return m.wklStat1[wid & 0xf]; + return m.wklState1[wid & 0xf]; } } @@ -526,25 +559,21 @@ struct CellSpursExceptionInfo struct CellSpursTraceInfo { - be_t spu_thread[8]; - be_t count[8]; - be_t spu_thread_grp; - be_t nspu; - //u8 padding[]; -}; + static const u32 size = 0x80; + static const u32 align = 16; -struct CellTraceHeader -{ - u8 tag; - u8 length; - u8 cpu; - u8 thread; - be_t time; + be_t spu_thread[8]; // 0x00 + be_t count[8]; // 0x20 + be_t spu_thread_grp; // 0x40 + be_t nspu; // 0x44 + //u8 padding[]; }; struct CellSpursTracePacket { - struct header_struct + static const u32 size = 16; + + struct { u8 tag; u8 length; @@ -553,23 +582,29 @@ struct CellSpursTracePacket be_t time; } header; - struct data_struct + union { - struct load_struct + struct + { + be_t incident; + be_t reserved; + } service; + + struct { be_t ea; be_t ls; be_t size; } load; - struct map_struct + struct { be_t offset; be_t ls; be_t size; } map; - struct start_struct + struct { s8 module[4]; be_t level; @@ -578,6 +613,7 @@ struct CellSpursTracePacket be_t user; be_t guid; + be_t stop; } data; }; @@ -771,12 +807,15 @@ struct SpursKernelMgmtData { be_t selectWorkloadAddr; // 0x1E4 u8 x1E8; // 0x1E8 u8 x1E9; // 0x1E9 - u8 x1EA; // 0x1EA - u8 x1EB; // 0x1EB - This might be spuIdling - be_t x1EC; // 0x1EC - This might be wklEnable1 - be_t x1EE; // 0x1EE - This might be wklEnable2 - u8 x1F0[0x220 - 0x1F0]; // 0x1F0 - u8 wklUniqueId[0x10]; // 0x220 + u8 sysSrvInitialised; // 0x1EA + u8 spuIdling; // 0x1EB + be_t wklRunnable1; // 0x1EC + be_t wklRunnable2; // 0x1EE + u8 x1F0[0x210 - 0x1F0]; // 0x1F0 + be_t traceBuffer; // 0x210 + be_t traceMsgCount; // 0x218 + be_t traceMaxCount; // 0x21C + u8 wklUniqueId[0x10]; // 0x220 }; s64 spursAttachLv2EventQueue(vm::ptr spurs, u32 queue, vm::ptr port, s32 isDynamic, bool wasCreated); diff --git a/rpcs3/cellSpursSpu.cpp b/rpcs3/cellSpursSpu.cpp new file mode 100644 index 0000000000..6aad7f7430 --- /dev/null +++ b/rpcs3/cellSpursSpu.cpp @@ -0,0 +1,220 @@ +#include "stdafx.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "Emu/Cell/SPUThread.h" +#include "Emu/SysCalls/Modules/cellSpurs.h" + +/// Output trace information +void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag) { + // TODO: Implement this +} + +/// Check for execution right requests +unsigned cellSpursModulePollStatus(CellSpursModulePollStatus * status) { + // TODO: Implement this + return 0; +} + +void spursSysServiceCleanup(SPUThread & spu, SpursKernelMgmtData * mgmt) { + // TODO: Implement this +} + +void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) { + // TODO: Implement this +} + +void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownMask) { + u32 wklNotifyMask = 0; + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + if (wklShutdownMask & (0x80000000 >> i)) { + mgmt->spurs->m.wklEvent1[i] |= 0x01; + if (mgmt->spurs->m.wklEvent1[i] & 0x02 || mgmt->spurs->m.wklEvent1[i] & 0x10) { + wklNotifyMask |= 0x80000000 >> i; + } + } + } + + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + if (wklShutdownMask & (0x8000 >> i)) { + mgmt->spurs->m.wklEvent2[i] |= 0x01; + if (mgmt->spurs->m.wklEvent2[i] & 0x02 || mgmt->spurs->m.wklEvent2[i] & 0x10) { + wklNotifyMask |= 0x8000 >> i; + } + } + } + + if (wklNotifyMask) { + // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 0, wklNotifyMask); + } +} + +/// Update workload information in the LS from main memory +void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) { + u32 wklShutdownMask = 0; + mgmt->wklRunnable1 = 0; + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + mgmt->priority[i] = mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum]; + mgmt->wklUniqueId[i] = mgmt->spurs->m.wklInfo1[i].uniqueId.read_relaxed(); + + auto wklStatus = mgmt->spurs->m.wklStatus1[i]; + if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { + mgmt->spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum; + mgmt->wklRunnable1 |= 0x8000 >> i; + } else { + mgmt->spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum); + } + + if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { + if (((wklStatus & (1 << mgmt->spuNum)) != 0) && ((mgmt->spurs->m.wklStatus1[i] & (1 << mgmt->spuNum)) == 0)) { + mgmt->spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); + wklShutdownMask |= 0x80000000 >> i; + } + } + } + + if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { + mgmt->wklRunnable2 = 0; + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + if (mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) { + mgmt->priority[i] |= (0x10 - mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) << 4; + } + + auto wklStatus = mgmt->spurs->m.wklStatus2[i]; + if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { + mgmt->spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum; + mgmt->wklRunnable2 |= 0x8000 >> i; + } else { + mgmt->spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum); + } + + if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { + if (((wklStatus & (1 << mgmt->spuNum)) != 0) && ((mgmt->spurs->m.wklStatus2[i] & (1 << mgmt->spuNum)) == 0)) { + mgmt->spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); + wklShutdownMask |= 0x8000 >> i; + } + } + } + } + + if (wklShutdownMask) { + spursSysServiceUpdateEvent(spu, mgmt, wklShutdownMask); + } +} + +/// Process any messages +void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) { + if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) { + mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum); + spursSysServiceUpdateWorkload(spu, mgmt); + } + + if (mgmt->spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) { + spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0); + } + + if (mgmt->spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) { + mgmt->spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum); + // TODO: Rest of the terminate processing + } +} + +void spursSysServiceExitIfRequired() { + // TODO: Implement this +} + +/// Main function for the system service workload +void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) { + auto mgmt = vm::get_ptr(spu.ls_offset); + + if (mgmt->spurs.addr() % CellSpurs::align) { + // TODO: Halt + } + + // Initialise the system service if this is the first time its being started on this SPU + if (mgmt->sysSrvInitialised != 0) { + mgmt->sysSrvInitialised = 1; + + if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) { + // TODO: Halt + } + + mgmt->spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum; + mgmt->traceBuffer = 0; + mgmt->traceMsgCount = -1; + + spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0); + spursSysServiceCleanup(spu, mgmt); + + // Trace - SERVICE: INIT + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; + pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + } + + // Trace - START: Module='SYS ' + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_START; + memcpy(pkt.data.start.module, "SYS ", 4); + pkt.data.start.level = 1; // Policy module + pkt.data.start.ls = 0xA00 >> 2; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + while (true) { + spursSysServiceProcessMessages(spu, mgmt); + + if (cellSpursModulePollStatus(nullptr)) { + // Trace - SERVICE: EXIT + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; + pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + // Trace - STOP: GUID + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; + pkt.data.stop = 0; // TODO: Put GUID of the sys service workload here + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + break; + } + + if (mgmt->spuIdling == 0) { + continue; + } + + // Trace - SERVICE: WAIT + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; + pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + spursSysServiceExitIfRequired(); + } +} + +/// Entry point of the system service workload +void spursSysServiceWorkloadEntry(SPUThread & spu) { + auto mgmt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); + auto arg = spu.GPR[4]._u64[1]; + auto pollStatus = spu.GPR[5]._u32[3]; + + spu.GPR[1]._u32[3] = 0x3FFD0; + *(vm::ptr::make(spu.GPR[1]._u32[3])) = 0x3FFF0; + memset(vm::get_ptr(spu.ls_offset + 0x3FFE0), 0, 32); + + LV2_LOCK(0); + + if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + spursSysServiceWorkloadMain(spu, pollStatus); + } else { + // TODO: If we reach here it means the current workload was preempted to start the + // system service workload. Need to implement this. + } + + // TODO: Ensure that this function always returns to the SPURS kernel + return; +} diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 8e2629cea3..3599cd2cd3 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -54,6 +54,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 9335c19690..a1e6732cc9 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -653,6 +653,9 @@ Emu\CPU\ARMv7\Modules + + Emu\SysCalls\Modules + From 4a83d43a8f317c0a5f75ddced3b78f20db84d7e8 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Fri, 2 Jan 2015 01:33:36 +0530 Subject: [PATCH 10/29] SPURS: System service workload --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 10 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 12 +- rpcs3/cellSpursSpu.cpp | 162 ++++++++++++++++++++--- 3 files changed, 159 insertions(+), 25 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index bdf0d97101..4cac04ffad 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -104,7 +104,7 @@ s64 spursInit( spurs->m.sysSrvMsgUpdateTrace = 0; for (u32 i = 0; i < 8; i++) { - spurs->m.xC0[i] = -1; + spurs->m.sysSrvWorkload[i] = -1; } // default or system workload: @@ -755,7 +755,7 @@ s64 spursInit( } } - spurs->m.traceBuffer = 0; + spurs->m.traceBuffer.set(0); // can also use cellLibprof if available (omitted) // some unknown subroutine @@ -3418,13 +3418,13 @@ s64 spursTraceInitialize(vm::ptr spurs, vm::ptr b buffer->spu_thread_grp = spurs->m.spuTG; buffer->nspu = spurs->m.nSpus; - spurs->m.traceBuffer = buffer.addr() | (mode & CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER ? 1 : 0); + spurs->m.traceBuffer.set(buffer.addr() | (mode & CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER ? 1 : 0)); spurs->m.traceMode = mode; u32 spuTraceDataCount = (spurs->m.traceDataSize / CellSpursTracePacket::size) / spurs->m.nSpus; for (u32 i = 0, j = 8; i < 6; i++) { - spurs->m.x908[i] = j; + spurs->m.traceStartIndex[i] = j; j += spuTraceDataCount; } @@ -3548,7 +3548,7 @@ s64 cellSpursTraceFinalize(vm::ptr spurs) spurs->m.sysSrvTraceControl = 0; spurs->m.traceMode = 0; - spurs->m.traceBuffer = 0; + spurs->m.traceBuffer.set(0); spursTraceStatusUpdate(spurs); return CELL_OK; } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 1307de72e9..3d817e4a4d 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -249,6 +249,8 @@ struct CellSpursWorkloadFlag typedef void(*CellSpursShutdownCompletionEventHook)(vm::ptr, u32 wid, vm::ptr arg); +struct CellSpursTraceInfo; + // Core CellSpurs structures struct CellSpurs { @@ -336,9 +338,9 @@ struct CellSpurs atomic_t wklMskB; // 0xB4 - System service - Available module id u8 xB8[5]; // 0xB8 - 0xBC - Syetem service exit barrier atomic_t sysSrvMsgUpdateWorkload; // 0xBD - u8 xBE[2]; // 0xBE + u8 xBE; // 0xBE u8 sysSrvMsgTerminate; // 0xBF - u8 xC0[8]; // 0xC0 - System workload + u8 sysSrvWorkload[8]; // 0xC0 u8 sysSrvOnSpu; // 0xC8 u8 spuPort; // 0xC9 - SPU port for system service u8 xCA; // 0xCA @@ -351,8 +353,8 @@ struct CellSpurs u8 wklStatus2[0x10]; // 0xE0 u8 wklEvent2[0x10]; // 0xF0 _sub_str1 wklF1[0x10]; // 0x100 - be_t traceBuffer; // 0x900 - be_t x908[6]; // 0x908 - Indices to traceData (a guess) + vm::bptr traceBuffer; // 0x900 + be_t traceStartIndex[6]; // 0x908 u8 unknown7[0x948 - 0x920]; // 0x920 be_t traceDataSize; // 0x948 be_t traceMode; // 0x950 @@ -810,7 +812,7 @@ struct SpursKernelMgmtData { u8 sysSrvInitialised; // 0x1EA u8 spuIdling; // 0x1EB be_t wklRunnable1; // 0x1EC - be_t wklRunnable2; // 0x1EE + be_t wklRunnable2; // 0x1EE u8 x1F0[0x210 - 0x1F0]; // 0x1F0 be_t traceBuffer; // 0x210 be_t traceMsgCount; // 0x218 diff --git a/rpcs3/cellSpursSpu.cpp b/rpcs3/cellSpursSpu.cpp index 6aad7f7430..33c75e8a44 100644 --- a/rpcs3/cellSpursSpu.cpp +++ b/rpcs3/cellSpursSpu.cpp @@ -15,14 +15,80 @@ unsigned cellSpursModulePollStatus(CellSpursModulePollStatus * status) { return 0; } -void spursSysServiceCleanup(SPUThread & spu, SpursKernelMgmtData * mgmt) { - // TODO: Implement this +/// Restore scheduling paraneters to the right values after a workload has been preempted by the system service workload +void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt) { + if (mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] != -1) { + auto wklId = mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum]; + mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] = -1; + + spursSysServiceUpdateWorkload(spu, mgmt); + if (wklId >= CELL_SPURS_MAX_WORKLOAD) { + mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10; + mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1); + } else { + mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01; + mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1); + } + + auto wklIdSaved = mgmt->wklCurrentId; + mgmt->wklCurrentId = wklId; + + // Trace - STOP: GUID + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; + pkt.data.stop = 0; // TODO: Put GUID of the sys service workload here + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + mgmt->wklCurrentId = wklIdSaved; + } } +/// Updatre the trace count for this SPU in CellSpurs +void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt) { + if (mgmt->traceBuffer) { + mgmt->spurs->m.traceBuffer->count[mgmt->spuNum] = mgmt->traceMsgCount; + } +} + +/// Update trace control in SPU from CellSpurs void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) { - // TODO: Implement this + auto sysSrvMsgUpdateTrace = mgmt->spurs->m.sysSrvMsgUpdateTrace; + mgmt->spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum); + mgmt->spurs->m.xCC &= ~(1 << mgmt->spuNum); + mgmt->spurs->m.xCC |= arg2 << mgmt->spuNum; + + bool notify = false; + if ((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum) != 0) && (mgmt->spurs->m.sysSrvMsgUpdateTrace == 0) && (mgmt->spurs->m.xCD != 0)) { + mgmt->spurs->m.xCD = 0; + notify = true; + } + + if (arg4 && mgmt->spurs->m.xCD != 0) { + mgmt->spurs->m.xCD = 0; + notify = true; + } + + if ((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum) != 0) || (arg3 != 0)) { + if (mgmt->traceMsgCount != 0xFF || mgmt->traceBuffer == 0 || mgmt->spurs->m.traceBuffer.addr() == 0) { + spursSysServiceUpdateTraceCount(spu, mgmt); + } else { + mgmt->traceMsgCount = mgmt->spurs->m.traceBuffer->count[mgmt->spuNum]; + } + + mgmt->traceBuffer = mgmt->spurs->m.traceBuffer.addr() + (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4); + mgmt->traceMaxCount = mgmt->spurs->m.traceStartIndex[1] - mgmt->spurs->m.traceStartIndex[0]; + if (mgmt->traceBuffer == 0) { + mgmt->traceMsgCount = 0; + } + } + + if (notify) { + // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 2, 0); + } } +/// Update events in CellSpurs void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownMask) { u32 wklNotifyMask = 0; for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { @@ -48,7 +114,7 @@ void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 } } -/// Update workload information in the LS from main memory +/// Update workload information in the SPU LS from CellSpurs void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) { u32 wklShutdownMask = 0; mgmt->wklRunnable1 = 0; @@ -65,7 +131,7 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) } if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { - if (((wklStatus & (1 << mgmt->spuNum)) != 0) && ((mgmt->spurs->m.wklStatus1[i] & (1 << mgmt->spuNum)) == 0)) { + if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus1[i] == 0)) { mgmt->spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); wklShutdownMask |= 0x80000000 >> i; } @@ -88,7 +154,7 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) } if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { - if (((wklStatus & (1 << mgmt->spuNum)) != 0) && ((mgmt->spurs->m.wklStatus2[i] & (1 << mgmt->spuNum)) == 0)) { + if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus2[i] == 0)) { mgmt->spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); wklShutdownMask |= 0x8000 >> i; } @@ -103,23 +169,82 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) /// Process any messages void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) { + // Process update workload message if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) { mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum); spursSysServiceUpdateWorkload(spu, mgmt); } + // Process update trace message if (mgmt->spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) { spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0); } + // Process terminate request if (mgmt->spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) { mgmt->spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum); // TODO: Rest of the terminate processing } } -void spursSysServiceExitIfRequired() { - // TODO: Implement this +/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled +void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) { + while (true) { + u32 nIdlingSpus = 0; + for (u32 i = 0; i < 8; i++) { + if (mgmt->spurs->m.spuIdling & (1 << i)) { + nIdlingSpus++; + } + } + + bool shouldExit = nIdlingSpus != mgmt->spurs->m.nSpus ? false : mgmt->spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false; + bool foundSchedulableWorkload = false; + if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { + foundSchedulableWorkload = true; + } else { + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + if ((mgmt->wklRunnable1 & (0x8000 >> i)) && + (mgmt->priority[i] & 0x0F) != 0 && + (mgmt->spurs->m.wklMaxContention[i].read_relaxed() & 0x0F) > (mgmt->spurs->m.wklCurrentContention[i] & 0x0F)) { + foundSchedulableWorkload = true; + break; + } + } + + if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS && foundSchedulableWorkload == false) { + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + if ((mgmt->wklRunnable2 & (0x8000 >> i)) && + (mgmt->priority[i] & 0xF0) != 0 && + (mgmt->spurs->m.wklMaxContention[i].read_relaxed() & 0xF0) > (mgmt->spurs->m.wklCurrentContention[i] & 0xF0)) { + foundSchedulableWorkload = true; + break; + } + } + } + } + + if ((mgmt->spurs->m.spuIdling & (1 << mgmt->spuNum)) && shouldExit == false && foundSchedulableWorkload == false) { + // TODO: Wait for events + } + + if (shouldExit || foundSchedulableWorkload == false) { + mgmt->spurs->m.spuIdling |= 1 << mgmt->spuNum; + } else { + mgmt->spurs->m.spuIdling &= ~(1 << mgmt->spuNum); + } + + if (shouldExit == false && foundSchedulableWorkload == false) { + continue; + } + + if (shouldExit == false) { + return; + } + + break; + } + + // TODO: exit spu thread group } /// Main function for the system service workload @@ -127,15 +252,15 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) { auto mgmt = vm::get_ptr(spu.ls_offset); if (mgmt->spurs.addr() % CellSpurs::align) { - // TODO: Halt + assert(0); } // Initialise the system service if this is the first time its being started on this SPU - if (mgmt->sysSrvInitialised != 0) { + if (mgmt->sysSrvInitialised == 0) { mgmt->sysSrvInitialised = 1; if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) { - // TODO: Halt + assert(0); } mgmt->spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum; @@ -143,7 +268,7 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) { mgmt->traceMsgCount = -1; spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0); - spursSysServiceCleanup(spu, mgmt); + spursSysServiceCleanupAfterPreemption(spu, mgmt); // Trace - SERVICE: INIT CellSpursTracePacket pkt; @@ -163,8 +288,10 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) { cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); while (true) { + // Process messages for the system service workload spursSysServiceProcessMessages(spu, mgmt); +poll: if (cellSpursModulePollStatus(nullptr)) { // Trace - SERVICE: EXIT CellSpursTracePacket pkt; @@ -181,10 +308,16 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) { break; } + // If we reach here it means that either there are more system service messages to be processed + // or there are no workloads that can be scheduled. + + // If the SPU is not idling then process the remaining system service messages if (mgmt->spuIdling == 0) { continue; } + // If we reach here it means that the SPU is idling + // Trace - SERVICE: WAIT CellSpursTracePacket pkt; memset(&pkt, 0, sizeof(pkt)); @@ -192,7 +325,8 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) { pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT; cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); - spursSysServiceExitIfRequired(); + spursSysServiceWaitOrExit(spu, mgmt); + goto poll; } } @@ -206,8 +340,6 @@ void spursSysServiceWorkloadEntry(SPUThread & spu) { *(vm::ptr::make(spu.GPR[1]._u32[3])) = 0x3FFF0; memset(vm::get_ptr(spu.ls_offset + 0x3FFE0), 0, 32); - LV2_LOCK(0); - if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { spursSysServiceWorkloadMain(spu, pollStatus); } else { From c1df79b7139f21faa0de83bceaa602f3fed46ed9 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sat, 3 Jan 2015 15:59:22 +0530 Subject: [PATCH 11/29] SPURS: Integrate SPURS kernel and system service workload. Also, fixed some bugs. --- rpcs3/Emu/Cell/SPUThread.cpp | 2 +- rpcs3/Emu/Cell/SPUThread.h | 2 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 403 +--------- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 193 +++-- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 786 ++++++++++++++++++++ rpcs3/cellSpursSpu.cpp | 352 --------- rpcs3/emucore.vcxproj | 2 +- rpcs3/emucore.vcxproj.filters | 2 +- 8 files changed, 897 insertions(+), 845 deletions(-) create mode 100644 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp delete mode 100644 rpcs3/cellSpursSpu.cpp diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 719cdc8cab..bc6d1d681f 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1029,7 +1029,7 @@ void SPUThread::StopAndSignal(u32 code) case 0x003: { - GPR[3]._u64[1] = m_code3_func(*this); + m_code3_func(*this); break; } diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index e7c28ed8ff..4bebaf1baf 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -511,7 +511,7 @@ public: void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); } std::function m_custom_task; - std::function m_code3_func; + std::function m_code3_func; public: SPUThread(CPUThreadType type = CPU_THREAD_SPU); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 4cac04ffad..343ca9ecbb 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -21,6 +21,8 @@ extern u32 libsre; extern u32 libsre_rtoc; #endif +void spursKernelMain(SPUThread & spu); + s64 spursCreateLv2EventQueue(vm::ptr spurs, u32& queue_id, vm::ptr port, s32 size, u64 name_u64) { #ifdef PRX_DEBUG_XXX @@ -112,7 +114,7 @@ s64 spursInit( spurs->m.wklInfoSysSrv.addr.set(be_t::make(vm::read32(libsre_rtoc - 0x7EA4))); spurs->m.wklInfoSysSrv.size = 0x2200; #else - spurs->m.wklInfoSysSrv.addr.set(be_t::make(0x100)); // wrong 64-bit address + spurs->m.wklInfoSysSrv.addr.set(be_t::make(SPURS_IMG_ADDR_SYS_SRV_WORKLOAD)); #endif spurs->m.wklInfoSysSrv.arg = 0; spurs->m.wklInfoSysSrv.uniqueId.write_relaxed(0xff); @@ -170,399 +172,16 @@ s64 spursInit( name += "CellSpursKernel0"; for (s32 num = 0; num < nSpus; num++, name[name.size() - 1]++) { - spurs->m.spus[num] = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, 0, 0, 0, 0, [spurs, num, isSecond](SPUThread& SPU) + spurs->m.spus[num] = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, 0, 0, 0, 0, [spurs, num](SPUThread& SPU) { -#ifdef PRX_DEBUG_XXX SPU.GPR[3]._u32[3] = num; SPU.GPR[4]._u64[1] = spurs.addr(); + +#ifdef PRX_DEBUG_XXX return SPU.FastCall(SPU.PC); #endif - // code replacement: - { - const u32 addr = /*SPU.ReadLS32(0x1e0) +*/ 8; //SPU.ReadLS32(0x1e4); - SPU.WriteLS32(addr + 0, 3); // hack for cellSpursModulePollStatus - SPU.WriteLS32(addr + 4, 0x35000000); // bi $0 - SPU.WriteLS32(0x1e4, addr); - - SPU.WriteLS32(SPU.ReadLS32(0x1e0), 2); // hack for cellSpursModuleExit - } - - if (!isSecond) SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // first kernel - { - LV2_LOCK(0); // TODO: lock-free implementation if possible - - auto mgmt = vm::get_ptr(SPU.ls_offset); - - // The first and only argument to this function is a boolean that is set to false if the function - // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. - // If the first argument is true then the shared data is not updated with the result. - const auto isPoll = SPU.GPR[3]._u32[3]; - - // Calculate the contention (number of SPUs used) for each workload - u8 contention[CELL_SPURS_MAX_WORKLOAD]; - u8 pendingContention[CELL_SPURS_MAX_WORKLOAD]; - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) - { - contention[i] = mgmt->spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i]; - - // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably - // to prevent unnecessary jumps to the kernel - if (isPoll) - { - pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i]; - if (i != mgmt->wklCurrentId) - { - contention[i] += pendingContention[i]; - } - } - } - - u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; - u32 pollStatus = 0; - - // The system service workload has the highest priority. Select the system service workload if - // the system service message bit for this SPU is set. - if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) - { - mgmt->spuIdling = 0; - if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) - { - // Clear the message bit - mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); - } - } - else - { - // Caclulate the scheduling weight for each workload - u16 maxWeight = 0; - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) - { - u8 runnable = mgmt->wklRunnable1 & (0x8000 >> i); - u8 wklSignal = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); - u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed(); - u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); - u8 requestCount = readyCount + idleSpuCount; - - // For a workload to be considered for scheduling: - // 1. Its priority must not be 0 - // 2. The number of SPUs used by it must be less than the max contention for that workload - // 3. The workload should be in runnable state - // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) - // OR the workload must be signalled - // OR the workload flag is 0 and the workload is configured as the wokload flag receiver - if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) - { - if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) - { - // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority: - // 1. Wokload signal set or workload flag or ready count > contention - // 2. Priority of the workload on the SPU - // 3. Is the workload the last selected workload - // 4. Minimum contention of the workload - // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight) - // 6. Is the workload executable same as the currently loaded executable - // 7. The workload id (lesser the number, more the weight) - u16 weight = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0; - weight |= (u16)(mgmt->priority[i] & 0x7F) << 16; - weight |= i == mgmt->wklCurrentId ? 0x80 : 0x00; - weight |= (contention[i] > 0 && mgmt->spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00; - weight |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2; - weight |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00; - weight |= 0x01; - - // In case of a tie the lower numbered workload is chosen - if (weight > maxWeight) - { - wklSelectedId = i; - maxWeight = weight; - pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; - pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; - pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; - } - } - } - } - - // Not sure what this does. Possibly mark the SPU as idle/in use. - mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; - - if (!isPoll || wklSelectedId == mgmt->wklCurrentId) - { - // Clear workload signal for the selected workload - mgmt->spurs->m.wklSignal1.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); - mgmt->spurs->m.wklSignal2.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); - - // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s - if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) - { - mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); - } - } - } - - if (!isPoll) - { - // Called by kernel - // Increment the contention for the selected workload - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) - { - contention[wklSelectedId]++; - } - - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) - { - mgmt->spurs->m.wklCurrentContention[i] = contention[i]; - mgmt->wklLocContention[i] = 0; - mgmt->wklLocPendingContention[i] = 0; - } - - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) - { - mgmt->wklLocContention[wklSelectedId] = 1; - } - - mgmt->wklCurrentId = wklSelectedId; - } - else if (wklSelectedId != mgmt->wklCurrentId) - { - // Not called by kernel but a context switch is required - // Increment the pending contention for the selected workload - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) - { - pendingContention[wklSelectedId]++; - } - - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) - { - mgmt->spurs->m.wklPendingContention[i] = pendingContention[i]; - mgmt->wklLocPendingContention[i] = 0; - } - - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) - { - mgmt->wklLocPendingContention[wklSelectedId] = 1; - } - } - - u64 result = (u64)wklSelectedId << 32; - result |= pollStatus; - return result; - }; - else SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // second kernel - { - LV2_LOCK(0); // TODO: lock-free implementation if possible - - auto mgmt = vm::get_ptr(SPU.ls_offset); - - // The first and only argument to this function is a boolean that is set to false if the function - // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. - // If the first argument is true then the shared data is not updated with the result. - const auto isPoll = SPU.GPR[3]._u32[3]; - - // Calculate the contention (number of SPUs used) for each workload - u8 contention[CELL_SPURS_MAX_WORKLOAD2]; - u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2]; - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) - { - contention[i] = mgmt->spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F]; - contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4; - - // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably - // to prevent unnecessary jumps to the kernel - if (isPoll) - { - pendingContention[i] = mgmt->spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F]; - pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4; - if (i != mgmt->wklCurrentId) - { - contention[i] += pendingContention[i]; - } - } - } - - u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; - u32 pollStatus = 0; - - // The system service workload has the highest priority. Select the system service workload if - // the system service message bit for this SPU is set. - if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) - { - // Not sure what this does. Possibly Mark the SPU as in use. - mgmt->spuIdling = 0; - if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) - { - // Clear the message bit - mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); - } - } - else - { - // Caclulate the scheduling weight for each workload - u8 maxWeight = 0; - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) - { - auto j = i & 0x0F; - u8 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); - u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; - u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4; - u8 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); - u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); - - // For a workload to be considered for scheduling: - // 1. Its priority must be greater than 0 - // 2. The number of SPUs used by it must be less than the max contention for that workload - // 3. The workload should be in runnable state - // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) - // OR the workload must be signalled - // OR the workload flag is 0 and the workload is configured as the wokload receiver - if (runnable && priority > 0 && maxContention > contention[i]) - { - if (wklFlag || wklSignal || readyCount > contention[i]) - { - // The scheduling weight of the workload is equal to the priority of the workload for the SPU. - // The current workload is given a sligtly higher weight presumably to reduce the number of context switches. - // In case of a tie the lower numbered workload is chosen. - u8 weight = priority << 4; - if (mgmt->wklCurrentId == i) - { - weight |= 0x04; - } - - if (weight > maxWeight) - { - wklSelectedId = i; - maxWeight = weight; - pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; - pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; - pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; - } - } - } - } - - // Not sure what this does. Possibly mark the SPU as idle/in use. - mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; - - if (!isPoll || wklSelectedId == mgmt->wklCurrentId) - { - // Clear workload signal for the selected workload - mgmt->spurs->m.wklSignal1.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); - mgmt->spurs->m.wklSignal2.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); - - // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s - if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) - { - mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); - } - } - } - - if (!isPoll) - { - // Called by kernel - // Increment the contention for the selected workload - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) - { - contention[wklSelectedId]++; - } - - for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) - { - mgmt->spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4); - mgmt->wklLocContention[i] = 0; - mgmt->wklLocPendingContention[i] = 0; - } - - mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; - mgmt->wklCurrentId = wklSelectedId; - } - else if (wklSelectedId != mgmt->wklCurrentId) - { - // Not called by kernel but a context switch is required - // Increment the pending contention for the selected workload - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) - { - pendingContention[wklSelectedId]++; - } - - for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) - { - mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4); - mgmt->wklLocPendingContention[i] = 0; - } - - mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; - } - - u64 result = (u64)wklSelectedId << 32; - result |= pollStatus; - return result; - }; - //SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // test - //{ - // LV2_LOCK(0); - // SPU.FastCall(0x290); - // u64 vRES = SPU.GPR[3]._u64[1]; - // return vRES; - //}; - - SpursKernelMgmtData * mgmt = vm::get_ptr(SPU.ls_offset); - mgmt->spurs = spurs; - mgmt->spuNum = num; - mgmt->dmaTagId = 0x1F; - - u32 wid = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; - u32 pollStatus = 0; - while (true) - { - if (Emu.IsStopped()) - { - cellSpurs->Warning("Spurs Kernel aborted"); - return; - } - - // get current workload info: - auto& wkl = wid < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? spurs->m.wklInfo2[wid & 0xf] : spurs->m.wklInfoSysSrv); - - if (mgmt->wklCurrentAddr != wkl.addr) - { - // load executable code: - memcpy(vm::get_ptr(SPU.ls_offset + 0xa00), wkl.addr.get_ptr(), wkl.size); - mgmt->wklCurrentAddr = wkl.addr; - mgmt->wklCurrentUniqueId = wkl.uniqueId.read_relaxed(); - } - - if (!isSecond) SPU.WriteLS16(0x1e8, 0); - - // run workload: - SPU.GPR[1]._u32[3] = 0x3FFB0; - SPU.GPR[3]._u32[3] = 0x100; - SPU.GPR[4]._u64[1] = wkl.arg; - SPU.GPR[5]._u32[3] = pollStatus; - SPU.FastCall(0xa00); - - // check status: - auto status = SPU.SPU.Status.GetValue(); - if (status == SPU_STATUS_STOPPED_BY_STOP) - { - return; - } - else - { - assert(status == SPU_STATUS_RUNNING); - } - - // get workload id: - SPU.GPR[3].clear(); - assert(SPU.m_code3_func); - u64 res = SPU.m_code3_func(SPU); - pollStatus = (u32)(res); - wid = (u32)(res >> 32); - } - + spursKernelMain(SPU); })->GetId(); } @@ -653,7 +272,7 @@ s64 spursInit( for (u32 i = 0; i < 16; i++) { if (spurs->m.wklState1[i].read_relaxed() == 2 && - spurs->m.wklInfo1[i].priority.ToBE() != 0 && + *((u64 *)spurs->m.wklInfo1[i].priority) != 0 && spurs->m.wklMaxContention[i].read_relaxed() & 0xf ) { @@ -671,7 +290,7 @@ s64 spursInit( if (spurs->m.flags1 & SF1_32_WORKLOADS) for (u32 i = 0; i < 16; i++) { if (spurs->m.wklState2[i].read_relaxed() == 2 && - spurs->m.wklInfo2[i].priority.ToBE() != 0 && + *((u64 *)spurs->m.wklInfo2[i].priority) != 0 && spurs->m.wklMaxContention[i].read_relaxed() & 0xf0 ) { @@ -3383,7 +3002,7 @@ void spursTraceStatusUpdate(vm::ptr spurs) spurs->m.xCD = 1; spurs->m.sysSrvMsgUpdateTrace = (1 << spurs->m.nSpus) - 1; spurs->m.sysSrvMessage.write_relaxed(0xFF); - sys_semaphore_wait(spurs->m.semPrv, 0); + sys_semaphore_wait((u32)spurs->m.semPrv, 0); } } @@ -3421,7 +3040,7 @@ s64 spursTraceInitialize(vm::ptr spurs, vm::ptr b spurs->m.traceBuffer.set(buffer.addr() | (mode & CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER ? 1 : 0)); spurs->m.traceMode = mode; - u32 spuTraceDataCount = (spurs->m.traceDataSize / CellSpursTracePacket::size) / spurs->m.nSpus; + u32 spuTraceDataCount = (u32)((spurs->m.traceDataSize / CellSpursTracePacket::size) / spurs->m.nSpus); for (u32 i = 0, j = 8; i < 6; i++) { spurs->m.traceStartIndex[i] = j; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 3d817e4a4d..771406b3af 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -140,7 +140,7 @@ enum SpursFlags1 : u8 SF1_EXIT_IF_NO_WORK = 0x80, }; -enum SpursWorkloadConstants +enum SpursWorkloadConstants : u64 { // Workload states SPURS_WKL_STATE_NON_EXISTENT = 0, @@ -149,6 +149,12 @@ enum SpursWorkloadConstants SPURS_WKL_STATE_SHUTTING_DOWN = 3, SPURS_WKL_STATE_REMOVABLE = 4, SPURS_WKL_STATE_INVALID = 5, + + // GUID + SPURS_GUID_SYS_WKL = 0x1BB841BF38F89D33ull, + + // Image addresses + SPURS_IMG_ADDR_SYS_SRV_WORKLOAD = 0x100, }; enum CellSpursModulePollStatus @@ -199,7 +205,6 @@ enum SpursTaskConstants class SPURSManager; class SPURSManagerEventFlag; class SPURSManagerTaskset; - struct CellSpurs; struct CellSpursAttribute @@ -249,7 +254,65 @@ struct CellSpursWorkloadFlag typedef void(*CellSpursShutdownCompletionEventHook)(vm::ptr, u32 wid, vm::ptr arg); -struct CellSpursTraceInfo; +struct CellSpursTraceInfo +{ + static const u32 size = 0x80; + static const u32 align = 16; + + be_t spu_thread[8]; // 0x00 + be_t count[8]; // 0x20 + be_t spu_thread_grp; // 0x40 + be_t nspu; // 0x44 + //u8 padding[]; +}; + +struct CellSpursTracePacket +{ + static const u32 size = 16; + + struct + { + u8 tag; + u8 length; + u8 spu; + u8 workload; + be_t time; + } header; + + union + { + struct + { + be_t incident; + be_t reserved; + } service; + + struct + { + be_t ea; + be_t ls; + be_t size; + } load; + + struct + { + be_t offset; + be_t ls; + be_t size; + } map; + + struct + { + s8 module[4]; + be_t level; + be_t ls; + } start; + + be_t user; + be_t guid; + be_t stop; + } data; +}; // Core CellSpurs structures struct CellSpurs @@ -289,7 +352,7 @@ struct CellSpurs be_t arg; // spu argument be_t size; atomic_t uniqueId; // The unique id is the same for all workloads with the same addr - be_t priority[8]; + u8 priority[8]; }; static_assert(sizeof(WorkloadInfo) == 0x20, "Wrong WorkloadInfo size"); @@ -311,10 +374,6 @@ struct CellSpurs // real data struct { - // The first 0x80 bytes of the CellSpurs structure is shared by all instances of the SPURS kernel in a SPURS instance and the PPU. - // The SPURS kernel copies this from main memory to the LS (address 0x100) then runs its scheduling algorithm using this as one - // of the inputs. After selecting a new workload, the SPURS kernel updates this and writes it back to main memory. - // The read-modify-write is performed atomically by the SPURS kernel. atomic_t wklReadyCount1[0x10]; // 0x00 Number of SPUs requested by each workload (0..15 wids). atomic_t wklIdleSpuCountOrReadyCount2[0x10]; // 0x10 SPURS1: Number of idle SPUs requested by each workload (0..15 wids). SPURS2: Number of SPUs requested by each workload (16..31 wids). u8 wklCurrentContention[0x10]; // 0x20 Number of SPUs used by each workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16. @@ -332,33 +391,34 @@ struct CellSpurs atomic_t wklSignal2; // 0x78 (bitset for 16..32 wids) u8 x7A[6]; // 0x7A atomic_t wklState1[0x10]; // 0x80 SPURS_WKL_STATE_* - u8 wklStatus1[0x10]; // 0x90 - u8 wklEvent1[0x10]; // 0xA0 - atomic_t wklMskA; // 0xB0 - System service - Available workloads (32*u1) - atomic_t wklMskB; // 0xB4 - System service - Available module id - u8 xB8[5]; // 0xB8 - 0xBC - Syetem service exit barrier - atomic_t sysSrvMsgUpdateWorkload; // 0xBD - u8 xBE; // 0xBE - u8 sysSrvMsgTerminate; // 0xBF - u8 sysSrvWorkload[8]; // 0xC0 - u8 sysSrvOnSpu; // 0xC8 - u8 spuPort; // 0xC9 - SPU port for system service - u8 xCA; // 0xCA - u8 xCB; // 0xCB - u8 xCC; // 0xCC - u8 xCD; // 0xCD - u8 sysSrvMsgUpdateTrace; // 0xCE - u8 xCF; // 0xCF - atomic_t wklState2[0x10]; // 0xD0 SPURS_WKL_STATE_* - u8 wklStatus2[0x10]; // 0xE0 - u8 wklEvent2[0x10]; // 0xF0 - _sub_str1 wklF1[0x10]; // 0x100 - vm::bptr traceBuffer; // 0x900 - be_t traceStartIndex[6]; // 0x908 - u8 unknown7[0x948 - 0x920]; // 0x920 - be_t traceDataSize; // 0x948 - be_t traceMode; // 0x950 - u8 unknown8[0x980 - 0x954]; // 0x954 + u8 wklStatus1[0x10]; // 0x90 + u8 wklEvent1[0x10]; // 0xA0 + atomic_t wklMskA; // 0xB0 - System service - Available workloads (32*u1) + atomic_t wklMskB; // 0xB4 - System service - Available module id + u32 xB8; // 0xB8 + u8 sysSrvExitBarrier; // 0xBC + atomic_t sysSrvMsgUpdateWorkload; // 0xBD + u8 xBE; // 0xBE + u8 sysSrvMsgTerminate; // 0xBF + u8 sysSrvWorkload[8]; // 0xC0 + u8 sysSrvOnSpu; // 0xC8 + u8 spuPort; // 0xC9 + u8 xCA; // 0xCA + u8 xCB; // 0xCB + u8 xCC; // 0xCC + u8 xCD; // 0xCD + u8 sysSrvMsgUpdateTrace; // 0xCE + u8 xCF; // 0xCF + atomic_t wklState2[0x10]; // 0xD0 SPURS_WKL_STATE_* + u8 wklStatus2[0x10]; // 0xE0 + u8 wklEvent2[0x10]; // 0xF0 + _sub_str1 wklF1[0x10]; // 0x100 + vm::bptr traceBuffer; // 0x900 + be_t traceStartIndex[6]; // 0x908 + u8 unknown7[0x948 - 0x920]; // 0x920 + be_t traceDataSize; // 0x948 + be_t traceMode; // 0x950 + u8 unknown8[0x980 - 0x954]; // 0x954 be_t semPrv; // 0x980 be_t unk11; // 0x988 be_t unk12; // 0x98C @@ -559,66 +619,6 @@ struct CellSpursExceptionInfo be_t option; }; -struct CellSpursTraceInfo -{ - static const u32 size = 0x80; - static const u32 align = 16; - - be_t spu_thread[8]; // 0x00 - be_t count[8]; // 0x20 - be_t spu_thread_grp; // 0x40 - be_t nspu; // 0x44 - //u8 padding[]; -}; - -struct CellSpursTracePacket -{ - static const u32 size = 16; - - struct - { - u8 tag; - u8 length; - u8 spu; - u8 workload; - be_t time; - } header; - - union - { - struct - { - be_t incident; - be_t reserved; - } service; - - struct - { - be_t ea; - be_t ls; - be_t size; - } load; - - struct - { - be_t offset; - be_t ls; - be_t size; - } map; - - struct - { - s8 module[4]; - be_t level; - be_t ls; - } start; - - be_t user; - be_t guid; - be_t stop; - } data; -}; - // Exception handlers. //typedef void (*CellSpursGlobalExceptionEventHandler)(vm::ptr spurs, vm::ptr info, // u32 id, vm::ptr arg); @@ -793,7 +793,6 @@ struct CellSpursTaskBinInfo // The SPURS kernel data store. This resides at 0x00 of the LS. struct SpursKernelMgmtData { - u8 unk0[0x100]; // 0x00 u8 tempArea[0x80]; // 0x100 u8 wklLocContention[0x10]; // 0x180 u8 wklLocPendingContention[0x10]; // 0x190 @@ -804,7 +803,7 @@ struct SpursKernelMgmtData { be_t dmaTagId; // 0x1CC vm::bptr wklCurrentAddr; // 0x1D0 be_t wklCurrentUniqueId; // 0x1D8 - u32 wklCurrentId; // 0x1DC + be_t wklCurrentId; // 0x1DC be_t yieldToKernelAddr; // 0x1E0 be_t selectWorkloadAddr; // 0x1E4 u8 x1E8; // 0x1E8 diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp new file mode 100644 index 0000000000..643aed981c --- /dev/null +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -0,0 +1,786 @@ +#include "stdafx.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "Emu/Cell/SPUThread.h" +#include "Emu/SysCalls/Modules.h" +#include "Emu/SysCalls/Modules/cellSpurs.h" + +// +// SPURS utility functions +// +void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag); +u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status); + +// +// SPURS Kernel functions +// +void spursKernelSelectWorkload(SPUThread & spu); +void spursKernelSelectWorkload2(SPUThread & spu); + +// +// SPURS system service workload functions +// +void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt); +void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt); +void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4); +void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet); +void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt); +void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt); +void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt); +void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus); +void spursSysServiceWorkloadEntry(SPUThread & spu); + +extern Module *cellSpurs; + +////////////////////////////////////////////////////////////////////////////// +// SPURS utility functions +////////////////////////////////////////////////////////////////////////////// + +/// Output trace information +void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag) { + // TODO: Implement this +} + +/// Check for execution right requests +u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) { + auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); + + spu.GPR[3]._u32[3] = 1; + if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { + spursKernelSelectWorkload2(spu); + } else { + spursKernelSelectWorkload(spu); + } + + auto result = spu.GPR[3]._u64[1]; + if (status) { + *status = (u32)result; + } + + u32 wklId = result >> 32; + return wklId == mgmt->wklCurrentId ? 0 : 1; +} + +////////////////////////////////////////////////////////////////////////////// +// SPURS kernel functions +////////////////////////////////////////////////////////////////////////////// + +/// Select a workload to run +void spursKernelSelectWorkload(SPUThread & spu) { + LV2_LOCK(0); // TODO: lock-free implementation if possible + + auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); + + // The first and only argument to this function is a boolean that is set to false if the function + // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. + // If the first argument is true then the shared data is not updated with the result. + const auto isPoll = spu.GPR[3]._u32[3]; + + // Calculate the contention (number of SPUs used) for each workload + u8 contention[CELL_SPURS_MAX_WORKLOAD]; + u8 pendingContention[CELL_SPURS_MAX_WORKLOAD]; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + contention[i] = mgmt->spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i]; + + // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably + // to prevent unnecessary jumps to the kernel + if (isPoll) { + pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i]; + if (i != mgmt->wklCurrentId) { + contention[i] += pendingContention[i]; + } + } + } + + u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + u32 pollStatus = 0; + + // The system service workload has the highest priority. Select the system service workload if + // the system service message bit for this SPU is set. + if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { + mgmt->spuIdling = 0; + if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + // Clear the message bit + mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); + } + } else { + // Caclulate the scheduling weight for each workload + u16 maxWeight = 0; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + u16 runnable = mgmt->wklRunnable1 & (0x8000 >> i); + u16 wklSignal = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); + u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed(); + u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); + u8 requestCount = readyCount + idleSpuCount; + + // For a workload to be considered for scheduling: + // 1. Its priority must not be 0 + // 2. The number of SPUs used by it must be less than the max contention for that workload + // 3. The workload should be in runnable state + // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) + // OR the workload must be signalled + // OR the workload flag is 0 and the workload is configured as the wokload flag receiver + if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) { + if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) { + // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority: + // 1. Wokload signal set or workload flag or ready count > contention + // 2. Priority of the workload on the SPU + // 3. Is the workload the last selected workload + // 4. Minimum contention of the workload + // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight) + // 6. Is the workload executable same as the currently loaded executable + // 7. The workload id (lesser the number, more the weight) + u16 weight = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0; + weight |= (u16)(mgmt->priority[i] & 0x7F) << 16; + weight |= i == mgmt->wklCurrentId ? 0x80 : 0x00; + weight |= (contention[i] > 0 && mgmt->spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00; + weight |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2; + weight |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00; + weight |= 0x01; + + // In case of a tie the lower numbered workload is chosen + if (weight > maxWeight) { + wklSelectedId = i; + maxWeight = weight; + pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; + pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; + pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; + } + } + } + } + + // Not sure what this does. Possibly mark the SPU as idle/in use. + mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + + if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { + // Clear workload signal for the selected workload + mgmt->spurs->m.wklSignal1.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); + mgmt->spurs->m.wklSignal2.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + + // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s + if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) { + mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + } + } + } + + if (!isPoll) { + // Called by kernel + // Increment the contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + contention[wklSelectedId]++; + } + + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + mgmt->spurs->m.wklCurrentContention[i] = contention[i]; + mgmt->wklLocContention[i] = 0; + mgmt->wklLocPendingContention[i] = 0; + } + + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + mgmt->wklLocContention[wklSelectedId] = 1; + } + + mgmt->wklCurrentId = wklSelectedId; + } else if (wklSelectedId != mgmt->wklCurrentId) { + // Not called by kernel but a context switch is required + // Increment the pending contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + pendingContention[wklSelectedId]++; + } + + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + mgmt->spurs->m.wklPendingContention[i] = pendingContention[i]; + mgmt->wklLocPendingContention[i] = 0; + } + + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + mgmt->wklLocPendingContention[wklSelectedId] = 1; + } + } + + u64 result = (u64)wklSelectedId << 32; + result |= pollStatus; + spu.GPR[3]._u64[1] = result; +} + +/// Select a workload to run +void spursKernelSelectWorkload2(SPUThread & spu) { + LV2_LOCK(0); // TODO: lock-free implementation if possible + + auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); + + // The first and only argument to this function is a boolean that is set to false if the function + // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. + // If the first argument is true then the shared data is not updated with the result. + const auto isPoll = spu.GPR[3]._u32[3]; + + // Calculate the contention (number of SPUs used) for each workload + u8 contention[CELL_SPURS_MAX_WORKLOAD2]; + u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2]; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { + contention[i] = mgmt->spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F]; + contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4; + + // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably + // to prevent unnecessary jumps to the kernel + if (isPoll) { + pendingContention[i] = mgmt->spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F]; + pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4; + if (i != mgmt->wklCurrentId) { + contention[i] += pendingContention[i]; + } + } + } + + u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + u32 pollStatus = 0; + + // The system service workload has the highest priority. Select the system service workload if + // the system service message bit for this SPU is set. + if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { + // Not sure what this does. Possibly Mark the SPU as in use. + mgmt->spuIdling = 0; + if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + // Clear the message bit + mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); + } + } else { + // Caclulate the scheduling weight for each workload + u8 maxWeight = 0; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { + auto j = i & 0x0F; + u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); + u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; + u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4; + u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); + u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); + + // For a workload to be considered for scheduling: + // 1. Its priority must be greater than 0 + // 2. The number of SPUs used by it must be less than the max contention for that workload + // 3. The workload should be in runnable state + // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) + // OR the workload must be signalled + // OR the workload flag is 0 and the workload is configured as the wokload receiver + if (runnable && priority > 0 && maxContention > contention[i]) { + if (wklFlag || wklSignal || readyCount > contention[i]) { + // The scheduling weight of the workload is equal to the priority of the workload for the SPU. + // The current workload is given a sligtly higher weight presumably to reduce the number of context switches. + // In case of a tie the lower numbered workload is chosen. + u8 weight = priority << 4; + if (mgmt->wklCurrentId == i) { + weight |= 0x04; + } + + if (weight > maxWeight) { + wklSelectedId = i; + maxWeight = weight; + pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; + pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; + pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; + } + } + } + } + + // Not sure what this does. Possibly mark the SPU as idle/in use. + mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + + if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { + // Clear workload signal for the selected workload + mgmt->spurs->m.wklSignal1.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); + mgmt->spurs->m.wklSignal2.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + + // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s + if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) { + mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + } + } + } + + if (!isPoll) { + // Called by kernel + // Increment the contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + contention[wklSelectedId]++; + } + + for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { + mgmt->spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4); + mgmt->wklLocContention[i] = 0; + mgmt->wklLocPendingContention[i] = 0; + } + + mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; + mgmt->wklCurrentId = wklSelectedId; + } else if (wklSelectedId != mgmt->wklCurrentId) { + // Not called by kernel but a context switch is required + // Increment the pending contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + pendingContention[wklSelectedId]++; + } + + for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { + mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4); + mgmt->wklLocPendingContention[i] = 0; + } + + mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; + } + + u64 result = (u64)wklSelectedId << 32; + result |= pollStatus; + spu.GPR[3]._u64[1] = result; +} + +/// Entry point of the SPURS kernel +void spursKernelMain(SPUThread & spu) { + SpursKernelMgmtData * mgmt = vm::get_ptr(spu.ls_offset + 0x100); + mgmt->spuNum = spu.GPR[3]._u32[3]; + mgmt->dmaTagId = 0x1F; + mgmt->spurs.set(spu.GPR[4]._u64[1]); + mgmt->wklCurrentId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + mgmt->wklCurrentUniqueId = 0x20; + + bool isSecond = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; + mgmt->yieldToKernelAddr = isSecond ? 0x838 : 0x808; + mgmt->selectWorkloadAddr = 0x290; + spu.WriteLS32(mgmt->yieldToKernelAddr, 2); // hack for cellSpursModuleExit + spu.WriteLS32(mgmt->selectWorkloadAddr, 3); // hack for cellSpursModulePollStatus + spu.WriteLS32(mgmt->selectWorkloadAddr + 4, 0x35000000); // bi $0 + spu.m_code3_func = isSecond ? spursKernelSelectWorkload2 : spursKernelSelectWorkload; + + u32 wid = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + u32 pollStatus = 0; + while (true) { + if (Emu.IsStopped()) { + cellSpurs->Warning("Spurs Kernel aborted"); + return; + } + + // Get current workload info + auto & wkl = wid < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? mgmt->spurs->m.wklInfo2[wid & 0xf] : mgmt->spurs->m.wklInfoSysSrv); + + if (mgmt->wklCurrentAddr != wkl.addr) { + if (wkl.addr.addr() != SPURS_IMG_ADDR_SYS_SRV_WORKLOAD) { + // Load executable code + memcpy(vm::get_ptr(spu.ls_offset + 0xA00), wkl.addr.get_ptr(), wkl.size); + } + mgmt->wklCurrentAddr = wkl.addr; + mgmt->wklCurrentUniqueId = wkl.uniqueId.read_relaxed(); + } + + if (!isSecond) { + mgmt->x1E8 = 0; + mgmt->x1E9 = 0; + } + + // Run workload + spu.GPR[1]._u32[3] = 0x3FFB0; + spu.GPR[3]._u32[3] = 0x100; + spu.GPR[4]._u64[1] = wkl.arg; + spu.GPR[5]._u32[3] = pollStatus; + switch (mgmt->wklCurrentAddr.addr()) { + case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD: + spursSysServiceWorkloadEntry(spu); + break; + default: + spu.FastCall(0xA00); + break; + } + + // Check status + auto status = spu.SPU.Status.GetValue(); + if (status == SPU_STATUS_STOPPED_BY_STOP) { + return; + } else { + assert(status == SPU_STATUS_RUNNING); + } + + // Select next workload to run + spu.GPR[3].clear(); + if (isSecond) { + spursKernelSelectWorkload2(spu); + } else { + spursKernelSelectWorkload(spu); + } + u64 res = spu.GPR[3]._u64[1]; + pollStatus = (u32)(res); + wid = (u32)(res >> 32); + } +} + +////////////////////////////////////////////////////////////////////////////// +// SPURS system workload functions +////////////////////////////////////////////////////////////////////////////// + +/// Restore scheduling parameters after a workload has been preempted by the system service workload +void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt) { + if (mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] != 0xFF) { + auto wklId = mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum]; + mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] = 0xFF; + + spursSysServiceUpdateWorkload(spu, mgmt); + if (wklId >= CELL_SPURS_MAX_WORKLOAD) { + mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10; + mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1); + } else { + mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01; + mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1); + } + + // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace + // uses the current worload id to determine the workload to which the trace belongs + auto wklIdSaved = mgmt->wklCurrentId; + mgmt->wklCurrentId = wklId; + + // Trace - STOP: GUID + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; + pkt.data.stop = SPURS_GUID_SYS_WKL; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + mgmt->wklCurrentId = wklIdSaved; + } +} + +/// Update the trace count for this SPU in CellSpurs +void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt) { + if (mgmt->traceBuffer) { + auto traceInfo = vm::ptr::make((u32)(mgmt->traceBuffer - (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4))); + traceInfo->count[mgmt->spuNum] = mgmt->traceMsgCount; + } +} + +/// Update trace control in SPU from CellSpurs +void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) { + auto sysSrvMsgUpdateTrace = mgmt->spurs->m.sysSrvMsgUpdateTrace; + mgmt->spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum); + mgmt->spurs->m.xCC &= ~(1 << mgmt->spuNum); + mgmt->spurs->m.xCC |= arg2 << mgmt->spuNum; + + bool notify = false; + if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.sysSrvMsgUpdateTrace == 0) && (mgmt->spurs->m.xCD != 0)) { + mgmt->spurs->m.xCD = 0; + notify = true; + } + + if (arg4 && mgmt->spurs->m.xCD != 0) { + mgmt->spurs->m.xCD = 0; + notify = true; + } + + // Get trace parameters from CellSpurs and store them in the LS + if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) || (arg3 != 0)) { + if (mgmt->traceMsgCount != 0xFF || mgmt->spurs->m.traceBuffer.addr() == 0) { + spursSysServiceUpdateTraceCount(spu, mgmt); + } else { + mgmt->traceMsgCount = mgmt->spurs->m.traceBuffer->count[mgmt->spuNum]; + } + + mgmt->traceBuffer = mgmt->spurs->m.traceBuffer.addr() + (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4); + mgmt->traceMaxCount = mgmt->spurs->m.traceStartIndex[1] - mgmt->spurs->m.traceStartIndex[0]; + if (mgmt->traceBuffer == 0) { + mgmt->traceMsgCount = 0; + } + } + + if (notify) { + // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 2, 0); + } +} + +/// Update events in CellSpurs +void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet) { + // Mark the workloads in wklShutdownBitSet as completed and also generate a bit set of the completed + // workloads that have a shutdown completion hook registered + u32 wklNotifyBitSet = 0; + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + if (wklShutdownBitSet & (0x80000000u >> i)) { + mgmt->spurs->m.wklEvent1[i] |= 0x01; + if (mgmt->spurs->m.wklEvent1[i] & 0x02 || mgmt->spurs->m.wklEvent1[i] & 0x10) { + wklNotifyBitSet |= 0x80000000u >> i; + } + } + + if (wklShutdownBitSet & (0x8000 >> i)) { + mgmt->spurs->m.wklEvent2[i] |= 0x01; + if (mgmt->spurs->m.wklEvent2[i] & 0x02 || mgmt->spurs->m.wklEvent2[i] & 0x10) { + wklNotifyBitSet |= 0x8000 >> i; + } + } + } + + if (wklNotifyBitSet) { + // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 0, wklNotifyMask); + } +} + +/// Update workload information in the SPU from CellSpurs +void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) { + u32 wklShutdownBitSet = 0; + mgmt->wklRunnable1 = 0; + mgmt->wklRunnable2 = 0; + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + // Copy the priority of the workload for this SPU and its unique id to the LS + mgmt->priority[i] = mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum]; + mgmt->wklUniqueId[i] = mgmt->spurs->m.wklInfo1[i].uniqueId.read_relaxed(); + + // Update workload status and runnable flag based on the workload state + auto wklStatus = mgmt->spurs->m.wklStatus1[i]; + if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { + mgmt->spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum; + mgmt->wklRunnable1 |= 0x8000 >> i; + } else { + mgmt->spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum); + } + + // If the workload is shutting down and if this is the last SPU from which it is being removed then + // add it to the shutdown bit set + if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { + if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus1[i] == 0)) { + mgmt->spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); + wklShutdownBitSet |= 0x80000000u >> i; + } + } + + if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { + // Copy the priority of the workload for this SPU to the LS + if (mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) { + mgmt->priority[i] |= (0x10 - mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) << 4; + } + + // Update workload status and runnable flag based on the workload state + wklStatus = mgmt->spurs->m.wklStatus2[i]; + if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { + mgmt->spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum; + mgmt->wklRunnable2 |= 0x8000 >> i; + } else { + mgmt->spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum); + } + + // If the workload is shutting down and if this is the last SPU from which it is being removed then + // add it to the shutdown bit set + if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { + if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus2[i] == 0)) { + mgmt->spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); + wklShutdownBitSet |= 0x8000 >> i; + } + } + } + } + + if (wklShutdownBitSet) { + spursSysServiceUpdateEvent(spu, mgmt, wklShutdownBitSet); + } +} + +/// Process any messages +void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) { + // Process update workload message + if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) { + mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum); + spursSysServiceUpdateWorkload(spu, mgmt); + } + + // Process update trace message + if (mgmt->spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) { + spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0); + } + + // Process terminate request + if (mgmt->spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) { + mgmt->spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum); + // TODO: Rest of the terminate processing + } +} + +/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled +void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) { + while (true) { + // Find the number of SPUs that are idling in this SPURS instance + u32 nIdlingSpus = 0; + for (u32 i = 0; i < 8; i++) { + if (mgmt->spurs->m.spuIdling & (1 << i)) { + nIdlingSpus++; + } + } + + bool allSpusIdle = nIdlingSpus == mgmt->spurs->m.nSpus ? true: false; + bool exitIfNoWork = mgmt->spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false; + + // Check if any workloads can be scheduled + bool foundReadyWorkload = false; + if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { + foundReadyWorkload = true; + } else { + if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { + u32 j = i & 0x0F; + u8 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); + u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; + u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4; + u8 contention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklCurrentContention[j] & 0x0F : mgmt->spurs->m.wklCurrentContention[j] >> 4; + u8 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); + u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); + + if (runnable && priority > 0 && maxContention > contention) { + if (wklFlag || wklSignal || readyCount > contention) { + foundReadyWorkload = true; + break; + } + } + } + } else { + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + u8 runnable = mgmt->wklRunnable1 & (0x8000 >> i); + u8 wklSignal = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); + u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed(); + u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); + u8 requestCount = readyCount + idleSpuCount; + + if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > mgmt->spurs->m.wklCurrentContention[i]) { + if (wklFlag || wklSignal || (readyCount != 0 && requestCount > mgmt->spurs->m.wklCurrentContention[i])) { + foundReadyWorkload = true; + break; + } + } + } + } + } + + // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events. + if ((mgmt->spurs->m.spuIdling & (1 << mgmt->spuNum)) && (allSpusIdle == false || exitIfNoWork == false) && foundReadyWorkload == false) { + // The system service blocks by making a reservation and waiting on the reservation lost event. This is unfortunately + // not yet completely implemented in rpcs3. So we busy wait here. + //u128 r; + //spu.ReadChannel(r, 0); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + + if ((allSpusIdle == true && exitIfNoWork == true) || foundReadyWorkload == false) { + mgmt->spurs->m.spuIdling |= 1 << mgmt->spuNum; + } else { + mgmt->spurs->m.spuIdling &= ~(1 << mgmt->spuNum); + } + + if (allSpusIdle == false || exitIfNoWork == false) { + if (foundReadyWorkload == true) { + return; + } + } else { + // TODO: exit spu thread group + } + } +} + +/// Main function for the system service workload +void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) { + auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); + + if (mgmt->spurs.addr() % CellSpurs::align) { + assert(0); + } + + // Initialise the system service if this is the first time its being started on this SPU + if (mgmt->sysSrvInitialised == 0) { + mgmt->sysSrvInitialised = 1; + + if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) { + assert(0); + } + + mgmt->spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum; + mgmt->traceBuffer = 0; + mgmt->traceMsgCount = -1; + + spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0); + spursSysServiceCleanupAfterPreemption(spu, mgmt); + + // Trace - SERVICE: INIT + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; + pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + } + + // Trace - START: Module='SYS ' + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_START; + memcpy(pkt.data.start.module, "SYS ", 4); + pkt.data.start.level = 1; // Policy module + pkt.data.start.ls = 0xA00 >> 2; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + while (true) { + // Process messages for the system service workload + spursSysServiceProcessMessages(spu, mgmt); + +poll: + if (cellSpursModulePollStatus(spu, nullptr)) { + // Trace - SERVICE: EXIT + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; + pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + // Trace - STOP: GUID + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; + pkt.data.stop = SPURS_GUID_SYS_WKL; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + break; + } + + // If we reach here it means that either there are more system service messages to be processed + // or there are no workloads that can be scheduled. + + // If the SPU is not idling then process the remaining system service messages + if (mgmt->spuIdling == 0) { + continue; + } + + // If we reach here it means that the SPU is idling + + // Trace - SERVICE: WAIT + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; + pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + spursSysServiceWaitOrExit(spu, mgmt); + goto poll; + } +} + +/// Entry point of the system service workload +void spursSysServiceWorkloadEntry(SPUThread & spu) { + auto mgmt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); + auto arg = spu.GPR[4]._u64[1]; + auto pollStatus = spu.GPR[5]._u32[3]; + + spu.GPR[1]._u32[3] = 0x3FFD0; + *(vm::ptr::make(spu.GPR[1]._u32[3])) = 0x3FFF0; + memset(vm::get_ptr(spu.ls_offset + 0x3FFE0), 0, 32); + + if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + spursSysServiceWorkloadMain(spu, pollStatus); + } else { + // TODO: If we reach here it means the current workload was preempted to start the + // system service workload. Need to implement this. + } + + // TODO: Ensure that this function always returns to the SPURS kernel + return; +} diff --git a/rpcs3/cellSpursSpu.cpp b/rpcs3/cellSpursSpu.cpp deleted file mode 100644 index 33c75e8a44..0000000000 --- a/rpcs3/cellSpursSpu.cpp +++ /dev/null @@ -1,352 +0,0 @@ -#include "stdafx.h" -#include "Emu/Memory/Memory.h" -#include "Emu/System.h" -#include "Emu/Cell/SPUThread.h" -#include "Emu/SysCalls/Modules/cellSpurs.h" - -/// Output trace information -void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag) { - // TODO: Implement this -} - -/// Check for execution right requests -unsigned cellSpursModulePollStatus(CellSpursModulePollStatus * status) { - // TODO: Implement this - return 0; -} - -/// Restore scheduling paraneters to the right values after a workload has been preempted by the system service workload -void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt) { - if (mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] != -1) { - auto wklId = mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum]; - mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] = -1; - - spursSysServiceUpdateWorkload(spu, mgmt); - if (wklId >= CELL_SPURS_MAX_WORKLOAD) { - mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10; - mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1); - } else { - mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01; - mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1); - } - - auto wklIdSaved = mgmt->wklCurrentId; - mgmt->wklCurrentId = wklId; - - // Trace - STOP: GUID - CellSpursTracePacket pkt; - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; - pkt.data.stop = 0; // TODO: Put GUID of the sys service workload here - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); - - mgmt->wklCurrentId = wklIdSaved; - } -} - -/// Updatre the trace count for this SPU in CellSpurs -void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt) { - if (mgmt->traceBuffer) { - mgmt->spurs->m.traceBuffer->count[mgmt->spuNum] = mgmt->traceMsgCount; - } -} - -/// Update trace control in SPU from CellSpurs -void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) { - auto sysSrvMsgUpdateTrace = mgmt->spurs->m.sysSrvMsgUpdateTrace; - mgmt->spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum); - mgmt->spurs->m.xCC &= ~(1 << mgmt->spuNum); - mgmt->spurs->m.xCC |= arg2 << mgmt->spuNum; - - bool notify = false; - if ((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum) != 0) && (mgmt->spurs->m.sysSrvMsgUpdateTrace == 0) && (mgmt->spurs->m.xCD != 0)) { - mgmt->spurs->m.xCD = 0; - notify = true; - } - - if (arg4 && mgmt->spurs->m.xCD != 0) { - mgmt->spurs->m.xCD = 0; - notify = true; - } - - if ((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum) != 0) || (arg3 != 0)) { - if (mgmt->traceMsgCount != 0xFF || mgmt->traceBuffer == 0 || mgmt->spurs->m.traceBuffer.addr() == 0) { - spursSysServiceUpdateTraceCount(spu, mgmt); - } else { - mgmt->traceMsgCount = mgmt->spurs->m.traceBuffer->count[mgmt->spuNum]; - } - - mgmt->traceBuffer = mgmt->spurs->m.traceBuffer.addr() + (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4); - mgmt->traceMaxCount = mgmt->spurs->m.traceStartIndex[1] - mgmt->spurs->m.traceStartIndex[0]; - if (mgmt->traceBuffer == 0) { - mgmt->traceMsgCount = 0; - } - } - - if (notify) { - // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 2, 0); - } -} - -/// Update events in CellSpurs -void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownMask) { - u32 wklNotifyMask = 0; - for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - if (wklShutdownMask & (0x80000000 >> i)) { - mgmt->spurs->m.wklEvent1[i] |= 0x01; - if (mgmt->spurs->m.wklEvent1[i] & 0x02 || mgmt->spurs->m.wklEvent1[i] & 0x10) { - wklNotifyMask |= 0x80000000 >> i; - } - } - } - - for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - if (wklShutdownMask & (0x8000 >> i)) { - mgmt->spurs->m.wklEvent2[i] |= 0x01; - if (mgmt->spurs->m.wklEvent2[i] & 0x02 || mgmt->spurs->m.wklEvent2[i] & 0x10) { - wklNotifyMask |= 0x8000 >> i; - } - } - } - - if (wklNotifyMask) { - // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 0, wklNotifyMask); - } -} - -/// Update workload information in the SPU LS from CellSpurs -void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) { - u32 wklShutdownMask = 0; - mgmt->wklRunnable1 = 0; - for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - mgmt->priority[i] = mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum]; - mgmt->wklUniqueId[i] = mgmt->spurs->m.wklInfo1[i].uniqueId.read_relaxed(); - - auto wklStatus = mgmt->spurs->m.wklStatus1[i]; - if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { - mgmt->spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum; - mgmt->wklRunnable1 |= 0x8000 >> i; - } else { - mgmt->spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum); - } - - if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { - if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus1[i] == 0)) { - mgmt->spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); - wklShutdownMask |= 0x80000000 >> i; - } - } - } - - if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { - mgmt->wklRunnable2 = 0; - for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - if (mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) { - mgmt->priority[i] |= (0x10 - mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) << 4; - } - - auto wklStatus = mgmt->spurs->m.wklStatus2[i]; - if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { - mgmt->spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum; - mgmt->wklRunnable2 |= 0x8000 >> i; - } else { - mgmt->spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum); - } - - if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { - if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus2[i] == 0)) { - mgmt->spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); - wklShutdownMask |= 0x8000 >> i; - } - } - } - } - - if (wklShutdownMask) { - spursSysServiceUpdateEvent(spu, mgmt, wklShutdownMask); - } -} - -/// Process any messages -void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) { - // Process update workload message - if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) { - mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum); - spursSysServiceUpdateWorkload(spu, mgmt); - } - - // Process update trace message - if (mgmt->spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) { - spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0); - } - - // Process terminate request - if (mgmt->spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) { - mgmt->spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum); - // TODO: Rest of the terminate processing - } -} - -/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled -void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) { - while (true) { - u32 nIdlingSpus = 0; - for (u32 i = 0; i < 8; i++) { - if (mgmt->spurs->m.spuIdling & (1 << i)) { - nIdlingSpus++; - } - } - - bool shouldExit = nIdlingSpus != mgmt->spurs->m.nSpus ? false : mgmt->spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false; - bool foundSchedulableWorkload = false; - if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { - foundSchedulableWorkload = true; - } else { - for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - if ((mgmt->wklRunnable1 & (0x8000 >> i)) && - (mgmt->priority[i] & 0x0F) != 0 && - (mgmt->spurs->m.wklMaxContention[i].read_relaxed() & 0x0F) > (mgmt->spurs->m.wklCurrentContention[i] & 0x0F)) { - foundSchedulableWorkload = true; - break; - } - } - - if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS && foundSchedulableWorkload == false) { - for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - if ((mgmt->wklRunnable2 & (0x8000 >> i)) && - (mgmt->priority[i] & 0xF0) != 0 && - (mgmt->spurs->m.wklMaxContention[i].read_relaxed() & 0xF0) > (mgmt->spurs->m.wklCurrentContention[i] & 0xF0)) { - foundSchedulableWorkload = true; - break; - } - } - } - } - - if ((mgmt->spurs->m.spuIdling & (1 << mgmt->spuNum)) && shouldExit == false && foundSchedulableWorkload == false) { - // TODO: Wait for events - } - - if (shouldExit || foundSchedulableWorkload == false) { - mgmt->spurs->m.spuIdling |= 1 << mgmt->spuNum; - } else { - mgmt->spurs->m.spuIdling &= ~(1 << mgmt->spuNum); - } - - if (shouldExit == false && foundSchedulableWorkload == false) { - continue; - } - - if (shouldExit == false) { - return; - } - - break; - } - - // TODO: exit spu thread group -} - -/// Main function for the system service workload -void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) { - auto mgmt = vm::get_ptr(spu.ls_offset); - - if (mgmt->spurs.addr() % CellSpurs::align) { - assert(0); - } - - // Initialise the system service if this is the first time its being started on this SPU - if (mgmt->sysSrvInitialised == 0) { - mgmt->sysSrvInitialised = 1; - - if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) { - assert(0); - } - - mgmt->spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum; - mgmt->traceBuffer = 0; - mgmt->traceMsgCount = -1; - - spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0); - spursSysServiceCleanupAfterPreemption(spu, mgmt); - - // Trace - SERVICE: INIT - CellSpursTracePacket pkt; - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; - pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); - } - - // Trace - START: Module='SYS ' - CellSpursTracePacket pkt; - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_START; - memcpy(pkt.data.start.module, "SYS ", 4); - pkt.data.start.level = 1; // Policy module - pkt.data.start.ls = 0xA00 >> 2; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); - - while (true) { - // Process messages for the system service workload - spursSysServiceProcessMessages(spu, mgmt); - -poll: - if (cellSpursModulePollStatus(nullptr)) { - // Trace - SERVICE: EXIT - CellSpursTracePacket pkt; - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; - pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); - - // Trace - STOP: GUID - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; - pkt.data.stop = 0; // TODO: Put GUID of the sys service workload here - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); - break; - } - - // If we reach here it means that either there are more system service messages to be processed - // or there are no workloads that can be scheduled. - - // If the SPU is not idling then process the remaining system service messages - if (mgmt->spuIdling == 0) { - continue; - } - - // If we reach here it means that the SPU is idling - - // Trace - SERVICE: WAIT - CellSpursTracePacket pkt; - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; - pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); - - spursSysServiceWaitOrExit(spu, mgmt); - goto poll; - } -} - -/// Entry point of the system service workload -void spursSysServiceWorkloadEntry(SPUThread & spu) { - auto mgmt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); - auto arg = spu.GPR[4]._u64[1]; - auto pollStatus = spu.GPR[5]._u32[3]; - - spu.GPR[1]._u32[3] = 0x3FFD0; - *(vm::ptr::make(spu.GPR[1]._u32[3])) = 0x3FFF0; - memset(vm::get_ptr(spu.ls_offset + 0x3FFE0), 0, 32); - - if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - spursSysServiceWorkloadMain(spu, pollStatus); - } else { - // TODO: If we reach here it means the current workload was preempted to start the - // system service workload. Need to implement this. - } - - // TODO: Ensure that this function always returns to the SPURS kernel - return; -} diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 3599cd2cd3..dbac3af5be 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -54,7 +54,7 @@ - + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index a1e6732cc9..3166265186 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -653,7 +653,7 @@ Emu\CPU\ARMv7\Modules - + Emu\SysCalls\Modules From b01c17d1a0048e71fb232c462e0c20430255720f Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Mon, 5 Jan 2015 21:54:03 +0530 Subject: [PATCH 12/29] SPURS: Add locks in the system service workload --- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index 2d696666aa..06ff2553be 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -585,6 +585,8 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) /// Process any messages void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) { + LV2_LOCK(0); + // Process update workload message if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) { mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum); @@ -606,6 +608,8 @@ void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) /// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) { while (true) { + Emu.GetCoreMutex().lock(); + // Find the number of SPUs that are idling in this SPURS instance u32 nIdlingSpus = 0; for (u32 i = 0; i < 8; i++) { @@ -665,7 +669,10 @@ void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) { // not yet completely implemented in rpcs3. So we busy wait here. //u128 r; //spu.ReadChannel(r, 0); + + Emu.GetCoreMutex().unlock(); std::this_thread::sleep_for(std::chrono::milliseconds(1)); + Emu.GetCoreMutex().lock(); } if ((allSpusIdle == true && exitIfNoWork == true) || foundReadyWorkload == false) { @@ -674,6 +681,8 @@ void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) { mgmt->spurs->m.spuIdling &= ~(1 << mgmt->spuNum); } + Emu.GetCoreMutex().unlock(); + if (allSpusIdle == false || exitIfNoWork == false) { if (foundReadyWorkload == true) { return; @@ -696,6 +705,7 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) { if (mgmt->sysSrvInitialised == 0) { mgmt->sysSrvInitialised = 1; + LV2_LOCK(0); if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) { assert(0); } From 17f34f512777cc56b923beb18316722d47227dea Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Wed, 21 Jan 2015 00:47:20 +0530 Subject: [PATCH 13/29] SPURS: Implement cellSpursSendSignal, cellSpursSendWorkloadSignal and some cellSpursEventFlag functions --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 370 +++++++++++++++++++++-- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 59 +++- 2 files changed, 399 insertions(+), 30 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 10c2348d4b..a9a50e2bbe 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -1460,13 +1460,63 @@ s64 cellSpursGetWorkloadFlag(vm::ptr spurs, vm::ptr spurs, u32 workloadId) { + cellSpurs->Warning("%s(spurs=0x%x, workloadId=0x%x)", __FUNCTION__, spurs.addr(), workloadId); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0xA658, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!spurs) + { + return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN; + } + + if (workloadId >= CELL_SPURS_MAX_WORKLOAD2 || (workloadId >= CELL_SPURS_MAX_WORKLOAD && (spurs->m.flags1 & SF1_32_WORKLOADS) == 0)) + { + return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; + } + + if ((spurs->m.wklMskA.read_relaxed() & (0x80000000u >> workloadId)) == 0) + { + return CELL_SPURS_POLICY_MODULE_ERROR_SRCH; + } + + if (spurs->m.exception) + { + return CELL_SPURS_POLICY_MODULE_ERROR_STAT; + } + + u8 state; + if (workloadId >= CELL_SPURS_MAX_WORKLOAD) + { + state = spurs->m.wklState2[workloadId & 0x0F].read_relaxed(); + } + else + { + state = spurs->m.wklState1[workloadId].read_relaxed(); + } + + if (state != SPURS_WKL_STATE_RUNNABLE) + { + return CELL_SPURS_POLICY_MODULE_ERROR_STAT; + } + + // TODO: Make the below block execute atomically + if (workloadId >= CELL_SPURS_MAX_WORKLOAD) + { + spurs->m.wklSignal2 |= be_t::make(0x8000 >> (workloadId & 0x0F)); + } + else + { + spurs->m.wklSignal1 |= be_t::make(0x8000 >> workloadId); + } + return CELL_OK; #endif } @@ -1611,64 +1661,250 @@ s64 cellSpursUnsetExceptionEventHandler() s64 _cellSpursEventFlagInitialize(vm::ptr spurs, vm::ptr taskset, vm::ptr eventFlag, u32 flagClearMode, u32 flagDirection) { -#ifdef PRX_DEBUG cellSpurs->Warning("_cellSpursEventFlagInitialize(spurs_addr=0x%x, taskset_addr=0x%x, eventFlag_addr=0x%x, flagClearMode=%d, flagDirection=%d)", spurs.addr(), taskset.addr(), eventFlag.addr(), flagClearMode, flagDirection); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1564C, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (taskset.addr() == 0 || spurs.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align || taskset.addr() % CellSpursTaskset::align || eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (taskset.addr() && taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD2) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + if (flagDirection > CELL_SPURS_EVENT_FLAG_LAST || flagClearMode > CELL_SPURS_EVENT_FLAG_CLEAR_LAST) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + memset(eventFlag.get_ptr(), 0, CellSpursEventFlag::size); + eventFlag->m.direction = flagDirection; + eventFlag->m.clearMode = flagClearMode; + eventFlag->m.spuPort = -1; + + if (taskset.addr()) + { + eventFlag->m.addr = taskset.addr(); + } + else + { + eventFlag->m.isIwl = 1; + eventFlag->m.addr = spurs.addr(); + } + return CELL_OK; #endif } s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr eventFlag) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagAttachLv2EventQueue(eventFlag_addr=0x%x)", eventFlag.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x157B8, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!eventFlag) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_AGAIN; + } + + if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_SPU2PPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + return CELL_SPURS_TASK_ERROR_PERM; + } + + if (eventFlag->m.spuPort != -1) + { + return CELL_SPURS_TASK_ERROR_STAT; + } + + vm::ptr spurs; + if (eventFlag->m.isIwl == 1) + { + spurs.set((u32)eventFlag->m.addr); + } + else + { + auto taskset = vm::ptr::make((u32)eventFlag->m.addr); + spurs.set((u32)taskset->m.spurs.addr()); + } + + u32 eventQueueId; + auto _port = vm::ptr::make((u32)Memory.Alloc(1, 1)); + auto rc = spursCreateLv2EventQueue(spurs, eventQueueId, _port, 1, *((u64 *)"_spuEvF")); + auto port = *_port; + Memory.Free(_port.addr()); + if (rc != CELL_OK) + { + return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF)); + } + + if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + auto _eventPortId = vm::ptr::make((u32)Memory.Alloc(4, 4)); + rc = sys_event_port_create(_eventPortId, SYS_EVENT_PORT_LOCAL, 0); + auto eventPortId = *_eventPortId; + Memory.Free(_eventPortId.addr()); + if (rc == CELL_OK) + { + rc = sys_event_port_connect_local(eventPortId, eventQueueId); + if (rc == CELL_OK) + { + eventFlag->m.eventPortId = eventPortId; + goto success; + } + + sys_event_port_destroy(eventPortId); + } + + // TODO: Implement the following + // if (spursDetachLv2EventQueue(spurs, port, 1) == CELL_OK) + // { + // sys_event_queue_destroy(eventQueueId, SYS_EVENT_QUEUE_DESTROY_FORCE); + // } + return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF)); + } + +success: + eventFlag->m.eventQueueId = eventQueueId; + eventFlag->m.spuPort = port; return CELL_OK; #endif } s64 cellSpursEventFlagDetachLv2EventQueue(vm::ptr eventFlag) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagDetachLv2EventQueue(eventFlag_addr=0x%x)", eventFlag.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x15998, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!eventFlag) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_AGAIN; + } + + if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_SPU2PPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + return CELL_SPURS_TASK_ERROR_PERM; + } + + if (eventFlag->m.spuPort == -1) + { + return CELL_SPURS_TASK_ERROR_STAT; + } + + if (eventFlag->m.x04 || eventFlag->m.x06 & 0xFF00) + { + return CELL_SPURS_TASK_ERROR_BUSY; + } + + auto port = eventFlag->m.spuPort; + eventFlag->m.spuPort = -1; // TODO: Make this execute atomically + + vm::ptr spurs; + if (eventFlag->m.isIwl == 1) + { + spurs.set((u32)eventFlag->m.addr); + } + else + { + auto taskset = vm::ptr::make((u32)eventFlag->m.addr); + spurs.set((u32)taskset->m.spurs.addr()); + } + + if(eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + sys_event_port_disconnect(eventFlag->m.eventPortId); + sys_event_port_destroy(eventFlag->m.eventPortId); + } + + s64 rc = CELL_OK; + // TODO: Implement the following + // auto rc = spursDetachLv2EventQueue(spurs, port, 1); + // if (rc == CELL_OK) + // { + // rc = sys_event_queue_destroy(eventFlag->m.eventQueueId, SYS_EVENT_QUEUE_DESTROY_FORCE); + // } + + if (rc != CELL_OK) + { + return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF)); + } + return CELL_OK; #endif } -s64 cellSpursEventFlagWait(vm::ptr eventFlag, vm::ptr mask, u32 mode) +s64 _cellSpursEventFlagWait(vm::ptr eventFlag, vm::ptr mask, u32 mode, u32 block) { -#ifdef PRX_DEBUG - cellSpurs->Warning("cellSpursEventFlagWait(eventFlag_addr=0x%x, mask_addr=0x%x, mode=%d)", eventFlag.addr(), mask.addr(), mode); - return GetCurrentPPUThread().FastCall2(libsre + 0x15E68, libsre_rtoc); -#else UNIMPLEMENTED_FUNC(cellSpurs); return CELL_OK; +} + +s64 cellSpursEventFlagWait(vm::ptr eventFlag, vm::ptr mask, u32 mode) +{ + cellSpurs->Warning("cellSpursEventFlagWait(eventFlag_addr=0x%x, mask_addr=0x%x, mode=%d)", eventFlag.addr(), mask.addr(), mode); + +#ifdef PRX_DEBUG + return GetCurrentPPUThread().FastCall2(libsre + 0x15E68, libsre_rtoc); +#else + return _cellSpursEventFlagWait(eventFlag, mask, mode, 1/*block*/); #endif } s64 cellSpursEventFlagClear(vm::ptr eventFlag, u16 bits) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagClear(eventFlag_addr=0x%x, bits=0x%x)", eventFlag.addr(), bits); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x15E9C, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (eventFlag.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + eventFlag->m.bits &= ~bits; // TODO: Make this execute atomically return CELL_OK; #endif } s64 cellSpursEventFlagSet(vm::ptr eventFlag, u16 bits) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagSet(eventFlag_addr=0x%x, bits=0x%x)", eventFlag.addr(), bits); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x15F04, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -1678,44 +1914,77 @@ s64 cellSpursEventFlagSet(vm::ptr eventFlag, u16 bits) s64 cellSpursEventFlagTryWait(vm::ptr eventFlag, vm::ptr mask, u32 mode) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagTryWait(eventFlag_addr=0x%x, mask_addr=0x%x, mode=0x%x)", eventFlag.addr(), mask.addr(), mode); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x15E70, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); - return CELL_OK; + return _cellSpursEventFlagWait(eventFlag, mask, mode, 0/*block*/); #endif } s64 cellSpursEventFlagGetDirection(vm::ptr eventFlag, vm::ptr direction) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagGetDirection(eventFlag_addr=0x%x, direction_addr=0x%x)", eventFlag.addr(), direction.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x162C4, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (eventFlag.addr() == 0 || direction.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + *direction = eventFlag->m.direction; return CELL_OK; #endif } s64 cellSpursEventFlagGetClearMode(vm::ptr eventFlag, vm::ptr clear_mode) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagGetClearMode(eventFlag_addr=0x%x, clear_mode_addr=0x%x)", eventFlag.addr(), clear_mode.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x16310, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (eventFlag.addr() == 0 || clear_mode.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + *clear_mode = eventFlag->m.clearMode; return CELL_OK; #endif } s64 cellSpursEventFlagGetTasksetAddress(vm::ptr eventFlag, vm::ptr taskset) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagGetTasksetAddress(eventFlag_addr=0x%x, taskset_addr=0x%x)", eventFlag.addr(), taskset.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1635C, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (eventFlag.addr() == 0 || taskset.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + taskset.set(eventFlag->m.isIwl ? 0 : eventFlag->m.addr); return CELL_OK; #endif } @@ -2302,7 +2571,54 @@ s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskID) cellSpurs->Warning("_cellSpursSendSignal(taskset_addr=0x%x, taskID=%d)", taskset.addr(), taskID); return GetCurrentPPUThread().FastCall2(libsre + 0x124CC, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!taskset) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (taskID >= CELL_SPURS_MAX_TASK || taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD2) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + auto word = taskID >> 5; + auto mask = 0x80000000u >> (taskID & 0x1F); + auto disabled = taskset->m.enabled_set[word] & mask ? false : true; + auto running = taskset->m.running_set[word]; + auto ready = taskset->m.ready_set[word]; + auto ready2 = taskset->m.ready2_set[word]; + auto waiting = taskset->m.waiting_set[word]; + auto signalled = taskset->m.signal_received_set[word]; + auto enabled = taskset->m.enabled_set[word]; + auto invalid = (ready & ready2) || (running & waiting) || ((running | ready | ready2 | waiting | signalled) & ~enabled) || disabled; + + if (invalid) + { + return CELL_SPURS_TASK_ERROR_SRCH; + } + + auto shouldSignal = waiting & ~signalled & mask ? true : false; + taskset->m.signal_received_set[word] |= mask; // TODO: Make this execute atomically + if (shouldSignal) + { + cellSpursSendWorkloadSignal(vm::ptr::make((u32)taskset->m.spurs.addr()), taskset->m.wid); + auto rc = cellSpursWakeUp(GetCurrentPPUThread(), vm::ptr::make((u32)taskset->m.spurs.addr())); + if (rc == CELL_SPURS_POLICY_MODULE_ERROR_STAT) + { + return CELL_SPURS_TASK_ERROR_STAT; + } + + if (rc != CELL_OK) + { + assert(0); + } + } + return CELL_OK; #endif } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index dab014f898..32798e6006 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -199,6 +199,28 @@ enum SpursTaskConstants CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE = 1024, }; +enum CellSpursEventFlagWaitMode +{ + CELL_SPURS_EVENT_FLAG_OR = 0, + CELL_SPURS_EVENT_FLAG_AND = 1 +}; + +enum CellSpursEventFlagClearMode +{ + CELL_SPURS_EVENT_FLAG_CLEAR_AUTO = 0, + CELL_SPURS_EVENT_FLAG_CLEAR_MANUAL = 1, + CELL_SPURS_EVENT_FLAG_CLEAR_LAST = CELL_SPURS_EVENT_FLAG_CLEAR_MANUAL, +}; + +enum CellSpursEventFlagDirection +{ + CELL_SPURS_EVENT_FLAG_SPU2SPU, + CELL_SPURS_EVENT_FLAG_SPU2PPU, + CELL_SPURS_EVENT_FLAG_PPU2SPU, + CELL_SPURS_EVENT_FLAG_ANY2ANY, + CELL_SPURS_EVENT_FLAG_LAST = CELL_SPURS_EVENT_FLAG_ANY2ANY, +}; + class SPURSManager; class SPURSManagerEventFlag; class SPURSManagerTaskset; @@ -519,7 +541,38 @@ struct CellSpursWorkloadAttribute struct CellSpursEventFlag { - SPURSManagerEventFlag *eventFlag; + static const u32 align = 128; + static const u32 size = 128; + + union + { + // Raw data + u8 _u8[size]; + + // Real data + struct _CellSpursEventFlag + { + be_t bits; // 0x00 + be_t x02; // 0x02 + be_t x04; // 0x04 + be_t x06; // 0x06 + be_t x08; // 0x08 + be_t x0A; // 0x0A + u8 spuPort; // 0x0C + u8 isIwl; // 0x0D + u8 direction; // 0x0E + u8 clearMode; // 0x0F + u16 x10[16]; // 0x10 + u8 x30[0x70 - 0x30]; // 0x30 + be_t addr; // 0x70 + be_t eventPortId; // 0x78 + be_t eventQueueId; // 0x7C + } m; + + static_assert(sizeof(_CellSpursEventFlag) == size, "Wrong _CellSpursEventFlag size"); + + SPURSManagerEventFlag *eventFlag; + }; }; union CellSpursTaskArgument @@ -559,7 +612,7 @@ struct CellSpursTaskset { be_t running_set[4]; // 0x00 be_t ready_set[4]; // 0x10 - be_t unk_set[4]; // 0x20 - TODO: Find out what this is + be_t ready2_set[4]; // 0x20 - TODO: Find out what this is be_t enabled_set[4]; // 0x30 be_t signal_received_set[4]; // 0x40 be_t waiting_set[4]; // 0x50 @@ -668,7 +721,7 @@ struct CellSpursTaskset2 { be_t running_set[4]; // 0x00 be_t ready_set[4]; // 0x10 - be_t unk_set[4]; // 0x20 - TODO: Find out what this is + be_t ready2_set[4]; // 0x20 - TODO: Find out what this is be_t enabled_set[4]; // 0x30 be_t signal_received_set[4]; // 0x40 be_t waiting_set[4]; // 0x50 From 8717bdffa9e58b56ae31476cb003aab9033e0f79 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Fri, 23 Jan 2015 03:01:46 +0530 Subject: [PATCH 14/29] SPURS: Implement cellSpursEventFlagWait and cellSpursEventFlagSet. Also modify implementation to make use of vm::var --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 293 +++++++++++++++++++---- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 24 +- 2 files changed, 259 insertions(+), 58 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 25bf599222..1c15206a46 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -27,6 +27,8 @@ extern u32 libsre_rtoc; #endif void spursKernelMain(SPUThread & spu); +s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, u32 id); +s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskID); s64 spursCreateLv2EventQueue(vm::ptr spurs, u32& queue_id, vm::ptr port, s32 size, u64 name_u64) { @@ -1302,7 +1304,7 @@ s64 cellSpursWorkloadAttributeSetShutdownCompletionEventHook(vm::ptr spurs, vm::ptr wid, vm::ptr attr) +s64 cellSpursAddWorkloadWithAttribute(vm::ptr spurs, const vm::ptr wid, vm::ptr attr) { cellSpurs->Warning("%s(spurs_addr=0x%x, wid_addr=0x%x, attr_addr=0x%x)", __FUNCTION__, spurs.addr(), wid.addr(), attr.addr()); #ifdef PRX_DEBUG_XXX @@ -1507,7 +1509,6 @@ s64 cellSpursSendWorkloadSignal(vm::ptr spurs, u32 workloadId) return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } - // TODO: Make the below block execute atomically if (workloadId >= CELL_SPURS_MAX_WORKLOAD) { spurs->m.wklSignal2 |= be_t::make(0x8000 >> (workloadId & 0x0F)); @@ -1750,10 +1751,8 @@ s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr eventFlag) } u32 eventQueueId; - auto _port = vm::ptr::make((u32)Memory.Alloc(1, 1)); - auto rc = spursCreateLv2EventQueue(spurs, eventQueueId, _port, 1, *((u64 *)"_spuEvF")); - auto port = *_port; - Memory.Free(_port.addr()); + vm::var port; + auto rc = spursCreateLv2EventQueue(spurs, eventQueueId, port, 1, *((u64 *)"_spuEvF")); if (rc != CELL_OK) { return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF)); @@ -1761,20 +1760,18 @@ s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr eventFlag) if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) { - auto _eventPortId = vm::ptr::make((u32)Memory.Alloc(4, 4)); - rc = sys_event_port_create(_eventPortId, SYS_EVENT_PORT_LOCAL, 0); - auto eventPortId = *_eventPortId; - Memory.Free(_eventPortId.addr()); + vm::var> eventPortId; + rc = sys_event_port_create(vm::ptr::make(eventPortId.addr()), SYS_EVENT_PORT_LOCAL, 0); if (rc == CELL_OK) { - rc = sys_event_port_connect_local(eventPortId, eventQueueId); + rc = sys_event_port_connect_local(eventPortId.value(), eventQueueId); if (rc == CELL_OK) { eventFlag->m.eventPortId = eventPortId; goto success; } - sys_event_port_destroy(eventPortId); + sys_event_port_destroy(eventPortId.value()); } // TODO: Implement the following @@ -1819,13 +1816,13 @@ s64 cellSpursEventFlagDetachLv2EventQueue(vm::ptr eventFlag) return CELL_SPURS_TASK_ERROR_STAT; } - if (eventFlag->m.x04 || eventFlag->m.x06 & 0xFF00) + if (eventFlag->m.x04 || eventFlag->m.x07) { return CELL_SPURS_TASK_ERROR_BUSY; } auto port = eventFlag->m.spuPort; - eventFlag->m.spuPort = -1; // TODO: Make this execute atomically + eventFlag->m.spuPort = -1; vm::ptr spurs; if (eventFlag->m.isIwl == 1) @@ -1863,7 +1860,121 @@ s64 cellSpursEventFlagDetachLv2EventQueue(vm::ptr eventFlag) s64 _cellSpursEventFlagWait(vm::ptr eventFlag, vm::ptr mask, u32 mode, u32 block) { - UNIMPLEMENTED_FUNC(cellSpurs); + if (eventFlag.addr() == 0 || mask.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (mode > CELL_SPURS_EVENT_FLAG_WAIT_MODE_LAST) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_SPU2PPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + return CELL_SPURS_TASK_ERROR_PERM; + } + + if (block && eventFlag->m.spuPort == -1) + { + return CELL_SPURS_TASK_ERROR_STAT; + } + + if (eventFlag->m.x04 || eventFlag->m.x07) + { + return CELL_SPURS_TASK_ERROR_BUSY; + } + + u16 bits = eventFlag->m.bits & *mask; + if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + u16 tmp = eventFlag->m.x08 & ~eventFlag->m.x02; + if (mode != CELL_SPURS_EVENT_FLAG_AND) + { + tmp &= eventFlag->m.x0A; + } + + int i = 15; + while (tmp) + { + if (tmp & 0x1) + { + if (eventFlag->m.x10[i] & *mask && eventFlag->m.x10[i] != *mask) + { + return CELL_SPURS_TASK_ERROR_AGAIN; + } + } + + tmp >>= 1; + i--; + } + } + + bool recv; + if ((*mask & ~bits) == 0 || (mode == CELL_SPURS_EVENT_FLAG_OR && bits)) + { + if (eventFlag->m.clearMode == CELL_SPURS_EVENT_FLAG_CLEAR_AUTO) + { + eventFlag->m.bits &= ~bits; + } + + recv = false; + } + else + { + if (block == 0) + { + return CELL_SPURS_TASK_ERROR_BUSY; + } + + eventFlag->m.x06 = 0; + if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + u8 i = 0; + u16 tmp = eventFlag->m.x08; + while (tmp & 0x01) + { + tmp >>= 1; + i++; + } + + if (i == 16) + { + return CELL_SPURS_TASK_ERROR_BUSY; + } + + eventFlag->m.x06 = (15 - i) << 4; + } + + eventFlag->m.x06 |= mode; + eventFlag->m.x04 = *mask; + recv = true; + } + + if (recv) { + vm::var data; + auto rc = sys_event_queue_receive(eventFlag->m.eventQueueId, data, 0); + if (rc != CELL_OK) + { + assert(0); + } + + u8 i = 0; + if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + i = eventFlag->m.x06 >> 4; + } + + bits = eventFlag->m.x30[i]; + eventFlag->m.x07 = 0; + } + + *mask = bits; return CELL_OK; } @@ -1895,7 +2006,7 @@ s64 cellSpursEventFlagClear(vm::ptr eventFlag, u16 bits) return CELL_SPURS_TASK_ERROR_ALIGN; } - eventFlag->m.bits &= ~bits; // TODO: Make this execute atomically + eventFlag->m.bits &= ~bits; return CELL_OK; #endif } @@ -1907,7 +2018,110 @@ s64 cellSpursEventFlagSet(vm::ptr eventFlag, u16 bits) #ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x15F04, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (eventFlag.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_PPU2SPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + return CELL_SPURS_TASK_ERROR_PERM; + } + + u16 tmp1 = 0; + auto send = false; + u16 tmp3 = 0; + u16 tmp4 = 0; + + if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY && eventFlag->m.x04) + { + u16 tmp = (eventFlag->m.bits | bits) & eventFlag->m.x04; + if ((eventFlag->m.x04 & ~tmp) == 0 || ((eventFlag->m.x06 & 0x0F) == 0 && tmp != 0)) + { + eventFlag->m.x07 = 1; + eventFlag->m.x04 = 0; + tmp1 = tmp; + send = true; + tmp3 = eventFlag->m.x06 >> 4; + tmp4 = tmp; + } + } + + u16 i = 15; + u16 j = 0; + u16 tmp = eventFlag->m.x08 & ~eventFlag->m.x02; + u16 tmp5 = 0; + u16 x30[16]; + while (tmp) + { + if (tmp & 0x0001) + { + u16 tmp6 = (eventFlag->m.bits | bits) & eventFlag->m.x10[i]; + if ((eventFlag->m.x10[i] & ~tmp6) == 0 || (((eventFlag->m.x0A >> j) & 0x01) == 0 && (eventFlag->m.x10[i] & ~tmp6) != 0)) + { + tmp4 |= tmp6; + tmp5 |= 1 << j; + x30[j] = tmp6; + } + } + + tmp >>= 1; + i--; + j++; + } + + eventFlag->m.bits |= bits; + eventFlag->m.x02 |= tmp5; + if (eventFlag->m.clearMode == CELL_SPURS_EVENT_FLAG_CLEAR_AUTO) + { + eventFlag->m.bits &= ~tmp4; + } + + if (send) + { + eventFlag->m.x30[tmp3] = tmp1; + if (sys_event_port_send(eventFlag->m.eventPortId, 0, 0, 0) != CELL_OK) + { + assert(0); + } + } + + if (tmp5) + { + for (auto i = 0; i < 16; i++) + { + if (tmp5 & (0x8000 >> i)) + { + eventFlag->m.x30[i] = x30[i]; + vm::var taskset; + if (eventFlag->m.isIwl) + { + cellSpursLookUpTasksetAddress(vm::ptr::make((u32)eventFlag->m.addr), vm::ptr::make(taskset.addr()), eventFlag->m.x60[i]); + } + else + { + taskset.value() = (u32)eventFlag->m.addr; + } + + auto rc = _cellSpursSendSignal(vm::ptr::make(taskset.addr()), eventFlag->m.x50[i]); + if (rc == CELL_SPURS_TASK_ERROR_INVAL || rc == CELL_SPURS_TASK_ERROR_STAT) + { + return CELL_SPURS_TASK_ERROR_FATAL; + } + + if (rc != CELL_OK) + { + assert(0); + } + } + } + } + return CELL_OK; #endif } @@ -2306,30 +2520,26 @@ s64 spursCreateTaskset(vm::ptr spurs, vm::ptr tasks taskset->m.enable_clear_ls = enable_clear_ls > 0 ? 1 : 0; taskset->m.size = size; - auto wkl_attr = vm::ptr::make((u32)Memory.Alloc(CellSpursWorkloadAttribute::size, CellSpursWorkloadAttribute::align)); + vm::var wkl_attr; _cellSpursWorkloadAttributeInitialize(wkl_attr, 1 /*revision*/, 0x33 /*sdk_version*/, vm::ptr::make(16) /*pm*/, 0x1E40 /*pm_size*/, taskset.addr(), priority, 8 /*min_contention*/, max_contention); // TODO: Check return code - auto cls = vm::ptr::make((u32)Memory.Alloc(1, 1)); - *((char *)cls.get_ptr()) = 0; - cellSpursWorkloadAttributeSetName(wkl_attr, cls, name); + cellSpursWorkloadAttributeSetName(wkl_attr, vm::ptr::make(0), name); // TODO: Check return code // TODO: cellSpursWorkloadAttributeSetShutdownCompletionEventHook(wkl_attr, hook, taskset); // TODO: Check return code - auto wid = vm::ptr::make((u32)Memory.Alloc(4, 4)); - cellSpursAddWorkloadWithAttribute(spurs, wid, vm::ptr::make(wkl_attr.addr())); + vm::var> wid; + cellSpursAddWorkloadWithAttribute(spurs, vm::ptr::make(wid.addr()), vm::ptr::make(wkl_attr.addr())); // TODO: Check return code taskset->m.x72 = 0x80; - taskset->m.wid = *wid; + taskset->m.wid = wid.value(); // TODO: cellSpursSetExceptionEventHandler(spurs, wid, hook, taskset); // TODO: Check return code - Memory.Free(wkl_attr.addr()); - Memory.Free(wid.addr()); return CELL_OK; } @@ -2439,14 +2649,13 @@ s64 cellSpursShutdownTaskset(vm::ptr taskset) u32 _cellSpursGetSdkVersion() { - static u32 sdk_version = -2; + static s32 sdk_version = -2; if (sdk_version == -2) { - auto version = vm::ptr::make((u32)Memory.Alloc(sizeof(u32), sizeof(u32))); - sys_process_get_sdk_version(sys_process_getpid(), version); - sdk_version = *version; - Memory.Free(version.addr()); + vm::var> version; + sys_process_get_sdk_version(sys_process_getpid(), vm::ptr::make(version.addr())); + sdk_version = version.value(); } return sdk_version; @@ -2522,7 +2731,6 @@ s64 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm: // TODO: Verify the ELF header is proper and all its load segments are at address >= 0x3000 - // TODO: Make the following block execute atomically u32 tmp_task_id; for (tmp_task_id = 0; tmp_task_id < CELL_SPURS_MAX_TASK; tmp_task_id++) { @@ -2603,7 +2811,7 @@ s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskID) } auto shouldSignal = waiting & ~signalled & mask ? true : false; - taskset->m.signal_received_set[word] |= mask; // TODO: Make this execute atomically + taskset->m.signal_received_set[word] |= mask; if (shouldSignal) { cellSpursSendWorkloadSignal(vm::ptr::make((u32)taskset->m.spurs.addr()), taskset->m.wid); @@ -2872,11 +3080,11 @@ s64 cellSpursCreateTaskset2(vm::ptr spurs, vm::ptr #ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x15108, libsre_rtoc); #else - auto free_attr = false; + vm::ptr tmp_attr; + if (!attr) { - attr.set(Memory.Alloc(CellSpursTasksetAttribute2::size, CellSpursTasksetAttribute2::align)); - free_attr = true; + attr.set(tmp_attr.addr()); _cellSpursTasksetAttribute2Initialize(attr, 0); } @@ -2885,10 +3093,6 @@ s64 cellSpursCreateTaskset2(vm::ptr spurs, vm::ptr attr->m.max_contention, vm::ptr::make(attr->m.name.addr()), CellSpursTaskset2::size, (u8)attr->m.enable_clear_ls); if (rc != CELL_OK) { - if (free_attr) - { - Memory.Free(attr.addr()); - } return rc; } @@ -2898,11 +3102,6 @@ s64 cellSpursCreateTaskset2(vm::ptr spurs, vm::ptr } // TODO: Implement rest of the function - - if (free_attr) - { - Memory.Free(attr.addr()); - } return CELL_OK; #endif } @@ -3028,7 +3227,7 @@ s64 cellSpursTasksetUnsetExceptionEventHandler(vm::ptr taskset #endif } -s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, u32 id) +s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, u32 id) { cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, id=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), id); @@ -3660,6 +3859,4 @@ void cellSpurs_init(Module *pxThis) REG_FUNC(cellSpurs, cellSpursTraceStart); REG_FUNC(cellSpurs, cellSpursTraceStop); REG_FUNC(cellSpurs, cellSpursTraceFinalize); - - // TODO: some trace funcs } \ No newline at end of file diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index ecbfc03efc..1946675c54 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -201,8 +201,9 @@ enum SpursTaskConstants enum CellSpursEventFlagWaitMode { - CELL_SPURS_EVENT_FLAG_OR = 0, - CELL_SPURS_EVENT_FLAG_AND = 1 + CELL_SPURS_EVENT_FLAG_OR = 0, + CELL_SPURS_EVENT_FLAG_AND = 1, + CELL_SPURS_EVENT_FLAG_WAIT_MODE_LAST = CELL_SPURS_EVENT_FLAG_AND, }; enum CellSpursEventFlagClearMode @@ -552,18 +553,21 @@ struct CellSpursEventFlag // Real data struct _CellSpursEventFlag { - be_t bits; // 0x00 - be_t x02; // 0x02 - be_t x04; // 0x04 - be_t x06; // 0x06 - be_t x08; // 0x08 - be_t x0A; // 0x0A + be_t bits; // 0x00 Bit mask of event set bits + be_t x02; // 0x02 Bit mask of SPU thread slots whose conditions have met + be_t x04; // 0x04 Wait mask for PPU thread + u8 x06; // 0x06 Top 4 bits: Bit number for PPU thread. Bottom 4 bits: Wait mode of PPU thread + u8 x07; // 0x07 Set to 1 if the blocked PPU thread's condition has been met + be_t x08; // 0x08 Bit mask of used wait slots + be_t x0A; // 0x0A Bit mask of used wait slots whose wait mode is AND u8 spuPort; // 0x0C u8 isIwl; // 0x0D u8 direction; // 0x0E u8 clearMode; // 0x0F - u16 x10[16]; // 0x10 - u8 x30[0x70 - 0x30]; // 0x30 + be_t x10[16]; // 0x10 Wait mask for SPU threads + be_t x30[16]; // 0x30 Received event flag mask for SPU threads + u8 x50[16]; // 0x50 Task id of waiting SPU threads + u8 x60[16]; // 0x50 Workload Ids of waiting SPU threads be_t addr; // 0x70 be_t eventPortId; // 0x78 be_t eventQueueId; // 0x7C From 173fb060cb5dea6c008aabf371e26566eb31a77d Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sat, 24 Jan 2015 00:17:37 +0530 Subject: [PATCH 15/29] SPURS: Improve the readability of the event flag functions --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 180 ++++++++++++++--------- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 43 +++--- 2 files changed, 134 insertions(+), 89 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 1c15206a46..c8fc774ad8 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -1696,7 +1696,7 @@ s64 _cellSpursEventFlagInitialize(vm::ptr spurs, vm::ptrm.direction = flagDirection; eventFlag->m.clearMode = flagClearMode; - eventFlag->m.spuPort = -1; + eventFlag->m.spuPort = CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT; if (taskset.addr()) { @@ -1734,7 +1734,7 @@ s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr eventFlag) return CELL_SPURS_TASK_ERROR_PERM; } - if (eventFlag->m.spuPort != -1) + if (eventFlag->m.spuPort != CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT) { return CELL_SPURS_TASK_ERROR_STAT; } @@ -1755,6 +1755,7 @@ s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr eventFlag) auto rc = spursCreateLv2EventQueue(spurs, eventQueueId, port, 1, *((u64 *)"_spuEvF")); if (rc != CELL_OK) { + // Return rc if its an error code from SPURS otherwise convert the error code to a SPURS task error code return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF)); } @@ -1779,6 +1780,8 @@ s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr eventFlag) // { // sys_event_queue_destroy(eventQueueId, SYS_EVENT_QUEUE_DESTROY_FORCE); // } + + // Return rc if its an error code from SPURS otherwise convert the error code to a SPURS task error code return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF)); } @@ -1811,18 +1814,18 @@ s64 cellSpursEventFlagDetachLv2EventQueue(vm::ptr eventFlag) return CELL_SPURS_TASK_ERROR_PERM; } - if (eventFlag->m.spuPort == -1) + if (eventFlag->m.spuPort == CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT) { return CELL_SPURS_TASK_ERROR_STAT; } - if (eventFlag->m.x04 || eventFlag->m.x07) + if (eventFlag->m.ppuWaitMask || eventFlag->m.ppuPendingRecv) { return CELL_SPURS_TASK_ERROR_BUSY; } auto port = eventFlag->m.spuPort; - eventFlag->m.spuPort = -1; + eventFlag->m.spuPort = CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT; vm::ptr spurs; if (eventFlag->m.isIwl == 1) @@ -1851,6 +1854,7 @@ s64 cellSpursEventFlagDetachLv2EventQueue(vm::ptr eventFlag) if (rc != CELL_OK) { + // Return rc if its an error code from SPURS otherwise convert the error code to a SPURS task error code return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF)); } @@ -1880,83 +1884,104 @@ s64 _cellSpursEventFlagWait(vm::ptr eventFlag, vm::ptr return CELL_SPURS_TASK_ERROR_PERM; } - if (block && eventFlag->m.spuPort == -1) + if (block && eventFlag->m.spuPort == CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT) { return CELL_SPURS_TASK_ERROR_STAT; } - if (eventFlag->m.x04 || eventFlag->m.x07) + if (eventFlag->m.ppuWaitMask || eventFlag->m.ppuPendingRecv) { return CELL_SPURS_TASK_ERROR_BUSY; } - u16 bits = eventFlag->m.bits & *mask; + u16 relevantEvents = eventFlag->m.events & *mask; if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) { - u16 tmp = eventFlag->m.x08 & ~eventFlag->m.x02; - if (mode != CELL_SPURS_EVENT_FLAG_AND) + // Make sure the wait mask and mode specified does not conflict with that of the already waiting tasks. + // Conflict scenarios: + // OR vs OR - A conflict never occurs + // OR vs AND - A conflict occurs if the masks for the two tasks overlap + // AND vs AND - A conflict occurs if the masks for the two tasks are not the same + + // Determine the set of all already waiting tasks whose wait mode/mask can possibly conflict with the specified wait mode/mask. + // This set is equal to 'set of all tasks waiting' - 'set of all tasks whose wait conditions have been met'. + // If the wait mode is OR, we prune the set of all tasks that are waiting in OR mode from the set since a conflict cannot occur + // with an already waiting task in OR mode. + u16 relevantWaitSlots = eventFlag->m.spuTaskUsedWaitSlots & ~eventFlag->m.spuTaskPendingRecv; + if (mode == CELL_SPURS_EVENT_FLAG_OR) { - tmp &= eventFlag->m.x0A; + relevantWaitSlots &= eventFlag->m.spuTaskWaitMode; } - int i = 15; - while (tmp) + int i = CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS - 1; + while (relevantWaitSlots) { - if (tmp & 0x1) + if (relevantWaitSlots & 0x0001) { - if (eventFlag->m.x10[i] & *mask && eventFlag->m.x10[i] != *mask) + if (eventFlag->m.spuTaskWaitMask[i] & *mask && eventFlag->m.spuTaskWaitMask[i] != *mask) { return CELL_SPURS_TASK_ERROR_AGAIN; } } - tmp >>= 1; + relevantWaitSlots >>= 1; i--; } } + // There is no need to block if all bits required by the wait operation have already been set or + // if the wait mode is OR and atleast one of the bits required by the wait operation has been set. bool recv; - if ((*mask & ~bits) == 0 || (mode == CELL_SPURS_EVENT_FLAG_OR && bits)) + if ((*mask & ~relevantEvents) == 0 || (mode == CELL_SPURS_EVENT_FLAG_OR && relevantEvents)) { + // If the clear flag is AUTO then clear the bits comnsumed by this thread if (eventFlag->m.clearMode == CELL_SPURS_EVENT_FLAG_CLEAR_AUTO) { - eventFlag->m.bits &= ~bits; + eventFlag->m.events &= ~relevantEvents; } recv = false; } else { + // If we reach here it means that the conditions for this thread have not been met. + // If this is a try wait operation then do not block but return an error code. if (block == 0) { return CELL_SPURS_TASK_ERROR_BUSY; } - eventFlag->m.x06 = 0; + eventFlag->m.ppuWaitSlotAndMode = 0; if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) { - u8 i = 0; - u16 tmp = eventFlag->m.x08; - while (tmp & 0x01) + // Find an unsed wait slot + int i = 0; + u16 spuTaskUsedWaitSlots = eventFlag->m.spuTaskUsedWaitSlots; + while (spuTaskUsedWaitSlots & 0x0001) { - tmp >>= 1; + spuTaskUsedWaitSlots >>= 1; i++; } - if (i == 16) + if (i == CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS) { + // Event flag has no empty wait slots return CELL_SPURS_TASK_ERROR_BUSY; } - eventFlag->m.x06 = (15 - i) << 4; + // Mark the found wait slot as used by this thread + eventFlag->m.ppuWaitSlotAndMode = (CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS - 1 - i) << 4; } - eventFlag->m.x06 |= mode; - eventFlag->m.x04 = *mask; - recv = true; + // Save the wait mask and mode for this thread + eventFlag->m.ppuWaitSlotAndMode |= mode; + eventFlag->m.ppuWaitMask = *mask; + recv = true; } + u16 receivedEventFlag; if (recv) { + // Block till something happens vm::var data; auto rc = sys_event_queue_receive(eventFlag->m.eventQueueId, data, 0); if (rc != CELL_OK) @@ -1964,17 +1989,17 @@ s64 _cellSpursEventFlagWait(vm::ptr eventFlag, vm::ptr assert(0); } - u8 i = 0; + int i = 0; if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) { - i = eventFlag->m.x06 >> 4; + i = eventFlag->m.ppuWaitSlotAndMode >> 4; } - bits = eventFlag->m.x30[i]; - eventFlag->m.x07 = 0; + receivedEventFlag = eventFlag->m.pendingRecvTaskEvents[i]; + eventFlag->m.ppuPendingRecv = 0; } - *mask = bits; + *mask = receivedEventFlag; return CELL_OK; } @@ -2006,7 +2031,7 @@ s64 cellSpursEventFlagClear(vm::ptr eventFlag, u16 bits) return CELL_SPURS_TASK_ERROR_ALIGN; } - eventFlag->m.bits &= ~bits; + eventFlag->m.events &= ~bits; return CELL_OK; #endif } @@ -2033,82 +2058,95 @@ s64 cellSpursEventFlagSet(vm::ptr eventFlag, u16 bits) return CELL_SPURS_TASK_ERROR_PERM; } - u16 tmp1 = 0; - auto send = false; - u16 tmp3 = 0; - u16 tmp4 = 0; - - if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY && eventFlag->m.x04) + u16 ppuEventFlag = 0; + bool send = false; + int ppuWaitSlot = 0; + u16 eventsToClear = 0; + if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY && eventFlag->m.ppuWaitMask) { - u16 tmp = (eventFlag->m.bits | bits) & eventFlag->m.x04; - if ((eventFlag->m.x04 & ~tmp) == 0 || ((eventFlag->m.x06 & 0x0F) == 0 && tmp != 0)) + u16 ppuRelevantEvents = (eventFlag->m.events | bits) & eventFlag->m.ppuWaitMask; + + // Unblock the waiting PPU thread if either all the bits being waited by the thread have been set or + // if the wait mode of the thread is OR and atleast one bit the thread is waiting on has been set + if ((eventFlag->m.ppuWaitMask & ~ppuRelevantEvents) == 0 || + ((eventFlag->m.ppuWaitSlotAndMode & 0x0F) == CELL_SPURS_EVENT_FLAG_OR && ppuRelevantEvents != 0)) { - eventFlag->m.x07 = 1; - eventFlag->m.x04 = 0; - tmp1 = tmp; - send = true; - tmp3 = eventFlag->m.x06 >> 4; - tmp4 = tmp; + eventFlag->m.ppuPendingRecv = 1; + eventFlag->m.ppuWaitMask = 0; + ppuEventFlag = ppuRelevantEvents; + eventsToClear = ppuRelevantEvents; + ppuWaitSlot = eventFlag->m.ppuWaitSlotAndMode >> 4; + send = true; } } - u16 i = 15; - u16 j = 0; - u16 tmp = eventFlag->m.x08 & ~eventFlag->m.x02; - u16 tmp5 = 0; - u16 x30[16]; - while (tmp) + int i = CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS - 1; + int j = 0; + u16 relevantWaitSlots = eventFlag->m.spuTaskUsedWaitSlots & ~eventFlag->m.spuTaskPendingRecv; + u16 spuTaskPendingRecv = 0; + u16 pendingRecvTaskEvents[16]; + while (relevantWaitSlots) { - if (tmp & 0x0001) + if (relevantWaitSlots & 0x0001) { - u16 tmp6 = (eventFlag->m.bits | bits) & eventFlag->m.x10[i]; - if ((eventFlag->m.x10[i] & ~tmp6) == 0 || (((eventFlag->m.x0A >> j) & 0x01) == 0 && (eventFlag->m.x10[i] & ~tmp6) != 0)) + u16 spuTaskRelevantEvents = (eventFlag->m.events | bits) & eventFlag->m.spuTaskWaitMask[i]; + + // Unblock the waiting SPU task if either all the bits being waited by the task have been set or + // if the wait mode of the task is OR and atleast one bit the thread is waiting on has been set + if ((eventFlag->m.spuTaskWaitMask[i] & ~spuTaskRelevantEvents) == 0 || + (((eventFlag->m.spuTaskWaitMode >> j) & 0x0001) == CELL_SPURS_EVENT_FLAG_OR && spuTaskRelevantEvents != 0)) { - tmp4 |= tmp6; - tmp5 |= 1 << j; - x30[j] = tmp6; + eventsToClear |= spuTaskRelevantEvents; + spuTaskPendingRecv |= 1 << j; + pendingRecvTaskEvents[j] = spuTaskRelevantEvents; } } - tmp >>= 1; + relevantWaitSlots >>= 1; i--; j++; } - eventFlag->m.bits |= bits; - eventFlag->m.x02 |= tmp5; + eventFlag->m.events |= bits; + eventFlag->m.spuTaskPendingRecv |= spuTaskPendingRecv; + + // If the clear flag is AUTO then clear the bits comnsumed by all tasks marked to be unblocked if (eventFlag->m.clearMode == CELL_SPURS_EVENT_FLAG_CLEAR_AUTO) { - eventFlag->m.bits &= ~tmp4; + eventFlag->m.events &= ~eventsToClear; } if (send) { - eventFlag->m.x30[tmp3] = tmp1; + // Signal the PPU thread to be woken up + eventFlag->m.pendingRecvTaskEvents[ppuWaitSlot] = ppuEventFlag; if (sys_event_port_send(eventFlag->m.eventPortId, 0, 0, 0) != CELL_OK) { assert(0); } } - if (tmp5) + if (spuTaskPendingRecv) { - for (auto i = 0; i < 16; i++) + // Signal each SPU task whose conditions have been met to be woken up + for (int i = 0; i < CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS; i++) { - if (tmp5 & (0x8000 >> i)) + if (spuTaskPendingRecv & (0x8000 >> i)) { - eventFlag->m.x30[i] = x30[i]; + eventFlag->m.pendingRecvTaskEvents[i] = pendingRecvTaskEvents[i]; vm::var taskset; if (eventFlag->m.isIwl) { - cellSpursLookUpTasksetAddress(vm::ptr::make((u32)eventFlag->m.addr), vm::ptr::make(taskset.addr()), eventFlag->m.x60[i]); + cellSpursLookUpTasksetAddress(vm::ptr::make((u32)eventFlag->m.addr), + vm::ptr::make(taskset.addr()), + eventFlag->m.waitingTaskWklId[i]); } else { taskset.value() = (u32)eventFlag->m.addr; } - auto rc = _cellSpursSendSignal(vm::ptr::make(taskset.addr()), eventFlag->m.x50[i]); + auto rc = _cellSpursSendSignal(vm::ptr::make(taskset.addr()), eventFlag->m.waitingTaskId[i]); if (rc == CELL_SPURS_TASK_ERROR_INVAL || rc == CELL_SPURS_TASK_ERROR_STAT) { return CELL_SPURS_TASK_ERROR_FATAL; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 1946675c54..172bd43b23 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -222,6 +222,13 @@ enum CellSpursEventFlagDirection CELL_SPURS_EVENT_FLAG_LAST = CELL_SPURS_EVENT_FLAG_ANY2ANY, }; +// Event flag constants +enum SpursEventFlagConstants +{ + CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS = 16, + CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT = 0xFF, +}; + class SPURSManager; class SPURSManagerEventFlag; class SPURSManagerTaskset; @@ -553,24 +560,24 @@ struct CellSpursEventFlag // Real data struct _CellSpursEventFlag { - be_t bits; // 0x00 Bit mask of event set bits - be_t x02; // 0x02 Bit mask of SPU thread slots whose conditions have met - be_t x04; // 0x04 Wait mask for PPU thread - u8 x06; // 0x06 Top 4 bits: Bit number for PPU thread. Bottom 4 bits: Wait mode of PPU thread - u8 x07; // 0x07 Set to 1 if the blocked PPU thread's condition has been met - be_t x08; // 0x08 Bit mask of used wait slots - be_t x0A; // 0x0A Bit mask of used wait slots whose wait mode is AND - u8 spuPort; // 0x0C - u8 isIwl; // 0x0D - u8 direction; // 0x0E - u8 clearMode; // 0x0F - be_t x10[16]; // 0x10 Wait mask for SPU threads - be_t x30[16]; // 0x30 Received event flag mask for SPU threads - u8 x50[16]; // 0x50 Task id of waiting SPU threads - u8 x60[16]; // 0x50 Workload Ids of waiting SPU threads - be_t addr; // 0x70 - be_t eventPortId; // 0x78 - be_t eventQueueId; // 0x7C + be_t events; // 0x00 Event bits + be_t spuTaskPendingRecv; // 0x02 A bit is set to 1 when the condition of the SPU task using the slot are met and back to 0 when the SPU task unblocks + be_t ppuWaitMask; // 0x04 Wait mask for blocked PPU thread + u8 ppuWaitSlotAndMode; // 0x06 Top 4 bits: Wait slot number of the blocked PPU threa, Bottom 4 bits: Wait mode of the blocked PPU thread + u8 ppuPendingRecv; // 0x07 Set to 1 when the blocked PPU thread's conditions are met and back to 0 when the PPU thread is ublocked + be_t spuTaskUsedWaitSlots; // 0x08 A bit is set to 1 if the wait slot corresponding to the bit is used by an SPU task and 0 otherwise + be_t spuTaskWaitMode; // 0x0A A bit is set to 1 if the wait mode for the SPU task corresponding to the bit is AND and 0 otherwise + u8 spuPort; // 0x0C + u8 isIwl; // 0x0D + u8 direction; // 0x0E + u8 clearMode; // 0x0F + be_t spuTaskWaitMask[16]; // 0x10 Wait mask for blocked SPU tasks + be_t pendingRecvTaskEvents[16]; // 0x30 The value of event flag when the wait condition for the thread/task was met + u8 waitingTaskId[16]; // 0x50 Task id of waiting SPU threads + u8 waitingTaskWklId[16]; // 0x60 Workload id of waiting SPU threads + be_t addr; // 0x70 + be_t eventPortId; // 0x78 + be_t eventQueueId; // 0x7C } m; static_assert(sizeof(_CellSpursEventFlag) == size, "Wrong _CellSpursEventFlag size"); From 430aa9af8912c0c00944e62a91f309153b15dd69 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sat, 24 Jan 2015 00:41:29 +0530 Subject: [PATCH 16/29] SPURS: Implement cellSpursGetWorkloadData and cellSpursLookUpTasksetAddress --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 59 +++++++++++++++++++++--- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index c8fc774ad8..294331a6b3 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -1469,12 +1469,12 @@ s64 cellSpursSendWorkloadSignal(vm::ptr spurs, u32 workloadId) #ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0xA658, libsre_rtoc); #else - if (!spurs) + if (spurs.addr() == 0) { return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; } - if (spurs.addr() % CellSpurs::align) + if (spurs.addr() % CellSpurs::align) { return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN; } @@ -1522,13 +1522,47 @@ s64 cellSpursSendWorkloadSignal(vm::ptr spurs, u32 workloadId) #endif } -s64 cellSpursGetWorkloadData() +s64 cellSpursGetWorkloadData(vm::ptr spurs, vm::ptr data, u32 workloadId) { + cellSpurs->Warning("%s(spurs_addr=0x%x, data=0x%x, workloadId=%d)", __FUNCTION__, spurs.addr(), data.addr(), workloadId); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0xA78C, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (spurs.addr() == 0 || data.addr() == 0) + { + return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN; + } + + if (workloadId >= CELL_SPURS_MAX_WORKLOAD2 || (workloadId >= CELL_SPURS_MAX_WORKLOAD && (spurs->m.flags1 & SF1_32_WORKLOADS) == 0)) + { + return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; + } + + if ((spurs->m.wklMskA.read_relaxed() & (0x80000000u >> workloadId)) == 0) + { + return CELL_SPURS_POLICY_MODULE_ERROR_SRCH; + } + + if (spurs->m.exception) + { + return CELL_SPURS_POLICY_MODULE_ERROR_STAT; + } + + if (workloadId >= CELL_SPURS_MAX_WORKLOAD) + { + *data = spurs->m.wklInfo2[workloadId & 0x0F].arg; + } + else + { + *data = spurs->m.wklInfo1[workloadId].arg; + } + return CELL_OK; #endif } @@ -3272,7 +3306,20 @@ s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr> data; + auto rc = cellSpursGetWorkloadData(spurs, vm::ptr::make(data.addr()), id); + if (rc != CELL_OK) + { + // Convert policy module error code to a task error code + return rc ^ 0x100; + } + + taskset.set((u32)data.value()); return CELL_OK; #endif } From 2e2f92f4f64a7ddea7a020a907cc1dcb9dfb7aba Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Mon, 26 Jan 2015 20:15:58 +0530 Subject: [PATCH 17/29] SPURS: Implement some portions of taskset policy module --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 4 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 33 ++++- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 142 +++++++++++++++++++- 3 files changed, 170 insertions(+), 9 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 294331a6b3..99d2d35eea 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -1702,7 +1702,7 @@ s64 _cellSpursEventFlagInitialize(vm::ptr spurs, vm::ptr attribute, u32 revision, u32 sdk_version, u64 args, vm::ptr priority, u32 max_contention) { - cellSpurs->Warning("%s(attribute=0x%x, revision=%u, skd_version=%u, args=0x%llx, priority=0x%x, max_contention=%u)", + cellSpurs->Warning("%s(attribute=0x%x, revision=%d, skd_version=%d, args=0x%llx, priority=0x%x, max_contention=%d)", __FUNCTION__, attribute.addr(), revision, sdk_version, args, priority.addr(), max_contention); #ifdef PRX_DEBUG diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 172bd43b23..196fedd3a8 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -564,7 +564,7 @@ struct CellSpursEventFlag be_t spuTaskPendingRecv; // 0x02 A bit is set to 1 when the condition of the SPU task using the slot are met and back to 0 when the SPU task unblocks be_t ppuWaitMask; // 0x04 Wait mask for blocked PPU thread u8 ppuWaitSlotAndMode; // 0x06 Top 4 bits: Wait slot number of the blocked PPU threa, Bottom 4 bits: Wait mode of the blocked PPU thread - u8 ppuPendingRecv; // 0x07 Set to 1 when the blocked PPU thread's conditions are met and back to 0 when the PPU thread is ublocked + u8 ppuPendingRecv; // 0x07 Set to 1 when the blocked PPU thread's conditions are met and back to 0 when the PPU thread is unblocked be_t spuTaskUsedWaitSlots; // 0x08 A bit is set to 1 if the wait slot corresponding to the bit is used by an SPU task and 0 otherwise be_t spuTaskWaitMode; // 0x0A A bit is set to 1 if the wait mode for the SPU task corresponding to the bit is AND and 0 otherwise u8 spuPort; // 0x0C @@ -852,8 +852,9 @@ struct CellSpursTaskBinInfo CellSpursTaskLsPattern lsPattern; }; -// The SPURS kernel data store. This resides at 0x00 of the LS. -struct SpursKernelMgmtData { +// The SPURS kernel data store. This resides at 0x100 of the LS. +struct SpursKernelMgmtData +{ u8 tempArea[0x80]; // 0x100 u8 wklLocContention[0x10]; // 0x180 u8 wklLocPendingContention[0x10]; // 0x190 @@ -867,8 +868,7 @@ struct SpursKernelMgmtData { be_t wklCurrentId; // 0x1DC be_t yieldToKernelAddr; // 0x1E0 be_t selectWorkloadAddr; // 0x1E4 - u8 x1E8; // 0x1E8 - u8 x1E9; // 0x1E9 + u8 moduleId[2]; // 0x1E8 u8 sysSrvInitialised; // 0x1EA u8 spuIdling; // 0x1EB be_t wklRunnable1; // 0x1EC @@ -880,5 +880,28 @@ struct SpursKernelMgmtData { u8 wklUniqueId[0x10]; // 0x220 }; +static_assert(sizeof(SpursKernelMgmtData) == 0x130, "Incorrect size for SpursKernelMgmtData"); + +// The SPURS taskset policy module data store. This resides at 0x2700 of the LS. +struct SpursTasksetPmMgmtData +{ + u8 tempArea[0x80]; // 0x2700 + u8 x2780[0x27B8 - 0x2780]; // 0x2780 + vm::bptr taskset; // 0x27B8 + be_t kernelMgmt; // 0x27C0 + be_t yieldAddr; // 0x27C4 + be_t x27C8; // 0x27C8 + be_t spuNum; // 0x27CC + be_t dmaTagId; // 0x27D0 + be_t taskId; // 0x27D4 + u8 x27D8[0x2840 - 0x27D8]; // 0x27D8 + u8 moduleId[16]; // 0x2840 + u8 x2850[0x2C80 - 0x2850]; // 0x2850 + be_t contextSaveArea[50]; // 0x2C80 + u8 x2FA0[0x3000 - 0x2FA0]; // 0x2FA0 +}; + +static_assert(sizeof(SpursTasksetPmMgmtData) == 0x900, "Incorrect size for SpursTasksetPmMgmtData"); + s64 spursAttachLv2EventQueue(vm::ptr spurs, u32 queue, vm::ptr port, s32 isDynamic, bool wasCreated); s64 spursWakeUp(PPUThread& CPU, vm::ptr spurs); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index 06ff2553be..624be7e805 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -33,6 +33,10 @@ void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt); void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus); void spursSysServiceWorkloadEntry(SPUThread & spu); +// +// SPURS taskset polict module functions +// + extern Module *cellSpurs; ////////////////////////////////////////////////////////////////////////////// @@ -378,8 +382,8 @@ void spursKernelMain(SPUThread & spu) { } if (!isSecond) { - mgmt->x1E8 = 0; - mgmt->x1E9 = 0; + mgmt->moduleId[0] = 0; + mgmt->moduleId[1] = 0; } // Run workload @@ -387,6 +391,7 @@ void spursKernelMain(SPUThread & spu) { spu.GPR[3]._u32[3] = 0x100; spu.GPR[4]._u64[1] = wkl.arg; spu.GPR[5]._u32[3] = pollStatus; + spu.SetPc(0xA00); switch (mgmt->wklCurrentAddr.addr()) { case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD: spursSysServiceWorkloadEntry(spu); @@ -797,3 +802,136 @@ void spursSysServiceWorkloadEntry(SPUThread & spu) { // TODO: Ensure that this function always returns to the SPURS kernel return; } + +////////////////////////////////////////////////////////////////////////////// +// SPURS taskset policy module functions +////////////////////////////////////////////////////////////////////////////// + +bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting) { + auto kernelMgmt = vm::get_ptr(spu.ls_offset + 0x100); + auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + + // Verify taskset state is valid + for (auto i = 0; i < 4; i ++) { + if ((mgmt->taskset->m.waiting_set[i] & mgmt->taskset->m.running_set[i]) || + (mgmt->taskset->m.ready_set[i] & mgmt->taskset->m.ready2_set[i]) || + ((mgmt->taskset->m.running_set[i] | mgmt->taskset->m.ready_set[i] | + mgmt->taskset->m.ready2_set[i] | mgmt->taskset->m.signal_received_set[i] | + mgmt->taskset->m.waiting_set[i]) & ~mgmt->taskset->m.enabled_set[i])) { + assert(0); + } + } + + // TODO: Implement cases + s32 delta = 0; + switch (request + 1) { + case 0: + break; + case 1: + break; + case 2: + break; + case 3: + break; + case 4: + break; + case 5: + break; + case 6: + break; + default: + assert(0); + break; + } + + // Set the ready count of the workload to the number of ready tasks + do { + s32 readyCount = kernelMgmt->wklCurrentId >= CELL_SPURS_MAX_WORKLOAD ? + kernelMgmt->spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].read_relaxed() : + kernelMgmt->spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].read_relaxed(); + + auto newReadyCount = readyCount + delta > 0xFF ? 0xFF : readyCount + delta < 0 ? 0 : readyCount + delta; + + if (kernelMgmt->wklCurrentId >= CELL_SPURS_MAX_WORKLOAD) { + kernelMgmt->spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].write_relaxed(newReadyCount); + } else { + kernelMgmt->spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].write_relaxed(newReadyCount); + } + + delta += readyCount; + } while (delta > 0); + + // TODO: Implement return + return false; +} + +void spursTasksetDispatch() { +} + +void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) { + if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) { + spursTasksetProcessRequest(spu, 6, nullptr, nullptr); + } +} + +bool spursTasksetShouldYield(SPUThread & spu) { + u32 pollStatus; + + if (cellSpursModulePollStatus(spu, &pollStatus)) { + return true; + } + + spursTasksetProcessPollStatus(spu, pollStatus); + return false; +} + +void spursTasksetInit(SPUThread & spu, u32 pollStatus) { + auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + auto kernelMgmt = vm::get_ptr(spu.ls_offset + 0x100); + + kernelMgmt->moduleId[0] = 'T'; + kernelMgmt->moduleId[1] = 'K'; + + // Trace - START: Module='TKST' + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = 0x52; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_START + memcpy(pkt.data.start.module, "TKST", 4); + pkt.data.start.level = 2; + pkt.data.start.ls = 0xA00 >> 2; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + spursTasksetProcessPollStatus(spu, pollStatus); +} + +void spursTasksetEntry(SPUThread & spu) { + auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + + // Check if the function was invoked by the SPURS kernel or because of a syscall + if (spu.PC != 0xA70) { + // Called from kernel + auto kernelMgmt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); + auto arg = spu.GPR[4]._u64[1]; + auto pollStatus = spu.GPR[5]._u32[3]; + + memset(mgmt, 0, sizeof(*mgmt)); + mgmt->taskset.set(arg); + memcpy(mgmt->moduleId, "SPURSTASK MODULE", 16); + mgmt->kernelMgmt = spu.GPR[3]._u32[3]; + mgmt->yieldAddr = 0xA70; + mgmt->spuNum = kernelMgmt->spuNum; + mgmt->dmaTagId = kernelMgmt->dmaTagId; + mgmt->taskId = 0xFFFFFFFF; + + spursTasksetInit(spu, pollStatus); + // TODO: Dispatch + } + + mgmt->contextSaveArea[0] = spu.GPR[0]; + mgmt->contextSaveArea[1] = spu.GPR[1]; + for (auto i = 0; i < 48; i++) { + mgmt->contextSaveArea[i + 2] = spu.GPR[80 + i]; + } + + // TODO: Process syscall +} From a7728c9067165c23d0e70000a72fefc11a84612a Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Wed, 28 Jan 2015 23:48:06 +0530 Subject: [PATCH 18/29] SPURS: Document some parts of taskset policy module --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 2 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 40 +++++++++--- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 68 ++++++++++++++++++--- 3 files changed, 92 insertions(+), 18 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 99d2d35eea..c9795203f8 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -2821,7 +2821,7 @@ s64 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm: } taskset->m.task_info[tmp_task_id].elf_addr.set(elf_addr.addr()); - taskset->m.task_info[tmp_task_id].context_save_storage.set((context_addr.addr() & 0xFFFFFFF8) | alloc_ls_blocks); + taskset->m.task_info[tmp_task_id].context_save_storage_and_alloc_ls_blocks = (context_addr.addr() | alloc_ls_blocks); for (u32 i = 0; i < 2; i++) { taskset->m.task_info[tmp_task_id].args.u64[i] = arg != 0 ? arg->u64[i] : 0; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 196fedd3a8..4ccf224ec7 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -181,6 +181,12 @@ enum SpursTraceConstants CELL_SPURS_TRACE_SERVICE_WAIT = 0x02, CELL_SPURS_TRACE_SERVICE_EXIT = 0x03, + // Task incident + CELL_SPURS_TRACE_TASK_DISPATCH = 0x01, + CELL_SPURS_TRACE_TASK_YIELD = 0x03, + CELL_SPURS_TRACE_TASK_WAIT = 0x04, + CELL_SPURS_TRACE_TASK_EXIT = 0x05, + // Trace mode flags CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER = 0x1, CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP = 0x2, @@ -335,6 +341,12 @@ struct CellSpursTracePacket be_t ls; } start; + struct + { + be_t incident; + be_t taskId; + } task; + be_t user; be_t guid; be_t stop; @@ -607,7 +619,7 @@ struct CellSpursTaskset { CellSpursTaskArgument args; vm::bptr elf_addr; - vm::bptr context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks) + be_t context_save_storage_and_alloc_ls_blocks; // This is (context_save_storage_addr | allocated_ls_blocks) CellSpursTaskLsPattern ls_pattern; }; @@ -716,7 +728,7 @@ struct CellSpursTaskset2 { CellSpursTaskArgument args; vm::bptr elf_addr; - vm::bptr context_save_storage; // This is ((context_save_storage_addr & 0xFFFFFFF8) | allocated_ls_blocks) + vm::bptr context_save_storage; // This is (context_save_storage_addr | allocated_ls_blocks) CellSpursTaskLsPattern ls_pattern; }; @@ -885,10 +897,11 @@ static_assert(sizeof(SpursKernelMgmtData) == 0x130, "Incorrect size for SpursKer // The SPURS taskset policy module data store. This resides at 0x2700 of the LS. struct SpursTasksetPmMgmtData { - u8 tempArea[0x80]; // 0x2700 - u8 x2780[0x27B8 - 0x2780]; // 0x2780 + u8 tempAreaTaskset[0x80]; // 0x2700 + u8 tempAreaTaskInfo[0x30]; // 0x2780 + be_t x27B0; // 0x27B0 vm::bptr taskset; // 0x27B8 - be_t kernelMgmt; // 0x27C0 + be_t kernelMgmtAddr; // 0x27C0 be_t yieldAddr; // 0x27C4 be_t x27C8; // 0x27C8 be_t spuNum; // 0x27CC @@ -896,9 +909,20 @@ struct SpursTasksetPmMgmtData be_t taskId; // 0x27D4 u8 x27D8[0x2840 - 0x27D8]; // 0x27D8 u8 moduleId[16]; // 0x2840 - u8 x2850[0x2C80 - 0x2850]; // 0x2850 - be_t contextSaveArea[50]; // 0x2C80 - u8 x2FA0[0x3000 - 0x2FA0]; // 0x2FA0 + u8 stackArea[0x2C80 - 0x2850]; // 0x2850 + be_t savedContextLr; // 0x2C80 + be_t savedContextSp; // 0x2C90 + be_t savedContextR80ToR127[48]; // 0x2CA0 + be_t savedContextFpscr; // 0x2FA0 + be_t savedWriteTagGroupQueryMask; // 0x2FB0 + be_t savedSpuWriteEventMask; // 0x2FB4 + be_t tasksetMgmtAddr; // 0x2FB8 + be_t lowestLoadSegmentAddr; // 0x2FBC + be_t x2FC0; // 0x2FC0 + be_t x2FC8; // 0x2FC8 + be_t x2FD0; // 0x2FD0 + be_t taskExitCode; // 0x2FD4 + u8 x2FD8[0x3000 - 0x2FD8]; // 0x2FD8 }; static_assert(sizeof(SpursTasksetPmMgmtData) == 0x900, "Incorrect size for SpursTasksetPmMgmtData"); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index 624be7e805..e4ebca84a5 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -865,7 +865,57 @@ bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 return false; } -void spursTasksetDispatch() { +void spursTasksetDispatch(SPUThread & spu) { + auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + auto kernelMgmt = vm::get_ptr(spu.ls_offset + 0x100); + + u32 taskId; + u32 isWaiting; + spursTasksetProcessRequest(spu, 5, &taskId, &isWaiting); + if (taskId >= CELL_SPURS_MAX_TASK) { + // TODO: spursTasksetExit(spu); + } + + mgmt->taskId = taskId; + u64 elfAddr = mgmt->taskset->m.task_info[taskId].elf_addr.addr() & 0xFFFFFFFFFFFFFFF8ull; + + // Trace - Task: Incident=dispatch + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_TASK; + pkt.data.task.incident = CELL_SPURS_TRACE_TASK_DISPATCH; + pkt.data.task.taskId = taskId; + cellSpursModulePutTrace(&pkt, 0x1F); + + if (isWaiting == 0) { + } + + if (mgmt->taskset->m.enable_clear_ls) { + memset(vm::get_ptr(spu.ls_offset + CELL_SPURS_TASK_TOP), 0, CELL_SPURS_TASK_BOTTOM - CELL_SPURS_TASK_TOP); + } + + // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area + if (mgmt->taskset->m.task_info[taskId].ls_pattern.u64[0] != 0xFFFFFFFFFFFFFFFFull || + (mgmt->taskset->m.task_info[taskId].ls_pattern.u64[0] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) { + // Load the ELF + // TODO: Load ELF + } + + // Load save context from main memory to LS + u64 context_save_storage = mgmt->taskset->m.task_info[taskId].context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull; + for (auto i = 6; i < 128; i++) { + bool shouldLoad = mgmt->taskset->m.task_info[taskId].ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false; + if (shouldLoad) { + memcpy(vm::get_ptr(spu.ls_offset + CELL_SPURS_TASK_TOP + ((i - 6) << 11)), + vm::get_ptr((u32)context_save_storage + 0x400 + ((i - 6) << 11)), 0x800); + } + } + + // Trace - GUID + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_GUID; + pkt.data.guid = 0; // TODO: Put GUID of taskId here + cellSpursModulePutTrace(&pkt, 0x1F); } void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) { @@ -917,20 +967,20 @@ void spursTasksetEntry(SPUThread & spu) { memset(mgmt, 0, sizeof(*mgmt)); mgmt->taskset.set(arg); memcpy(mgmt->moduleId, "SPURSTASK MODULE", 16); - mgmt->kernelMgmt = spu.GPR[3]._u32[3]; - mgmt->yieldAddr = 0xA70; - mgmt->spuNum = kernelMgmt->spuNum; - mgmt->dmaTagId = kernelMgmt->dmaTagId; - mgmt->taskId = 0xFFFFFFFF; + mgmt->kernelMgmtAddr = spu.GPR[3]._u32[3]; + mgmt->yieldAddr = 0xA70; + mgmt->spuNum = kernelMgmt->spuNum; + mgmt->dmaTagId = kernelMgmt->dmaTagId; + mgmt->taskId = 0xFFFFFFFF; spursTasksetInit(spu, pollStatus); // TODO: Dispatch } - mgmt->contextSaveArea[0] = spu.GPR[0]; - mgmt->contextSaveArea[1] = spu.GPR[1]; + mgmt->savedContextLr = spu.GPR[0]; + mgmt->savedContextSp = spu.GPR[1]; for (auto i = 0; i < 48; i++) { - mgmt->contextSaveArea[i + 2] = spu.GPR[80 + i]; + mgmt->savedContextR80ToR127[i] = spu.GPR[80 + i]; } // TODO: Process syscall From 62e2d8d9a7b8aaa110673862b80733b4e93b42c7 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Thu, 29 Jan 2015 20:20:34 +0530 Subject: [PATCH 19/29] SPURS: Update kernel to use lock line reservations --- rpcs3/Emu/Cell/MFC.h | 8 + rpcs3/Emu/Cell/SPUThread.cpp | 9 +- rpcs3/Emu/Cell/SPUThread.h | 31 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 19 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 19 +- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 623 +++++++++++--------- rpcs3/stdafx.h | 2 + 7 files changed, 414 insertions(+), 297 deletions(-) diff --git a/rpcs3/Emu/Cell/MFC.h b/rpcs3/Emu/Cell/MFC.h index a6c731d3da..0b669deb97 100644 --- a/rpcs3/Emu/Cell/MFC.h +++ b/rpcs3/Emu/Cell/MFC.h @@ -35,6 +35,14 @@ enum MFC_GETLLAR_SUCCESS = 4, }; +// MFC Write Tag Status Update Request Channel (ch23) operations +enum +{ + MFC_TAG_UPDATE_IMMEDIATE = 0, + MFC_TAG_UPDATE_ANY = 1, + MFC_TAG_UPDATE_ALL = 2, +}; + enum { MFC_SPU_TO_PPU_MAILBOX_STATUS_MASK = 0x000000FF, diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 95f678bf02..fcb9b012e9 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1060,7 +1060,14 @@ void SPUThread::StopAndSignal(u32 code) case 0x003: { - m_code3_func(*this); + auto iter = m_addr_to_hle_function_map.find(PC); + assert(iter != m_addr_to_hle_function_map.end()); + + auto return_to_caller = iter->second(*this); + if (return_to_caller) + { + SetBranch(GPR[0]._u32[3] & 0x3fffc); + } break; } diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index f880e5ca6e..d6ecbe64b0 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -290,6 +290,8 @@ public: u32 m_event_mask; u32 m_events; + std::unordered_map> m_addr_to_hle_function_map; + struct IntrTag { u32 enabled; // 1 == true @@ -509,8 +511,35 @@ public: void WriteLS64 (const u32 lsa, const u64& data) const { vm::write64 (lsa + m_offset, data); } void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); } + void RegisterHleFuncion(u32 addr, std::function function) + { + m_addr_to_hle_function_map[addr] = function; + WriteLS32(addr, 0x00000003); // STOP 3 + } + + void UnregisterHleFunction(u32 addr) + { + WriteLS32(addr, 0x00200000); // NOP + m_addr_to_hle_function_map.erase(addr); + } + + void UnregisterHleFunctions(u32 start_addr, u32 end_addr) + { + for (auto iter = m_addr_to_hle_function_map.begin(); iter != m_addr_to_hle_function_map.end();) + { + if (iter->first >= start_addr && iter->first <= end_addr) + { + WriteLS32(iter->first, 0x00200000); // NOP + m_addr_to_hle_function_map.erase(iter++); + } + else + { + iter++; + } + } + } + std::function m_custom_task; - std::function m_code3_func; public: SPUThread(CPUThreadType type = CPU_THREAD_SPU); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index c9795203f8..62349f276d 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -26,7 +26,7 @@ extern u32 libsre; extern u32 libsre_rtoc; #endif -void spursKernelMain(SPUThread & spu); +bool spursKernelMain(SPUThread & spu); s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, u32 id); s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskID); @@ -155,7 +155,8 @@ s64 spursInit( assert(!"spu_image_import() failed"); } #else - spurs->m.spuImg.addr = (u32)Memory.Alloc(0x40000, 4096); + spurs->m.spuImg.addr = (u32)Memory.Alloc(0x40000, 4096); + spurs->m.spuImg.entry_point = isSecond ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR; #endif s32 tgt = SYS_SPU_THREAD_GROUP_TYPE_NORMAL; @@ -179,17 +180,11 @@ s64 spursInit( name += "CellSpursKernel0"; for (s32 num = 0; num < nSpus; num++, name[name.size() - 1]++) { - spurs->m.spus[num] = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, 0, 0, 0, 0, [spurs, num](SPUThread& SPU) - { - SPU.GPR[3]._u32[3] = num; - SPU.GPR[4]._u64[1] = spurs.addr(); - -#ifdef PRX_DEBUG_XXX - return SPU.FastCall(SPU.PC); + auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, num, spurs.addr(), 0, 0); +#ifndef PRX_DEBUG_XXX + spu->RegisterHleFuncion(spurs->m.spuImg.entry_point, spursKernelMain); #endif - - spursKernelMain(SPU); - })->GetId(); + spurs->m.spus[num] = spu->GetId(); } if (flags & SAF_SPU_PRINTF_ENABLED) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 4ccf224ec7..4d77a06402 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -102,6 +102,12 @@ enum SPURSKernelInterfaces CELL_SPURS_INTERRUPT_VECTOR = 0x0, CELL_SPURS_LOCK_LINE = 0x80, CELL_SPURS_KERNEL_DMA_TAG_ID = 31, + CELL_SPURS_KERNEL1_ENTRY_ADDR = 0x818, + CELL_SPURS_KERNEL2_ENTRY_ADDR = 0x848, + CELL_SPURS_KERNEL1_YIELD_ADDR = 0x808, + CELL_SPURS_KERNEL2_YIELD_ADDR = 0x838, + CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR = 0x290, + CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR = 0x290, }; enum RangeofEventQueuePortNumbers @@ -885,14 +891,23 @@ struct SpursKernelMgmtData u8 spuIdling; // 0x1EB be_t wklRunnable1; // 0x1EC be_t wklRunnable2; // 0x1EE - u8 x1F0[0x210 - 0x1F0]; // 0x1F0 + be_t x1F0; // 0x1F0 + be_t x1F4; // 0x1F4 + be_t x1F8; // 0x1F8 + be_t x1FC; // 0x1FC + be_t x200; // 0x200 + be_t x204; // 0x204 + be_t x208; // 0x208 + be_t x20C; // 0x20C be_t traceBuffer; // 0x210 be_t traceMsgCount; // 0x218 be_t traceMaxCount; // 0x21C u8 wklUniqueId[0x10]; // 0x220 + u8 x230[0x280 - 0x230]; // 0x230 + be_t guid[4]; // 0x280 }; -static_assert(sizeof(SpursKernelMgmtData) == 0x130, "Incorrect size for SpursKernelMgmtData"); +static_assert(sizeof(SpursKernelMgmtData) == 0x190, "Incorrect size for SpursKernelMgmtData"); // The SPURS taskset policy module data store. This resides at 0x2700 of the LS. struct SpursTasksetPmMgmtData diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index e4ebca84a5..898638894c 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -14,11 +14,15 @@ void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag); u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status); +bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag); +u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask); +u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll = true); + // // SPURS Kernel functions // -void spursKernelSelectWorkload(SPUThread & spu); -void spursKernelSelectWorkload2(SPUThread & spu); +bool spursKernel1SelectWorkload(SPUThread & spu); +bool spursKernel2SelectWorkload(SPUThread & spu); // // SPURS system service workload functions @@ -31,7 +35,7 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt); void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt); void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt); void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus); -void spursSysServiceWorkloadEntry(SPUThread & spu); +bool spursSysServiceWorkloadEntry(SPUThread & spu); // // SPURS taskset polict module functions @@ -54,9 +58,9 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) { spu.GPR[3]._u32[3] = 1; if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { - spursKernelSelectWorkload2(spu); + spursKernel2SelectWorkload(spu); } else { - spursKernelSelectWorkload(spu); + spursKernel1SelectWorkload(spu); } auto result = spu.GPR[3]._u64[1]; @@ -68,14 +72,51 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) { return wklId == mgmt->wklCurrentId ? 0 : 1; } +/// Execute a DMA operation +bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) { + spu.WriteChannel(MFC_LSA, u128::from32r(lsa)); + spu.WriteChannel(MFC_EAH, u128::from32r((u32)(ea >> 32))); + spu.WriteChannel(MFC_EAL, u128::from32r((u32)ea)); + spu.WriteChannel(MFC_Size, u128::from32r(size)); + spu.WriteChannel(MFC_TagID, u128::from32r(tag)); + spu.WriteChannel(MFC_Cmd, u128::from32r(cmd)); + + if (cmd == MFC_GETLLAR_CMD || cmd == MFC_PUTLLC_CMD || cmd == MFC_PUTLLUC_CMD) { + u128 rv; + + spu.ReadChannel(rv, MFC_RdAtomicStat); + return rv._u32[3] ? true : false; + } + + return true; +} + +/// Get the status of DMA operations +u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask) { + u128 rv; + + spu.WriteChannel(MFC_WrTagMask, u128::from32r(tagMask)); + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_IMMEDIATE)); + spu.ReadChannel(rv, MFC_RdTagStat); + return rv._u32[3]; +} + +/// Wait for DMA operations to complete +u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll) { + u128 rv; + + spu.WriteChannel(MFC_WrTagMask, u128::from32r(tagMask)); + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(waitForAll ? MFC_TAG_UPDATE_ALL : MFC_TAG_UPDATE_ANY)); + spu.ReadChannel(rv, MFC_RdTagStat); + return rv._u32[3]; +} + ////////////////////////////////////////////////////////////////////////////// // SPURS kernel functions ////////////////////////////////////////////////////////////////////////////// /// Select a workload to run -void spursKernelSelectWorkload(SPUThread & spu) { - LV2_LOCK(0); // TODO: lock-free implementation if possible - +bool spursKernel1SelectWorkload(SPUThread & spu) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); // The first and only argument to this function is a boolean that is set to false if the function @@ -83,140 +124,148 @@ void spursKernelSelectWorkload(SPUThread & spu) { // If the first argument is true then the shared data is not updated with the result. const auto isPoll = spu.GPR[3]._u32[3]; - // Calculate the contention (number of SPUs used) for each workload - u8 contention[CELL_SPURS_MAX_WORKLOAD]; - u8 pendingContention[CELL_SPURS_MAX_WORKLOAD]; - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - contention[i] = mgmt->spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i]; + u32 wklSelectedId; + u32 pollStatus; - // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably - // to prevent unnecessary jumps to the kernel - if (isPoll) { - pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i]; - if (i != mgmt->wklCurrentId) { - contention[i] += pendingContention[i]; - } - } - } + do { + // DMA and lock the first 0x80 bytes of spurs + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + CellSpurs * spurs = (CellSpurs *)mgmt->tempArea; - u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; - u32 pollStatus = 0; - - // The system service workload has the highest priority. Select the system service workload if - // the system service message bit for this SPU is set. - if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { - mgmt->spuIdling = 0; - if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - // Clear the message bit - mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); - } - } else { - // Caclulate the scheduling weight for each workload - u16 maxWeight = 0; + // Calculate the contention (number of SPUs used) for each workload + u8 contention[CELL_SPURS_MAX_WORKLOAD]; + u8 pendingContention[CELL_SPURS_MAX_WORKLOAD]; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - u16 runnable = mgmt->wklRunnable1 & (0x8000 >> i); - u16 wklSignal = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); - u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed(); - u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); - u8 requestCount = readyCount + idleSpuCount; + contention[i] = spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i]; - // For a workload to be considered for scheduling: - // 1. Its priority must not be 0 - // 2. The number of SPUs used by it must be less than the max contention for that workload - // 3. The workload should be in runnable state - // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) - // OR the workload must be signalled - // OR the workload flag is 0 and the workload is configured as the wokload flag receiver - if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) { - if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) { - // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority: - // 1. Wokload signal set or workload flag or ready count > contention - // 2. Priority of the workload on the SPU - // 3. Is the workload the last selected workload - // 4. Minimum contention of the workload - // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight) - // 6. Is the workload executable same as the currently loaded executable - // 7. The workload id (lesser the number, more the weight) - u16 weight = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0; - weight |= (u16)(mgmt->priority[i] & 0x7F) << 16; - weight |= i == mgmt->wklCurrentId ? 0x80 : 0x00; - weight |= (contention[i] > 0 && mgmt->spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00; - weight |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2; - weight |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00; - weight |= 0x01; - - // In case of a tie the lower numbered workload is chosen - if (weight > maxWeight) { - wklSelectedId = i; - maxWeight = weight; - pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; - pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; - pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; - } + // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably + // to prevent unnecessary jumps to the kernel + if (isPoll) { + pendingContention[i] = spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i]; + if (i != mgmt->wklCurrentId) { + contention[i] += pendingContention[i]; } } } - // Not sure what this does. Possibly mark the SPU as idle/in use. - mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + pollStatus = 0; - if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { - // Clear workload signal for the selected workload - mgmt->spurs->m.wklSignal1.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); - mgmt->spurs->m.wklSignal2.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + // The system service workload has the highest priority. Select the system service workload if + // the system service message bit for this SPU is set. + if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { + mgmt->spuIdling = 0; + if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + // Clear the message bit + spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); + } + } else { + // Caclulate the scheduling weight for each workload + u16 maxWeight = 0; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + u16 runnable = mgmt->wklRunnable1 & (0x8000 >> i); + u16 wklSignal = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); + u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed(); + u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); + u8 requestCount = readyCount + idleSpuCount; - // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s - if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) { - mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + // For a workload to be considered for scheduling: + // 1. Its priority must not be 0 + // 2. The number of SPUs used by it must be less than the max contention for that workload + // 3. The workload should be in runnable state + // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) + // OR the workload must be signalled + // OR the workload flag is 0 and the workload is configured as the wokload flag receiver + if (runnable && mgmt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) { + if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) { + // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority: + // 1. Wokload signal set or workload flag or ready count > contention + // 2. Priority of the workload on the SPU + // 3. Is the workload the last selected workload + // 4. Minimum contention of the workload + // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight) + // 6. Is the workload executable same as the currently loaded executable + // 7. The workload id (lesser the number, more the weight) + u16 weight = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0; + weight |= (u16)(mgmt->priority[i] & 0x7F) << 16; + weight |= i == mgmt->wklCurrentId ? 0x80 : 0x00; + weight |= (contention[i] > 0 && spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00; + weight |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2; + weight |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00; + weight |= 0x01; + + // In case of a tie the lower numbered workload is chosen + if (weight > maxWeight) { + wklSelectedId = i; + maxWeight = weight; + pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; + pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; + pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; + } + } + } + } + + // Not sure what this does. Possibly mark the SPU as idle/in use. + mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + + if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { + // Clear workload signal for the selected workload + spurs->m.wklSignal1.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); + spurs->m.wklSignal2.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + + // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s + if (wklSelectedId == spurs->m.wklFlagReceiver.read_relaxed()) { + spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + } } } - } - if (!isPoll) { - // Called by kernel - // Increment the contention for the selected workload - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - contention[wklSelectedId]++; - } + if (!isPoll) { + // Called by kernel + // Increment the contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + contention[wklSelectedId]++; + } - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - mgmt->spurs->m.wklCurrentContention[i] = contention[i]; - mgmt->wklLocContention[i] = 0; - mgmt->wklLocPendingContention[i] = 0; - } + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + spurs->m.wklCurrentContention[i] = contention[i]; + mgmt->wklLocContention[i] = 0; + mgmt->wklLocPendingContention[i] = 0; + } - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - mgmt->wklLocContention[wklSelectedId] = 1; - } + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + mgmt->wklLocContention[wklSelectedId] = 1; + } - mgmt->wklCurrentId = wklSelectedId; - } else if (wklSelectedId != mgmt->wklCurrentId) { - // Not called by kernel but a context switch is required - // Increment the pending contention for the selected workload - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - pendingContention[wklSelectedId]++; - } + mgmt->wklCurrentId = wklSelectedId; + } else if (wklSelectedId != mgmt->wklCurrentId) { + // Not called by kernel but a context switch is required + // Increment the pending contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + pendingContention[wklSelectedId]++; + } - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - mgmt->spurs->m.wklPendingContention[i] = pendingContention[i]; - mgmt->wklLocPendingContention[i] = 0; - } + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + spurs->m.wklPendingContention[i] = pendingContention[i]; + mgmt->wklLocPendingContention[i] = 0; + } - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - mgmt->wklLocPendingContention[wklSelectedId] = 1; + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + mgmt->wklLocPendingContention[wklSelectedId] = 1; + } } - } + } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); u64 result = (u64)wklSelectedId << 32; result |= pollStatus; spu.GPR[3]._u64[1] = result; + return true; } /// Select a workload to run -void spursKernelSelectWorkload2(SPUThread & spu) { - LV2_LOCK(0); // TODO: lock-free implementation if possible - +bool spursKernel2SelectWorkload(SPUThread & spu) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); // The first and only argument to this function is a boolean that is set to false if the function @@ -224,202 +273,214 @@ void spursKernelSelectWorkload2(SPUThread & spu) { // If the first argument is true then the shared data is not updated with the result. const auto isPoll = spu.GPR[3]._u32[3]; - // Calculate the contention (number of SPUs used) for each workload - u8 contention[CELL_SPURS_MAX_WORKLOAD2]; - u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2]; - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { - contention[i] = mgmt->spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F]; - contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4; + u32 wklSelectedId; + u32 pollStatus; - // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably - // to prevent unnecessary jumps to the kernel - if (isPoll) { - pendingContention[i] = mgmt->spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F]; - pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4; - if (i != mgmt->wklCurrentId) { - contention[i] += pendingContention[i]; - } - } - } + do { + // DMA and lock the first 0x80 bytes of spurs + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + CellSpurs * spurs = (CellSpurs *)mgmt->tempArea; - u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; - u32 pollStatus = 0; - - // The system service workload has the highest priority. Select the system service workload if - // the system service message bit for this SPU is set. - if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { - // Not sure what this does. Possibly Mark the SPU as in use. - mgmt->spuIdling = 0; - if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - // Clear the message bit - mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); - } - } else { - // Caclulate the scheduling weight for each workload - u8 maxWeight = 0; + // Calculate the contention (number of SPUs used) for each workload + u8 contention[CELL_SPURS_MAX_WORKLOAD2]; + u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2]; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { - auto j = i & 0x0F; - u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); - u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; - u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4; - u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); - u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); + contention[i] = spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F]; + contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4; - // For a workload to be considered for scheduling: - // 1. Its priority must be greater than 0 - // 2. The number of SPUs used by it must be less than the max contention for that workload - // 3. The workload should be in runnable state - // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) - // OR the workload must be signalled - // OR the workload flag is 0 and the workload is configured as the wokload receiver - if (runnable && priority > 0 && maxContention > contention[i]) { - if (wklFlag || wklSignal || readyCount > contention[i]) { - // The scheduling weight of the workload is equal to the priority of the workload for the SPU. - // The current workload is given a sligtly higher weight presumably to reduce the number of context switches. - // In case of a tie the lower numbered workload is chosen. - u8 weight = priority << 4; - if (mgmt->wklCurrentId == i) { - weight |= 0x04; - } - - if (weight > maxWeight) { - wklSelectedId = i; - maxWeight = weight; - pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; - pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; - pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; - } + // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably + // to prevent unnecessary jumps to the kernel + if (isPoll) { + pendingContention[i] = spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F]; + pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4; + if (i != mgmt->wklCurrentId) { + contention[i] += pendingContention[i]; } } } - // Not sure what this does. Possibly mark the SPU as idle/in use. - mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + pollStatus = 0; - if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { - // Clear workload signal for the selected workload - mgmt->spurs->m.wklSignal1.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); - mgmt->spurs->m.wklSignal2.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + // The system service workload has the highest priority. Select the system service workload if + // the system service message bit for this SPU is set. + if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { + // Not sure what this does. Possibly Mark the SPU as in use. + mgmt->spuIdling = 0; + if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + // Clear the message bit + spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); + } + } else { + // Caclulate the scheduling weight for each workload + u8 maxWeight = 0; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { + auto j = i & 0x0F; + u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); + u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; + u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4; + u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); + u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); - // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s - if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) { - mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + // For a workload to be considered for scheduling: + // 1. Its priority must be greater than 0 + // 2. The number of SPUs used by it must be less than the max contention for that workload + // 3. The workload should be in runnable state + // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) + // OR the workload must be signalled + // OR the workload flag is 0 and the workload is configured as the wokload receiver + if (runnable && priority > 0 && maxContention > contention[i]) { + if (wklFlag || wklSignal || readyCount > contention[i]) { + // The scheduling weight of the workload is equal to the priority of the workload for the SPU. + // The current workload is given a sligtly higher weight presumably to reduce the number of context switches. + // In case of a tie the lower numbered workload is chosen. + u8 weight = priority << 4; + if (mgmt->wklCurrentId == i) { + weight |= 0x04; + } + + if (weight > maxWeight) { + wklSelectedId = i; + maxWeight = weight; + pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; + pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; + pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; + } + } + } + } + + // Not sure what this does. Possibly mark the SPU as idle/in use. + mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + + if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { + // Clear workload signal for the selected workload + spurs->m.wklSignal1.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); + spurs->m.wklSignal2.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + + // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s + if (wklSelectedId == spurs->m.wklFlagReceiver.read_relaxed()) { + spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + } } } - } - if (!isPoll) { - // Called by kernel - // Increment the contention for the selected workload - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - contention[wklSelectedId]++; + if (!isPoll) { + // Called by kernel + // Increment the contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + contention[wklSelectedId]++; + } + + for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { + spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4); + mgmt->wklLocContention[i] = 0; + mgmt->wklLocPendingContention[i] = 0; + } + + mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; + mgmt->wklCurrentId = wklSelectedId; + } else if (wklSelectedId != mgmt->wklCurrentId) { + // Not called by kernel but a context switch is required + // Increment the pending contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + pendingContention[wklSelectedId]++; + } + + for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { + spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4); + mgmt->wklLocPendingContention[i] = 0; + } + + mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; } - - for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { - mgmt->spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4); - mgmt->wklLocContention[i] = 0; - mgmt->wklLocPendingContention[i] = 0; - } - - mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; - mgmt->wklCurrentId = wklSelectedId; - } else if (wklSelectedId != mgmt->wklCurrentId) { - // Not called by kernel but a context switch is required - // Increment the pending contention for the selected workload - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - pendingContention[wklSelectedId]++; - } - - for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { - mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4); - mgmt->wklLocPendingContention[i] = 0; - } - - mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; - } + } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); u64 result = (u64)wklSelectedId << 32; result |= pollStatus; spu.GPR[3]._u64[1] = result; + return true; } -/// Entry point of the SPURS kernel -void spursKernelMain(SPUThread & spu) { +/// SPURS kernel main +bool spursKernelMain(SPUThread & spu) { SpursKernelMgmtData * mgmt = vm::get_ptr(spu.ls_offset + 0x100); - mgmt->spuNum = spu.GPR[3]._u32[3]; - mgmt->dmaTagId = 0x1F; - mgmt->spurs.set(spu.GPR[4]._u64[1]); - mgmt->wklCurrentId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; - mgmt->wklCurrentUniqueId = 0x20; - bool isSecond = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; - mgmt->yieldToKernelAddr = isSecond ? 0x838 : 0x808; - mgmt->selectWorkloadAddr = 0x290; - spu.WriteLS32(mgmt->yieldToKernelAddr, 2); // hack for cellSpursModuleExit - spu.WriteLS32(mgmt->selectWorkloadAddr, 3); // hack for cellSpursModulePollStatus - spu.WriteLS32(mgmt->selectWorkloadAddr + 4, 0x35000000); // bi $0 - spu.m_code3_func = isSecond ? spursKernelSelectWorkload2 : spursKernelSelectWorkload; + bool isKernel2; + u32 pollStatus; + const CellSpurs::WorkloadInfo * wklInfo; + if (spu.PC == CELL_SPURS_KERNEL1_ENTRY_ADDR || spu.PC == CELL_SPURS_KERNEL2_ENTRY_ADDR) { + // Entry point of SPURS kernel + // Save arguments + mgmt->spuNum = spu.GPR[3]._u32[3]; + mgmt->spurs.set(spu.GPR[4]._u64[1]); - u32 wid = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; - u32 pollStatus = 0; - while (true) { - if (Emu.IsStopped()) { - cellSpurs->Warning("Spurs Kernel aborted"); - return; - } + isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; - // Get current workload info - auto & wkl = wid < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? mgmt->spurs->m.wklInfo2[wid & 0xf] : mgmt->spurs->m.wklInfoSysSrv); + memset(mgmt, 0, sizeof(SpursKernelMgmtData)); - if (mgmt->wklCurrentAddr != wkl.addr) { - if (wkl.addr.addr() != SPURS_IMG_ADDR_SYS_SRV_WORKLOAD) { - // Load executable code - memcpy(vm::get_ptr(spu.ls_offset + 0xA00), wkl.addr.get_ptr(), wkl.size); - } - mgmt->wklCurrentAddr = wkl.addr; - mgmt->wklCurrentUniqueId = wkl.uniqueId.read_relaxed(); - } - - if (!isSecond) { - mgmt->moduleId[0] = 0; - mgmt->moduleId[1] = 0; - } - - // Run workload - spu.GPR[1]._u32[3] = 0x3FFB0; - spu.GPR[3]._u32[3] = 0x100; - spu.GPR[4]._u64[1] = wkl.arg; - spu.GPR[5]._u32[3] = pollStatus; - spu.SetPc(0xA00); - switch (mgmt->wklCurrentAddr.addr()) { - case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD: - spursSysServiceWorkloadEntry(spu); - break; - default: - spu.FastCall(0xA00); - break; - } - - // Check status - auto status = spu.SPU.Status.GetValue(); - if (status == SPU_STATUS_STOPPED_BY_STOP) { - return; + // Initialise the SPURS management area to its initial values + mgmt->dmaTagId = CELL_SPURS_KERNEL_DMA_TAG_ID; + mgmt->wklCurrentUniqueId = 0x20; + mgmt->wklCurrentId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + mgmt->yieldToKernelAddr = isKernel2 ? CELL_SPURS_KERNEL2_YIELD_ADDR : CELL_SPURS_KERNEL1_YIELD_ADDR; + mgmt->selectWorkloadAddr = isKernel2 ? CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR : CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR; + if (!isKernel2) { + mgmt->x1F0 = 0xF0020000; + mgmt->x200 = 0x20000; + mgmt->guid[0] = 0x423A3A02; + mgmt->guid[1] = 0x43F43A82; + mgmt->guid[2] = 0x43F26502; + mgmt->guid[3] = 0x420EB382; } else { - assert(status == SPU_STATUS_RUNNING); + mgmt->guid[0] = 0x43A08402; + mgmt->guid[1] = 0x43FB0A82; + mgmt->guid[2] = 0x435E9302; + mgmt->guid[3] = 0x43A3C982; } + spu.UnregisterHleFunctions(0, 0x40000); // TODO: use a symbolic constant + spu.RegisterHleFuncion(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelMain); + spu.RegisterHleFuncion(mgmt->yieldToKernelAddr, spursKernelMain); + spu.RegisterHleFuncion(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload); + + // Start the system service workload + spu.RegisterHleFuncion(0xA00, spursSysServiceWorkloadEntry); + wklInfo = &mgmt->spurs->m.wklInfoSysSrv; + pollStatus = 0; + } else if (spu.PC == mgmt->yieldToKernelAddr) { + isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; + // Select next workload to run spu.GPR[3].clear(); - if (isSecond) { - spursKernelSelectWorkload2(spu); + if (isKernel2) { + spursKernel2SelectWorkload(spu); } else { - spursKernelSelectWorkload(spu); + spursKernel1SelectWorkload(spu); } - u64 res = spu.GPR[3]._u64[1]; - pollStatus = (u32)(res); - wid = (u32)(res >> 32); + + pollStatus = (u32)(spu.GPR[3]._u64[1]); + auto wid = (u32)(spu.GPR[3]._u64[1] >> 32); + wklInfo = wid < CELL_SPURS_MAX_WORKLOAD ? &mgmt->spurs->m.wklInfo1[wid] : + (wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? &mgmt->spurs->m.wklInfo2[wid & 0xf] : &mgmt->spurs->m.wklInfoSysSrv); + } else { + assert(0); } + + if (!isKernel2) { + mgmt->moduleId[0] = 0; + mgmt->moduleId[1] = 0; + } + + // Run workload + spu.GPR[0]._u32[3] = mgmt->yieldToKernelAddr; + spu.GPR[1]._u32[3] = 0x3FFB0; + spu.GPR[3]._u32[3] = 0x100; + spu.GPR[4]._u64[1] = wklInfo->arg; + spu.GPR[5]._u32[3] = pollStatus; + spu.SetBranch(0xA00); + return false; } ////////////////////////////////////////////////////////////////////////////// @@ -783,7 +844,7 @@ poll: } /// Entry point of the system service workload -void spursSysServiceWorkloadEntry(SPUThread & spu) { +bool spursSysServiceWorkloadEntry(SPUThread & spu) { auto mgmt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); auto arg = spu.GPR[4]._u64[1]; auto pollStatus = spu.GPR[5]._u32[3]; @@ -800,7 +861,7 @@ void spursSysServiceWorkloadEntry(SPUThread & spu) { } // TODO: Ensure that this function always returns to the SPURS kernel - return; + return false; } ////////////////////////////////////////////////////////////////////////////// diff --git a/rpcs3/stdafx.h b/rpcs3/stdafx.h index 825c1c4007..4581c27650 100644 --- a/rpcs3/stdafx.h +++ b/rpcs3/stdafx.h @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include #include "Utilities/GNU.h" From 507638e6d825baa187015d49fbbe98f1db2bedc8 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sat, 31 Jan 2015 00:36:58 +0530 Subject: [PATCH 20/29] SPURS: Update system service workload to use lock line reservations --- rpcs3/Emu/Cell/SPUThread.h | 2 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 2 +- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 412 ++++++++++++-------- 3 files changed, 255 insertions(+), 161 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index d6ecbe64b0..ce0036e4a5 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -511,7 +511,7 @@ public: void WriteLS64 (const u32 lsa, const u64& data) const { vm::write64 (lsa + m_offset, data); } void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); } - void RegisterHleFuncion(u32 addr, std::function function) + void RegisterHleFunction(u32 addr, std::function function) { m_addr_to_hle_function_map[addr] = function; WriteLS32(addr, 0x00000003); // STOP 3 diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 62349f276d..83dffb54e7 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -182,7 +182,7 @@ s64 spursInit( { auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, num, spurs.addr(), 0, 0); #ifndef PRX_DEBUG_XXX - spu->RegisterHleFuncion(spurs->m.spuImg.entry_point, spursKernelMain); + spu->RegisterHleFunction(spurs->m.spuImg.entry_point, spursKernelMain); #endif spurs->m.spus[num] = spu->GetId(); } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index 898638894c..e6921057c1 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -17,6 +17,7 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status); bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag); u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask); u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll = true); +void spursHalt(); // // SPURS Kernel functions @@ -111,6 +112,12 @@ u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll) { return rv._u32[3]; } +// Halt the SPU +void spursHalt(SPUThread & spu) { + spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_HALT); + spu.Stop(); +} + ////////////////////////////////////////////////////////////////////////////// // SPURS kernel functions ////////////////////////////////////////////////////////////////////////////// @@ -130,7 +137,7 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { do { // DMA and lock the first 0x80 bytes of spurs spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); - CellSpurs * spurs = (CellSpurs *)mgmt->tempArea; + auto spurs = vm::get_ptr(spu.ls_offset + 0x100); // Calculate the contention (number of SPUs used) for each workload u8 contention[CELL_SPURS_MAX_WORKLOAD]; @@ -279,7 +286,7 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { do { // DMA and lock the first 0x80 bytes of spurs spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); - CellSpurs * spurs = (CellSpurs *)mgmt->tempArea; + auto spurs = vm::get_ptr(spu.ls_offset + 0x100); // Calculate the contention (number of SPUs used) for each workload u8 contention[CELL_SPURS_MAX_WORKLOAD2]; @@ -408,8 +415,8 @@ bool spursKernelMain(SPUThread & spu) { SpursKernelMgmtData * mgmt = vm::get_ptr(spu.ls_offset + 0x100); bool isKernel2; - u32 pollStatus; - const CellSpurs::WorkloadInfo * wklInfo; + u32 pollStatus; + u64 wklArg; if (spu.PC == CELL_SPURS_KERNEL1_ENTRY_ADDR || spu.PC == CELL_SPURS_KERNEL2_ENTRY_ADDR) { // Entry point of SPURS kernel // Save arguments @@ -440,14 +447,15 @@ bool spursKernelMain(SPUThread & spu) { mgmt->guid[3] = 0x43A3C982; } + // Register SPURS kernel HLE functions spu.UnregisterHleFunctions(0, 0x40000); // TODO: use a symbolic constant - spu.RegisterHleFuncion(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelMain); - spu.RegisterHleFuncion(mgmt->yieldToKernelAddr, spursKernelMain); - spu.RegisterHleFuncion(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload); + spu.RegisterHleFunction(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelMain); + spu.RegisterHleFunction(mgmt->yieldToKernelAddr, spursKernelMain); + spu.RegisterHleFunction(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload); - // Start the system service workload - spu.RegisterHleFuncion(0xA00, spursSysServiceWorkloadEntry); - wklInfo = &mgmt->spurs->m.wklInfoSysSrv; + // Register the system HLE service workload entry point + spu.RegisterHleFunction(0xA00, spursSysServiceWorkloadEntry); + wklArg = mgmt->spurs->m.wklInfoSysSrv.arg; pollStatus = 0; } else if (spu.PC == mgmt->yieldToKernelAddr) { isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; @@ -460,10 +468,11 @@ bool spursKernelMain(SPUThread & spu) { spursKernel1SelectWorkload(spu); } - pollStatus = (u32)(spu.GPR[3]._u64[1]); - auto wid = (u32)(spu.GPR[3]._u64[1] >> 32); - wklInfo = wid < CELL_SPURS_MAX_WORKLOAD ? &mgmt->spurs->m.wklInfo1[wid] : - (wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? &mgmt->spurs->m.wklInfo2[wid & 0xf] : &mgmt->spurs->m.wklInfoSysSrv); + pollStatus = (u32)(spu.GPR[3]._u64[1]); + auto wid = (u32)(spu.GPR[3]._u64[1] >> 32); + auto wklInfo = wid < CELL_SPURS_MAX_WORKLOAD ? &mgmt->spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? &mgmt->spurs->m.wklInfo2[wid & 0xf] : + &mgmt->spurs->m.wklInfoSysSrv); + wklArg = wklInfo->arg; } else { assert(0); } @@ -477,7 +486,7 @@ bool spursKernelMain(SPUThread & spu) { spu.GPR[0]._u32[3] = mgmt->yieldToKernelAddr; spu.GPR[1]._u32[3] = 0x3FFB0; spu.GPR[3]._u32[3] = 0x100; - spu.GPR[4]._u64[1] = wklInfo->arg; + spu.GPR[4]._u64[1] = wklArg; spu.GPR[5]._u32[3] = pollStatus; spu.SetBranch(0xA00); return false; @@ -489,33 +498,48 @@ bool spursKernelMain(SPUThread & spu) { /// Restore scheduling parameters after a workload has been preempted by the system service workload void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt) { - if (mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] != 0xFF) { - auto wklId = mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum]; - mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] = 0xFF; + u8 wklId; - spursSysServiceUpdateWorkload(spu, mgmt); - if (wklId >= CELL_SPURS_MAX_WORKLOAD) { - mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10; - mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1); - } else { - mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01; - mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1); + do { + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + if (spurs->m.sysSrvWorkload[mgmt->spuNum] == 0xFF) { + return; } - // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace - // uses the current worload id to determine the workload to which the trace belongs - auto wklIdSaved = mgmt->wklCurrentId; - mgmt->wklCurrentId = wklId; + wklId = spurs->m.sysSrvWorkload[mgmt->spuNum]; + spurs->m.sysSrvWorkload[mgmt->spuNum] = 0xFF; + } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); - // Trace - STOP: GUID - CellSpursTracePacket pkt; - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; - pkt.data.stop = SPURS_GUID_SYS_WKL; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + spursSysServiceUpdateWorkload(spu, mgmt); - mgmt->wklCurrentId = wklIdSaved; - } + do { + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x100); + + if (wklId >= CELL_SPURS_MAX_WORKLOAD) { + spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10; + spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1); + } else { + spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01; + spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1); + } + } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace + // uses the current worload id to determine the workload to which the trace belongs + auto wklIdSaved = mgmt->wklCurrentId; + mgmt->wklCurrentId = wklId; + + // Trace - STOP: GUID + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; + pkt.data.stop = SPURS_GUID_SYS_WKL; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + mgmt->wklCurrentId = wklIdSaved; } /// Update the trace count for this SPU in CellSpurs @@ -528,39 +552,52 @@ void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt /// Update trace control in SPU from CellSpurs void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) { - auto sysSrvMsgUpdateTrace = mgmt->spurs->m.sysSrvMsgUpdateTrace; - mgmt->spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum); - mgmt->spurs->m.xCC &= ~(1 << mgmt->spuNum); - mgmt->spurs->m.xCC |= arg2 << mgmt->spuNum; + bool notify; - bool notify = false; - if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.sysSrvMsgUpdateTrace == 0) && (mgmt->spurs->m.xCD != 0)) { - mgmt->spurs->m.xCD = 0; - notify = true; - } + u8 sysSrvMsgUpdateTrace; + do { + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); - if (arg4 && mgmt->spurs->m.xCD != 0) { - mgmt->spurs->m.xCD = 0; - notify = true; - } + sysSrvMsgUpdateTrace = spurs->m.sysSrvMsgUpdateTrace; + spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum); + spurs->m.xCC &= ~(1 << mgmt->spuNum); + spurs->m.xCC |= arg2 << mgmt->spuNum; + + notify = false; + if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) && (spurs->m.sysSrvMsgUpdateTrace == 0) && (spurs->m.xCD != 0)) { + spurs->m.xCD = 0; + notify = true; + } + + if (arg4 && spurs->m.xCD != 0) { + spurs->m.xCD = 0; + notify = true; + } + } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); // Get trace parameters from CellSpurs and store them in the LS if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) || (arg3 != 0)) { - if (mgmt->traceMsgCount != 0xFF || mgmt->spurs->m.traceBuffer.addr() == 0) { + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.traceBuffer), 0x80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x80 - offsetof(CellSpurs, m.traceBuffer)); + + if (mgmt->traceMsgCount != 0xFF || spurs->m.traceBuffer.addr() == 0) { spursSysServiceUpdateTraceCount(spu, mgmt); } else { - mgmt->traceMsgCount = mgmt->spurs->m.traceBuffer->count[mgmt->spuNum]; + spursDma(spu, MFC_GET_CMD, spurs->m.traceBuffer.addr() & 0xFFFFFFFC, 0x2C00/*LSA*/, 0x80/*size*/, mgmt->dmaTagId); + auto traceBuffer = vm::get_ptr(spu.ls_offset + 0x2C00); + mgmt->traceMsgCount = traceBuffer->count[mgmt->spuNum]; } - mgmt->traceBuffer = mgmt->spurs->m.traceBuffer.addr() + (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4); - mgmt->traceMaxCount = mgmt->spurs->m.traceStartIndex[1] - mgmt->spurs->m.traceStartIndex[0]; + mgmt->traceBuffer = spurs->m.traceBuffer.addr() + (spurs->m.traceStartIndex[mgmt->spuNum] << 4); + mgmt->traceMaxCount = spurs->m.traceStartIndex[1] - spurs->m.traceStartIndex[0]; if (mgmt->traceBuffer == 0) { mgmt->traceMsgCount = 0; } } if (notify) { - // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 2, 0); + // TODO: sys_spu_thread_send_event(spurs->m.spuPort, 2, 0); } } @@ -568,81 +605,108 @@ void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet) { // Mark the workloads in wklShutdownBitSet as completed and also generate a bit set of the completed // workloads that have a shutdown completion hook registered - u32 wklNotifyBitSet = 0; - for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - if (wklShutdownBitSet & (0x80000000u >> i)) { - mgmt->spurs->m.wklEvent1[i] |= 0x01; - if (mgmt->spurs->m.wklEvent1[i] & 0x02 || mgmt->spurs->m.wklEvent1[i] & 0x10) { - wklNotifyBitSet |= 0x80000000u >> i; - } - } + u32 wklNotifyBitSet; + u8 spuPort; + do { + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); - if (wklShutdownBitSet & (0x8000 >> i)) { - mgmt->spurs->m.wklEvent2[i] |= 0x01; - if (mgmt->spurs->m.wklEvent2[i] & 0x02 || mgmt->spurs->m.wklEvent2[i] & 0x10) { - wklNotifyBitSet |= 0x8000 >> i; + wklNotifyBitSet = 0; + spuPort = spurs->m.spuPort;; + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + if (wklShutdownBitSet & (0x80000000u >> i)) { + spurs->m.wklEvent1[i] |= 0x01; + if (spurs->m.wklEvent1[i] & 0x02 || spurs->m.wklEvent1[i] & 0x10) { + wklNotifyBitSet |= 0x80000000u >> i; + } + } + + if (wklShutdownBitSet & (0x8000 >> i)) { + spurs->m.wklEvent2[i] |= 0x01; + if (spurs->m.wklEvent2[i] & 0x02 || spurs->m.wklEvent2[i] & 0x10) { + wklNotifyBitSet |= 0x8000 >> i; + } } } - } + } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); if (wklNotifyBitSet) { - // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 0, wklNotifyMask); + // TODO: sys_spu_thread_send_event(spuPort, 0, wklNotifyMask); } } /// Update workload information in the SPU from CellSpurs void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) { + auto spurs = vm::get_ptr(spu.ls_offset + 0x100); + spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfo1), 0x30000/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); + if (spurs->m.flags1 & SF1_32_WORKLOADS) { + spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfo2), 0x30200/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); + } + u32 wklShutdownBitSet = 0; mgmt->wklRunnable1 = 0; mgmt->wklRunnable2 = 0; for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + auto wklInfo1 = vm::get_ptr(spu.ls_offset + 0x30000); + // Copy the priority of the workload for this SPU and its unique id to the LS - mgmt->priority[i] = mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum]; - mgmt->wklUniqueId[i] = mgmt->spurs->m.wklInfo1[i].uniqueId.read_relaxed(); + mgmt->priority[i] = wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - wklInfo1[i].priority[mgmt->spuNum]; + mgmt->wklUniqueId[i] = wklInfo1[i].uniqueId.read_relaxed(); - // Update workload status and runnable flag based on the workload state - auto wklStatus = mgmt->spurs->m.wklStatus1[i]; - if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { - mgmt->spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum; - mgmt->wklRunnable1 |= 0x8000 >> i; - } else { - mgmt->spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum); - } + if (spurs->m.flags1 & SF1_32_WORKLOADS) { + auto wklInfo2 = vm::get_ptr(spu.ls_offset + 0x30200); - // If the workload is shutting down and if this is the last SPU from which it is being removed then - // add it to the shutdown bit set - if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { - if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus1[i] == 0)) { - mgmt->spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); - wklShutdownBitSet |= 0x80000000u >> i; - } - } - - if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { // Copy the priority of the workload for this SPU to the LS - if (mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) { - mgmt->priority[i] |= (0x10 - mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) << 4; + if (wklInfo2[i].priority[mgmt->spuNum]) { + mgmt->priority[i] |= (0x10 - wklInfo2[i].priority[mgmt->spuNum]) << 4; } + } + } + do { + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { // Update workload status and runnable flag based on the workload state - wklStatus = mgmt->spurs->m.wklStatus2[i]; - if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { - mgmt->spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum; - mgmt->wklRunnable2 |= 0x8000 >> i; + auto wklStatus = spurs->m.wklStatus1[i]; + if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { + spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum; + mgmt->wklRunnable1 |= 0x8000 >> i; } else { - mgmt->spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum); + spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum); } // If the workload is shutting down and if this is the last SPU from which it is being removed then // add it to the shutdown bit set - if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { - if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus2[i] == 0)) { - mgmt->spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); - wklShutdownBitSet |= 0x8000 >> i; + if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { + if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (spurs->m.wklStatus1[i] == 0)) { + spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); + wklShutdownBitSet |= 0x80000000u >> i; + } + } + + if (spurs->m.flags1 & SF1_32_WORKLOADS) { + // Update workload status and runnable flag based on the workload state + wklStatus = spurs->m.wklStatus2[i]; + if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { + spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum; + mgmt->wklRunnable2 |= 0x8000 >> i; + } else { + spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum); + } + + // If the workload is shutting down and if this is the last SPU from which it is being removed then + // add it to the shutdown bit set + if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { + if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (spurs->m.wklStatus2[i] == 0)) { + spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); + wklShutdownBitSet |= 0x8000 >> i; + } } } } - } + } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); if (wklShutdownBitSet) { spursSysServiceUpdateEvent(spu, mgmt, wklShutdownBitSet); @@ -651,57 +715,85 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) /// Process any messages void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) { - LV2_LOCK(0); + bool updateTrace = false; + bool updateWorkload = false; + bool terminate = false; + + do { + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + // Terminate request + if (spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) { + spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum); + terminate = true; + } + + // Update workload message + if (spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) { + spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum); + updateWorkload = true; + } + + // Update trace message + if (spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) { + updateTrace = true; + } + } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); // Process update workload message - if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) { - mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum); + if (updateWorkload) { spursSysServiceUpdateWorkload(spu, mgmt); } // Process update trace message - if (mgmt->spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) { + if (updateTrace) { spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0); } // Process terminate request - if (mgmt->spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) { - mgmt->spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum); + if (terminate) { // TODO: Rest of the terminate processing } } /// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) { + // Monitor only lock line reservation lost events + spu.WriteChannel(SPU_WrEventMask, u128::from32r(SPU_EVENT_LR)); + + bool shouldExit; while (true) { - Emu.GetCoreMutex().lock(); + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x100); // Find the number of SPUs that are idling in this SPURS instance u32 nIdlingSpus = 0; for (u32 i = 0; i < 8; i++) { - if (mgmt->spurs->m.spuIdling & (1 << i)) { + if (spurs->m.spuIdling & (1 << i)) { nIdlingSpus++; } } - bool allSpusIdle = nIdlingSpus == mgmt->spurs->m.nSpus ? true: false; - bool exitIfNoWork = mgmt->spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false; + bool allSpusIdle = nIdlingSpus == spurs->m.nSpus ? true: false; + bool exitIfNoWork = spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false; + shouldExit = allSpusIdle && exitIfNoWork; // Check if any workloads can be scheduled bool foundReadyWorkload = false; - if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { + if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { foundReadyWorkload = true; } else { - if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { + if (spurs->m.flags1 & SF1_32_WORKLOADS) { for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { u32 j = i & 0x0F; u8 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; - u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4; - u8 contention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklCurrentContention[j] & 0x0F : mgmt->spurs->m.wklCurrentContention[j] >> 4; - u8 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); - u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); + u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4; + u8 contention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklCurrentContention[j] & 0x0F : spurs->m.wklCurrentContention[j] >> 4; + u8 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); + u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); if (runnable && priority > 0 && maxContention > contention) { if (wklFlag || wklSignal || readyCount > contention) { @@ -713,14 +805,14 @@ void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) { } else { for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { u8 runnable = mgmt->wklRunnable1 & (0x8000 >> i); - u8 wklSignal = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); - u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed(); - u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); + u8 wklSignal = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); + u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed(); + u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); u8 requestCount = readyCount + idleSpuCount; - if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > mgmt->spurs->m.wklCurrentContention[i]) { - if (wklFlag || wklSignal || (readyCount != 0 && requestCount > mgmt->spurs->m.wklCurrentContention[i])) { + if (runnable && mgmt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > spurs->m.wklCurrentContention[i]) { + if (wklFlag || wklSignal || (readyCount != 0 && requestCount > spurs->m.wklCurrentContention[i])) { foundReadyWorkload = true; break; } @@ -729,33 +821,29 @@ void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) { } } + bool spuIdling = spurs->m.spuIdling & (1 << mgmt->spuNum) ? true : false; + if (foundReadyWorkload && shouldExit == false) { + spurs->m.spuIdling &= ~(1 << mgmt->spuNum); + } else { + spurs->m.spuIdling |= 1 << mgmt->spuNum; + } + // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events. - if ((mgmt->spurs->m.spuIdling & (1 << mgmt->spuNum)) && (allSpusIdle == false || exitIfNoWork == false) && foundReadyWorkload == false) { - // The system service blocks by making a reservation and waiting on the reservation lost event. This is unfortunately - // not yet completely implemented in rpcs3. So we busy wait here. - //u128 r; - //spu.ReadChannel(r, 0); - - Emu.GetCoreMutex().unlock(); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - Emu.GetCoreMutex().lock(); + if (spuIdling && shouldExit == false && foundReadyWorkload == false) { + // The system service blocks by making a reservation and waiting on the lock line reservation lost event. + u128 r; + spu.ReadChannel(r, SPU_RdEventStat); + spu.WriteChannel(SPU_WrEventAck, u128::from32r(SPU_EVENT_LR)); } - if ((allSpusIdle == true && exitIfNoWork == true) || foundReadyWorkload == false) { - mgmt->spurs->m.spuIdling |= 1 << mgmt->spuNum; - } else { - mgmt->spurs->m.spuIdling &= ~(1 << mgmt->spuNum); + auto dmaSuccess = spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + if (dmaSuccess && (shouldExit || foundReadyWorkload)) { + break; } + } - Emu.GetCoreMutex().unlock(); - - if (allSpusIdle == false || exitIfNoWork == false) { - if (foundReadyWorkload == true) { - return; - } - } else { - // TODO: exit spu thread group - } + if (shouldExit) { + // TODO: exit spu thread group } } @@ -764,22 +852,31 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); if (mgmt->spurs.addr() % CellSpurs::align) { - assert(0); + spursHalt(spu); + return; } // Initialise the system service if this is the first time its being started on this SPU if (mgmt->sysSrvInitialised == 0) { mgmt->sysSrvInitialised = 1; - LV2_LOCK(0); - if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) { - assert(0); - } + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); - mgmt->spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum; - mgmt->traceBuffer = 0; - mgmt->traceMsgCount = -1; + do { + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + CellSpurs * spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + // Halt if already initialised + if (spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) { + spursHalt(spu); + return; + } + + spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum; + } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + mgmt->traceBuffer = 0; + mgmt->traceMsgCount = -1; spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0); spursSysServiceCleanupAfterPreemption(spu, mgmt); @@ -818,6 +915,8 @@ poll: pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; pkt.data.stop = SPURS_GUID_SYS_WKL; cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId); break; } @@ -849,10 +948,6 @@ bool spursSysServiceWorkloadEntry(SPUThread & spu) { auto arg = spu.GPR[4]._u64[1]; auto pollStatus = spu.GPR[5]._u32[3]; - spu.GPR[1]._u32[3] = 0x3FFD0; - *(vm::ptr::make(spu.GPR[1]._u32[3])) = 0x3FFF0; - memset(vm::get_ptr(spu.ls_offset + 0x3FFE0), 0, 32); - if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { spursSysServiceWorkloadMain(spu, pollStatus); } else { @@ -860,7 +955,6 @@ bool spursSysServiceWorkloadEntry(SPUThread & spu) { // system service workload. Need to implement this. } - // TODO: Ensure that this function always returns to the SPURS kernel return false; } From f7b7c234b7d745e41f35ff8cce2350b623875562 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sat, 31 Jan 2015 01:32:07 +0530 Subject: [PATCH 21/29] SPURS: Implement some SPU thread functions used by the system service module --- rpcs3/Emu/SysCalls/lv2/sys_spu.cpp | 83 ++++++++++++++++++++++++++++++ rpcs3/Emu/SysCalls/lv2/sys_spu.h | 6 +++ 2 files changed, 89 insertions(+) diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp index 29cdb9bd39..b6d29837e2 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp @@ -1099,3 +1099,86 @@ s32 sys_raw_spu_get_spu_cfg(u32 id, vm::ptr value) *value = (u32)t->cfg.value; return CELL_OK; } + +void sys_spu_thread_exit(SPUThread & spu, s32 status) +{ + // Cancel any pending status update requests + u128 r; + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(0)); + while (spu.GetChannelCount(MFC_RdTagStat) != 1); + spu.ReadChannel(r, MFC_RdTagStat); + + // Wait for all pending DMA operations to complete + spu.WriteChannel(MFC_WrTagMask, u128::from32r(0xFFFFFFFF)); + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_ALL)); + spu.ReadChannel(r, MFC_RdTagStat); + + spu.WriteChannel(SPU_WrOutMbox, u128::from32r(status)); + spu.StopAndSignal(0x102); +} + +void sys_spu_thread_group_exit(SPUThread & spu, s32 status) +{ + // Cancel any pending status update requests + u128 r; + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(0)); + while (spu.GetChannelCount(MFC_RdTagStat) != 1); + spu.ReadChannel(r, MFC_RdTagStat); + + // Wait for all pending DMA operations to complete + spu.WriteChannel(MFC_WrTagMask, u128::from32r(0xFFFFFFFF)); + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_ALL)); + spu.ReadChannel(r, MFC_RdTagStat); + + spu.WriteChannel(SPU_WrOutMbox, u128::from32r(status)); + spu.StopAndSignal(0x101); +} + +s32 sys_spu_thread_send_event(SPUThread & spu, u8 spup, u32 data0, u32 data1) +{ + if (spup > 0x3F) + { + return CELL_EINVAL; + } + + if (spu.GetChannelCount(SPU_RdInMbox)) + { + return CELL_EBUSY; + } + + spu.WriteChannel(SPU_WrOutMbox, u128::from32r(data1)); + spu.WriteChannel(SPU_WrOutIntrMbox, u128::from32r((spup << 24) | (data0 & 0x00FFFFFF))); + + u128 r; + spu.ReadChannel(r, SPU_RdInMbox); + return r._u32[3]; +} + +s32 sys_spu_thread_switch_system_module(SPUThread & spu, u32 status) +{ + if (spu.GetChannelCount(SPU_RdInMbox)) + { + return CELL_EBUSY; + } + + // Cancel any pending status update requests + u128 r; + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(0)); + while (spu.GetChannelCount(MFC_RdTagStat) != 1); + spu.ReadChannel(r, MFC_RdTagStat); + + // Wait for all pending DMA operations to complete + spu.WriteChannel(MFC_WrTagMask, u128::from32r(0xFFFFFFFF)); + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_ALL)); + spu.ReadChannel(r, MFC_RdTagStat); + + do + { + spu.WriteChannel(SPU_WrOutMbox, u128::from32r(status)); + spu.StopAndSignal(0x120); + spu.ReadChannel(r, SPU_RdInMbox); + } + while (r._u32[3] == CELL_EBUSY); + + return r._u32[3]; +} diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.h b/rpcs3/Emu/SysCalls/lv2/sys_spu.h index e71c606bf7..e129455758 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_spu.h +++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.h @@ -204,3 +204,9 @@ s32 sys_raw_spu_get_int_stat(u32 id, u32 class_id, vm::ptr stat); s32 sys_raw_spu_read_puint_mb(u32 id, vm::ptr value); s32 sys_raw_spu_set_spu_cfg(u32 id, u32 value); s32 sys_raw_spu_get_spu_cfg(u32 id, vm::ptr value); + +// SPU Calls +void sys_spu_thread_exit(SPUThread & spu, s32 status); +void sys_spu_thread_group_exit(SPUThread & spu, s32 status); +s32 sys_spu_thread_send_event(SPUThread & spu, u8 spup, u32 data0, u32 data1); +s32 sys_spu_thread_switch_system_module(SPUThread & spu, u32 status); From d8bed3b0ced0bc219658ad90148f68e1caa6a5bd Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sat, 31 Jan 2015 03:35:03 +0530 Subject: [PATCH 22/29] SPURS: Implement some portions of the taskset PM --- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 6 +- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 130 ++++++++++++++++---- 2 files changed, 110 insertions(+), 26 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 4d77a06402..08fff45abf 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -155,9 +155,11 @@ enum SpursWorkloadConstants : u64 // GUID SPURS_GUID_SYS_WKL = 0x1BB841BF38F89D33ull, + SPURS_GUID_TASKSET_PM = 0x836E915B2E654143ull, // Image addresses SPURS_IMG_ADDR_SYS_SRV_WORKLOAD = 0x100, + SPURS_IMG_ADDR_TASKSET_PM = 0x200, }; enum CellSpursModulePollStatus @@ -209,6 +211,8 @@ enum SpursTaskConstants CELL_SPURS_TASK_ATTRIBUTE_REVISION = 1, CELL_SPURS_TASKSET_ATTRIBUTE_REVISION = 1, CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE = 1024, + CELL_SPURS_TASKSET_PM_ENTRY_ADDR = 0xA00, + CELL_SPURS_TASKSET_PM_SYSCALL_ADDR = 0xA70, }; enum CellSpursEventFlagWaitMode @@ -917,7 +921,7 @@ struct SpursTasksetPmMgmtData be_t x27B0; // 0x27B0 vm::bptr taskset; // 0x27B8 be_t kernelMgmtAddr; // 0x27C0 - be_t yieldAddr; // 0x27C4 + be_t syscallAddr; // 0x27C4 be_t x27C8; // 0x27C8 be_t spuNum; // 0x27CC be_t dmaTagId; // 0x27D0 diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index e6921057c1..653d4ca259 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -13,6 +13,7 @@ // void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag); u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status); +void cellSpursModuleExit(SPUThread & spu); bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag); u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask); @@ -39,8 +40,16 @@ void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus); bool spursSysServiceWorkloadEntry(SPUThread & spu); // -// SPURS taskset polict module functions +// SPURS taskset policy module functions // +bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting); +void spursTasksetExit(SPUThread & spu); +void spursTasksetDispatch(SPUThread & spu); +void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus); +bool spursTasksetShouldYield(SPUThread & spu); +void spursTasksetInit(SPUThread & spu, u32 pollStatus); +void spursTasksetResumeTask(SPUThread & spu); +bool spursTasksetEntry(SPUThread & spu); extern Module *cellSpurs; @@ -73,6 +82,12 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) { return wklId == mgmt->wklCurrentId ? 0 : 1; } +/// Exit current workload +void cellSpursModuleExit(SPUThread & spu) { + auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); + spu.SetBranch(mgmt->yieldToKernelAddr); +} + /// Execute a DMA operation bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) { spu.WriteChannel(MFC_LSA, u128::from32r(lsa)); @@ -416,7 +431,6 @@ bool spursKernelMain(SPUThread & spu) { bool isKernel2; u32 pollStatus; - u64 wklArg; if (spu.PC == CELL_SPURS_KERNEL1_ENTRY_ADDR || spu.PC == CELL_SPURS_KERNEL2_ENTRY_ADDR) { // Entry point of SPURS kernel // Save arguments @@ -453,9 +467,9 @@ bool spursKernelMain(SPUThread & spu) { spu.RegisterHleFunction(mgmt->yieldToKernelAddr, spursKernelMain); spu.RegisterHleFunction(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload); - // Register the system HLE service workload entry point - spu.RegisterHleFunction(0xA00, spursSysServiceWorkloadEntry); - wklArg = mgmt->spurs->m.wklInfoSysSrv.arg; + // DMA in the system service workload info + spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfoSysSrv), 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID/*tag*/); + spursDmaWaitForCompletion(spu, 0x80000000); pollStatus = 0; } else if (spu.PC == mgmt->yieldToKernelAddr) { isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; @@ -468,15 +482,38 @@ bool spursKernelMain(SPUThread & spu) { spursKernel1SelectWorkload(spu); } - pollStatus = (u32)(spu.GPR[3]._u64[1]); - auto wid = (u32)(spu.GPR[3]._u64[1] >> 32); - auto wklInfo = wid < CELL_SPURS_MAX_WORKLOAD ? &mgmt->spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? &mgmt->spurs->m.wklInfo2[wid & 0xf] : - &mgmt->spurs->m.wklInfoSysSrv); - wklArg = wklInfo->arg; + pollStatus = (u32)(spu.GPR[3]._u64[1]); + auto wid = (u32)(spu.GPR[3]._u64[1] >> 32); + + // DMA in the workload info for the selected workload + auto wklInfoOffset = wid < CELL_SPURS_MAX_WORKLOAD ? offsetof(CellSpurs, m.wklInfo1[wid]) : + wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? offsetof(CellSpurs, m.wklInfo2[wid & 0xf]) : + offsetof(CellSpurs, m.wklInfoSysSrv); + spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + wklInfoOffset, 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); + spursDmaWaitForCompletion(spu, 0x80000000); } else { assert(0); } + auto wklInfo = vm::get_ptr(spu.ls_offset + 0x3FFE0); + if (mgmt->wklCurrentAddr != wklInfo->addr) { + switch (wklInfo->addr.addr()) { + case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD: + spu.RegisterHleFunction(0xA00, spursSysServiceWorkloadEntry); + break; + case SPURS_IMG_ADDR_TASKSET_PM: + spu.RegisterHleFunction(0xA00, spursTasksetEntry); + break; + default: + spursDma(spu, MFC_GET_CMD, wklInfo-> addr.addr(), 0xA00/*LSA*/, wklInfo->size, CELL_SPURS_KERNEL_DMA_TAG_ID); + spursDmaWaitForCompletion(spu, 0x80000000); + break; + } + + mgmt->wklCurrentAddr = wklInfo->addr; + mgmt->wklCurrentUniqueId = wklInfo->uniqueId.read_relaxed(); + } + if (!isKernel2) { mgmt->moduleId[0] = 0; mgmt->moduleId[1] = 0; @@ -486,7 +523,7 @@ bool spursKernelMain(SPUThread & spu) { spu.GPR[0]._u32[3] = mgmt->yieldToKernelAddr; spu.GPR[1]._u32[3] = 0x3FFB0; spu.GPR[3]._u32[3] = 0x100; - spu.GPR[4]._u64[1] = wklArg; + spu.GPR[4]._u64[1] = wklInfo->arg; spu.GPR[5]._u32[3] = pollStatus; spu.SetBranch(0xA00); return false; @@ -597,7 +634,8 @@ void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 } if (notify) { - // TODO: sys_spu_thread_send_event(spurs->m.spuPort, 2, 0); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + sys_spu_thread_send_event(spu, spurs->m.spuPort, 2, 0); } } @@ -1020,6 +1058,24 @@ bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 return false; } +void spursTasksetExit(SPUThread & spu) { + auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + + // Trace - STOP + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = 0x54; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_STOP + pkt.data.stop = SPURS_GUID_TASKSET_PM; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + // Not sure why this check exists. Perhaps to check for memory corruption. + if (memcmp(mgmt->moduleId, "SPURSTASK MODULE", 16) != 0) { + spursHalt(spu); + } + + cellSpursModuleExit(spu); +} + void spursTasksetDispatch(SPUThread & spu) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); auto kernelMgmt = vm::get_ptr(spu.ls_offset + 0x100); @@ -1028,7 +1084,8 @@ void spursTasksetDispatch(SPUThread & spu) { u32 isWaiting; spursTasksetProcessRequest(spu, 5, &taskId, &isWaiting); if (taskId >= CELL_SPURS_MAX_TASK) { - // TODO: spursTasksetExit(spu); + spursTasksetExit(spu); + return; } mgmt->taskId = taskId; @@ -1109,11 +1166,23 @@ void spursTasksetInit(SPUThread & spu, u32 pollStatus) { spursTasksetProcessPollStatus(spu, pollStatus); } -void spursTasksetEntry(SPUThread & spu) { +void spursTasksetResumeTask(SPUThread & spu) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); - // Check if the function was invoked by the SPURS kernel or because of a syscall - if (spu.PC != 0xA70) { + // Restore task context + spu.GPR[0] = mgmt->savedContextLr; + spu.GPR[1] = mgmt->savedContextSp; + for (auto i = 0; i < 48; i++) { + spu.GPR[80 + i] = mgmt->savedContextR80ToR127[i]; + } + + spu.SetBranch(spu.GPR[0]._u32[3]); +} + +bool spursTasksetEntry(SPUThread & spu) { + auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + + if (spu.PC == CELL_SPURS_TASKSET_PM_ENTRY_ADDR) { // Called from kernel auto kernelMgmt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); auto arg = spu.GPR[4]._u64[1]; @@ -1123,20 +1192,31 @@ void spursTasksetEntry(SPUThread & spu) { mgmt->taskset.set(arg); memcpy(mgmt->moduleId, "SPURSTASK MODULE", 16); mgmt->kernelMgmtAddr = spu.GPR[3]._u32[3]; - mgmt->yieldAddr = 0xA70; + mgmt->syscallAddr = CELL_SPURS_TASKSET_PM_SYSCALL_ADDR; mgmt->spuNum = kernelMgmt->spuNum; mgmt->dmaTagId = kernelMgmt->dmaTagId; mgmt->taskId = 0xFFFFFFFF; + // Register SPURS takset policy module HLE functions + spu.UnregisterHleFunctions(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, 0x40000); // TODO: use a symbolic constant + spu.RegisterHleFunction(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, spursTasksetEntry); + spu.RegisterHleFunction(mgmt->syscallAddr, spursTasksetEntry); + spursTasksetInit(spu, pollStatus); - // TODO: Dispatch + spursTasksetDispatch(spu); + } else if (spu.PC == CELL_SPURS_TASKSET_PM_SYSCALL_ADDR) { + // Save task context + mgmt->savedContextLr = spu.GPR[0]; + mgmt->savedContextSp = spu.GPR[1]; + for (auto i = 0; i < 48; i++) { + mgmt->savedContextR80ToR127[i] = spu.GPR[80 + i]; + } + + // TODO: Process syscall + spursTasksetResumeTask(spu); + } else { + assert(0); } - mgmt->savedContextLr = spu.GPR[0]; - mgmt->savedContextSp = spu.GPR[1]; - for (auto i = 0; i < 48; i++) { - mgmt->savedContextR80ToR127[i] = spu.GPR[80 + i]; - } - - // TODO: Process syscall + return false; } From 61342946a4a84271a398f3f48bfd1ed60e91f69d Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sun, 1 Feb 2015 02:16:06 +0530 Subject: [PATCH 23/29] SPURS: Implement some portions of taskset pm --- rpcs3/Emu/Cell/SPUInterpreter.h | 5 +- rpcs3/Emu/Cell/SPUThread.h | 9 + rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 34 ++- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 235 +++++++++++++++++--- 4 files changed, 235 insertions(+), 48 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 151eb4e436..8351911153 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -1373,10 +1373,7 @@ private: } void FSCRWR(u32 rt, u32 ra) { - CPU.FPSCR._u32[3] = CPU.GPR[ra]._u32[3] & 0x00000F07; - CPU.FPSCR._u32[2] = CPU.GPR[ra]._u32[2] & 0x00003F07; - CPU.FPSCR._u32[1] = CPU.GPR[ra]._u32[1] & 0x00003F07; - CPU.FPSCR._u32[0] = CPU.GPR[ra]._u32[0] & 0x00000F07; + CPU.FPSCR.Write(CPU.GPR[ra]); } void DFTSV(u32 rt, u32 ra, s32 i7) { diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index ce0036e4a5..cfd45d6c0e 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -248,6 +248,15 @@ public: { _u32[1+slice] |= exceptions; } + + // Write the FPSCR + void Write(u128 & r) + { + _u32[3] = r._u32[3] & 0x00000F07; + _u32[2] = r._u32[2] & 0x00003F07; + _u32[1] = r._u32[1] & 0x00003F07; + _u32[0] = r._u32[0] & 0x00000F07; + } }; union SPU_SNRConfig_hdr diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 08fff45abf..85df5fb582 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -40,6 +40,7 @@ enum { CELL_SPURS_TASK_ERROR_AGAIN = 0x80410901, CELL_SPURS_TASK_ERROR_INVAL = 0x80410902, + CELL_SPURS_TASK_ERROR_NOSYS = 0x80410903, CELL_SPURS_TASK_ERROR_NOMEM = 0x80410904, CELL_SPURS_TASK_ERROR_SRCH = 0x80410905, CELL_SPURS_TASK_ERROR_NOEXEC = 0x80410907, @@ -213,6 +214,17 @@ enum SpursTaskConstants CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE = 1024, CELL_SPURS_TASKSET_PM_ENTRY_ADDR = 0xA00, CELL_SPURS_TASKSET_PM_SYSCALL_ADDR = 0xA70, + + // Task syscall numbers + CELL_SPURS_TASK_SYSCALL_EXIT = 0, + CELL_SPURS_TASK_SYSCALL_YIELD = 1, + CELL_SPURS_TASK_SYSCALL_WAIT_SIGNAL = 2, + CELL_SPURS_TASK_SYSCALL_POLL = 3, + CELL_SPURS_TASK_SYSCALL_RECV_WKL_FLAG = 4, + + // Task poll status + CELL_SPURS_TASK_POLL_FOUND_TASK = 1, + CELL_SPURS_TASK_POLL_FOUND_WORKLOAD = 2, }; enum CellSpursEventFlagWaitMode @@ -627,10 +639,10 @@ struct CellSpursTaskset struct TaskInfo { - CellSpursTaskArgument args; - vm::bptr elf_addr; - be_t context_save_storage_and_alloc_ls_blocks; // This is (context_save_storage_addr | allocated_ls_blocks) - CellSpursTaskLsPattern ls_pattern; + CellSpursTaskArgument args; // 0x00 + vm::bptr elf_addr; // 0x10 + be_t context_save_storage_and_alloc_ls_blocks; // 0x18 This is (context_save_storage_addr | allocated_ls_blocks) + CellSpursTaskLsPattern ls_pattern; // 0x20 }; static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size"); @@ -656,7 +668,7 @@ struct CellSpursTaskset u8 x72; // 0x72 u8 last_scheduled_task; // 0x73 be_t wid; // 0x74 - u8 unk1[8]; // 0x78 + be_t x78; // 0x78 TaskInfo task_info[128]; // 0x80 vm::bptr exception_handler; // 0x1880 vm::bptr exception_handler_arg; // 0x1888 @@ -765,7 +777,7 @@ struct CellSpursTaskset2 u8 x72; // 0x72 u8 last_scheduled_task; // 0x73 be_t wid; // 0x74 - u8 unk1[8]; // 0x78 + be_t x78; // 0x78 TaskInfo task_info[128]; // 0x80 vm::bptr exception_handler; // 0x1880 vm::bptr exception_handler_arg; // 0x1888 @@ -773,9 +785,9 @@ struct CellSpursTaskset2 u32 unk2; // 0x1894 u32 event_flag_id1; // 0x1898 u32 event_flag_id2; // 0x189C - u8 unk3[0xE8]; // 0x18A0 - u64 task_exit_code[256]; // 0x1988 - u8 unk4[0x778]; // 0x2188 + u8 unk3[0x1980 - 0x18A0]; // 0x18A0 + be_t task_exit_code[128]; // 0x1980 + u8 unk4[0x2900 - 0x2180]; // 0x2180 } m; static_assert(sizeof(_CellSpursTaskset2) == size, "Wrong _CellSpursTaskset2 size"); @@ -939,8 +951,8 @@ struct SpursTasksetPmMgmtData be_t lowestLoadSegmentAddr; // 0x2FBC be_t x2FC0; // 0x2FC0 be_t x2FC8; // 0x2FC8 - be_t x2FD0; // 0x2FD0 - be_t taskExitCode; // 0x2FD4 + be_t taskExitCode; // 0x2FD0 + be_t x2FD4; // 0x2FD4 u8 x2FD8[0x3000 - 0x2FD8]; // 0x2FD8 }; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index 653d4ca259..7118eadbd5 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -18,7 +18,7 @@ void cellSpursModuleExit(SPUThread & spu); bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag); u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask); u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll = true); -void spursHalt(); +void spursHalt(SPUThread & spu); // // SPURS Kernel functions @@ -44,9 +44,10 @@ bool spursSysServiceWorkloadEntry(SPUThread & spu); // bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting); void spursTasksetExit(SPUThread & spu); +void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs); void spursTasksetDispatch(SPUThread & spu); void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus); -bool spursTasksetShouldYield(SPUThread & spu); +bool spursTasksetPollStatus(SPUThread & spu); void spursTasksetInit(SPUThread & spu, u32 pollStatus); void spursTasksetResumeTask(SPUThread & spu); bool spursTasksetEntry(SPUThread & spu); @@ -1018,6 +1019,8 @@ bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 // TODO: Implement cases s32 delta = 0; switch (request + 1) { + case -1: + break; case 0: break; case 1: @@ -1033,7 +1036,7 @@ bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 case 6: break; default: - assert(0); + spursHalt(spu); break; } @@ -1076,9 +1079,24 @@ void spursTasksetExit(SPUThread & spu) { cellSpursModuleExit(spu); } +void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) { + auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); + + spu.GPR[2].clear(); + spu.GPR[3] = u128::from64(taskArgs.u64[0], taskArgs.u64[1]); + spu.GPR[4]._u64[1] = taskset->m.args; + spu.GPR[4]._u64[0] = taskset->m.spurs.addr(); + for (auto i = 5; i < 128; i++) { + spu.GPR[i].clear(); + } + + spu.SetBranch(mgmt->savedContextLr.value()._u32[3]); +} + void spursTasksetDispatch(SPUThread & spu) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); - auto kernelMgmt = vm::get_ptr(spu.ls_offset + 0x100); + auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); u32 taskId; u32 isWaiting; @@ -1089,7 +1107,13 @@ void spursTasksetDispatch(SPUThread & spu) { } mgmt->taskId = taskId; - u64 elfAddr = mgmt->taskset->m.task_info[taskId].elf_addr.addr() & 0xFFFFFFFFFFFFFFF8ull; + + // DMA in the task info for the selected task + spursDma(spu, MFC_GET_CMD, mgmt->taskset.addr() + offsetof(CellSpursTaskset, m.task_info[taskId]), 0x2780/*LSA*/, sizeof(CellSpursTaskset::TaskInfo), mgmt->dmaTagId); + spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId); + auto taskInfo = vm::get_ptr(spu.ls_offset + 0x2780); + auto elfAddr = taskInfo->elf_addr.addr().value(); + taskInfo->elf_addr.set(taskInfo->elf_addr.addr() & 0xFFFFFFFFFFFFFFF8ull); // Trace - Task: Incident=dispatch CellSpursTracePacket pkt; @@ -1097,37 +1121,84 @@ void spursTasksetDispatch(SPUThread & spu) { pkt.header.tag = CELL_SPURS_TRACE_TAG_TASK; pkt.data.task.incident = CELL_SPURS_TRACE_TASK_DISPATCH; pkt.data.task.taskId = taskId; - cellSpursModulePutTrace(&pkt, 0x1F); + cellSpursModulePutTrace(&pkt, CELL_SPURS_KERNEL_DMA_TAG_ID); if (isWaiting == 0) { - } + // If we reach here it means that the task is being started and not being resumed + mgmt->lowestLoadSegmentAddr = CELL_SPURS_TASK_TOP; - if (mgmt->taskset->m.enable_clear_ls) { - memset(vm::get_ptr(spu.ls_offset + CELL_SPURS_TASK_TOP), 0, CELL_SPURS_TASK_BOTTOM - CELL_SPURS_TASK_TOP); - } + // TODO: Load elf + // TODO: halt if rc of Load elf != CELL_OK - // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area - if (mgmt->taskset->m.task_info[taskId].ls_pattern.u64[0] != 0xFFFFFFFFFFFFFFFFull || - (mgmt->taskset->m.task_info[taskId].ls_pattern.u64[0] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) { - // Load the ELF - // TODO: Load ELF - } + spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId); - // Load save context from main memory to LS - u64 context_save_storage = mgmt->taskset->m.task_info[taskId].context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull; - for (auto i = 6; i < 128; i++) { - bool shouldLoad = mgmt->taskset->m.task_info[taskId].ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false; - if (shouldLoad) { - memcpy(vm::get_ptr(spu.ls_offset + CELL_SPURS_TASK_TOP + ((i - 6) << 11)), - vm::get_ptr((u32)context_save_storage + 0x400 + ((i - 6) << 11)), 0x800); + mgmt->tasksetMgmtAddr = 0x2700; + mgmt->x2FC0 = 0; + mgmt->taskExitCode = isWaiting; + mgmt->x2FD4 = elfAddr & 5; // TODO: Figure this out + + if ((elfAddr & 5) == 1) { + spursDma(spu, MFC_GET_CMD, mgmt->taskset.addr() + offsetof(CellSpursTaskset2, m.task_exit_code[taskId]), 0x2FC0/*LSA*/, 0x10/*size*/, mgmt->dmaTagId); } - } - // Trace - GUID - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_GUID; - pkt.data.guid = 0; // TODO: Put GUID of taskId here - cellSpursModulePutTrace(&pkt, 0x1F); + // Trace - GUID + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_GUID; + pkt.data.guid = 0; // TODO: Put GUID of taskId here + cellSpursModulePutTrace(&pkt, 0x1F); + + if (elfAddr & 2) { // TODO: Figure this out + spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_STOP); + spu.Stop(); + return; + } + + spursTasksetStartTask(spu, taskInfo->args); + } else { + if (taskset->m.enable_clear_ls) { + memset(vm::get_ptr(spu.ls_offset + CELL_SPURS_TASK_TOP), 0, CELL_SPURS_TASK_BOTTOM - CELL_SPURS_TASK_TOP); + } + + // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area as well + if (taskInfo->ls_pattern.u64[0] != 0xFFFFFFFFFFFFFFFFull || + (taskInfo->ls_pattern.u64[1] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) { + // Load the ELF + // TODO: Load ELF + // TODO: halt if rc of Load elf != CELL_OK + } + + // Load saved context from main memory to LS + u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull; + for (auto i = 6; i < 128; i++) { + bool shouldLoad = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false; + if (shouldLoad) { + // TODO: Combine DMA requests for consecutive blocks into a single request + spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, mgmt->dmaTagId); + } + } + + spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId); + + // Restore saved registers + spu.FPSCR.Write(mgmt->savedContextFpscr.value()); + spu.WriteChannel(MFC_WrTagMask, u128::from32r(mgmt->savedWriteTagGroupQueryMask)); + spu.WriteChannel(SPU_WrEventMask, u128::from32r(mgmt->savedSpuWriteEventMask)); + + // Trace - GUID + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_GUID; + pkt.data.guid = 0; // TODO: Put GUID of taskId here + cellSpursModulePutTrace(&pkt, 0x1F); + + if (elfAddr & 2) { // TODO: Figure this out + spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_STOP); + spu.Stop(); + return; + } + + spu.GPR[3].clear(); + spursTasksetResumeTask(spu); + } } void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) { @@ -1136,7 +1207,7 @@ void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) { } } -bool spursTasksetShouldYield(SPUThread & spu) { +bool spursTasksetPollStatus(SPUThread & spu) { u32 pollStatus; if (cellSpursModulePollStatus(spu, &pollStatus)) { @@ -1179,6 +1250,99 @@ void spursTasksetResumeTask(SPUThread & spu) { spu.SetBranch(spu.GPR[0]._u32[3]); } +s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { + auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); + + // If the 0x10 bit is set in syscallNum then its the 2nd version of the + // syscall (e.g. cellSpursYield2 instead of cellSpursYield) and so don't wait + // for DMA completion + if ((syscallNum & 0x10) == 0) { + spursDmaWaitForCompletion(spu, 0xFFFFFFFF); + syscallNum &= 0x0F; + } + + s32 rc = 0; + u32 incident = 0; + switch (syscallNum) { + case CELL_SPURS_TASK_SYSCALL_EXIT: + if (mgmt->x2FD4 == 4 || (mgmt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out + if (mgmt->x2FD4 != 4) { + spursTasksetProcessRequest(spu, 0, nullptr, nullptr); + } + + auto a = mgmt->x2FD4 == 4 ? taskset->m.x78 : mgmt->x2FC0; + auto b = mgmt->x2FD4 == 4 ? 0 : mgmt->x2FC8; + // TODO: onTaskExit(a, mgmt->taskId, mgmt->taskExitCode, b) + } + + incident = CELL_SPURS_TRACE_TASK_EXIT; + break; + case CELL_SPURS_TASK_SYSCALL_YIELD: + if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, 3, nullptr, nullptr)) { + // If we reach here then it means that either another task can be scheduled or another workload can be scheduled + // Save the context of the current task + // TODO: rc = saveContext + if (rc == CELL_OK) { + spursTasksetProcessRequest(spu, 1, nullptr, nullptr); + incident = CELL_SPURS_TRACE_TASK_YIELD; + } + } + break; + case CELL_SPURS_TASK_SYSCALL_WAIT_SIGNAL: + if (spursTasksetProcessRequest(spu, -1, nullptr, nullptr)) { + // TODO: rc = saveContext + if (rc == CELL_OK) { + if (spursTasksetProcessRequest(spu, 2, nullptr, nullptr) == false) { + incident = CELL_SPURS_TRACE_TASK_WAIT; + } + } + } + break; + case CELL_SPURS_TASK_SYSCALL_POLL: + rc = spursTasksetPollStatus(spu) ? CELL_SPURS_TASK_POLL_FOUND_WORKLOAD : 0; + rc |= spursTasksetProcessRequest(spu, 3, nullptr, nullptr) ? CELL_SPURS_TASK_POLL_FOUND_TASK : 0; + break; + case CELL_SPURS_TASK_SYSCALL_RECV_WKL_FLAG: + if (args == 0) { // TODO: Figure this out + spursHalt(spu); + } + + if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, 4, nullptr, nullptr) != true) { + // TODO: rc = saveContext + if (rc == CELL_OK) { + spursTasksetProcessRequest(spu, 1, nullptr, nullptr); + incident = CELL_SPURS_TRACE_TASK_WAIT; + } + } + break; + default: + rc = CELL_SPURS_TASK_ERROR_NOSYS; + break; + } + + if (incident) { + // Trace - TASK + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_TASK; + pkt.data.task.incident = incident; + pkt.data.task.taskId = mgmt->taskId; + cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + + // Clear the GUID of the task + memset(vm::get_ptr(spu.ls_offset + mgmt->lowestLoadSegmentAddr), 0, 0x10); + + if (spursTasksetPollStatus(spu)) { + spursTasksetExit(spu); + } + + spursTasksetDispatch(spu); + } + + return rc; +} + bool spursTasksetEntry(SPUThread & spu) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); @@ -1188,6 +1352,7 @@ bool spursTasksetEntry(SPUThread & spu) { auto arg = spu.GPR[4]._u64[1]; auto pollStatus = spu.GPR[5]._u32[3]; + // Initialise memory and save args memset(mgmt, 0, sizeof(*mgmt)); mgmt->taskset.set(arg); memcpy(mgmt->moduleId, "SPURSTASK MODULE", 16); @@ -1212,8 +1377,12 @@ bool spursTasksetEntry(SPUThread & spu) { mgmt->savedContextR80ToR127[i] = spu.GPR[80 + i]; } - // TODO: Process syscall - spursTasksetResumeTask(spu); + spu.GPR[3]._u32[3] = spursTasksetProcessSyscall(spu, spu.GPR[3]._u32[3], spu.GPR[4]._u32[3]); + + // Resume the previously executing task if the syscall did not cause a context switch + if (spu.m_is_branch == false) { + spursTasksetResumeTask(spu); + } } else { assert(0); } From ba6ac5019ea6e022d08060c730ae0cc9af1a0c6d Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Mon, 2 Feb 2015 01:32:40 +0530 Subject: [PATCH 24/29] SPURS: Implement some portions of taskset policy manager --- rpcs3/Emu/Cell/SPUInterpreter.h | 5 +- rpcs3/Emu/Cell/SPUThread.h | 9 + rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 34 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 16 +- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 380 ++++++++++++++++---- rpcs3/Loader/ELF32.cpp | 7 +- rpcs3/Loader/ELF32.h | 2 +- 7 files changed, 345 insertions(+), 108 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 8351911153..7c56b4c9ac 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -1316,10 +1316,7 @@ private: void FSCRRD(u32 rt) { - CPU.GPR[rt]._u32[3] = CPU.FPSCR._u32[3]; - CPU.GPR[rt]._u32[2] = CPU.FPSCR._u32[2]; - CPU.GPR[rt]._u32[1] = CPU.FPSCR._u32[1]; - CPU.GPR[rt]._u32[0] = CPU.FPSCR._u32[0]; + CPU.FPSCR.Read(CPU.GPR[rt]); } void FESD(u32 rt, u32 ra) { diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index cfd45d6c0e..ff5012f146 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -257,6 +257,15 @@ public: _u32[1] = r._u32[1] & 0x00003F07; _u32[0] = r._u32[0] & 0x00000F07; } + + // Read the FPSCR + void Read(u128 & r) + { + r._u32[3] = _u32[3]; + r._u32[2] = _u32[2]; + r._u32[1] = _u32[1]; + r._u32[0] = _u32[0]; + } }; union SPU_SNRConfig_hdr diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 83dffb54e7..b8088daaff 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -2602,8 +2602,8 @@ s64 spursCreateTaskset(vm::ptr spurs, vm::ptr tasks cellSpursAddWorkloadWithAttribute(spurs, vm::ptr::make(wid.addr()), vm::ptr::make(wkl_attr.addr())); // TODO: Check return code - taskset->m.x72 = 0x80; - taskset->m.wid = wid.value(); + taskset->m.wkl_flag_wait_task = 0x80; + taskset->m.wid = wid.value(); // TODO: cellSpursSetExceptionEventHandler(spurs, wid, hook, taskset); // TODO: Check return code @@ -2801,11 +2801,9 @@ s64 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm: u32 tmp_task_id; for (tmp_task_id = 0; tmp_task_id < CELL_SPURS_MAX_TASK; tmp_task_id++) { - u32 l = tmp_task_id >> 5; - u32 b = tmp_task_id & 0x1F; - if ((taskset->m.enabled_set[l] & (0x80000000 >> b)) == 0) + if (!taskset->m.enabled.value()._bit[tmp_task_id]) { - taskset->m.enabled_set[l] |= 0x80000000 >> b; + taskset->m.enabled.value()._bit[tmp_task_id] = true; break; } } @@ -2840,10 +2838,10 @@ s64 cellSpursCreateTask(vm::ptr taskset, vm::ptr taskID, #endif } -s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskID) +s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskId) { #ifdef PRX_DEBUG - cellSpurs->Warning("_cellSpursSendSignal(taskset_addr=0x%x, taskID=%d)", taskset.addr(), taskID); + cellSpurs->Warning("_cellSpursSendSignal(taskset_addr=0x%x, taskId=%d)", taskset.addr(), taskId); return GetCurrentPPUThread().FastCall2(libsre + 0x124CC, libsre_rtoc); #else if (!taskset) @@ -2856,29 +2854,23 @@ s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskID) return CELL_SPURS_TASK_ERROR_ALIGN; } - if (taskID >= CELL_SPURS_MAX_TASK || taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD2) + if (taskId >= CELL_SPURS_MAX_TASK || taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD2) { return CELL_SPURS_TASK_ERROR_INVAL; } - auto word = taskID >> 5; - auto mask = 0x80000000u >> (taskID & 0x1F); - auto disabled = taskset->m.enabled_set[word] & mask ? false : true; - auto running = taskset->m.running_set[word]; - auto ready = taskset->m.ready_set[word]; - auto ready2 = taskset->m.ready2_set[word]; - auto waiting = taskset->m.waiting_set[word]; - auto signalled = taskset->m.signal_received_set[word]; - auto enabled = taskset->m.enabled_set[word]; - auto invalid = (ready & ready2) || (running & waiting) || ((running | ready | ready2 | waiting | signalled) & ~enabled) || disabled; + auto _0 = be_t::make(u128::from32(0)); + auto disabled = taskset->m.enabled.value()._bit[taskId] ? false : true; + auto invalid = (taskset->m.ready & taskset->m.pending_ready) != _0 || (taskset->m.running & taskset->m.waiting) != _0 || disabled || + ((taskset->m.running | taskset->m.ready | taskset->m.pending_ready | taskset->m.waiting | taskset->m.signalled) & be_t::make(~taskset->m.enabled.value())) != _0; if (invalid) { return CELL_SPURS_TASK_ERROR_SRCH; } - auto shouldSignal = waiting & ~signalled & mask ? true : false; - taskset->m.signal_received_set[word] |= mask; + auto shouldSignal = (taskset->m.waiting & be_t::make(~taskset->m.signalled.value()) & be_t::make(u128::fromBit(taskId))) != _0 ? true : false; + ((u128)taskset->m.signalled)._bit[taskId] = true; if (shouldSignal) { cellSpursSendWorkloadSignal(vm::ptr::make((u32)taskset->m.spurs.addr()), taskset->m.wid); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 85df5fb582..f1a4793e13 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -655,17 +655,17 @@ struct CellSpursTaskset // Real data struct _CellSpursTaskset { - be_t running_set[4]; // 0x00 - be_t ready_set[4]; // 0x10 - be_t ready2_set[4]; // 0x20 - TODO: Find out what this is - be_t enabled_set[4]; // 0x30 - be_t signal_received_set[4]; // 0x40 - be_t waiting_set[4]; // 0x50 + be_t running; // 0x00 + be_t ready; // 0x10 + be_t pending_ready; // 0x20 + be_t enabled; // 0x30 + be_t signalled; // 0x40 + be_t waiting; // 0x50 vm::bptr spurs; // 0x60 be_t args; // 0x68 u8 enable_clear_ls; // 0x70 u8 x71; // 0x71 - u8 x72; // 0x72 + u8 wkl_flag_wait_task; // 0x72 u8 last_scheduled_task; // 0x73 be_t wid; // 0x74 be_t x78; // 0x78 @@ -948,7 +948,7 @@ struct SpursTasksetPmMgmtData be_t savedWriteTagGroupQueryMask; // 0x2FB0 be_t savedSpuWriteEventMask; // 0x2FB4 be_t tasksetMgmtAddr; // 0x2FB8 - be_t lowestLoadSegmentAddr; // 0x2FBC + be_t guidAddr; // 0x2FBC be_t x2FC0; // 0x2FC0 be_t x2FC8; // 0x2FC8 be_t taskExitCode; // 0x2FD0 diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index 7118eadbd5..c7902f7240 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -7,6 +7,8 @@ #include "Emu/SysCalls/lv2/sys_lwcond.h" #include "Emu/SysCalls/lv2/sys_spu.h" #include "Emu/SysCalls/Modules/cellSpurs.h" +#include "Loader/ELF32.h" +#include "Emu/FS/vfsStreamMemory.h" // // SPURS utility functions @@ -42,7 +44,8 @@ bool spursSysServiceWorkloadEntry(SPUThread & spu); // // SPURS taskset policy module functions // -bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting); +s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting); +s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments); void spursTasksetExit(SPUThread & spu); void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs); void spursTasksetDispatch(SPUThread & spu); @@ -50,6 +53,9 @@ void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus); bool spursTasksetPollStatus(SPUThread & spu); void spursTasksetInit(SPUThread & spu, u32 pollStatus); void spursTasksetResumeTask(SPUThread & spu); +s32 spursTasketSaveTaskContext(SPUThread & spu); +void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args); +s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args); bool spursTasksetEntry(SPUThread & spu); extern Module *cellSpurs; @@ -1001,64 +1007,216 @@ bool spursSysServiceWorkloadEntry(SPUThread & spu) { // SPURS taskset policy module functions ////////////////////////////////////////////////////////////////////////////// -bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting) { +enum SpursTasksetRequest { + SPURS_TASKSET_REQUEST_POLL_SIGNAL = -1, + SPURS_TASKSET_REQUEST_DESTROY_TASK = 0, + SPURS_TASKSET_REQUEST_YIELD_TASK = 1, + SPURS_TASKSET_REQUEST_WAIT_SIGNAL = 2, + SPURS_TASKSET_REQUEST_POLL = 3, + SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG = 4, + SPURS_TASKSET_REQUEST_SELECT_TASK = 5, + SPURS_TASKSET_REQUEST_RECV_WKL_FLAG = 6, +}; + +s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting) { auto kernelMgmt = vm::get_ptr(spu.ls_offset + 0x100); auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); - // Verify taskset state is valid - for (auto i = 0; i < 4; i ++) { - if ((mgmt->taskset->m.waiting_set[i] & mgmt->taskset->m.running_set[i]) || - (mgmt->taskset->m.ready_set[i] & mgmt->taskset->m.ready2_set[i]) || - ((mgmt->taskset->m.running_set[i] | mgmt->taskset->m.ready_set[i] | - mgmt->taskset->m.ready2_set[i] | mgmt->taskset->m.signal_received_set[i] | - mgmt->taskset->m.waiting_set[i]) & ~mgmt->taskset->m.enabled_set[i])) { - assert(0); - } - } - - // TODO: Implement cases - s32 delta = 0; - switch (request + 1) { - case -1: - break; - case 0: - break; - case 1: - break; - case 2: - break; - case 3: - break; - case 4: - break; - case 5: - break; - case 6: - break; - default: - spursHalt(spu); - break; - } - - // Set the ready count of the workload to the number of ready tasks + s32 rc = CELL_OK; + s32 numNewlyReadyTasks; do { - s32 readyCount = kernelMgmt->wklCurrentId >= CELL_SPURS_MAX_WORKLOAD ? - kernelMgmt->spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].read_relaxed() : - kernelMgmt->spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].read_relaxed(); + spursDma(spu, MFC_GETLLAR_CMD, mgmt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); - auto newReadyCount = readyCount + delta > 0xFF ? 0xFF : readyCount + delta < 0 ? 0 : readyCount + delta; - - if (kernelMgmt->wklCurrentId >= CELL_SPURS_MAX_WORKLOAD) { - kernelMgmt->spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].write_relaxed(newReadyCount); - } else { - kernelMgmt->spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].write_relaxed(newReadyCount); + // Verify taskset state is valid + auto _0 = be_t::make(u128::from32(0)); + if ((taskset->m.waiting & taskset->m.running) != _0 || (taskset->m.ready & taskset->m.pending_ready) != _0 || + ((taskset->m.running | taskset->m.ready | taskset->m.pending_ready | taskset->m.signalled | taskset->m.waiting) & be_t::make(~taskset->m.enabled.value())) != _0) { + spursHalt(spu); + return CELL_OK; } - delta += readyCount; - } while (delta > 0); + // Find the number of tasks that have become ready since the last iteration + auto newlyReadyTasks = (taskset->m.signalled | taskset->m.pending_ready).value() & ~taskset->m.ready.value(); + numNewlyReadyTasks = 0; + for (auto i = 0; i < 128; i++) { + if (newlyReadyTasks._bit[i]) { + numNewlyReadyTasks++; + } + } - // TODO: Implement return - return false; + u128 readyButNotRunning; + u8 selectedTaskId; + auto running = taskset->m.running.value(); + auto waiting = taskset->m.waiting.value(); + auto enabled = taskset->m.enabled.value(); + auto signalled = (taskset->m.signalled & (taskset->m.ready | taskset->m.pending_ready)).value(); + auto ready = (taskset->m.signalled | taskset->m.ready | taskset->m.pending_ready).value(); + + switch (request) { + case SPURS_TASKSET_REQUEST_POLL_SIGNAL: + rc = signalled._bit[mgmt->taskId] ? 1 : 0; + signalled._bit[mgmt->taskId] = false; + break; + case SPURS_TASKSET_REQUEST_DESTROY_TASK: + numNewlyReadyTasks--; + running._bit[mgmt->taskId] = false; + enabled._bit[mgmt->taskId] = false; + signalled._bit[mgmt->taskId] = false; + ready._bit[mgmt->taskId] = false; + break; + case SPURS_TASKSET_REQUEST_YIELD_TASK: + running._bit[mgmt->taskId] = false; + waiting._bit[mgmt->taskId] = true; + break; + case SPURS_TASKSET_REQUEST_WAIT_SIGNAL: + if (signalled._bit[mgmt->taskId]) { + numNewlyReadyTasks--; + running._bit[mgmt->taskId] = false; + waiting._bit[mgmt->taskId] = true; + signalled._bit[mgmt->taskId] = false; + ready._bit[mgmt->taskId] = false; + } + break; + case SPURS_TASKSET_REQUEST_POLL: + readyButNotRunning = ready & ~running; + if (taskset->m.wkl_flag_wait_task < CELL_SPURS_MAX_TASK) { + readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->m.wkl_flag_wait_task)); + } + + rc = readyButNotRunning != _0 ? 1 : 0; + break; + case SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG: + if (taskset->m.wkl_flag_wait_task == 0x81) { + // A workload flag is already pending so consume it + taskset->m.wkl_flag_wait_task = 0x80; + rc = 0; + } else if (taskset->m.wkl_flag_wait_task == 0x80) { + // No tasks are waiting for the workload flag. Mark this task as waiting for the workload flag. + taskset->m.wkl_flag_wait_task = mgmt->taskId; + running._bit[mgmt->taskId] = false; + waiting._bit[mgmt->taskId] = true; + rc = 1; + numNewlyReadyTasks--; + } else { + // Another task is already waiting for the workload signal + rc = CELL_SPURS_TASK_ERROR_BUSY; + } + break; + case SPURS_TASKSET_REQUEST_SELECT_TASK: + readyButNotRunning = ready & ~running; + if (taskset->m.wkl_flag_wait_task < CELL_SPURS_MAX_TASK) { + readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->m.wkl_flag_wait_task)); + } + + // Select a task from the readyButNotRunning set to run. Start from the task after the last scheduled task to ensure fairness. + for (selectedTaskId = taskset->m.last_scheduled_task + 1; selectedTaskId < 128; selectedTaskId++) { + if (readyButNotRunning._bit[selectedTaskId]) { + break; + } + } + + if (selectedTaskId == 128) { + for (selectedTaskId = 0; selectedTaskId < taskset->m.last_scheduled_task + 1; selectedTaskId++) { + if (readyButNotRunning._bit[selectedTaskId]) { + break; + } + } + + if (selectedTaskId == taskset->m.last_scheduled_task + 1) { + selectedTaskId = CELL_SPURS_MAX_TASK; + } + } + + *taskId = selectedTaskId; + *isWaiting = waiting._bit[selectedTaskId < CELL_SPURS_MAX_TASK ? selectedTaskId : 0] ? 1 : 0; + if (selectedTaskId != CELL_SPURS_MAX_TASK) { + taskset->m.last_scheduled_task = selectedTaskId; + running._bit[mgmt->taskId] = true; + waiting._bit[mgmt->taskId] = false; + } + break; + case SPURS_TASKSET_REQUEST_RECV_WKL_FLAG: + if (taskset->m.wkl_flag_wait_task < CELL_SPURS_MAX_TASK) { + // There is a task waiting for the workload flag + taskset->m.wkl_flag_wait_task = 0x80; + rc = 1; + numNewlyReadyTasks++; + } else { + // No tasks are waiting for the workload flag + taskset->m.wkl_flag_wait_task = 0x81; + rc = 0; + } + break; + default: + spursHalt(spu); + return CELL_OK; + } + + taskset->m.pending_ready = _0; + taskset->m.running = running; + taskset->m.waiting = waiting; + taskset->m.enabled = enabled; + taskset->m.signalled = signalled; + taskset->m.ready = ready; + } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + // Increment the ready count of the workload by the number of tasks that have become ready + do { + spursDma(spu, MFC_GETLLAR_CMD, kernelMgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + s32 readyCount = kernelMgmt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].read_relaxed(); + readyCount += numNewlyReadyTasks; + readyCount = readyCount < 0 ? 0 : readyCount > 0xFF ? 0xFF : readyCount; + + if (kernelMgmt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD) { + spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].write_relaxed(readyCount); + } else { + spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].write_relaxed(readyCount); + } + } while (spursDma(spu, MFC_PUTLLC_CMD, kernelMgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + return rc; +} + +s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments) { + if (elfAddr == 0 || (elfAddr & 0x0F) != 0) { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + vfsStreamMemory stream(elfAddr); + loader::handlers::elf32 loader; + auto rc = loader.init(stream); + if (rc != loader::handler::ok) { + return CELL_SPURS_TASK_ERROR_NOEXEC; + } + + u32 _lowestLoadAddr = CELL_SPURS_TASK_BOTTOM; + for (auto & phdr : loader.m_phdrs) { + if (phdr.data_be.p_paddr >= CELL_SPURS_TASK_BOTTOM) { + break; + } + + if (phdr.data_be.p_type == 1/*PT_LOAD*/) { + if (skipWriteableSegments == false || (phdr.data_be.p_flags & 2/*PF_W*/) == 0) { + if (phdr.data_be.p_vaddr < CELL_SPURS_TASK_TOP || + phdr.data_be.p_vaddr + phdr.data_be.p_memsz > CELL_SPURS_TASK_BOTTOM) { + return CELL_SPURS_TASK_ERROR_FAULT; + } + + _lowestLoadAddr = _lowestLoadAddr > phdr.data_be.p_vaddr ? phdr.data_be.p_vaddr : _lowestLoadAddr; + } + } + } + + loader.load_data(spu.ls_offset, skipWriteableSegments); + *entryPoint = loader.m_ehdr.data_be.e_entry; + if (*lowestLoadAddr) { + *lowestLoadAddr = _lowestLoadAddr; + } + + return CELL_OK; } void spursTasksetExit(SPUThread & spu) { @@ -1100,7 +1258,7 @@ void spursTasksetDispatch(SPUThread & spu) { u32 taskId; u32 isWaiting; - spursTasksetProcessRequest(spu, 5, &taskId, &isWaiting); + spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_SELECT_TASK, &taskId, &isWaiting); if (taskId >= CELL_SPURS_MAX_TASK) { spursTasksetExit(spu); return; @@ -1125,13 +1283,19 @@ void spursTasksetDispatch(SPUThread & spu) { if (isWaiting == 0) { // If we reach here it means that the task is being started and not being resumed - mgmt->lowestLoadSegmentAddr = CELL_SPURS_TASK_TOP; + mgmt->guidAddr = CELL_SPURS_TASK_TOP; - // TODO: Load elf - // TODO: halt if rc of Load elf != CELL_OK + u32 entryPoint; + u32 lowestLoadAddr; + if (spursTasksetLoadElf(spu, &entryPoint, &lowestLoadAddr, taskInfo->elf_addr.addr(), false) != CELL_OK) { + spursHalt(spu); + return; + } spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId); + mgmt->savedContextLr.value()._u32[3] = entryPoint; + mgmt->guidAddr = lowestLoadAddr; mgmt->tasksetMgmtAddr = 0x2700; mgmt->x2FC0 = 0; mgmt->taskExitCode = isWaiting; @@ -1163,8 +1327,11 @@ void spursTasksetDispatch(SPUThread & spu) { if (taskInfo->ls_pattern.u64[0] != 0xFFFFFFFFFFFFFFFFull || (taskInfo->ls_pattern.u64[1] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) { // Load the ELF - // TODO: Load ELF - // TODO: halt if rc of Load elf != CELL_OK + u32 entryPoint; + if (spursTasksetLoadElf(spu, &entryPoint, nullptr, taskInfo->elf_addr.addr(), true) != CELL_OK) { + spursHalt(spu); + return; + } } // Load saved context from main memory to LS @@ -1203,7 +1370,7 @@ void spursTasksetDispatch(SPUThread & spu) { void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) { if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) { - spursTasksetProcessRequest(spu, 6, nullptr, nullptr); + spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_RECV_WKL_FLAG, nullptr, nullptr); } } @@ -1250,6 +1417,74 @@ void spursTasksetResumeTask(SPUThread & spu) { spu.SetBranch(spu.GPR[0]._u32[3]); } +s32 spursTasketSaveTaskContext(SPUThread & spu) { + auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + auto taskInfo = vm::get_ptr(spu.ls_offset + 0x2780); + + spursDmaWaitForCompletion(spu, 0xFFFFFFFF); + + if (taskInfo->context_save_storage_and_alloc_ls_blocks == 0) { + return CELL_SPURS_TASK_ERROR_STAT; + } + + u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F; + u32 lsBlocks = 0; + for (auto i = 0; i < 128; i++) { + if (taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) { + lsBlocks++; + } + } + + if (lsBlocks > allocLsBlocks) { + return CELL_SPURS_TASK_ERROR_STAT; + } + + // Make sure the stack is area is specified in the ls pattern + for (auto i = (mgmt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) { + if ((taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) == 0) { + return CELL_SPURS_TASK_ERROR_STAT; + } + } + + // Get the processor context + u128 r; + spu.FPSCR.Read(r); + mgmt->savedContextFpscr = r; + spu.ReadChannel(r, SPU_RdEventMask); + mgmt->savedSpuWriteEventMask = r._u32[3]; + spu.ReadChannel(r, MFC_RdTagMask); + mgmt->savedWriteTagGroupQueryMask = r._u32[3]; + + // Store the processor context + u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull; + spursDma(spu, MFC_PUT_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, mgmt->dmaTagId); + + // Save LS context + for (auto i = 6; i < 128; i++) { + bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false; + if (shouldStore) { + // TODO: Combine DMA requests for consecutive blocks into a single request + spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, mgmt->dmaTagId); + } + } + + spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId); + return CELL_OK; +} + +void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args) { + auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + + spursDma(spu, MFC_GET_CMD, addr & 0xFFFFFF80, 0x10000/*LSA*/, (addr & 0x7F) << 11/*size*/, 0); + spursDmaWaitForCompletion(spu, 1); + + spu.GPR[3]._u64[1] = mgmt->taskset.addr(); + spu.GPR[4]._u32[3] = taskId; + spu.GPR[5]._u32[3] = exitCode; + spu.GPR[6]._u64[1] = args; + spu.FastCall(0x10000); +} + s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); @@ -1268,32 +1503,32 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { case CELL_SPURS_TASK_SYSCALL_EXIT: if (mgmt->x2FD4 == 4 || (mgmt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out if (mgmt->x2FD4 != 4) { - spursTasksetProcessRequest(spu, 0, nullptr, nullptr); + spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_DESTROY_TASK, nullptr, nullptr); } - auto a = mgmt->x2FD4 == 4 ? taskset->m.x78 : mgmt->x2FC0; - auto b = mgmt->x2FD4 == 4 ? 0 : mgmt->x2FC8; - // TODO: onTaskExit(a, mgmt->taskId, mgmt->taskExitCode, b) + auto addr = mgmt->x2FD4 == 4 ? taskset->m.x78 : mgmt->x2FC0; + auto args = mgmt->x2FD4 == 4 ? 0 : mgmt->x2FC8; + spursTasksetOnTaskExit(spu, addr, mgmt->taskId, mgmt->taskExitCode, args); } incident = CELL_SPURS_TRACE_TASK_EXIT; break; case CELL_SPURS_TASK_SYSCALL_YIELD: - if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, 3, nullptr, nullptr)) { + if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_POLL, nullptr, nullptr)) { // If we reach here then it means that either another task can be scheduled or another workload can be scheduled // Save the context of the current task - // TODO: rc = saveContext + rc = spursTasketSaveTaskContext(spu); if (rc == CELL_OK) { - spursTasksetProcessRequest(spu, 1, nullptr, nullptr); + spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_YIELD_TASK, nullptr, nullptr); incident = CELL_SPURS_TRACE_TASK_YIELD; } } break; case CELL_SPURS_TASK_SYSCALL_WAIT_SIGNAL: - if (spursTasksetProcessRequest(spu, -1, nullptr, nullptr)) { - // TODO: rc = saveContext + if (spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_POLL_SIGNAL, nullptr, nullptr) == 0) { + rc = spursTasketSaveTaskContext(spu); if (rc == CELL_OK) { - if (spursTasksetProcessRequest(spu, 2, nullptr, nullptr) == false) { + if (spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_WAIT_SIGNAL, nullptr, nullptr) == 0) { incident = CELL_SPURS_TRACE_TASK_WAIT; } } @@ -1301,17 +1536,16 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { break; case CELL_SPURS_TASK_SYSCALL_POLL: rc = spursTasksetPollStatus(spu) ? CELL_SPURS_TASK_POLL_FOUND_WORKLOAD : 0; - rc |= spursTasksetProcessRequest(spu, 3, nullptr, nullptr) ? CELL_SPURS_TASK_POLL_FOUND_TASK : 0; + rc |= spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_POLL, nullptr, nullptr) ? CELL_SPURS_TASK_POLL_FOUND_TASK : 0; break; case CELL_SPURS_TASK_SYSCALL_RECV_WKL_FLAG: if (args == 0) { // TODO: Figure this out spursHalt(spu); } - if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, 4, nullptr, nullptr) != true) { - // TODO: rc = saveContext + if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG, nullptr, nullptr) != 1) { + rc = spursTasketSaveTaskContext(spu); if (rc == CELL_OK) { - spursTasksetProcessRequest(spu, 1, nullptr, nullptr); incident = CELL_SPURS_TRACE_TASK_WAIT; } } @@ -1331,7 +1565,7 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); // Clear the GUID of the task - memset(vm::get_ptr(spu.ls_offset + mgmt->lowestLoadSegmentAddr), 0, 0x10); + memset(vm::get_ptr(spu.ls_offset + mgmt->guidAddr), 0, 0x10); if (spursTasksetPollStatus(spu)) { spursTasksetExit(spu); diff --git a/rpcs3/Loader/ELF32.cpp b/rpcs3/Loader/ELF32.cpp index cb9be62759..a24e64c05f 100644 --- a/rpcs3/Loader/ELF32.cpp +++ b/rpcs3/Loader/ELF32.cpp @@ -247,7 +247,7 @@ namespace loader return ok; } - handler::error_code elf32::load_data(u32 offset) + handler::error_code elf32::load_data(u32 offset, bool skip_writeable) { Elf_Machine machine = (Elf_Machine)(u16)(m_ehdr.is_le() ? m_ehdr.data_le.e_machine : m_ehdr.data_be.e_machine); @@ -270,6 +270,11 @@ namespace loader return loading_error; } + if (skip_writeable == true && (phdr.data_be.p_flags & 2/*PF_W*/) != 0) + { + continue; + } + if (filesz) { m_stream->Seek(handler::get_stream_offset() + offset); diff --git a/rpcs3/Loader/ELF32.h b/rpcs3/Loader/ELF32.h index d3d37f543c..6a13b6f7cb 100644 --- a/rpcs3/Loader/ELF32.h +++ b/rpcs3/Loader/ELF32.h @@ -132,7 +132,7 @@ namespace loader error_code init(vfsStream& stream) override; error_code load() override; - error_code load_data(u32 offset); + error_code load_data(u32 offset, bool skip_writeable = false); virtual ~elf32() = default; }; From 2c70f5168a89e081f2bfde4482df94eef2426c7c Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Mon, 2 Feb 2015 09:02:38 +0530 Subject: [PATCH 25/29] SPURS: Reorder, rename and some cleanup --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 4 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 18 +- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 1599 ++++++++++--------- 3 files changed, 826 insertions(+), 795 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index b8088daaff..18eed2c944 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -26,7 +26,7 @@ extern u32 libsre; extern u32 libsre_rtoc; #endif -bool spursKernelMain(SPUThread & spu); +bool spursKernelEntry(SPUThread & spu); s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, u32 id); s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskID); @@ -182,7 +182,7 @@ s64 spursInit( { auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, num, spurs.addr(), 0, 0); #ifndef PRX_DEBUG_XXX - spu->RegisterHleFunction(spurs->m.spuImg.entry_point, spursKernelMain); + spu->RegisterHleFunction(spurs->m.spuImg.entry_point, spursKernelEntry); #endif spurs->m.spus[num] = spu->GetId(); } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index f1a4793e13..21f011eda2 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -105,8 +105,8 @@ enum SPURSKernelInterfaces CELL_SPURS_KERNEL_DMA_TAG_ID = 31, CELL_SPURS_KERNEL1_ENTRY_ADDR = 0x818, CELL_SPURS_KERNEL2_ENTRY_ADDR = 0x848, - CELL_SPURS_KERNEL1_YIELD_ADDR = 0x808, - CELL_SPURS_KERNEL2_YIELD_ADDR = 0x838, + CELL_SPURS_KERNEL1_EXIT_ADDR = 0x808, + CELL_SPURS_KERNEL2_EXIT_ADDR = 0x838, CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR = 0x290, CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR = 0x290, }; @@ -886,8 +886,8 @@ struct CellSpursTaskBinInfo CellSpursTaskLsPattern lsPattern; }; -// The SPURS kernel data store. This resides at 0x100 of the LS. -struct SpursKernelMgmtData +// The SPURS kernel context. This resides at 0x100 of the LS. +struct SpursKernelContext { u8 tempArea[0x80]; // 0x100 u8 wklLocContention[0x10]; // 0x180 @@ -900,7 +900,7 @@ struct SpursKernelMgmtData vm::bptr wklCurrentAddr; // 0x1D0 be_t wklCurrentUniqueId; // 0x1D8 be_t wklCurrentId; // 0x1DC - be_t yieldToKernelAddr; // 0x1E0 + be_t exitToKernelAddr; // 0x1E0 be_t selectWorkloadAddr; // 0x1E4 u8 moduleId[2]; // 0x1E8 u8 sysSrvInitialised; // 0x1EA @@ -923,10 +923,10 @@ struct SpursKernelMgmtData be_t guid[4]; // 0x280 }; -static_assert(sizeof(SpursKernelMgmtData) == 0x190, "Incorrect size for SpursKernelMgmtData"); +static_assert(sizeof(SpursKernelContext) == 0x190, "Incorrect size for SpursKernelContext"); -// The SPURS taskset policy module data store. This resides at 0x2700 of the LS. -struct SpursTasksetPmMgmtData +// The SPURS taskset policy module context. This resides at 0x2700 of the LS. +struct SpursTasksetContext { u8 tempAreaTaskset[0x80]; // 0x2700 u8 tempAreaTaskInfo[0x30]; // 0x2780 @@ -956,7 +956,7 @@ struct SpursTasksetPmMgmtData u8 x2FD8[0x3000 - 0x2FD8]; // 0x2FD8 }; -static_assert(sizeof(SpursTasksetPmMgmtData) == 0x900, "Incorrect size for SpursTasksetPmMgmtData"); +static_assert(sizeof(SpursTasksetContext) == 0x900, "Incorrect size for SpursTasksetContext"); s64 spursAttachLv2EventQueue(vm::ptr spurs, u32 queue, vm::ptr port, s32 isDynamic, bool wasCreated); s64 spursWakeUp(PPUThread& CPU, vm::ptr spurs); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index c7902f7240..451bfef848 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -13,7 +13,7 @@ // // SPURS utility functions // -void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag); +void cellSpursModulePutTrace(CellSpursTracePacket * packet, u32 dmaTagId); u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status); void cellSpursModuleExit(SPUThread & spu); @@ -27,36 +27,44 @@ void spursHalt(SPUThread & spu); // bool spursKernel1SelectWorkload(SPUThread & spu); bool spursKernel2SelectWorkload(SPUThread & spu); +void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus); +bool spursKernelWorkloadExit(SPUThread & spu); +bool spursKernelEntry(SPUThread & spu); // -// SPURS system service workload functions +// SPURS System Service functions // -void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt); -void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt); -void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4); -void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet); -void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt); -void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt); -void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt); -void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus); -bool spursSysServiceWorkloadEntry(SPUThread & spu); +bool spursSysServiceEntry(SPUThread & spu); +// TODO: Exit +void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt); +void spursSysServiceMain(SPUThread & spu, u32 pollStatus); +void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt); +void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt); +// TODO: Deactivate workload +void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelContext * ctxt, u32 wklShutdownBitSet); +void spursSysServiceTraceSaveCount(SPUThread & spu, SpursKernelContext * ctxt); +void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 arg2, u32 arg3, u32 arg4); +// TODO: Deactivate trace +// TODO: System workload entry +void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelContext * ctxt); // -// SPURS taskset policy module functions +// SPURS Taskset Policy Module functions // -s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting); -s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments); -void spursTasksetExit(SPUThread & spu); +bool spursTasksetEntry(SPUThread & spu); +bool spursTasksetSyscallEntry(SPUThread & spu); +void spursTasksetResumeTask(SPUThread & spu); void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs); -void spursTasksetDispatch(SPUThread & spu); +s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting); void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus); bool spursTasksetPollStatus(SPUThread & spu); -void spursTasksetInit(SPUThread & spu, u32 pollStatus); -void spursTasksetResumeTask(SPUThread & spu); -s32 spursTasketSaveTaskContext(SPUThread & spu); +void spursTasksetExit(SPUThread & spu); void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args); +s32 spursTasketSaveTaskContext(SPUThread & spu); +void spursTasksetDispatch(SPUThread & spu); s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args); -bool spursTasksetEntry(SPUThread & spu); +void spursTasksetInit(SPUThread & spu, u32 pollStatus); +s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments); extern Module *cellSpurs; @@ -65,16 +73,16 @@ extern Module *cellSpurs; ////////////////////////////////////////////////////////////////////////////// /// Output trace information -void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag) { +void cellSpursModulePutTrace(CellSpursTracePacket * packet, u32 dmaTagId) { // TODO: Implement this } /// Check for execution right requests u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); spu.GPR[3]._u32[3] = 1; - if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { + if (ctxt->spurs->m.flags1 & SF1_32_WORKLOADS) { spursKernel2SelectWorkload(spu); } else { spursKernel1SelectWorkload(spu); @@ -86,13 +94,13 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) { } u32 wklId = result >> 32; - return wklId == mgmt->wklCurrentId ? 0 : 1; + return wklId == ctxt->wklCurrentId ? 0 : 1; } /// Exit current workload void cellSpursModuleExit(SPUThread & spu) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); - spu.SetBranch(mgmt->yieldToKernelAddr); + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + spu.SetBranch(ctxt->exitToKernelAddr); } /// Execute a DMA operation @@ -134,7 +142,7 @@ u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll) { return rv._u32[3]; } -// Halt the SPU +/// Halt the SPU void spursHalt(SPUThread & spu) { spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_HALT); spu.Stop(); @@ -146,7 +154,7 @@ void spursHalt(SPUThread & spu) { /// Select a workload to run bool spursKernel1SelectWorkload(SPUThread & spu) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); // The first and only argument to this function is a boolean that is set to false if the function // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. @@ -158,20 +166,20 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { do { // DMA and lock the first 0x80 bytes of spurs - spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); auto spurs = vm::get_ptr(spu.ls_offset + 0x100); // Calculate the contention (number of SPUs used) for each workload u8 contention[CELL_SPURS_MAX_WORKLOAD]; u8 pendingContention[CELL_SPURS_MAX_WORKLOAD]; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - contention[i] = spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i]; + contention[i] = spurs->m.wklCurrentContention[i] - ctxt->wklLocContention[i]; // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably // to prevent unnecessary jumps to the kernel if (isPoll) { - pendingContention[i] = spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i]; - if (i != mgmt->wklCurrentId) { + pendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i]; + if (i != ctxt->wklCurrentId) { contention[i] += pendingContention[i]; } } @@ -180,19 +188,19 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; pollStatus = 0; - // The system service workload has the highest priority. Select the system service workload if + // The system service has the highest priority. Select the system service if // the system service message bit for this SPU is set. - if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { - mgmt->spuIdling = 0; - if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) { + ctxt->spuIdling = 0; + if (!isPoll || ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { // Clear the message bit - spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); + spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << ctxt->spuNum)); } } else { // Caclulate the scheduling weight for each workload u16 maxWeight = 0; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - u16 runnable = mgmt->wklRunnable1 & (0x8000 >> i); + u16 runnable = ctxt->wklRunnable1 & (0x8000 >> i); u16 wklSignal = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; u8 readyCount = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed(); @@ -206,7 +214,7 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) // OR the workload must be signalled // OR the workload flag is 0 and the workload is configured as the wokload flag receiver - if (runnable && mgmt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) { + if (runnable && ctxt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) { if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) { // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority: // 1. Wokload signal set or workload flag or ready count > contention @@ -217,11 +225,11 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { // 6. Is the workload executable same as the currently loaded executable // 7. The workload id (lesser the number, more the weight) u16 weight = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0; - weight |= (u16)(mgmt->priority[i] & 0x7F) << 16; - weight |= i == mgmt->wklCurrentId ? 0x80 : 0x00; + weight |= (u16)(ctxt->priority[i] & 0x7F) << 16; + weight |= i == ctxt->wklCurrentId ? 0x80 : 0x00; weight |= (contention[i] > 0 && spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00; weight |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2; - weight |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00; + weight |= ctxt->wklUniqueId[i] == ctxt->wklCurrentId ? 0x02 : 0x00; weight |= 0x01; // In case of a tie the lower numbered workload is chosen @@ -237,9 +245,9 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { } // Not sure what this does. Possibly mark the SPU as idle/in use. - mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + ctxt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; - if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { + if (!isPoll || wklSelectedId == ctxt->wklCurrentId) { // Clear workload signal for the selected workload spurs->m.wklSignal1.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); spurs->m.wklSignal2.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); @@ -260,16 +268,16 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { spurs->m.wklCurrentContention[i] = contention[i]; - mgmt->wklLocContention[i] = 0; - mgmt->wklLocPendingContention[i] = 0; + ctxt->wklLocContention[i] = 0; + ctxt->wklLocPendingContention[i] = 0; } if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - mgmt->wklLocContention[wklSelectedId] = 1; + ctxt->wklLocContention[wklSelectedId] = 1; } - mgmt->wklCurrentId = wklSelectedId; - } else if (wklSelectedId != mgmt->wklCurrentId) { + ctxt->wklCurrentId = wklSelectedId; + } else if (wklSelectedId != ctxt->wklCurrentId) { // Not called by kernel but a context switch is required // Increment the pending contention for the selected workload if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { @@ -278,14 +286,14 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { spurs->m.wklPendingContention[i] = pendingContention[i]; - mgmt->wklLocPendingContention[i] = 0; + ctxt->wklLocPendingContention[i] = 0; } if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - mgmt->wklLocPendingContention[wklSelectedId] = 1; + ctxt->wklLocPendingContention[wklSelectedId] = 1; } } - } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); u64 result = (u64)wklSelectedId << 32; result |= pollStatus; @@ -295,7 +303,7 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { /// Select a workload to run bool spursKernel2SelectWorkload(SPUThread & spu) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); // The first and only argument to this function is a boolean that is set to false if the function // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. @@ -307,22 +315,22 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { do { // DMA and lock the first 0x80 bytes of spurs - spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); auto spurs = vm::get_ptr(spu.ls_offset + 0x100); // Calculate the contention (number of SPUs used) for each workload u8 contention[CELL_SPURS_MAX_WORKLOAD2]; u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2]; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { - contention[i] = spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F]; + contention[i] = spurs->m.wklCurrentContention[i & 0x0F] - ctxt->wklLocContention[i & 0x0F]; contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4; // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably // to prevent unnecessary jumps to the kernel if (isPoll) { - pendingContention[i] = spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F]; + pendingContention[i] = spurs->m.wklPendingContention[i & 0x0F] - ctxt->wklLocPendingContention[i & 0x0F]; pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4; - if (i != mgmt->wklCurrentId) { + if (i != ctxt->wklCurrentId) { contention[i] += pendingContention[i]; } } @@ -331,22 +339,22 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; pollStatus = 0; - // The system service workload has the highest priority. Select the system service workload if + // The system service has the highest priority. Select the system service if // the system service message bit for this SPU is set. - if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { + if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) { // Not sure what this does. Possibly Mark the SPU as in use. - mgmt->spuIdling = 0; - if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + ctxt->spuIdling = 0; + if (!isPoll || ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { // Clear the message bit - spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); + spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << ctxt->spuNum)); } } else { // Caclulate the scheduling weight for each workload u8 maxWeight = 0; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { auto j = i & 0x0F; - u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); - u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; + u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j); + u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4; u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4; u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; @@ -365,7 +373,7 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { // The current workload is given a sligtly higher weight presumably to reduce the number of context switches. // In case of a tie the lower numbered workload is chosen. u8 weight = priority << 4; - if (mgmt->wklCurrentId == i) { + if (ctxt->wklCurrentId == i) { weight |= 0x04; } @@ -381,9 +389,9 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { } // Not sure what this does. Possibly mark the SPU as idle/in use. - mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + ctxt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; - if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { + if (!isPoll || wklSelectedId == ctxt->wklCurrentId) { // Clear workload signal for the selected workload spurs->m.wklSignal1.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); spurs->m.wklSignal2.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); @@ -404,13 +412,13 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4); - mgmt->wklLocContention[i] = 0; - mgmt->wklLocPendingContention[i] = 0; + ctxt->wklLocContention[i] = 0; + ctxt->wklLocPendingContention[i] = 0; } - mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; - mgmt->wklCurrentId = wklSelectedId; - } else if (wklSelectedId != mgmt->wklCurrentId) { + ctxt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; + ctxt->wklCurrentId = wklSelectedId; + } else if (wklSelectedId != ctxt->wklCurrentId) { // Not called by kernel but a context switch is required // Increment the pending contention for the selected workload if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { @@ -419,12 +427,12 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4); - mgmt->wklLocPendingContention[i] = 0; + ctxt->wklLocPendingContention[i] = 0; } - mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; + ctxt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; } - } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); u64 result = (u64)wklSelectedId << 32; result |= pollStatus; @@ -432,81 +440,27 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { return true; } -/// SPURS kernel main -bool spursKernelMain(SPUThread & spu) { - SpursKernelMgmtData * mgmt = vm::get_ptr(spu.ls_offset + 0x100); +/// SPURS kernel dispatch workload +void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus) { + SpursKernelContext * ctxt = vm::get_ptr(spu.ls_offset + 0x100); + auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; - bool isKernel2; - u32 pollStatus; - if (spu.PC == CELL_SPURS_KERNEL1_ENTRY_ADDR || spu.PC == CELL_SPURS_KERNEL2_ENTRY_ADDR) { - // Entry point of SPURS kernel - // Save arguments - mgmt->spuNum = spu.GPR[3]._u32[3]; - mgmt->spurs.set(spu.GPR[4]._u64[1]); + auto pollStatus = (u32)(spu.GPR[3]._u64[1]); + auto wid = (u32)(spu.GPR[3]._u64[1] >> 32); - isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; - - memset(mgmt, 0, sizeof(SpursKernelMgmtData)); - - // Initialise the SPURS management area to its initial values - mgmt->dmaTagId = CELL_SPURS_KERNEL_DMA_TAG_ID; - mgmt->wklCurrentUniqueId = 0x20; - mgmt->wklCurrentId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; - mgmt->yieldToKernelAddr = isKernel2 ? CELL_SPURS_KERNEL2_YIELD_ADDR : CELL_SPURS_KERNEL1_YIELD_ADDR; - mgmt->selectWorkloadAddr = isKernel2 ? CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR : CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR; - if (!isKernel2) { - mgmt->x1F0 = 0xF0020000; - mgmt->x200 = 0x20000; - mgmt->guid[0] = 0x423A3A02; - mgmt->guid[1] = 0x43F43A82; - mgmt->guid[2] = 0x43F26502; - mgmt->guid[3] = 0x420EB382; - } else { - mgmt->guid[0] = 0x43A08402; - mgmt->guid[1] = 0x43FB0A82; - mgmt->guid[2] = 0x435E9302; - mgmt->guid[3] = 0x43A3C982; - } - - // Register SPURS kernel HLE functions - spu.UnregisterHleFunctions(0, 0x40000); // TODO: use a symbolic constant - spu.RegisterHleFunction(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelMain); - spu.RegisterHleFunction(mgmt->yieldToKernelAddr, spursKernelMain); - spu.RegisterHleFunction(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload); - - // DMA in the system service workload info - spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfoSysSrv), 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID/*tag*/); - spursDmaWaitForCompletion(spu, 0x80000000); - pollStatus = 0; - } else if (spu.PC == mgmt->yieldToKernelAddr) { - isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; - - // Select next workload to run - spu.GPR[3].clear(); - if (isKernel2) { - spursKernel2SelectWorkload(spu); - } else { - spursKernel1SelectWorkload(spu); - } - - pollStatus = (u32)(spu.GPR[3]._u64[1]); - auto wid = (u32)(spu.GPR[3]._u64[1] >> 32); - - // DMA in the workload info for the selected workload - auto wklInfoOffset = wid < CELL_SPURS_MAX_WORKLOAD ? offsetof(CellSpurs, m.wklInfo1[wid]) : - wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? offsetof(CellSpurs, m.wklInfo2[wid & 0xf]) : - offsetof(CellSpurs, m.wklInfoSysSrv); - spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + wklInfoOffset, 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); - spursDmaWaitForCompletion(spu, 0x80000000); - } else { - assert(0); - } + // DMA in the workload info for the selected workload + auto wklInfoOffset = wid < CELL_SPURS_MAX_WORKLOAD ? offsetof(CellSpurs, m.wklInfo1[wid]) : + wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? offsetof(CellSpurs, m.wklInfo2[wid & 0xf]) : + offsetof(CellSpurs, m.wklInfoSysSrv); + spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + wklInfoOffset, 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); + spursDmaWaitForCompletion(spu, 0x80000000); + // Load the workload to LS auto wklInfo = vm::get_ptr(spu.ls_offset + 0x3FFE0); - if (mgmt->wklCurrentAddr != wklInfo->addr) { + if (ctxt->wklCurrentAddr != wklInfo->addr) { switch (wklInfo->addr.addr()) { case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD: - spu.RegisterHleFunction(0xA00, spursSysServiceWorkloadEntry); + spu.RegisterHleFunction(0xA00, spursSysServiceEntry); break; case SPURS_IMG_ADDR_TASKSET_PM: spu.RegisterHleFunction(0xA00, spursTasksetEntry); @@ -517,22 +471,81 @@ bool spursKernelMain(SPUThread & spu) { break; } - mgmt->wklCurrentAddr = wklInfo->addr; - mgmt->wklCurrentUniqueId = wklInfo->uniqueId.read_relaxed(); + ctxt->wklCurrentAddr = wklInfo->addr; + ctxt->wklCurrentUniqueId = wklInfo->uniqueId.read_relaxed(); } if (!isKernel2) { - mgmt->moduleId[0] = 0; - mgmt->moduleId[1] = 0; + ctxt->moduleId[0] = 0; + ctxt->moduleId[1] = 0; } // Run workload - spu.GPR[0]._u32[3] = mgmt->yieldToKernelAddr; + spu.GPR[0]._u32[3] = ctxt->exitToKernelAddr; spu.GPR[1]._u32[3] = 0x3FFB0; spu.GPR[3]._u32[3] = 0x100; spu.GPR[4]._u64[1] = wklInfo->arg; spu.GPR[5]._u32[3] = pollStatus; spu.SetBranch(0xA00); +} + +/// SPURS kernel workload exit +bool spursKernelWorkloadExit(SPUThread & spu) { + SpursKernelContext * ctxt = vm::get_ptr(spu.ls_offset + 0x100); + auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; + + // Select next workload to run + spu.GPR[3].clear(); + if (isKernel2) { + spursKernel2SelectWorkload(spu); + } else { + spursKernel1SelectWorkload(spu); + } + + spursKernelDispatchWorkload(spu, spu.GPR[3]._u64[1]); + return false; +} + +/// SPURS kernel entry point +bool spursKernelEntry(SPUThread & spu) { + SpursKernelContext * ctxt = vm::get_ptr(spu.ls_offset + 0x100); + + // Save arguments + ctxt->spuNum = spu.GPR[3]._u32[3]; + ctxt->spurs.set(spu.GPR[4]._u64[1]); + + auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; + + memset(ctxt, 0, sizeof(SpursKernelContext)); + + // Initialise the SPURS context to its initial values + ctxt->dmaTagId = CELL_SPURS_KERNEL_DMA_TAG_ID; + ctxt->wklCurrentUniqueId = 0x20; + ctxt->wklCurrentId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + ctxt->exitToKernelAddr = isKernel2 ? CELL_SPURS_KERNEL2_EXIT_ADDR : CELL_SPURS_KERNEL1_EXIT_ADDR; + ctxt->selectWorkloadAddr = isKernel2 ? CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR : CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR; + if (!isKernel2) { + ctxt->x1F0 = 0xF0020000; + ctxt->x200 = 0x20000; + ctxt->guid[0] = 0x423A3A02; + ctxt->guid[1] = 0x43F43A82; + ctxt->guid[2] = 0x43F26502; + ctxt->guid[3] = 0x420EB382; + } else { + ctxt->guid[0] = 0x43A08402; + ctxt->guid[1] = 0x43FB0A82; + ctxt->guid[2] = 0x435E9302; + ctxt->guid[3] = 0x43A3C982; + } + + // Register SPURS kernel HLE functions + spu.UnregisterHleFunctions(0, 0x40000/*LS_BOTTOM*/); + spu.RegisterHleFunction(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelEntry); + spu.RegisterHleFunction(ctxt->exitToKernelAddr, spursKernelWorkloadExit); + spu.RegisterHleFunction(ctxt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload); + + // Start the system service + spursKernelDispatchWorkload(spu, ((u64)CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) << 32); return false; } @@ -540,120 +553,337 @@ bool spursKernelMain(SPUThread & spu) { // SPURS system workload functions ////////////////////////////////////////////////////////////////////////////// -/// Restore scheduling parameters after a workload has been preempted by the system service workload -void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt) { - u8 wklId; +/// Entry point of the system service +bool spursSysServiceEntry(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); + auto arg = spu.GPR[4]._u64[1]; + auto pollStatus = spu.GPR[5]._u32[3]; - do { - spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); - auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + if (ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + spursSysServiceMain(spu, pollStatus); + } else { + // TODO: If we reach here it means the current workload was preempted to start the + // system workload. Need to implement this. + } - if (spurs->m.sysSrvWorkload[mgmt->spuNum] == 0xFF) { - return; - } + return false; +} - wklId = spurs->m.sysSrvWorkload[mgmt->spuNum]; - spurs->m.sysSrvWorkload[mgmt->spuNum] = 0xFF; - } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); +/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled +void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) { + // Monitor only lock line reservation lost events + spu.WriteChannel(SPU_WrEventMask, u128::from32r(SPU_EVENT_LR)); - spursSysServiceUpdateWorkload(spu, mgmt); - - do { - spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + bool shouldExit; + while (true) { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); auto spurs = vm::get_ptr(spu.ls_offset + 0x100); - if (wklId >= CELL_SPURS_MAX_WORKLOAD) { - spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10; - spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1); - } else { - spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01; - spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1); + // Find the number of SPUs that are idling in this SPURS instance + u32 nIdlingSpus = 0; + for (u32 i = 0; i < 8; i++) { + if (spurs->m.spuIdling & (1 << i)) { + nIdlingSpus++; + } } - } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); - // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace - // uses the current worload id to determine the workload to which the trace belongs - auto wklIdSaved = mgmt->wklCurrentId; - mgmt->wklCurrentId = wklId; + bool allSpusIdle = nIdlingSpus == spurs->m.nSpus ? true: false; + bool exitIfNoWork = spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false; + shouldExit = allSpusIdle && exitIfNoWork; - // Trace - STOP: GUID + // Check if any workloads can be scheduled + bool foundReadyWorkload = false; + if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) { + foundReadyWorkload = true; + } else { + if (spurs->m.flags1 & SF1_32_WORKLOADS) { + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { + u32 j = i & 0x0F; + u8 runnable = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j); + u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4; + u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4; + u8 contention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklCurrentContention[j] & 0x0F : spurs->m.wklCurrentContention[j] >> 4; + u8 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); + u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); + + if (runnable && priority > 0 && maxContention > contention) { + if (wklFlag || wklSignal || readyCount > contention) { + foundReadyWorkload = true; + break; + } + } + } + } else { + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + u8 runnable = ctxt->wklRunnable1 & (0x8000 >> i); + u8 wklSignal = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); + u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed(); + u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); + u8 requestCount = readyCount + idleSpuCount; + + if (runnable && ctxt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > spurs->m.wklCurrentContention[i]) { + if (wklFlag || wklSignal || (readyCount != 0 && requestCount > spurs->m.wklCurrentContention[i])) { + foundReadyWorkload = true; + break; + } + } + } + } + } + + bool spuIdling = spurs->m.spuIdling & (1 << ctxt->spuNum) ? true : false; + if (foundReadyWorkload && shouldExit == false) { + spurs->m.spuIdling &= ~(1 << ctxt->spuNum); + } else { + spurs->m.spuIdling |= 1 << ctxt->spuNum; + } + + // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events. + if (spuIdling && shouldExit == false && foundReadyWorkload == false) { + // The system service blocks by making a reservation and waiting on the lock line reservation lost event. + u128 r; + spu.ReadChannel(r, SPU_RdEventStat); + spu.WriteChannel(SPU_WrEventAck, u128::from32r(SPU_EVENT_LR)); + } + + auto dmaSuccess = spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + if (dmaSuccess && (shouldExit || foundReadyWorkload)) { + break; + } + } + + if (shouldExit) { + // TODO: exit spu thread group + } +} + +/// Main function for the system service +void spursSysServiceMain(SPUThread & spu, u32 pollStatus) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + + if (ctxt->spurs.addr() % CellSpurs::align) { + spursHalt(spu); + return; + } + + // Initialise the system service if this is the first time its being started on this SPU + if (ctxt->sysSrvInitialised == 0) { + ctxt->sysSrvInitialised = 1; + + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + + do { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + CellSpurs * spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + // Halt if already initialised + if (spurs->m.sysSrvOnSpu & (1 << ctxt->spuNum)) { + spursHalt(spu); + return; + } + + spurs->m.sysSrvOnSpu |= 1 << ctxt->spuNum; + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + ctxt->traceBuffer = 0; + ctxt->traceMsgCount = -1; + spursSysServiceTraceUpdate(spu, ctxt, 1, 1, 0); + spursSysServiceCleanupAfterSystemWorkload(spu, ctxt); + + // Trace - SERVICE: INIT + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; + pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + } + + // Trace - START: Module='SYS ' CellSpursTracePacket pkt; memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; - pkt.data.stop = SPURS_GUID_SYS_WKL; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + pkt.header.tag = CELL_SPURS_TRACE_TAG_START; + memcpy(pkt.data.start.module, "SYS ", 4); + pkt.data.start.level = 1; // Policy module + pkt.data.start.ls = 0xA00 >> 2; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); - mgmt->wklCurrentId = wklIdSaved; -} + while (true) { + // Process requests for the system service + spursSysServiceProcessRequests(spu, ctxt); -/// Update the trace count for this SPU in CellSpurs -void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt) { - if (mgmt->traceBuffer) { - auto traceInfo = vm::ptr::make((u32)(mgmt->traceBuffer - (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4))); - traceInfo->count[mgmt->spuNum] = mgmt->traceMsgCount; +poll: + if (cellSpursModulePollStatus(spu, nullptr)) { + // Trace - SERVICE: EXIT + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; + pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + + // Trace - STOP: GUID + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; + pkt.data.stop = SPURS_GUID_SYS_WKL; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + + spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); + break; + } + + // If we reach here it means that either there are more system service messages to be processed + // or there are no workloads that can be scheduled. + + // If the SPU is not idling then process the remaining system service messages + if (ctxt->spuIdling == 0) { + continue; + } + + // If we reach here it means that the SPU is idling + + // Trace - SERVICE: WAIT + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; + pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + + spursSysServiceIdleHandler(spu, ctxt); + goto poll; } } -/// Update trace control in SPU from CellSpurs -void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) { - bool notify; +/// Process any requests +void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt) { + bool updateTrace = false; + bool updateWorkload = false; + bool terminate = false; - u8 sysSrvMsgUpdateTrace; do { - spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); - sysSrvMsgUpdateTrace = spurs->m.sysSrvMsgUpdateTrace; - spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum); - spurs->m.xCC &= ~(1 << mgmt->spuNum); - spurs->m.xCC |= arg2 << mgmt->spuNum; - - notify = false; - if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) && (spurs->m.sysSrvMsgUpdateTrace == 0) && (spurs->m.xCD != 0)) { - spurs->m.xCD = 0; - notify = true; + // Terminate request + if (spurs->m.sysSrvMsgTerminate & (1 << ctxt->spuNum)) { + spurs->m.sysSrvOnSpu &= ~(1 << ctxt->spuNum); + terminate = true; } - if (arg4 && spurs->m.xCD != 0) { - spurs->m.xCD = 0; - notify = true; - } - } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); - - // Get trace parameters from CellSpurs and store them in the LS - if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) || (arg3 != 0)) { - spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.traceBuffer), 0x80/*LSA*/, 0x80/*size*/, 0/*tag*/); - auto spurs = vm::get_ptr(spu.ls_offset + 0x80 - offsetof(CellSpurs, m.traceBuffer)); - - if (mgmt->traceMsgCount != 0xFF || spurs->m.traceBuffer.addr() == 0) { - spursSysServiceUpdateTraceCount(spu, mgmt); - } else { - spursDma(spu, MFC_GET_CMD, spurs->m.traceBuffer.addr() & 0xFFFFFFFC, 0x2C00/*LSA*/, 0x80/*size*/, mgmt->dmaTagId); - auto traceBuffer = vm::get_ptr(spu.ls_offset + 0x2C00); - mgmt->traceMsgCount = traceBuffer->count[mgmt->spuNum]; + // Update workload message + if (spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << ctxt->spuNum)) { + spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << ctxt->spuNum); + updateWorkload = true; } - mgmt->traceBuffer = spurs->m.traceBuffer.addr() + (spurs->m.traceStartIndex[mgmt->spuNum] << 4); - mgmt->traceMaxCount = spurs->m.traceStartIndex[1] - spurs->m.traceStartIndex[0]; - if (mgmt->traceBuffer == 0) { - mgmt->traceMsgCount = 0; + // Update trace message + if (spurs->m.sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) { + updateTrace = true; } + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + // Process update workload message + if (updateWorkload) { + spursSysServiceActivateWorkload(spu, ctxt); } - if (notify) { - auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); - sys_spu_thread_send_event(spu, spurs->m.spuPort, 2, 0); + // Process update trace message + if (updateTrace) { + spursSysServiceTraceUpdate(spu, ctxt, 1, 0, 0); + } + + // Process terminate request + if (terminate) { + // TODO: Rest of the terminate processing } } -/// Update events in CellSpurs -void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet) { +/// Activate a workload +void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) { + auto spurs = vm::get_ptr(spu.ls_offset + 0x100); + spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo1), 0x30000/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); + if (spurs->m.flags1 & SF1_32_WORKLOADS) { + spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo2), 0x30200/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); + } + + u32 wklShutdownBitSet = 0; + ctxt->wklRunnable1 = 0; + ctxt->wklRunnable2 = 0; + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + auto wklInfo1 = vm::get_ptr(spu.ls_offset + 0x30000); + + // Copy the priority of the workload for this SPU and its unique id to the LS + ctxt->priority[i] = wklInfo1[i].priority[ctxt->spuNum] == 0 ? 0 : 0x10 - wklInfo1[i].priority[ctxt->spuNum]; + ctxt->wklUniqueId[i] = wklInfo1[i].uniqueId.read_relaxed(); + + if (spurs->m.flags1 & SF1_32_WORKLOADS) { + auto wklInfo2 = vm::get_ptr(spu.ls_offset + 0x30200); + + // Copy the priority of the workload for this SPU to the LS + if (wklInfo2[i].priority[ctxt->spuNum]) { + ctxt->priority[i] |= (0x10 - wklInfo2[i].priority[ctxt->spuNum]) << 4; + } + } + } + + do { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + // Update workload status and runnable flag based on the workload state + auto wklStatus = spurs->m.wklStatus1[i]; + if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { + spurs->m.wklStatus1[i] |= 1 << ctxt->spuNum; + ctxt->wklRunnable1 |= 0x8000 >> i; + } else { + spurs->m.wklStatus1[i] &= ~(1 << ctxt->spuNum); + } + + // If the workload is shutting down and if this is the last SPU from which it is being removed then + // add it to the shutdown bit set + if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { + if (((wklStatus & (1 << ctxt->spuNum)) != 0) && (spurs->m.wklStatus1[i] == 0)) { + spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); + wklShutdownBitSet |= 0x80000000u >> i; + } + } + + if (spurs->m.flags1 & SF1_32_WORKLOADS) { + // Update workload status and runnable flag based on the workload state + wklStatus = spurs->m.wklStatus2[i]; + if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { + spurs->m.wklStatus2[i] |= 1 << ctxt->spuNum; + ctxt->wklRunnable2 |= 0x8000 >> i; + } else { + spurs->m.wklStatus2[i] &= ~(1 << ctxt->spuNum); + } + + // If the workload is shutting down and if this is the last SPU from which it is being removed then + // add it to the shutdown bit set + if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { + if (((wklStatus & (1 << ctxt->spuNum)) != 0) && (spurs->m.wklStatus2[i] == 0)) { + spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); + wklShutdownBitSet |= 0x8000 >> i; + } + } + } + } + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + if (wklShutdownBitSet) { + spursSysServiceUpdateShutdownCompletionEvents(spu, ctxt, wklShutdownBitSet); + } +} + +/// Update shutdown completion events +void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelContext * ctxt, u32 wklShutdownBitSet) { // Mark the workloads in wklShutdownBitSet as completed and also generate a bit set of the completed // workloads that have a shutdown completion hook registered u32 wklNotifyBitSet; u8 spuPort; do { - spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); wklNotifyBitSet = 0; @@ -673,334 +903,117 @@ void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 } } } - } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); if (wklNotifyBitSet) { // TODO: sys_spu_thread_send_event(spuPort, 0, wklNotifyMask); } } -/// Update workload information in the SPU from CellSpurs -void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) { - auto spurs = vm::get_ptr(spu.ls_offset + 0x100); - spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfo1), 0x30000/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); - if (spurs->m.flags1 & SF1_32_WORKLOADS) { - spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfo2), 0x30200/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); - } - - u32 wklShutdownBitSet = 0; - mgmt->wklRunnable1 = 0; - mgmt->wklRunnable2 = 0; - for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - auto wklInfo1 = vm::get_ptr(spu.ls_offset + 0x30000); - - // Copy the priority of the workload for this SPU and its unique id to the LS - mgmt->priority[i] = wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - wklInfo1[i].priority[mgmt->spuNum]; - mgmt->wklUniqueId[i] = wklInfo1[i].uniqueId.read_relaxed(); - - if (spurs->m.flags1 & SF1_32_WORKLOADS) { - auto wklInfo2 = vm::get_ptr(spu.ls_offset + 0x30200); - - // Copy the priority of the workload for this SPU to the LS - if (wklInfo2[i].priority[mgmt->spuNum]) { - mgmt->priority[i] |= (0x10 - wklInfo2[i].priority[mgmt->spuNum]) << 4; - } - } - } - - do { - spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); - spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); - - for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - // Update workload status and runnable flag based on the workload state - auto wklStatus = spurs->m.wklStatus1[i]; - if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { - spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum; - mgmt->wklRunnable1 |= 0x8000 >> i; - } else { - spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum); - } - - // If the workload is shutting down and if this is the last SPU from which it is being removed then - // add it to the shutdown bit set - if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { - if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (spurs->m.wklStatus1[i] == 0)) { - spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); - wklShutdownBitSet |= 0x80000000u >> i; - } - } - - if (spurs->m.flags1 & SF1_32_WORKLOADS) { - // Update workload status and runnable flag based on the workload state - wklStatus = spurs->m.wklStatus2[i]; - if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { - spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum; - mgmt->wklRunnable2 |= 0x8000 >> i; - } else { - spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum); - } - - // If the workload is shutting down and if this is the last SPU from which it is being removed then - // add it to the shutdown bit set - if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { - if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (spurs->m.wklStatus2[i] == 0)) { - spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); - wklShutdownBitSet |= 0x8000 >> i; - } - } - } - } - } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); - - if (wklShutdownBitSet) { - spursSysServiceUpdateEvent(spu, mgmt, wklShutdownBitSet); +/// Update the trace count for this SPU +void spursSysServiceTraceSaveCount(SPUThread & spu, SpursKernelContext * ctxt) { + if (ctxt->traceBuffer) { + auto traceInfo = vm::ptr::make((u32)(ctxt->traceBuffer - (ctxt->spurs->m.traceStartIndex[ctxt->spuNum] << 4))); + traceInfo->count[ctxt->spuNum] = ctxt->traceMsgCount; } } -/// Process any messages -void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) { - bool updateTrace = false; - bool updateWorkload = false; - bool terminate = false; +/// Update trace control +void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 arg2, u32 arg3, u32 arg4) { + bool notify; + u8 sysSrvMsgUpdateTrace; do { - spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); - // Terminate request - if (spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) { - spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum); - terminate = true; + sysSrvMsgUpdateTrace = spurs->m.sysSrvMsgUpdateTrace; + spurs->m.sysSrvMsgUpdateTrace &= ~(1 << ctxt->spuNum); + spurs->m.xCC &= ~(1 << ctxt->spuNum); + spurs->m.xCC |= arg2 << ctxt->spuNum; + + notify = false; + if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) && (spurs->m.sysSrvMsgUpdateTrace == 0) && (spurs->m.xCD != 0)) { + spurs->m.xCD = 0; + notify = true; } - // Update workload message - if (spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) { - spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum); - updateWorkload = true; + if (arg4 && spurs->m.xCD != 0) { + spurs->m.xCD = 0; + notify = true; + } + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + // Get trace parameters from CellSpurs and store them in the LS + if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) || (arg3 != 0)) { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.traceBuffer), 0x80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x80 - offsetof(CellSpurs, m.traceBuffer)); + + if (ctxt->traceMsgCount != 0xFF || spurs->m.traceBuffer.addr() == 0) { + spursSysServiceTraceSaveCount(spu, ctxt); + } else { + spursDma(spu, MFC_GET_CMD, spurs->m.traceBuffer.addr() & 0xFFFFFFFC, 0x2C00/*LSA*/, 0x80/*size*/, ctxt->dmaTagId); + auto traceBuffer = vm::get_ptr(spu.ls_offset + 0x2C00); + ctxt->traceMsgCount = traceBuffer->count[ctxt->spuNum]; } - // Update trace message - if (spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) { - updateTrace = true; + ctxt->traceBuffer = spurs->m.traceBuffer.addr() + (spurs->m.traceStartIndex[ctxt->spuNum] << 4); + ctxt->traceMaxCount = spurs->m.traceStartIndex[1] - spurs->m.traceStartIndex[0]; + if (ctxt->traceBuffer == 0) { + ctxt->traceMsgCount = 0; } - } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); - - // Process update workload message - if (updateWorkload) { - spursSysServiceUpdateWorkload(spu, mgmt); } - // Process update trace message - if (updateTrace) { - spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0); - } - - // Process terminate request - if (terminate) { - // TODO: Rest of the terminate processing + if (notify) { + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + sys_spu_thread_send_event(spu, spurs->m.spuPort, 2, 0); } } -/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled -void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) { - // Monitor only lock line reservation lost events - spu.WriteChannel(SPU_WrEventMask, u128::from32r(SPU_EVENT_LR)); +/// Restore state after executing the system workload +void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelContext * ctxt) { + u8 wklId; - bool shouldExit; - while (true) { - spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + do { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + if (spurs->m.sysSrvWorkload[ctxt->spuNum] == 0xFF) { + return; + } + + wklId = spurs->m.sysSrvWorkload[ctxt->spuNum]; + spurs->m.sysSrvWorkload[ctxt->spuNum] = 0xFF; + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + spursSysServiceActivateWorkload(spu, ctxt); + + do { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); auto spurs = vm::get_ptr(spu.ls_offset + 0x100); - // Find the number of SPUs that are idling in this SPURS instance - u32 nIdlingSpus = 0; - for (u32 i = 0; i < 8; i++) { - if (spurs->m.spuIdling & (1 << i)) { - nIdlingSpus++; - } - } - - bool allSpusIdle = nIdlingSpus == spurs->m.nSpus ? true: false; - bool exitIfNoWork = spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false; - shouldExit = allSpusIdle && exitIfNoWork; - - // Check if any workloads can be scheduled - bool foundReadyWorkload = false; - if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { - foundReadyWorkload = true; + if (wklId >= CELL_SPURS_MAX_WORKLOAD) { + spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10; + spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1); } else { - if (spurs->m.flags1 & SF1_32_WORKLOADS) { - for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { - u32 j = i & 0x0F; - u8 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); - u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; - u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4; - u8 contention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklCurrentContention[j] & 0x0F : spurs->m.wklCurrentContention[j] >> 4; - u8 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); - u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); - - if (runnable && priority > 0 && maxContention > contention) { - if (wklFlag || wklSignal || readyCount > contention) { - foundReadyWorkload = true; - break; - } - } - } - } else { - for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - u8 runnable = mgmt->wklRunnable1 & (0x8000 >> i); - u8 wklSignal = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); - u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed(); - u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); - u8 requestCount = readyCount + idleSpuCount; - - if (runnable && mgmt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > spurs->m.wklCurrentContention[i]) { - if (wklFlag || wklSignal || (readyCount != 0 && requestCount > spurs->m.wklCurrentContention[i])) { - foundReadyWorkload = true; - break; - } - } - } - } + spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01; + spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1); } + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); - bool spuIdling = spurs->m.spuIdling & (1 << mgmt->spuNum) ? true : false; - if (foundReadyWorkload && shouldExit == false) { - spurs->m.spuIdling &= ~(1 << mgmt->spuNum); - } else { - spurs->m.spuIdling |= 1 << mgmt->spuNum; - } + // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace + // uses the current worload id to determine the workload to which the trace belongs + auto wklIdSaved = ctxt->wklCurrentId; + ctxt->wklCurrentId = wklId; - // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events. - if (spuIdling && shouldExit == false && foundReadyWorkload == false) { - // The system service blocks by making a reservation and waiting on the lock line reservation lost event. - u128 r; - spu.ReadChannel(r, SPU_RdEventStat); - spu.WriteChannel(SPU_WrEventAck, u128::from32r(SPU_EVENT_LR)); - } - - auto dmaSuccess = spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); - if (dmaSuccess && (shouldExit || foundReadyWorkload)) { - break; - } - } - - if (shouldExit) { - // TODO: exit spu thread group - } -} - -/// Main function for the system service workload -void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); - - if (mgmt->spurs.addr() % CellSpurs::align) { - spursHalt(spu); - return; - } - - // Initialise the system service if this is the first time its being started on this SPU - if (mgmt->sysSrvInitialised == 0) { - mgmt->sysSrvInitialised = 1; - - spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); - - do { - spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); - CellSpurs * spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); - - // Halt if already initialised - if (spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) { - spursHalt(spu); - return; - } - - spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum; - } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); - - mgmt->traceBuffer = 0; - mgmt->traceMsgCount = -1; - spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0); - spursSysServiceCleanupAfterPreemption(spu, mgmt); - - // Trace - SERVICE: INIT - CellSpursTracePacket pkt; - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; - pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); - } - - // Trace - START: Module='SYS ' + // Trace - STOP: GUID CellSpursTracePacket pkt; memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_START; - memcpy(pkt.data.start.module, "SYS ", 4); - pkt.data.start.level = 1; // Policy module - pkt.data.start.ls = 0xA00 >> 2; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; + pkt.data.stop = SPURS_GUID_SYS_WKL; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); - while (true) { - // Process messages for the system service workload - spursSysServiceProcessMessages(spu, mgmt); - -poll: - if (cellSpursModulePollStatus(spu, nullptr)) { - // Trace - SERVICE: EXIT - CellSpursTracePacket pkt; - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; - pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); - - // Trace - STOP: GUID - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; - pkt.data.stop = SPURS_GUID_SYS_WKL; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); - - spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId); - break; - } - - // If we reach here it means that either there are more system service messages to be processed - // or there are no workloads that can be scheduled. - - // If the SPU is not idling then process the remaining system service messages - if (mgmt->spuIdling == 0) { - continue; - } - - // If we reach here it means that the SPU is idling - - // Trace - SERVICE: WAIT - CellSpursTracePacket pkt; - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; - pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); - - spursSysServiceWaitOrExit(spu, mgmt); - goto poll; - } -} - -/// Entry point of the system service workload -bool spursSysServiceWorkloadEntry(SPUThread & spu) { - auto mgmt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); - auto arg = spu.GPR[4]._u64[1]; - auto pollStatus = spu.GPR[5]._u32[3]; - - if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - spursSysServiceWorkloadMain(spu, pollStatus); - } else { - // TODO: If we reach here it means the current workload was preempted to start the - // system service workload. Need to implement this. - } - - return false; + ctxt->wklCurrentId = wklIdSaved; } ////////////////////////////////////////////////////////////////////////////// @@ -1018,14 +1031,98 @@ enum SpursTasksetRequest { SPURS_TASKSET_REQUEST_RECV_WKL_FLAG = 6, }; +/// Taskset PM entry point +bool spursTasksetEntry(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + auto kernelCtxt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); + + auto arg = spu.GPR[4]._u64[1]; + auto pollStatus = spu.GPR[5]._u32[3]; + + // Initialise memory and save args + memset(ctxt, 0, sizeof(*ctxt)); + ctxt->taskset.set(arg); + memcpy(ctxt->moduleId, "SPURSTASK MODULE", sizeof(ctxt->moduleId)); + ctxt->kernelMgmtAddr = spu.GPR[3]._u32[3]; + ctxt->syscallAddr = CELL_SPURS_TASKSET_PM_SYSCALL_ADDR; + ctxt->spuNum = kernelCtxt->spuNum; + ctxt->dmaTagId = kernelCtxt->dmaTagId; + ctxt->taskId = 0xFFFFFFFF; + + // Register SPURS takset policy module HLE functions + spu.UnregisterHleFunctions(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, 0x40000/*LS_BOTTOM*/); + spu.RegisterHleFunction(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, spursTasksetEntry); + spu.RegisterHleFunction(ctxt->syscallAddr, spursTasksetSyscallEntry); + + // Initialise the taskset policy module + spursTasksetInit(spu, pollStatus); + + // Dispatch + spursTasksetDispatch(spu); + return false; +} + +/// Entry point into the Taskset PM for task syscalls +bool spursTasksetSyscallEntry(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + + // Save task context + ctxt->savedContextLr = spu.GPR[0]; + ctxt->savedContextSp = spu.GPR[1]; + for (auto i = 0; i < 48; i++) { + ctxt->savedContextR80ToR127[i] = spu.GPR[80 + i]; + } + + // Handle the syscall + spu.GPR[3]._u32[3] = spursTasksetProcessSyscall(spu, spu.GPR[3]._u32[3], spu.GPR[4]._u32[3]); + + // Resume the previously executing task if the syscall did not cause a context switch + if (spu.m_is_branch == false) { + spursTasksetResumeTask(spu); + } + + return false; +} + +/// Resume a task +void spursTasksetResumeTask(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + + // Restore task context + spu.GPR[0] = ctxt->savedContextLr; + spu.GPR[1] = ctxt->savedContextSp; + for (auto i = 0; i < 48; i++) { + spu.GPR[80 + i] = ctxt->savedContextR80ToR127[i]; + } + + spu.SetBranch(spu.GPR[0]._u32[3]); +} + +/// Start a task +void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); + + spu.GPR[2].clear(); + spu.GPR[3] = u128::from64(taskArgs.u64[0], taskArgs.u64[1]); + spu.GPR[4]._u64[1] = taskset->m.args; + spu.GPR[4]._u64[0] = taskset->m.spurs.addr(); + for (auto i = 5; i < 128; i++) { + spu.GPR[i].clear(); + } + + spu.SetBranch(ctxt->savedContextLr.value()._u32[3]); +} + +/// Process a request and update the state of the taskset s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting) { - auto kernelMgmt = vm::get_ptr(spu.ls_offset + 0x100); - auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + auto kernelCtxt = vm::get_ptr(spu.ls_offset + 0x100); + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); s32 rc = CELL_OK; s32 numNewlyReadyTasks; do { - spursDma(spu, MFC_GETLLAR_CMD, mgmt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/); + spursDma(spu, MFC_GETLLAR_CMD, ctxt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/); auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); // Verify taskset state is valid @@ -1055,27 +1152,27 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * switch (request) { case SPURS_TASKSET_REQUEST_POLL_SIGNAL: - rc = signalled._bit[mgmt->taskId] ? 1 : 0; - signalled._bit[mgmt->taskId] = false; + rc = signalled._bit[ctxt->taskId] ? 1 : 0; + signalled._bit[ctxt->taskId] = false; break; case SPURS_TASKSET_REQUEST_DESTROY_TASK: numNewlyReadyTasks--; - running._bit[mgmt->taskId] = false; - enabled._bit[mgmt->taskId] = false; - signalled._bit[mgmt->taskId] = false; - ready._bit[mgmt->taskId] = false; + running._bit[ctxt->taskId] = false; + enabled._bit[ctxt->taskId] = false; + signalled._bit[ctxt->taskId] = false; + ready._bit[ctxt->taskId] = false; break; case SPURS_TASKSET_REQUEST_YIELD_TASK: - running._bit[mgmt->taskId] = false; - waiting._bit[mgmt->taskId] = true; + running._bit[ctxt->taskId] = false; + waiting._bit[ctxt->taskId] = true; break; case SPURS_TASKSET_REQUEST_WAIT_SIGNAL: - if (signalled._bit[mgmt->taskId]) { + if (signalled._bit[ctxt->taskId]) { numNewlyReadyTasks--; - running._bit[mgmt->taskId] = false; - waiting._bit[mgmt->taskId] = true; - signalled._bit[mgmt->taskId] = false; - ready._bit[mgmt->taskId] = false; + running._bit[ctxt->taskId] = false; + waiting._bit[ctxt->taskId] = true; + signalled._bit[ctxt->taskId] = false; + ready._bit[ctxt->taskId] = false; } break; case SPURS_TASKSET_REQUEST_POLL: @@ -1093,9 +1190,9 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * rc = 0; } else if (taskset->m.wkl_flag_wait_task == 0x80) { // No tasks are waiting for the workload flag. Mark this task as waiting for the workload flag. - taskset->m.wkl_flag_wait_task = mgmt->taskId; - running._bit[mgmt->taskId] = false; - waiting._bit[mgmt->taskId] = true; + taskset->m.wkl_flag_wait_task = ctxt->taskId; + running._bit[ctxt->taskId] = false; + waiting._bit[ctxt->taskId] = true; rc = 1; numNewlyReadyTasks--; } else { @@ -1132,8 +1229,8 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * *isWaiting = waiting._bit[selectedTaskId < CELL_SPURS_MAX_TASK ? selectedTaskId : 0] ? 1 : 0; if (selectedTaskId != CELL_SPURS_MAX_TASK) { taskset->m.last_scheduled_task = selectedTaskId; - running._bit[mgmt->taskId] = true; - waiting._bit[mgmt->taskId] = false; + running._bit[ctxt->taskId] = true; + waiting._bit[ctxt->taskId] = false; } break; case SPURS_TASKSET_REQUEST_RECV_WKL_FLAG: @@ -1159,101 +1256,138 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * taskset->m.enabled = enabled; taskset->m.signalled = signalled; taskset->m.ready = ready; - } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); // Increment the ready count of the workload by the number of tasks that have become ready do { - spursDma(spu, MFC_GETLLAR_CMD, kernelMgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + spursDma(spu, MFC_GETLLAR_CMD, kernelCtxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); - s32 readyCount = kernelMgmt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].read_relaxed(); + s32 readyCount = kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[kernelCtxt->wklCurrentId].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].read_relaxed(); readyCount += numNewlyReadyTasks; readyCount = readyCount < 0 ? 0 : readyCount > 0xFF ? 0xFF : readyCount; - if (kernelMgmt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD) { - spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].write_relaxed(readyCount); + if (kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD) { + spurs->m.wklReadyCount1[kernelCtxt->wklCurrentId].write_relaxed(readyCount); } else { - spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].write_relaxed(readyCount); + spurs->m.wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].write_relaxed(readyCount); } - } while (spursDma(spu, MFC_PUTLLC_CMD, kernelMgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + } while (spursDma(spu, MFC_PUTLLC_CMD, kernelCtxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); return rc; } -s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments) { - if (elfAddr == 0 || (elfAddr & 0x0F) != 0) { - return CELL_SPURS_TASK_ERROR_INVAL; +/// Process pollStatus received from the SPURS kernel +void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) { + if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) { + spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_RECV_WKL_FLAG, nullptr, nullptr); } - - vfsStreamMemory stream(elfAddr); - loader::handlers::elf32 loader; - auto rc = loader.init(stream); - if (rc != loader::handler::ok) { - return CELL_SPURS_TASK_ERROR_NOEXEC; - } - - u32 _lowestLoadAddr = CELL_SPURS_TASK_BOTTOM; - for (auto & phdr : loader.m_phdrs) { - if (phdr.data_be.p_paddr >= CELL_SPURS_TASK_BOTTOM) { - break; - } - - if (phdr.data_be.p_type == 1/*PT_LOAD*/) { - if (skipWriteableSegments == false || (phdr.data_be.p_flags & 2/*PF_W*/) == 0) { - if (phdr.data_be.p_vaddr < CELL_SPURS_TASK_TOP || - phdr.data_be.p_vaddr + phdr.data_be.p_memsz > CELL_SPURS_TASK_BOTTOM) { - return CELL_SPURS_TASK_ERROR_FAULT; - } - - _lowestLoadAddr = _lowestLoadAddr > phdr.data_be.p_vaddr ? phdr.data_be.p_vaddr : _lowestLoadAddr; - } - } - } - - loader.load_data(spu.ls_offset, skipWriteableSegments); - *entryPoint = loader.m_ehdr.data_be.e_entry; - if (*lowestLoadAddr) { - *lowestLoadAddr = _lowestLoadAddr; - } - - return CELL_OK; } +/// Check execution rights +bool spursTasksetPollStatus(SPUThread & spu) { + u32 pollStatus; + + if (cellSpursModulePollStatus(spu, &pollStatus)) { + return true; + } + + spursTasksetProcessPollStatus(spu, pollStatus); + return false; +} + +/// Exit the Taskset PM void spursTasksetExit(SPUThread & spu) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); // Trace - STOP CellSpursTracePacket pkt; memset(&pkt, 0, sizeof(pkt)); pkt.header.tag = 0x54; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_STOP pkt.data.stop = SPURS_GUID_TASKSET_PM; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); // Not sure why this check exists. Perhaps to check for memory corruption. - if (memcmp(mgmt->moduleId, "SPURSTASK MODULE", 16) != 0) { + if (memcmp(ctxt->moduleId, "SPURSTASK MODULE", 16) != 0) { spursHalt(spu); } cellSpursModuleExit(spu); } -void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); - auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); +/// Invoked when a task exits +void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); - spu.GPR[2].clear(); - spu.GPR[3] = u128::from64(taskArgs.u64[0], taskArgs.u64[1]); - spu.GPR[4]._u64[1] = taskset->m.args; - spu.GPR[4]._u64[0] = taskset->m.spurs.addr(); - for (auto i = 5; i < 128; i++) { - spu.GPR[i].clear(); - } + spursDma(spu, MFC_GET_CMD, addr & 0xFFFFFF80, 0x10000/*LSA*/, (addr & 0x7F) << 11/*size*/, 0); + spursDmaWaitForCompletion(spu, 1); - spu.SetBranch(mgmt->savedContextLr.value()._u32[3]); + spu.GPR[3]._u64[1] = ctxt->taskset.addr(); + spu.GPR[4]._u32[3] = taskId; + spu.GPR[5]._u32[3] = exitCode; + spu.GPR[6]._u64[1] = args; + spu.FastCall(0x10000); } +/// Save the context of a task +s32 spursTasketSaveTaskContext(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + auto taskInfo = vm::get_ptr(spu.ls_offset + 0x2780); + + spursDmaWaitForCompletion(spu, 0xFFFFFFFF); + + if (taskInfo->context_save_storage_and_alloc_ls_blocks == 0) { + return CELL_SPURS_TASK_ERROR_STAT; + } + + u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F; + u32 lsBlocks = 0; + for (auto i = 0; i < 128; i++) { + if (taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) { + lsBlocks++; + } + } + + if (lsBlocks > allocLsBlocks) { + return CELL_SPURS_TASK_ERROR_STAT; + } + + // Make sure the stack is area is specified in the ls pattern + for (auto i = (ctxt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) { + if ((taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) == 0) { + return CELL_SPURS_TASK_ERROR_STAT; + } + } + + // Get the processor context + u128 r; + spu.FPSCR.Read(r); + ctxt->savedContextFpscr = r; + spu.ReadChannel(r, SPU_RdEventMask); + ctxt->savedSpuWriteEventMask = r._u32[3]; + spu.ReadChannel(r, MFC_RdTagMask); + ctxt->savedWriteTagGroupQueryMask = r._u32[3]; + + // Store the processor context + u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull; + spursDma(spu, MFC_PUT_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId); + + // Save LS context + for (auto i = 6; i < 128; i++) { + bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false; + if (shouldStore) { + // TODO: Combine DMA requests for consecutive blocks into a single request + spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId); + } + } + + spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); + return CELL_OK; +} + +/// Taskset dispatcher void spursTasksetDispatch(SPUThread & spu) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); u32 taskId; @@ -1264,11 +1398,11 @@ void spursTasksetDispatch(SPUThread & spu) { return; } - mgmt->taskId = taskId; + ctxt->taskId = taskId; // DMA in the task info for the selected task - spursDma(spu, MFC_GET_CMD, mgmt->taskset.addr() + offsetof(CellSpursTaskset, m.task_info[taskId]), 0x2780/*LSA*/, sizeof(CellSpursTaskset::TaskInfo), mgmt->dmaTagId); - spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId); + spursDma(spu, MFC_GET_CMD, ctxt->taskset.addr() + offsetof(CellSpursTaskset, m.task_info[taskId]), 0x2780/*LSA*/, sizeof(CellSpursTaskset::TaskInfo), ctxt->dmaTagId); + spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); auto taskInfo = vm::get_ptr(spu.ls_offset + 0x2780); auto elfAddr = taskInfo->elf_addr.addr().value(); taskInfo->elf_addr.set(taskInfo->elf_addr.addr() & 0xFFFFFFFFFFFFFFF8ull); @@ -1283,7 +1417,7 @@ void spursTasksetDispatch(SPUThread & spu) { if (isWaiting == 0) { // If we reach here it means that the task is being started and not being resumed - mgmt->guidAddr = CELL_SPURS_TASK_TOP; + ctxt->guidAddr = CELL_SPURS_TASK_TOP; u32 entryPoint; u32 lowestLoadAddr; @@ -1292,17 +1426,17 @@ void spursTasksetDispatch(SPUThread & spu) { return; } - spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId); + spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); - mgmt->savedContextLr.value()._u32[3] = entryPoint; - mgmt->guidAddr = lowestLoadAddr; - mgmt->tasksetMgmtAddr = 0x2700; - mgmt->x2FC0 = 0; - mgmt->taskExitCode = isWaiting; - mgmt->x2FD4 = elfAddr & 5; // TODO: Figure this out + ctxt->savedContextLr.value()._u32[3] = entryPoint; + ctxt->guidAddr = lowestLoadAddr; + ctxt->tasksetMgmtAddr = 0x2700; + ctxt->x2FC0 = 0; + ctxt->taskExitCode = isWaiting; + ctxt->x2FD4 = elfAddr & 5; // TODO: Figure this out if ((elfAddr & 5) == 1) { - spursDma(spu, MFC_GET_CMD, mgmt->taskset.addr() + offsetof(CellSpursTaskset2, m.task_exit_code[taskId]), 0x2FC0/*LSA*/, 0x10/*size*/, mgmt->dmaTagId); + spursDma(spu, MFC_GET_CMD, ctxt->taskset.addr() + offsetof(CellSpursTaskset2, m.task_exit_code[taskId]), 0x2FC0/*LSA*/, 0x10/*size*/, ctxt->dmaTagId); } // Trace - GUID @@ -1340,16 +1474,16 @@ void spursTasksetDispatch(SPUThread & spu) { bool shouldLoad = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false; if (shouldLoad) { // TODO: Combine DMA requests for consecutive blocks into a single request - spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, mgmt->dmaTagId); + spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId); } } - spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId); + spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); // Restore saved registers - spu.FPSCR.Write(mgmt->savedContextFpscr.value()); - spu.WriteChannel(MFC_WrTagMask, u128::from32r(mgmt->savedWriteTagGroupQueryMask)); - spu.WriteChannel(SPU_WrEventMask, u128::from32r(mgmt->savedSpuWriteEventMask)); + spu.FPSCR.Write(ctxt->savedContextFpscr.value()); + spu.WriteChannel(MFC_WrTagMask, u128::from32r(ctxt->savedWriteTagGroupQueryMask)); + spu.WriteChannel(SPU_WrEventMask, u128::from32r(ctxt->savedSpuWriteEventMask)); // Trace - GUID memset(&pkt, 0, sizeof(pkt)); @@ -1368,125 +1502,9 @@ void spursTasksetDispatch(SPUThread & spu) { } } -void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) { - if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) { - spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_RECV_WKL_FLAG, nullptr, nullptr); - } -} - -bool spursTasksetPollStatus(SPUThread & spu) { - u32 pollStatus; - - if (cellSpursModulePollStatus(spu, &pollStatus)) { - return true; - } - - spursTasksetProcessPollStatus(spu, pollStatus); - return false; -} - -void spursTasksetInit(SPUThread & spu, u32 pollStatus) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); - auto kernelMgmt = vm::get_ptr(spu.ls_offset + 0x100); - - kernelMgmt->moduleId[0] = 'T'; - kernelMgmt->moduleId[1] = 'K'; - - // Trace - START: Module='TKST' - CellSpursTracePacket pkt; - memset(&pkt, 0, sizeof(pkt)); - pkt.header.tag = 0x52; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_START - memcpy(pkt.data.start.module, "TKST", 4); - pkt.data.start.level = 2; - pkt.data.start.ls = 0xA00 >> 2; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); - - spursTasksetProcessPollStatus(spu, pollStatus); -} - -void spursTasksetResumeTask(SPUThread & spu) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); - - // Restore task context - spu.GPR[0] = mgmt->savedContextLr; - spu.GPR[1] = mgmt->savedContextSp; - for (auto i = 0; i < 48; i++) { - spu.GPR[80 + i] = mgmt->savedContextR80ToR127[i]; - } - - spu.SetBranch(spu.GPR[0]._u32[3]); -} - -s32 spursTasketSaveTaskContext(SPUThread & spu) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); - auto taskInfo = vm::get_ptr(spu.ls_offset + 0x2780); - - spursDmaWaitForCompletion(spu, 0xFFFFFFFF); - - if (taskInfo->context_save_storage_and_alloc_ls_blocks == 0) { - return CELL_SPURS_TASK_ERROR_STAT; - } - - u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F; - u32 lsBlocks = 0; - for (auto i = 0; i < 128; i++) { - if (taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) { - lsBlocks++; - } - } - - if (lsBlocks > allocLsBlocks) { - return CELL_SPURS_TASK_ERROR_STAT; - } - - // Make sure the stack is area is specified in the ls pattern - for (auto i = (mgmt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) { - if ((taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) == 0) { - return CELL_SPURS_TASK_ERROR_STAT; - } - } - - // Get the processor context - u128 r; - spu.FPSCR.Read(r); - mgmt->savedContextFpscr = r; - spu.ReadChannel(r, SPU_RdEventMask); - mgmt->savedSpuWriteEventMask = r._u32[3]; - spu.ReadChannel(r, MFC_RdTagMask); - mgmt->savedWriteTagGroupQueryMask = r._u32[3]; - - // Store the processor context - u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull; - spursDma(spu, MFC_PUT_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, mgmt->dmaTagId); - - // Save LS context - for (auto i = 6; i < 128; i++) { - bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false; - if (shouldStore) { - // TODO: Combine DMA requests for consecutive blocks into a single request - spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, mgmt->dmaTagId); - } - } - - spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId); - return CELL_OK; -} - -void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); - - spursDma(spu, MFC_GET_CMD, addr & 0xFFFFFF80, 0x10000/*LSA*/, (addr & 0x7F) << 11/*size*/, 0); - spursDmaWaitForCompletion(spu, 1); - - spu.GPR[3]._u64[1] = mgmt->taskset.addr(); - spu.GPR[4]._u32[3] = taskId; - spu.GPR[5]._u32[3] = exitCode; - spu.GPR[6]._u64[1] = args; - spu.FastCall(0x10000); -} - +/// Process a syscall request s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); // If the 0x10 bit is set in syscallNum then its the 2nd version of the @@ -1501,14 +1519,14 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { u32 incident = 0; switch (syscallNum) { case CELL_SPURS_TASK_SYSCALL_EXIT: - if (mgmt->x2FD4 == 4 || (mgmt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out - if (mgmt->x2FD4 != 4) { + if (ctxt->x2FD4 == 4 || (ctxt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out + if (ctxt->x2FD4 != 4) { spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_DESTROY_TASK, nullptr, nullptr); } - auto addr = mgmt->x2FD4 == 4 ? taskset->m.x78 : mgmt->x2FC0; - auto args = mgmt->x2FD4 == 4 ? 0 : mgmt->x2FC8; - spursTasksetOnTaskExit(spu, addr, mgmt->taskId, mgmt->taskExitCode, args); + auto addr = ctxt->x2FD4 == 4 ? taskset->m.x78 : ctxt->x2FC0; + auto args = ctxt->x2FD4 == 4 ? 0 : ctxt->x2FC8; + spursTasksetOnTaskExit(spu, addr, ctxt->taskId, ctxt->taskExitCode, args); } incident = CELL_SPURS_TRACE_TASK_EXIT; @@ -1561,11 +1579,11 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { memset(&pkt, 0, sizeof(pkt)); pkt.header.tag = CELL_SPURS_TRACE_TAG_TASK; pkt.data.task.incident = incident; - pkt.data.task.taskId = mgmt->taskId; - cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); + pkt.data.task.taskId = ctxt->taskId; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); // Clear the GUID of the task - memset(vm::get_ptr(spu.ls_offset + mgmt->guidAddr), 0, 0x10); + memset(vm::get_ptr(spu.ls_offset + ctxt->guidAddr), 0, 0x10); if (spursTasksetPollStatus(spu)) { spursTasksetExit(spu); @@ -1577,49 +1595,62 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { return rc; } -bool spursTasksetEntry(SPUThread & spu) { - auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); +/// Initialise the Taskset PM +void spursTasksetInit(SPUThread & spu, u32 pollStatus) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + auto kernelCtxt = vm::get_ptr(spu.ls_offset + 0x100); - if (spu.PC == CELL_SPURS_TASKSET_PM_ENTRY_ADDR) { - // Called from kernel - auto kernelMgmt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); - auto arg = spu.GPR[4]._u64[1]; - auto pollStatus = spu.GPR[5]._u32[3]; + kernelCtxt->moduleId[0] = 'T'; + kernelCtxt->moduleId[1] = 'K'; - // Initialise memory and save args - memset(mgmt, 0, sizeof(*mgmt)); - mgmt->taskset.set(arg); - memcpy(mgmt->moduleId, "SPURSTASK MODULE", 16); - mgmt->kernelMgmtAddr = spu.GPR[3]._u32[3]; - mgmt->syscallAddr = CELL_SPURS_TASKSET_PM_SYSCALL_ADDR; - mgmt->spuNum = kernelMgmt->spuNum; - mgmt->dmaTagId = kernelMgmt->dmaTagId; - mgmt->taskId = 0xFFFFFFFF; + // Trace - START: Module='TKST' + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = 0x52; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_START + memcpy(pkt.data.start.module, "TKST", 4); + pkt.data.start.level = 2; + pkt.data.start.ls = 0xA00 >> 2; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); - // Register SPURS takset policy module HLE functions - spu.UnregisterHleFunctions(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, 0x40000); // TODO: use a symbolic constant - spu.RegisterHleFunction(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, spursTasksetEntry); - spu.RegisterHleFunction(mgmt->syscallAddr, spursTasksetEntry); + spursTasksetProcessPollStatus(spu, pollStatus); +} - spursTasksetInit(spu, pollStatus); - spursTasksetDispatch(spu); - } else if (spu.PC == CELL_SPURS_TASKSET_PM_SYSCALL_ADDR) { - // Save task context - mgmt->savedContextLr = spu.GPR[0]; - mgmt->savedContextSp = spu.GPR[1]; - for (auto i = 0; i < 48; i++) { - mgmt->savedContextR80ToR127[i] = spu.GPR[80 + i]; - } - - spu.GPR[3]._u32[3] = spursTasksetProcessSyscall(spu, spu.GPR[3]._u32[3], spu.GPR[4]._u32[3]); - - // Resume the previously executing task if the syscall did not cause a context switch - if (spu.m_is_branch == false) { - spursTasksetResumeTask(spu); - } - } else { - assert(0); +/// Load an ELF +s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments) { + if (elfAddr == 0 || (elfAddr & 0x0F) != 0) { + return CELL_SPURS_TASK_ERROR_INVAL; } - return false; + vfsStreamMemory stream(elfAddr); + loader::handlers::elf32 loader; + auto rc = loader.init(stream); + if (rc != loader::handler::ok) { + return CELL_SPURS_TASK_ERROR_NOEXEC; + } + + u32 _lowestLoadAddr = CELL_SPURS_TASK_BOTTOM; + for (auto & phdr : loader.m_phdrs) { + if (phdr.data_be.p_paddr >= CELL_SPURS_TASK_BOTTOM) { + break; + } + + if (phdr.data_be.p_type == 1/*PT_LOAD*/) { + if (skipWriteableSegments == false || (phdr.data_be.p_flags & 2/*PF_W*/) == 0) { + if (phdr.data_be.p_vaddr < CELL_SPURS_TASK_TOP || + phdr.data_be.p_vaddr + phdr.data_be.p_memsz > CELL_SPURS_TASK_BOTTOM) { + return CELL_SPURS_TASK_ERROR_FAULT; + } + + _lowestLoadAddr = _lowestLoadAddr > phdr.data_be.p_vaddr ? phdr.data_be.p_vaddr : _lowestLoadAddr; + } + } + } + + loader.load_data(spu.ls_offset, skipWriteableSegments); + *entryPoint = loader.m_ehdr.data_be.e_entry; + if (*lowestLoadAddr) { + *lowestLoadAddr = _lowestLoadAddr; + } + + return CELL_OK; } From 67342781b7a91ac4d0737279c6e6d1ea565b3c7b Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Tue, 3 Feb 2015 02:13:32 +0530 Subject: [PATCH 26/29] SPURS: Fix some issues --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 78 +++++++++++++++++---- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 56 ++++++++------- 2 files changed, 95 insertions(+), 39 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 18eed2c944..dad4f10484 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -2588,7 +2588,7 @@ s64 spursCreateTaskset(vm::ptr spurs, vm::ptr tasks taskset->m.size = size; vm::var wkl_attr; - _cellSpursWorkloadAttributeInitialize(wkl_attr, 1 /*revision*/, 0x33 /*sdk_version*/, vm::ptr::make(16) /*pm*/, 0x1E40 /*pm_size*/, + _cellSpursWorkloadAttributeInitialize(wkl_attr, 1 /*revision*/, 0x33 /*sdk_version*/, vm::ptr::make(SPURS_IMG_ADDR_TASKSET_PM), 0x1E40 /*pm_size*/, taskset.addr(), priority, 8 /*min_contention*/, max_contention); // TODO: Check return code @@ -2768,14 +2768,16 @@ s64 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm: if (ls_pattern.addr() != 0) { u32 ls_blocks = 0; - for (u32 i = 0; i < 2; i++) + for (auto i = 0; i < 64; i++) { - for (u32 j = 0; j < 64; j++) + if (ls_pattern->u64[0] & ((u64)1 << i)) { - if (ls_pattern->u64[0] & ((u64)1 << j)) - { - ls_blocks++; - } + ls_blocks++; + } + + if (ls_pattern->u64[1] & ((u64)1 << i)) + { + ls_blocks++; } } @@ -2803,7 +2805,9 @@ s64 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm: { if (!taskset->m.enabled.value()._bit[tmp_task_id]) { - taskset->m.enabled.value()._bit[tmp_task_id] = true; + auto enabled = taskset->m.enabled.value(); + enabled._bit[tmp_task_id] = true; + taskset->m.enabled = enabled; break; } } @@ -2818,23 +2822,73 @@ s64 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm: for (u32 i = 0; i < 2; i++) { taskset->m.task_info[tmp_task_id].args.u64[i] = arg != 0 ? arg->u64[i] : 0; - taskset->m.task_info[tmp_task_id].ls_pattern.u64[i] = ls_pattern != 0 ? ls_pattern->u64[i] : 0; + if (ls_pattern.addr()) + { + taskset->m.task_info[tmp_task_id].ls_pattern.u64[i] = ls_pattern->u64[i]; + } } *task_id = tmp_task_id; return CELL_OK; } -s64 cellSpursCreateTask(vm::ptr taskset, vm::ptr taskID, u32 elf_addr, u32 context_addr, u32 context_size, vm::ptr lsPattern, +s64 spursTaskStart(vm::ptr taskset, u32 taskId) +{ + auto pendingReady = taskset->m.pending_ready.value(); + pendingReady._bit[taskId] = true; + taskset->m.pending_ready = pendingReady; + + cellSpursSendWorkloadSignal(vm::ptr::make((u32)taskset->m.spurs.addr()), taskset->m.wid); + auto rc = cellSpursWakeUp(GetCurrentPPUThread(), vm::ptr::make((u32)taskset->m.spurs.addr())); + if (rc != CELL_OK) + { + if (rc == CELL_SPURS_POLICY_MODULE_ERROR_STAT) + { + rc = CELL_SPURS_TASK_ERROR_STAT; + } + else + { + assert(0); + } + } + + return rc; +} + +s64 cellSpursCreateTask(vm::ptr taskset, vm::ptr taskId, u32 elf_addr, u32 context_addr, u32 context_size, vm::ptr lsPattern, vm::ptr argument) { cellSpurs->Warning("cellSpursCreateTask(taskset_addr=0x%x, taskID_addr=0x%x, elf_addr_addr=0x%x, context_addr_addr=0x%x, context_size=%d, lsPattern_addr=0x%x, argument_addr=0x%x)", - taskset.addr(), taskID.addr(), elf_addr, context_addr, context_size, lsPattern.addr(), argument.addr()); + taskset.addr(), taskId.addr(), elf_addr, context_addr, context_size, lsPattern.addr(), argument.addr()); #ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x12414, libsre_rtoc); #else - return spursCreateTask(taskset, taskID, vm::ptr::make(elf_addr), vm::ptr::make(context_addr), context_size, lsPattern, argument); + if (!taskset) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + vm::var tmpTaskId; + auto rc = spursCreateTask(taskset, tmpTaskId, vm::ptr::make(elf_addr), vm::ptr::make(context_addr), context_size, lsPattern, argument); + if (rc != CELL_OK) + { + return rc; + } + + rc = spursTaskStart(taskset, tmpTaskId); + if (rc != CELL_OK) + { + return rc; + } + + *taskId = tmpTaskId; + return CELL_OK; #endif } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index 451bfef848..aa1b25a081 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -116,7 +116,9 @@ bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) { u128 rv; spu.ReadChannel(rv, MFC_RdAtomicStat); - return rv._u32[3] ? true : false; + auto success = rv._u32[3] ? true : false; + success = cmd == MFC_PUTLLC_CMD ? !success : success; + return success; } return true; @@ -442,11 +444,11 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { /// SPURS kernel dispatch workload void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus) { - SpursKernelContext * ctxt = vm::get_ptr(spu.ls_offset + 0x100); - auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; - auto pollStatus = (u32)(spu.GPR[3]._u64[1]); - auto wid = (u32)(spu.GPR[3]._u64[1] >> 32); + auto pollStatus = (u32)widAndPollStatus; + auto wid = (u32)(widAndPollStatus >> 32); // DMA in the workload info for the selected workload auto wklInfoOffset = wid < CELL_SPURS_MAX_WORKLOAD ? offsetof(CellSpurs, m.wklInfo1[wid]) : @@ -491,8 +493,8 @@ void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus) { /// SPURS kernel workload exit bool spursKernelWorkloadExit(SPUThread & spu) { - SpursKernelContext * ctxt = vm::get_ptr(spu.ls_offset + 0x100); - auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; // Select next workload to run spu.GPR[3].clear(); @@ -508,7 +510,8 @@ bool spursKernelWorkloadExit(SPUThread & spu) { /// SPURS kernel entry point bool spursKernelEntry(SPUThread & spu) { - SpursKernelContext * ctxt = vm::get_ptr(spu.ls_offset + 0x100); + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + memset(ctxt, 0, sizeof(SpursKernelContext)); // Save arguments ctxt->spuNum = spu.GPR[3]._u32[3]; @@ -516,8 +519,6 @@ bool spursKernelEntry(SPUThread & spu) { auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; - memset(ctxt, 0, sizeof(SpursKernelContext)); - // Initialise the SPURS context to its initial values ctxt->dmaTagId = CELL_SPURS_KERNEL_DMA_TAG_ID; ctxt->wklCurrentUniqueId = 0x20; @@ -565,7 +566,8 @@ bool spursSysServiceEntry(SPUThread & spu) { // TODO: If we reach here it means the current workload was preempted to start the // system workload. Need to implement this. } - + + cellSpursModuleExit(spu); return false; } @@ -599,11 +601,11 @@ void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) { if (spurs->m.flags1 & SF1_32_WORKLOADS) { for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { u32 j = i & 0x0F; - u8 runnable = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j); + u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j); u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4; u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4; u8 contention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklCurrentContention[j] & 0x0F : spurs->m.wklCurrentContention[j] >> 4; - u8 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); + u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); @@ -616,8 +618,8 @@ void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) { } } else { for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - u8 runnable = ctxt->wklRunnable1 & (0x8000 >> i); - u8 wklSignal = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); + u16 runnable = ctxt->wklRunnable1 & (0x8000 >> i); + u16 wklSignal = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; u8 readyCount = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed(); u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); @@ -676,7 +678,7 @@ void spursSysServiceMain(SPUThread & spu, u32 pollStatus) { do { spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); - CellSpurs * spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); // Halt if already initialised if (spurs->m.sysSrvOnSpu & (1 << ctxt->spuNum)) { @@ -1167,7 +1169,7 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * waiting._bit[ctxt->taskId] = true; break; case SPURS_TASKSET_REQUEST_WAIT_SIGNAL: - if (signalled._bit[ctxt->taskId]) { + if (signalled._bit[ctxt->taskId] == false) { numNewlyReadyTasks--; running._bit[ctxt->taskId] = false; waiting._bit[ctxt->taskId] = true; @@ -1229,8 +1231,8 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * *isWaiting = waiting._bit[selectedTaskId < CELL_SPURS_MAX_TASK ? selectedTaskId : 0] ? 1 : 0; if (selectedTaskId != CELL_SPURS_MAX_TASK) { taskset->m.last_scheduled_task = selectedTaskId; - running._bit[ctxt->taskId] = true; - waiting._bit[ctxt->taskId] = false; + running._bit[selectedTaskId] = true; + waiting._bit[selectedTaskId] = false; } break; case SPURS_TASKSET_REQUEST_RECV_WKL_FLAG: @@ -1343,7 +1345,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) { u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F; u32 lsBlocks = 0; for (auto i = 0; i < 128; i++) { - if (taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) { + if (taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i)) { lsBlocks++; } } @@ -1354,7 +1356,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) { // Make sure the stack is area is specified in the ls pattern for (auto i = (ctxt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) { - if ((taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) == 0) { + if ((taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i)) == 0) { return CELL_SPURS_TASK_ERROR_STAT; } } @@ -1374,7 +1376,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) { // Save LS context for (auto i = 6; i < 128; i++) { - bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false; + bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i) ? true : false; if (shouldStore) { // TODO: Combine DMA requests for consecutive blocks into a single request spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId); @@ -1417,6 +1419,7 @@ void spursTasksetDispatch(SPUThread & spu) { if (isWaiting == 0) { // If we reach here it means that the task is being started and not being resumed + memset(vm::get_ptr(spu.ls_offset + CELL_SPURS_TASK_TOP), 0, CELL_SPURS_TASK_BOTTOM - CELL_SPURS_TASK_TOP); ctxt->guidAddr = CELL_SPURS_TASK_TOP; u32 entryPoint; @@ -1428,7 +1431,7 @@ void spursTasksetDispatch(SPUThread & spu) { spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); - ctxt->savedContextLr.value()._u32[3] = entryPoint; + ctxt->savedContextLr = u128::from32r(entryPoint); ctxt->guidAddr = lowestLoadAddr; ctxt->tasksetMgmtAddr = 0x2700; ctxt->x2FC0 = 0; @@ -1512,12 +1515,11 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { // for DMA completion if ((syscallNum & 0x10) == 0) { spursDmaWaitForCompletion(spu, 0xFFFFFFFF); - syscallNum &= 0x0F; } s32 rc = 0; u32 incident = 0; - switch (syscallNum) { + switch (syscallNum & 0x0F) { case CELL_SPURS_TASK_SYSCALL_EXIT: if (ctxt->x2FD4 == 4 || (ctxt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out if (ctxt->x2FD4 != 4) { @@ -1587,9 +1589,9 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { if (spursTasksetPollStatus(spu)) { spursTasksetExit(spu); + } else { + spursTasksetDispatch(spu); } - - spursTasksetDispatch(spu); } return rc; From 0191955ab67d178f6b27526853a63ed800b049d9 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Tue, 3 Feb 2015 11:03:49 +0530 Subject: [PATCH 27/29] SPURS: Fix more issues --- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 6 ++++-- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 21 ++++++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index dad4f10484..2d37755f48 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -2923,8 +2923,10 @@ s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskId) return CELL_SPURS_TASK_ERROR_SRCH; } - auto shouldSignal = (taskset->m.waiting & be_t::make(~taskset->m.signalled.value()) & be_t::make(u128::fromBit(taskId))) != _0 ? true : false; - ((u128)taskset->m.signalled)._bit[taskId] = true; + auto shouldSignal = (taskset->m.waiting & be_t::make(~taskset->m.signalled.value()) & be_t::make(u128::fromBit(taskId))) != _0 ? true : false; + auto signalled = taskset->m.signalled.value(); + signalled._bit[taskId] = true; + taskset->m.signalled = signalled; if (shouldSignal) { cellSpursSendWorkloadSignal(vm::ptr::make((u32)taskset->m.spurs.addr()), taskset->m.wid); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index aa1b25a081..bbcc00e056 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -270,6 +270,7 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { spurs->m.wklCurrentContention[i] = contention[i]; + spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i]; ctxt->wklLocContention[i] = 0; ctxt->wklLocPendingContention[i] = 0; } @@ -294,6 +295,12 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { ctxt->wklLocPendingContention[wklSelectedId] = 1; } + } else { + // Not called by kernel and no context switch is required + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i]; + ctxt->wklLocPendingContention[i] = 0; + } } } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); @@ -414,6 +421,7 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4); + spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i]; ctxt->wklLocContention[i] = 0; ctxt->wklLocPendingContention[i] = 0; } @@ -433,6 +441,12 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { } ctxt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; + } else { + // Not called by kernel and no context switch is required + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i]; + ctxt->wklLocPendingContention[i] = 0; + } } } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); @@ -1461,8 +1475,8 @@ void spursTasksetDispatch(SPUThread & spu) { } // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area as well - if (taskInfo->ls_pattern.u64[0] != 0xFFFFFFFFFFFFFFFFull || - (taskInfo->ls_pattern.u64[1] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) { + if (taskInfo->ls_pattern.u64[1] != 0xFFFFFFFFFFFFFFFFull || + (taskInfo->ls_pattern.u64[0] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) { // Load the ELF u32 entryPoint; if (spursTasksetLoadElf(spu, &entryPoint, nullptr, taskInfo->elf_addr.addr(), true) != CELL_OK) { @@ -1473,8 +1487,9 @@ void spursTasksetDispatch(SPUThread & spu) { // Load saved context from main memory to LS u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull; + spursDma(spu, MFC_GET_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId); for (auto i = 6; i < 128; i++) { - bool shouldLoad = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false; + bool shouldLoad = taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i) ? true : false; if (shouldLoad) { // TODO: Combine DMA requests for consecutive blocks into a single request spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId); From daaa5059e9da18efeabd2e94e71159786f735092 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Wed, 4 Feb 2015 20:59:34 +0530 Subject: [PATCH 28/29] SPURS: Fixed more issues --- Utilities/BEType.h | 20 ++++++++++--- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 32 +++++++-------------- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 7 ++--- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 15 ++++------ 4 files changed, 35 insertions(+), 39 deletions(-) diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 4965eac336..8cf5e9a89c 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -1,5 +1,7 @@ #pragma once +#define IS_LE_MACHINE + union _CRT_ALIGN(16) u128 { u64 _u64[2]; @@ -136,16 +138,28 @@ union _CRT_ALIGN(16) u128 } }; + // Index 0 returns the MSB and index 127 returns the LSB bit_element operator [] (u32 index) { assert(index < 128); - return bit_element(data[index / 64], 1ull << (index % 64)); + +#ifdef IS_LE_MACHINE + return bit_element(data[1 - (index >> 6)], 0x8000000000000000ull >> (index & 0x3F)); +#else + return bit_element(data[index >> 6], 0x8000000000000000ull >> (index & 0x3F)); +#endif } + // Index 0 returns the MSB and index 127 returns the LSB const bool operator [] (u32 index) const { assert(index < 128); - return (data[index / 64] & (1ull << (index % 64))) != 0; + +#ifdef IS_LE_MACHINE + return (data[1 - (index >> 6)] & (0x8000000000000000ull >> (index & 0x3F))) != 0; +#else + return (data[index >> 6] & (0x8000000000000000ull >> (index & 0x3F))) != 0; +#endif } } _bit; @@ -509,8 +523,6 @@ struct be_storage_t typedef u128 type; }; -#define IS_LE_MACHINE - template class be_t { diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 2d37755f48..18eaca3722 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -34,9 +34,9 @@ s64 spursCreateLv2EventQueue(vm::ptr spurs, u32& queue_id, vm::ptr> queue; - s32 res = cb_call, vm::ptr, vm::ptr, s32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc, + s32 res = cb_call, vm::ptr>, vm::ptr, s32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc, spurs, queue, port, size, vm::read32(libsre_rtoc - 0x7E2C)); - queue_id = queue; + queue_id = queue.value(); return res; #endif @@ -2768,14 +2768,9 @@ s64 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm: if (ls_pattern.addr() != 0) { u32 ls_blocks = 0; - for (auto i = 0; i < 64; i++) + for (auto i = 0; i < 128; i++) { - if (ls_pattern->u64[0] & ((u64)1 << i)) - { - ls_blocks++; - } - - if (ls_pattern->u64[1] & ((u64)1 << i)) + if (ls_pattern->_u128.value()._bit[i]) { ls_blocks++; } @@ -2786,7 +2781,8 @@ s64 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm: return CELL_SPURS_TASK_ERROR_INVAL; } - if (ls_pattern->u32[0] & 0xFC000000) + u128 _0 = u128::from32(0); + if ((ls_pattern->_u128.value() & u128::from32r(0xFC000000)) != _0) { // Prevent save/restore to SPURS management area return CELL_SPURS_TASK_ERROR_INVAL; @@ -2819,13 +2815,10 @@ s64 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm: taskset->m.task_info[tmp_task_id].elf_addr.set(elf_addr.addr()); taskset->m.task_info[tmp_task_id].context_save_storage_and_alloc_ls_blocks = (context_addr.addr() | alloc_ls_blocks); - for (u32 i = 0; i < 2; i++) + taskset->m.task_info[tmp_task_id].args = *arg; + if (ls_pattern.addr()) { - taskset->m.task_info[tmp_task_id].args.u64[i] = arg != 0 ? arg->u64[i] : 0; - if (ls_pattern.addr()) - { - taskset->m.task_info[tmp_task_id].ls_pattern.u64[i] = ls_pattern->u64[i]; - } + taskset->m.task_info[tmp_task_id].ls_pattern = *ls_pattern; } *task_id = tmp_task_id; @@ -3162,12 +3155,7 @@ s64 _cellSpursTaskAttribute2Initialize(vm::ptr attribut for (s32 c = 0; c < 4; c++) { - attribute->lsPattern.u32[c] = 0; - } - - for (s32 i = 0; i < 2; i++) - { - attribute->lsPattern.u64[i] = 0; + attribute->lsPattern._u128 = u128::from64r(0); } attribute->name_addr = 0; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 3188d3238c..348c795653 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -413,6 +413,7 @@ struct CellSpurs be_t arg; // spu argument be_t size; atomic_t uniqueId; // The unique id is the same for all workloads with the same addr + u8 pad[3]; u8 priority[8]; }; @@ -622,14 +623,12 @@ struct CellSpursEventFlag union CellSpursTaskArgument { - be_t u32[4]; - be_t u64[2]; + be_t _u128; }; union CellSpursTaskLsPattern { - be_t u32[4]; - be_t u64[2]; + be_t _u128; }; struct CellSpursTaskset diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index bbcc00e056..9070a8ce8e 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -1120,7 +1120,7 @@ void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) { auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); spu.GPR[2].clear(); - spu.GPR[3] = u128::from64(taskArgs.u64[0], taskArgs.u64[1]); + spu.GPR[3] = taskArgs._u128; spu.GPR[4]._u64[1] = taskset->m.args; spu.GPR[4]._u64[0] = taskset->m.spurs.addr(); for (auto i = 5; i < 128; i++) { @@ -1359,7 +1359,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) { u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F; u32 lsBlocks = 0; for (auto i = 0; i < 128; i++) { - if (taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i)) { + if (taskInfo->ls_pattern._u128.value()._bit[i]) { lsBlocks++; } } @@ -1370,7 +1370,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) { // Make sure the stack is area is specified in the ls pattern for (auto i = (ctxt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) { - if ((taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i)) == 0) { + if (taskInfo->ls_pattern._u128.value()._bit[i] == false) { return CELL_SPURS_TASK_ERROR_STAT; } } @@ -1390,8 +1390,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) { // Save LS context for (auto i = 6; i < 128; i++) { - bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i) ? true : false; - if (shouldStore) { + if (taskInfo->ls_pattern._u128.value()._bit[i]) { // TODO: Combine DMA requests for consecutive blocks into a single request spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId); } @@ -1475,8 +1474,7 @@ void spursTasksetDispatch(SPUThread & spu) { } // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area as well - if (taskInfo->ls_pattern.u64[1] != 0xFFFFFFFFFFFFFFFFull || - (taskInfo->ls_pattern.u64[0] | 0xFC00000000000000ull) != 0xFFFFFFFFFFFFFFFFull) { + if (taskInfo->ls_pattern._u128.value() != u128::from64r(0x03FFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull)) { // Load the ELF u32 entryPoint; if (spursTasksetLoadElf(spu, &entryPoint, nullptr, taskInfo->elf_addr.addr(), true) != CELL_OK) { @@ -1489,8 +1487,7 @@ void spursTasksetDispatch(SPUThread & spu) { u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull; spursDma(spu, MFC_GET_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId); for (auto i = 6; i < 128; i++) { - bool shouldLoad = taskInfo->ls_pattern.u64[i < 64 ? 0 : 1] & (0x8000000000000000ull >> i) ? true : false; - if (shouldLoad) { + if (taskInfo->ls_pattern._u128.value()._bit[i]) { // TODO: Combine DMA requests for consecutive blocks into a single request spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId); } From d675c67f798acf696818782aa181b2b8abb1ad20 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Wed, 11 Feb 2015 15:45:43 +0530 Subject: [PATCH 29/29] SPURS: Disable the SPURS kernel --- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index 9070a8ce8e..d1dc487eeb 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -524,6 +524,13 @@ bool spursKernelWorkloadExit(SPUThread & spu) { /// SPURS kernel entry point bool spursKernelEntry(SPUThread & spu) { + while (true) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + if (Emu.IsStopped()) { + return false; + } + } + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); memset(ctxt, 0, sizeof(SpursKernelContext));