diff --git a/rpcs3/Emu/Cell/MFC.h b/rpcs3/Emu/Cell/MFC.h index 4523131b46..839987cd68 100644 --- a/rpcs3/Emu/Cell/MFC.h +++ b/rpcs3/Emu/Cell/MFC.h @@ -151,6 +151,27 @@ struct DMAC long queue_lock; long proxy_lock; + bool ProcessCmd(u32 cmd, u32 tag, u32 lsa, u64 ea, u32 size) + { + //returns true if the command should be deleted from the queue + if (cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK)) _mm_mfence(); + + switch(cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK)) + { + case MFC_PUT_CMD: + memcpy(Memory + ea, Memory + ls_offset + lsa, size); + return true; + + case MFC_GET_CMD: + memcpy(Memory + ls_offset + lsa, Memory + ea, size); + return true; + + default: + ConLog.Error("Unknown DMA cmd."); + return true; + } + } + u32 Cmd(u32 cmd, u32 tag, u32 lsa, u64 ea, u32 size) { if(!Memory.IsGoodAddr(ls_offset + lsa, size) || !Memory.IsGoodAddr(ea, size)) @@ -163,7 +184,7 @@ struct DMAC return MFC_PPU_DMA_QUEUE_FULL; } - while (_InterlockedExchange(&proxy_lock, 1)); + /* while (_InterlockedExchange(&proxy_lock, 1)); _mm_lfence(); DMAC_Proxy& p = proxy[proxy_pos]; p.cmd = cmd; @@ -174,7 +195,8 @@ struct DMAC _mm_sfence(); //for DoCmd() proxy_pos++; _mm_sfence(); - proxy_lock = 0; + proxy_lock = 0; */ + ProcessCmd(cmd, tag, lsa, ea, size); return MFC_PPU_DMA_CMD_ENQUEUE_SUCCESSFUL; } @@ -193,22 +215,9 @@ struct DMAC if(proxy_pos) { const DMAC_Proxy& p = proxy[0]; - - switch(p.cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK)) //barrier/fence ignored + if (ProcessCmd(p.cmd, p.tag, p.lsa, p.ea, p.size)) { - case MFC_PUT_CMD: - memcpy(Memory + p.ea, Memory + ls_offset + p.lsa, p.size); ClearCmd(); - break; - - case MFC_GET_CMD: - memcpy(Memory + ls_offset + p.lsa, Memory + p.ea, p.size); - ClearCmd(); - break; - - default: - ConLog.Error("Unknown DMA cmd."); - break; } } } diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index e434b2b2cc..bcd9391f7a 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -279,10 +279,20 @@ public: { public: static const size_t max_count = _max_count; +#ifdef _M_X64 + static const bool x86 = false; +#else + static const bool x86 = true; +#endif private: - u32 m_value[max_count]; - u32 m_index; + union _CRT_ALIGN(8) { + struct { + u32 m_index; + u32 m_value[max_count]; + }; + u64 m_indval; + }; long m_lock; public: @@ -300,81 +310,149 @@ public: __forceinline bool Pop(u32& res) { - while (_InterlockedExchange(&m_lock, 1)); - _mm_lfence(); - if(!m_index) + if (max_count > 1 || x86) { - m_lock = 0; //release lock - return false; + while (_InterlockedExchange(&m_lock, 1)); + _mm_lfence(); + if(!m_index) + { + m_lock = 0; //release lock + return false; + } + res = m_value[--m_index]; + m_value[m_index] = 0; + _mm_sfence(); + m_lock = 0; + return true; + } + else + { //lock-free + if(!m_index) + return false; + else + { + res = (m_indval >> 32); + m_indval = 0; + return true; + } } - res = m_value[--m_index]; - _mm_sfence(); - m_lock = 0; - return true; } __forceinline bool Push(u32 value) { - while (_InterlockedExchange(&m_lock, 1)); - _mm_lfence(); - if(m_index >= max_count) + if (max_count > 1 || x86) { - m_lock = 0; //release lock - return false; + while (_InterlockedExchange(&m_lock, 1)); + _mm_lfence(); + if(m_index >= max_count) + { + m_lock = 0; //release lock + return false; + } + m_value[m_index++] = value; + _mm_sfence(); + m_lock = 0; + return true; + } + else + { //lock-free + if(m_index) + return false; + else + { + m_indval = ((u64)value << 32) | 1; + return true; + } } - m_value[m_index++] = value; - _mm_sfence(); - m_lock = 0; - return true; } __forceinline void PushUncond(u32 value) { - while (_InterlockedExchange(&m_lock, 1)); - _mm_lfence(); - if(m_index >= max_count) - m_value[max_count-1] = value; //last message is overwritten + if (max_count > 1 || x86) + { + while (_InterlockedExchange(&m_lock, 1)); + _mm_lfence(); + if(m_index >= max_count) + m_value[max_count-1] = value; //last message is overwritten + else + m_value[m_index++] = value; + _mm_sfence(); + m_lock = 0; + } else - m_value[m_index++] = value; - _mm_sfence(); - m_lock = 0; + { //lock-free + m_indval = ((u64)value << 32) | 1; + } } __forceinline void PushUncond_OR(u32 value) { - while (_InterlockedExchange(&m_lock, 1)); - _mm_lfence(); - if(m_index >= max_count) - m_value[max_count-1] |= value; //last message is logically ORed + if (max_count > 1 || x86) + { + while (_InterlockedExchange(&m_lock, 1)); + _mm_lfence(); + if(m_index >= max_count) + m_value[max_count-1] |= value; //last message is logically ORed + else + m_value[m_index++] = value; + _mm_sfence(); + m_lock = 0; + } else - m_value[m_index++] = value; - _mm_sfence(); - m_lock = 0; + { +#ifdef _M_X64 + _InterlockedOr64((volatile __int64*)m_indval, ((u64)value << 32) | 1); +#else + ConLog.Error("PushUncond_OR(): no code compiled"); +#endif + } } __forceinline void PopUncond(u32& res) { - while (_InterlockedExchange(&m_lock, 1)); - _mm_lfence(); - if(!m_index) - res = 0; //result is undefined + if (max_count > 1 || x86) + { + while (_InterlockedExchange(&m_lock, 1)); + _mm_lfence(); + if(!m_index) + res = 0; //result is undefined + else + { + res = m_value[--m_index]; + m_value[m_index] = 0; + } + _mm_sfence(); + m_lock = 0; + } else - res = m_value[--m_index]; - _mm_sfence(); - m_lock = 0; + { //lock-free + if(!m_index) + res = 0; + else + { + res = (m_indval >> 32); + m_indval = 0; + } + } } u32 GetCount() const { - while (m_lock); - _mm_lfence(); + if (max_count > 1 || x86) + { + while (m_lock); + _mm_lfence(); + } return m_index; } u32 GetFreeCount() const { - while (m_lock); - _mm_lfence(); + if (max_count > 1 || x86) + { + while (m_lock); + _mm_lfence(); + } return max_count - m_index; } @@ -471,6 +549,7 @@ public: return SPU.In_MBox.GetCount(); case SPU_WrOutIntrMbox: + ConLog.Warning("GetChannelCount(%s) = 0", spu_ch_name[ch]); return 0;//return SPU.OutIntr_Mbox.GetFreeCount(); case MFC_RdTagStat: @@ -498,12 +577,12 @@ public: { case SPU_WrOutIntrMbox: ConLog.Warning("%s: %s = 0x%x", __FUNCTION__, spu_ch_name[ch], v); - while (!SPU.OutIntr_Mbox.Push(v) && !Emu.IsStopped()) _mm_pause(); + if (!SPU.OutIntr_Mbox.Push(v)) do _mm_pause(); while (!SPU.OutIntr_Mbox.Push(v) && !Emu.IsStopped()); break; case SPU_WrOutMbox: ConLog.Warning("%s: %s = 0x%x", __FUNCTION__, spu_ch_name[ch], v); - while (!SPU.Out_MBox.Push(v) && !Emu.IsStopped()) _mm_pause(); + if (!SPU.Out_MBox.Push(v)) do _mm_pause(); while (!SPU.Out_MBox.Push(v) && !Emu.IsStopped()); break; case MFC_WrTagMask: @@ -555,23 +634,22 @@ public: switch(ch) { case SPU_RdInMbox: - while (!SPU.In_MBox.Pop(v) && !Emu.IsStopped()) _mm_pause(); + if (!SPU.In_MBox.Pop(v)) do _mm_pause(); while (!SPU.In_MBox.Pop(v) && !Emu.IsStopped()); ConLog.Warning("%s: 0x%x = %s", __FUNCTION__, v, spu_ch_name[ch]); break; case MFC_RdTagStat: - while (dmac.proxy_pos) dmac.DoCmd(); //probably incompatible with MFC lists - while (!Prxy.TagStatus.Pop(v) && !Emu.IsStopped()) _mm_pause(); + if (!Prxy.TagStatus.Pop(v)) do _mm_pause(); while (!Prxy.TagStatus.Pop(v) && !Emu.IsStopped()); //ConLog.Warning("%s: 0x%x = %s", __FUNCTION__, v, spu_ch_name[ch]); break; case SPU_RdSigNotify1: - while (!SPU.SNR[0].Pop(v) && !Emu.IsStopped()) _mm_pause(); - ConLog.Warning("%s: 0x%x = %s", __FUNCTION__, v, spu_ch_name[ch]); + if (!SPU.SNR[0].Pop(v)) do _mm_pause(); while (!SPU.SNR[0].Pop(v) && !Emu.IsStopped()); + //ConLog.Warning("%s: 0x%x = %s", __FUNCTION__, v, spu_ch_name[ch]); break; case SPU_RdSigNotify2: - while (!SPU.SNR[1].Pop(v) && !Emu.IsStopped()) _mm_pause(); + if (!SPU.SNR[1].Pop(v)) do _mm_pause(); while (!SPU.SNR[1].Pop(v) && !Emu.IsStopped()); //ConLog.Warning("%s: 0x%x = %s", __FUNCTION__, v, spu_ch_name[ch]); break; diff --git a/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp b/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp index 52a1d66755..e81f99e51b 100644 --- a/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp +++ b/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp @@ -1,6 +1,9 @@ #include "stdafx.h" #include "Emu/SysCalls/SysCalls.h" #include "Emu/SysCalls/SC_FUNC.h" +#include "Emu/SysCalls/lv2/SC_SPU_Thread.h" +#include "Loader/ELF.h" +#include "Emu/Cell/RawSPUThread.h" void sysPrxForUser_init(); Module sysPrxForUser("sysPrxForUser", sysPrxForUser_init); @@ -59,7 +62,7 @@ int sys_spu_elf_get_segments(u32 elf_img, mem_ptr_t segments, i return CELL_OK; } -int sys_spu_image_import(mem_ptr_t img, u64 src, u32 type) +int sys_spu_image_import(mem_ptr_t img, u32 src, u32 type) { sysPrxForUser.Warning("sys_spu_image_import(img=0x%x, src=0x%x, type=0x%x)", img.GetAddr(), src, type); @@ -79,6 +82,41 @@ int sys_spu_image_import(mem_ptr_t img, u64 src, u32 type) return CELL_OK; } +int sys_raw_spu_load(int id, u32 path_addr, mem32_t entry) +{ + const wxString path = Memory.ReadString(path_addr).mb_str(); + sysPrxForUser.Warning("sys_raw_spu_load(id=0x%x, path=0x%x [%s], entry_addr=0x%x)", + id, path_addr, path, entry.GetAddr()); + + vfsFile f(path.c_str()); + if(!f.IsOpened()) + { + sysPrxForUser.Error("sys_raw_spu_load error: '%s' not found!", path); + return CELL_ENOENT; + } + + ELFLoader l(f); + l.LoadInfo(); + l.LoadData(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * id); + + entry = l.GetEntry(); + + return CELL_OK; +} + +extern u64 g_last_spu_offset; + +int sys_raw_spu_image_load(int id, mem_ptr_t img) +{ + sysPrxForUser.Warning("sys_raw_spu_image_load(id=0x%x, img_addr=0x%x)", id, img.GetAddr()); + + memcpy(Memory + RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * id, Memory + g_last_spu_offset, 256 * 1024); + Memory.Write32(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * id + RAW_SPU_PROB_OFFSET + SPU_NPC_offs, + img->entry_point - g_last_spu_offset); + + return CELL_OK; +} + void sysPrxForUser_init() { sysPrxForUser.AddFunc(0x744680a2, sys_initialize_tls); @@ -118,4 +156,7 @@ void sysPrxForUser_init() sysPrxForUser.AddFunc(0x1ed454ce, sys_spu_elf_get_information); sysPrxForUser.AddFunc(0xdb6b3250, sys_spu_elf_get_segments); sysPrxForUser.AddFunc(0xebe5f72f, sys_spu_image_import); + + sysPrxForUser.AddFunc(0x893305fa, sys_raw_spu_load); + sysPrxForUser.AddFunc(0xb995662e, sys_raw_spu_image_load); } diff --git a/rpcs3/Emu/SysCalls/lv2/SC_SPU_Thread.cpp b/rpcs3/Emu/SysCalls/lv2/SC_SPU_Thread.cpp index bb5fe07953..a7fb066862 100644 --- a/rpcs3/Emu/SysCalls/lv2/SC_SPU_Thread.cpp +++ b/rpcs3/Emu/SysCalls/lv2/SC_SPU_Thread.cpp @@ -21,18 +21,17 @@ struct SpuGroupInfo } }; -u64 g_spu_offset = 0; -u64 g_spu_alloc_size = 0; +u64 g_last_spu_offset = 0; u32 LoadSpuImage(vfsStream& stream) { ELFLoader l(stream); l.LoadInfo(); - g_spu_alloc_size = 0xFFFFED - stream.GetSize(); - g_spu_offset = Memory.MainMem.Alloc(g_spu_alloc_size); - l.LoadData(g_spu_offset); + u32 alloc_size = 0xFFFFED - stream.GetSize(); + g_last_spu_offset = Memory.MainMem.Alloc(alloc_size); + l.LoadData(g_last_spu_offset); - return g_spu_offset + l.GetEntry(); + return g_last_spu_offset + l.GetEntry(); } //156 @@ -96,7 +95,7 @@ int sys_spu_thread_initialize(mem32_t thread, u32 group, u32 spu_num, mem_ptr_t< return CELL_EBUSY; } - u32 ls_entry = img->entry_point - g_spu_offset; + u32 ls_entry = img->entry_point - g_last_spu_offset; std::string name = Memory.ReadString(attr->name_addr, attr->name_len).mb_str(); u64 a1 = arg->arg1; u64 a2 = arg->arg2; @@ -106,7 +105,7 @@ int sys_spu_thread_initialize(mem32_t thread, u32 group, u32 spu_num, mem_ptr_t< CPUThread& new_thread = Emu.GetCPU().AddThread(CPU_THREAD_SPU); //copy SPU image: u32 spu_offset = Memory.MainMem.Alloc(256 * 1024); - memcpy(Memory + spu_offset, Memory + g_spu_offset, 256 * 1024); + memcpy(Memory + spu_offset, Memory + g_last_spu_offset, 256 * 1024); //initialize from new place: new_thread.SetOffset(spu_offset); new_thread.SetEntry(ls_entry); @@ -396,7 +395,7 @@ int sys_spu_thread_write_snr(u32 id, u32 number, u32 value) return CELL_EINVAL; } - if ((*(SPUThread*)thr).cfg.value & (1<