diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 8cf5e9a89c..17094fcf1e 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -227,16 +227,58 @@ union _CRT_ALIGN(16) u128 return ret; } + static u128 fromF(__m128 value) + { + u128 ret; + ret.vf = value; + return ret; + } + static __forceinline u128 add8(const u128& left, const u128& right) { return fromV(_mm_add_epi8(left.vi, right.vi)); } + static __forceinline u128 add16(const u128& left, const u128& right) + { + return fromV(_mm_add_epi16(left.vi, right.vi)); + } + + static __forceinline u128 add32(const u128& left, const u128& right) + { + return fromV(_mm_add_epi32(left.vi, right.vi)); + } + + static __forceinline u128 addfs(const u128& left, const u128& right) + { + return fromF(_mm_add_ps(left.vf, right.vf)); + } + static __forceinline u128 sub8(const u128& left, const u128& right) { return fromV(_mm_sub_epi8(left.vi, right.vi)); } + static __forceinline u128 sub16(const u128& left, const u128& right) + { + return fromV(_mm_sub_epi16(left.vi, right.vi)); + } + + static __forceinline u128 sub32(const u128& left, const u128& right) + { + return fromV(_mm_sub_epi32(left.vi, right.vi)); + } + + static __forceinline u128 subfs(const u128& left, const u128& right) + { + return fromF(_mm_sub_ps(left.vf, right.vf)); + } + + static __forceinline u128 maxu8(const u128& left, const u128& right) + { + return fromV(_mm_max_epu8(left.vi, right.vi)); + } + static __forceinline u128 minu8(const u128& left, const u128& right) { return fromV(_mm_min_epu8(left.vi, right.vi)); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 61215ad12e..393efddff1 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -667,14 +667,14 @@ void PPUThread::FastCall2(u32 addr, u32 rtoc) auto old_rtoc = GPR[2]; auto old_LR = LR; auto old_thread = GetCurrentNamedThread(); - auto old_task = custom_task; + auto old_task = decltype(custom_task)(); m_status = Running; PC = addr; GPR[2] = rtoc; LR = Emu.GetCPUThreadStop(); SetCurrentNamedThread(this); - custom_task = nullptr; + custom_task.swap(old_task); Task(); @@ -684,7 +684,7 @@ void PPUThread::FastCall2(u32 addr, u32 rtoc) GPR[2] = old_rtoc; LR = old_LR; SetCurrentNamedThread(old_thread); - custom_task = old_task; + custom_task.swap(old_task); } void PPUThread::FastStop() @@ -715,7 +715,12 @@ void PPUThread::Task() if (m_events) { // process events - if (m_events & CPU_EVENT_STOP && (Emu.IsStopped() || IsStopped() || IsPaused())) + if (Emu.IsStopped()) + { + return; + } + + if (m_events & CPU_EVENT_STOP && (IsStopped() || IsPaused())) { m_events &= ~CPU_EVENT_STOP; return; diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 282df3963d..0f2aa5382e 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -8,6 +8,16 @@ #include "SPUInterpreter.h" #include "SPUInterpreter2.h" +#ifdef _MSC_VER +#include +#define rotl32 _rotl +#define rotl16 _rotl16 +#else +#include +#define rotl16(x,r) (((u16)(x) << (r)) | ((u16)(x) >> (16 - (r)))) +#define rotl32(x,r) (((u32)(x) << (r)) | ((u32)(x) >> (32 - (r)))) +#endif + void spu_interpreter::DEFAULT(SPUThread& CPU, spu_opcode_t op) { SPUInterpreter inter(CPU); (*SPU_instr::rrr_list)(&inter, op.opcode); @@ -50,142 +60,211 @@ void spu_interpreter::RCHCNT(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::SF(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]); } void spu_interpreter::OR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = CPU.GPR[op.ra] | CPU.GPR[op.rb]; } void spu_interpreter::BG(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + for (u32 i = 0; i < 4; i++) + { + CPU.GPR[op.rt]._u32[i] = CPU.GPR[op.ra]._u32[i] <= CPU.GPR[op.rb]._u32[i]; + } } void spu_interpreter::SFH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::sub16(CPU.GPR[op.rb], CPU.GPR[op.ra]); } void spu_interpreter::NOR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = ~(CPU.GPR[op.ra] | CPU.GPR[op.rb]); } void spu_interpreter::ABSDB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + CPU.GPR[op.rt] = u128::sub8(u128::maxu8(a, b), u128::minu8(a, b)); } void spu_interpreter::ROT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 4; i++) + { + CPU.GPR[op.rt]._u32[i] = rotl32(a._u32[i], b._s32[i]); + } } void spu_interpreter::ROTM(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 4; i++) + { + const u64 value = a._u32[i]; + CPU.GPR[op.rt]._u32[i] = static_cast(value >> (0 - b._u32[i])); + } } void spu_interpreter::ROTMA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 4; i++) + { + const s64 value = a._s32[i]; + CPU.GPR[op.rt]._s32[i] = static_cast(value >> (0 - b._u32[i])); + } } void spu_interpreter::SHL(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 4; i++) + { + const u64 value = a._u32[i]; + CPU.GPR[op.rt]._u32[i] = static_cast(value << b._u32[i]); + } } void spu_interpreter::ROTH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 8; i++) + { + CPU.GPR[op.rt]._u16[i] = rotl16(a._u16[i], b._u8[i * 2]); + } } void spu_interpreter::ROTHM(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 8; i++) + { + const u32 value = a._u16[i]; + CPU.GPR[op.rt]._u16[i] = static_cast(value >> (0 - b._u16[i])); + } } void spu_interpreter::ROTMAH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 8; i++) + { + const s32 value = a._s16[i]; + CPU.GPR[op.rt]._s16[i] = static_cast(value >> (0 - b._u16[i])); + } } void spu_interpreter::SHLH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 8; i++) + { + const u32 value = a._u16[i]; + CPU.GPR[op.rt]._u16[i] = static_cast(value << b._u16[i]); + } } void spu_interpreter::ROTI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra].vi; + const s32 n = op.si7 & 0x1f; + + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi32(a, n), _mm_srli_epi32(a, 32 - n)); } void spu_interpreter::ROTMI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_srli_epi32(CPU.GPR[op.ra].vi, -op.si7 & 0x3f); } void spu_interpreter::ROTMAI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_srai_epi32(CPU.GPR[op.ra].vi, -op.si7 & 0x3f); } void spu_interpreter::SHLI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_slli_epi32(CPU.GPR[op.ra].vi, op.si7 & 0x3f); } void spu_interpreter::ROTHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra].vi; + const s32 n = op.si7 & 0xf; + + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi16(a, n), _mm_srli_epi16(a, 16 - n)); } void spu_interpreter::ROTHMI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_srli_epi16(CPU.GPR[op.ra].vi, -op.si7 & 0x1f); } void spu_interpreter::ROTMAHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_srai_epi16(CPU.GPR[op.ra].vi, -op.si7 & 0x1f); } void spu_interpreter::SHLHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_slli_epi16(CPU.GPR[op.ra].vi, op.si7 & 0x1f); } void spu_interpreter::A(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::AND(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = CPU.GPR[op.ra] & CPU.GPR[op.rb]; } void spu_interpreter::CG(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + for (u32 i = 0; i < 4; i++) + { + CPU.GPR[op.rt]._u32[i] = ~CPU.GPR[op.ra]._u32[i] < CPU.GPR[op.rb]._u32[i]; + } } void spu_interpreter::AH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::add16(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::NAND(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = ~(CPU.GPR[op.ra] & CPU.GPR[op.rb]); } void spu_interpreter::AVGB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_avg_epu8(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); } void spu_interpreter::MTSPR(SPUThread& CPU, spu_opcode_t op) @@ -199,137 +278,200 @@ void spu_interpreter::WRCH(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::BIZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (op.d || op.e) + { + throw __FUNCTION__; + } + + if (CPU.GPR[op.rt]._u32[3] == 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); + } } void spu_interpreter::BINZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (op.d || op.e) + { + throw __FUNCTION__; + } + + if (CPU.GPR[op.rt]._u32[3] != 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); + } } void spu_interpreter::BIHZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (op.d || op.e) + { + throw __FUNCTION__; + } + + if (CPU.GPR[op.rt]._u16[6] == 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); + } } void spu_interpreter::BIHNZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (op.d || op.e) + { + throw __FUNCTION__; + } + + if (CPU.GPR[op.rt]._u16[6] != 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); + } } void spu_interpreter::STOPD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + throw __FUNCTION__; } void spu_interpreter::STQX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.write128((CPU.GPR[op.ra]._u32[3] + CPU.GPR[op.rb]._u32[3]) & 0x3fff0, CPU.GPR[op.rt]); } void spu_interpreter::BI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (op.d || op.e) + { + throw __FUNCTION__; + } + + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); } void spu_interpreter::BISL(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (op.d || op.e) + { + throw __FUNCTION__; + } + + const u32 target = SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0); + CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4); + CPU.SetBranch(target); } void spu_interpreter::IRET(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + throw __FUNCTION__; } void spu_interpreter::BISLED(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + throw __FUNCTION__; } void spu_interpreter::HBR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); } void spu_interpreter::GB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + u32 result = 0; + for (u32 i = 0; i < 4; i++) + { + result |= (CPU.GPR[op.ra]._u32[i] & 1) << i; + } + + CPU.GPR[op.rt] = u128::from32r(result); } void spu_interpreter::GBH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + u32 result = 0; + for (u32 i = 0; i < 8; i++) + { + result |= (CPU.GPR[op.ra]._u16[i] & 1) << i; + } + + CPU.GPR[op.rt] = u128::from32r(result); } void spu_interpreter::GBB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(CPU.GPR[op.ra].vi, 7))); } void spu_interpreter::FSM(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = g_imm_table.fsm_table[CPU.GPR[op.ra]._u32[3] & 0xf]; } void spu_interpreter::FSMH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = g_imm_table.fsmh_table[CPU.GPR[op.ra]._u32[3] & 0xff]; } void spu_interpreter::FSMB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = g_imm_table.fsmb_table[CPU.GPR[op.ra]._u32[3] & 0xffff]; } void spu_interpreter::FREST(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vf = _mm_rcp_ps(CPU.GPR[op.ra].vf); } void spu_interpreter::FRSQEST(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vf = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[op.ra].vf, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); } void spu_interpreter::LQX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = CPU.read128((CPU.GPR[op.ra]._u32[3] + CPU.GPR[op.rb]._u32[3]) & 0x3fff0); } void spu_interpreter::ROTQBYBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.rldq_pshufb[CPU.GPR[op.rb]._u32[3] >> 3 & 0xf]); } void spu_interpreter::ROTQMBYBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.srdq_pshufb[-(CPU.GPR[op.rb]._s32[3] >> 3) & 0x1f]); } void spu_interpreter::SHLQBYBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.sldq_pshufb[CPU.GPR[op.rb]._u32[3] >> 3 & 0x1f]); } void spu_interpreter::CBX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = ~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xf; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u8[t] = 0x03; } void spu_interpreter::CHX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xe) >> 1; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u16[t] = 0x0203; } void spu_interpreter::CWX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xc) >> 2; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u32[t] = 0x00010203; } void spu_interpreter::CDX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0x8) >> 3; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u64[t] = 0x0001020304050607ull; } void spu_interpreter::ROTQBI(SPUThread& CPU, spu_opcode_t op) @@ -349,42 +491,50 @@ void spu_interpreter::SHLQBI(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::ROTQBY(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.rldq_pshufb[CPU.GPR[op.rb]._u32[3] & 0xf]); } void spu_interpreter::ROTQMBY(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.srdq_pshufb[-CPU.GPR[op.rb]._s32[3] & 0x1f]); } void spu_interpreter::SHLQBY(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.sldq_pshufb[CPU.GPR[op.rb]._u32[3] & 0x1f]); } void spu_interpreter::ORX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::from32r(CPU.GPR[op.ra]._u32[0] | CPU.GPR[op.ra]._u32[1] | CPU.GPR[op.ra]._u32[2] | CPU.GPR[op.ra]._u32[3]); } void spu_interpreter::CBD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = ~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xf; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u8[t] = 0x03; } void spu_interpreter::CHD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xe) >> 1; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u16[t] = 0x0203; } void spu_interpreter::CWD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xc) >> 2; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u32[t] = 0x00010203; } void spu_interpreter::CDD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0x8) >> 3; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u64[t] = 0x0001020304050607ull; } void spu_interpreter::ROTQBII(SPUThread& CPU, spu_opcode_t op) @@ -404,22 +554,21 @@ void spu_interpreter::SHLQBII(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::ROTQBYI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.rldq_pshufb[op.i7 & 0xf]); } void spu_interpreter::ROTQMBYI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.srdq_pshufb[-op.si7 & 0x1f]); } void spu_interpreter::SHLQBYI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.sldq_pshufb[op.i7 & 0x1f]); } void spu_interpreter::NOP(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); } void spu_interpreter::CGT(SPUThread& CPU, spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 2361d0c376..97d4934b3c 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -315,11 +315,9 @@ private: } void BIZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -336,11 +334,9 @@ private: } void BINZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -357,11 +353,9 @@ private: } void BIHZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -378,11 +372,9 @@ private: } void BIHNZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -409,11 +401,9 @@ private: } void BI(u32 intr, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -423,11 +413,9 @@ private: } void BISL(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } diff --git a/rpcs3/Emu/Cell/SPUInterpreter2.h b/rpcs3/Emu/Cell/SPUInterpreter2.h index 68dcb860e2..d4de2559c9 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter2.h +++ b/rpcs3/Emu/Cell/SPUInterpreter2.h @@ -73,6 +73,13 @@ union spu_opcode_t s32 : 7; // 25..31 s32 si18 : 18; // 7..24 }; + + struct + { + u32 : 18; // 14..31 + u32 e : 1; // 13 + u32 d : 1; // 12 + }; }; using spu_inter_func_t = void(*)(SPUThread& CPU, spu_opcode_t opcode); diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 85ba680249..0391532151 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -1073,11 +1073,9 @@ private: } void BIZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1094,11 +1092,9 @@ private: } void BINZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1115,11 +1111,9 @@ private: } void BIHZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1136,11 +1130,9 @@ private: } void BIHNZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1188,11 +1180,9 @@ private: } void BI(u32 intr, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1206,11 +1196,9 @@ private: } void BISL(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index c68ead7b02..625fbd2c74 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -97,7 +97,12 @@ void SPUThread::Task() if (m_events) { // process events - if (m_events & CPU_EVENT_STOP && (Emu.IsStopped() || IsStopped() || IsPaused())) + if (Emu.IsStopped()) + { + return; + } + + if (m_events & CPU_EVENT_STOP && (IsStopped() || IsPaused())) { m_events &= ~CPU_EVENT_STOP; return; @@ -225,16 +230,19 @@ void SPUThread::FastCall(u32 ls_addr) auto old_PC = PC; auto old_LR = GPR[0]._u32[3]; auto old_stack = GPR[1]._u32[3]; // only saved and restored (may be wrong) + auto old_task = decltype(m_custom_task)(); m_status = Running; PC = ls_addr; GPR[0]._u32[3] = 0x0; + m_custom_task.swap(m_custom_task); - CPUThread::Task(); + SPUThread::Task(); PC = old_PC; GPR[0]._u32[3] = old_LR; GPR[1]._u32[3] = old_stack; + m_custom_task.swap(m_custom_task); } void SPUThread::FastStop()