From 445b7c075856e3c1547c9ddfa471f3a1e2db6b08 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 27 Feb 2018 19:03:00 +0300 Subject: [PATCH] Optimize SPU interpreter Made SPU decoder similar to PPU decoder --- rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp | 8 +- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 7 +- rpcs3/Emu/Cell/SPUInterpreter.cpp | 757 +++++++++++++++--------- rpcs3/Emu/Cell/SPUInterpreter.h | 478 +++++++-------- rpcs3/Emu/Cell/SPUThread.cpp | 75 ++- 5 files changed, 797 insertions(+), 528 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp index a188a93fae..be89e251d6 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp @@ -117,7 +117,7 @@ u32 cellSpursModulePollStatus(SPUThread& spu, u32* status) void cellSpursModuleExit(SPUThread& spu) { auto ctxt = vm::_ptr(spu.offset + 0x100); - spu.pc = ctxt->exitToKernelAddr - 4; + spu.pc = ctxt->exitToKernelAddr; throw SpursModuleExit(); } @@ -659,7 +659,7 @@ void spursKernelDispatchWorkload(SPUThread& spu, u64 widAndPollStatus) spu.gpr[3]._u32[3] = 0x100; spu.gpr[4]._u64[1] = wklInfo->arg; spu.gpr[5]._u32[3] = pollStatus; - spu.pc = 0xA00 - 4; + spu.pc = 0xA00; } // SPURS kernel workload exit @@ -1404,7 +1404,7 @@ void spursTasksetResumeTask(SPUThread& spu) spu.gpr[80 + i] = ctxt->savedContextR80ToR127[i]; } - spu.pc = spu.gpr[0]._u32[3] - 4; + spu.pc = spu.gpr[0]._u32[3]; } // Start a task @@ -1422,7 +1422,7 @@ void spursTasksetStartTask(SPUThread& spu, CellSpursTaskArgument& taskArgs) spu.gpr[i].clear(); } - spu.pc = ctxt->savedContextLr.value()._u32[3] - 4; + spu.pc = ctxt->savedContextLr.value()._u32[3]; } // Process a request and update the state of the taskset diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 3400adf07c..cb47718ec7 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -340,18 +340,13 @@ void spu_recompiler::InterpreterCall(spu_opcode_t op) { // TODO: check correctness - const u32 old_pc = _spu->pc; - if (test(_spu->state) && _spu->check_state()) { return 0x2000000 | _spu->pc; } - _func(*_spu, { opcode }); - - if (old_pc != _spu->pc) + if (UNLIKELY(!_func(*_spu, {opcode}))) { - _spu->pc += 4; return 0x2000000 | _spu->pc; } diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 8a0fe644f1..ebe73b837c 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -32,7 +32,7 @@ inline __m128i sse_cmpgt_epu32(__m128i A, __m128i B) return _mm_cmpgt_epi32(_mm_xor_si128(A, sign), _mm_xor_si128(B, sign)); } -void spu_interpreter::UNK(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::UNK(SPUThread& spu, spu_opcode_t op) { fmt::throw_exception("Unknown/Illegal instruction (0x%08x)" HERE, op.opcode); } @@ -57,92 +57,99 @@ void spu_interpreter::set_interrupt_status(SPUThread& spu, spu_opcode_t op) if (spu.interrupts_enabled && (spu.ch_event_mask & spu.ch_event_stat & SPU_EVENT_INTR_IMPLEMENTED) > 0) { spu.interrupts_enabled = false; - spu.srr0 = std::exchange(spu.pc, -4) + 4; + spu.srr0 = std::exchange(spu.pc, 0); } } -void spu_interpreter::STOP(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::STOP(SPUThread& spu, spu_opcode_t op) { - if (!spu.stop_and_signal(op.opcode & 0x3fff)) - { - spu.pc -= 4; - } + return spu.stop_and_signal(op.opcode & 0x3fff); } -void spu_interpreter::LNOP(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::LNOP(SPUThread& spu, spu_opcode_t op) { + return true; } // This instruction must be used following a store instruction that modifies the instruction stream. -void spu_interpreter::SYNC(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SYNC(SPUThread& spu, spu_opcode_t op) { _mm_mfence(); + return true; } // This instruction forces all earlier load, store, and channel instructions to complete before proceeding. -void spu_interpreter::DSYNC(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::DSYNC(SPUThread& spu, spu_opcode_t op) { _mm_mfence(); + return true; } -void spu_interpreter::MFSPR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MFSPR(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].clear(); // All SPRs read as zero. TODO: check it. + return true; } -void spu_interpreter::RDCH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::RDCH(SPUThread& spu, spu_opcode_t op) { u32 result; if (!spu.get_ch_value(op.ra, result)) { - spu.pc -= 4; - } - else - { - spu.gpr[op.rt] = v128::from32r(result); + return false; } + + spu.gpr[op.rt] = v128::from32r(result); + return true; } -void spu_interpreter::RCHCNT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::RCHCNT(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::from32r(spu.get_ch_count(op.ra)); + return true; } -void spu_interpreter::SF(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SF(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::sub32(spu.gpr[op.rb], spu.gpr[op.ra]); + return true; } -void spu_interpreter::OR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::OR(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu.gpr[op.ra] | spu.gpr[op.rb]; + return true; } -void spu_interpreter::BG(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BG(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_add_epi32(sse_cmpgt_epu32(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi), _mm_set1_epi32(1)); + return true; } -void spu_interpreter::SFH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SFH(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::sub16(spu.gpr[op.rb], spu.gpr[op.ra]); + return true; } -void spu_interpreter::NOR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::NOR(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = ~(spu.gpr[op.ra] | spu.gpr[op.rb]); + return true; } -void spu_interpreter::ABSDB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ABSDB(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; spu.gpr[op.rt] = v128::sub8(v128::maxu8(a, b), v128::minu8(a, b)); + return true; } -void spu_interpreter::ROT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROT(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -151,9 +158,10 @@ void spu_interpreter::ROT(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt]._u32[i] = rol32(a._u32[i], b._u32[i]); } + return true; } -void spu_interpreter::ROTM(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTM(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -163,9 +171,10 @@ void spu_interpreter::ROTM(SPUThread& spu, spu_opcode_t op) const u64 value = a._u32[i]; spu.gpr[op.rt]._u32[i] = static_cast(value >> ((0 - b._u32[i]) & 0x3f)); } + return true; } -void spu_interpreter::ROTMA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTMA(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -175,9 +184,10 @@ void spu_interpreter::ROTMA(SPUThread& spu, spu_opcode_t op) const s64 value = a._s32[i]; spu.gpr[op.rt]._s32[i] = static_cast(value >> ((0 - b._u32[i]) & 0x3f)); } + return true; } -void spu_interpreter::SHL(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHL(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -187,9 +197,10 @@ void spu_interpreter::SHL(SPUThread& spu, spu_opcode_t op) const u64 value = a._u32[i]; spu.gpr[op.rt]._u32[i] = static_cast(value << (b._u32[i] & 0x3f)); } + return true; } -void spu_interpreter::ROTH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTH(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -198,9 +209,10 @@ void spu_interpreter::ROTH(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt]._u16[i] = rol16(a._u16[i], b._u16[i]); } + return true; } -void spu_interpreter::ROTHM(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTHM(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -210,9 +222,10 @@ void spu_interpreter::ROTHM(SPUThread& spu, spu_opcode_t op) const u32 value = a._u16[i]; spu.gpr[op.rt]._u16[i] = static_cast(value >> ((0 - b._u16[i]) & 0x1f)); } + return true; } -void spu_interpreter::ROTMAH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTMAH(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -222,9 +235,10 @@ void spu_interpreter::ROTMAH(SPUThread& spu, spu_opcode_t op) const s32 value = a._s16[i]; spu.gpr[op.rt]._s16[i] = static_cast(value >> ((0 - b._u16[i]) & 0x1f)); } + return true; } -void spu_interpreter::SHLH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLH(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -234,255 +248,298 @@ void spu_interpreter::SHLH(SPUThread& spu, spu_opcode_t op) const u32 value = a._u16[i]; spu.gpr[op.rt]._u16[i] = static_cast(value << (b._u16[i] & 0x1f)); } + return true; } -void spu_interpreter::ROTI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = op.i7 & 0x1f; spu.gpr[op.rt].vi = _mm_or_si128(_mm_slli_epi32(a, n), _mm_srli_epi32(a, 32 - n)); + return true; } -void spu_interpreter::ROTMI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTMI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srli_epi32(spu.gpr[op.ra].vi, 0-op.i7 & 0x3f); + return true; } -void spu_interpreter::ROTMAI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTMAI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srai_epi32(spu.gpr[op.ra].vi, 0-op.i7 & 0x3f); + return true; } -void spu_interpreter::SHLI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_slli_epi32(spu.gpr[op.ra].vi, op.i7 & 0x3f); + return true; } -void spu_interpreter::ROTHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTHI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = op.i7 & 0xf; spu.gpr[op.rt].vi = _mm_or_si128(_mm_slli_epi16(a, n), _mm_srli_epi16(a, 16 - n)); + return true; } -void spu_interpreter::ROTHMI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTHMI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srli_epi16(spu.gpr[op.ra].vi, 0-op.i7 & 0x1f); + return true; } -void spu_interpreter::ROTMAHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTMAHI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srai_epi16(spu.gpr[op.ra].vi, 0-op.i7 & 0x1f); + return true; } -void spu_interpreter::SHLHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLHI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_slli_epi16(spu.gpr[op.ra].vi, op.i7 & 0x1f); + return true; } -void spu_interpreter::A(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::A(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::add32(spu.gpr[op.ra], spu.gpr[op.rb]); + return true; } -void spu_interpreter::AND(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::AND(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu.gpr[op.ra] & spu.gpr[op.rb]; + return true; } -void spu_interpreter::CG(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CG(SPUThread& spu, spu_opcode_t op) { const auto a = _mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(0x7fffffff)); const auto b = _mm_xor_si128(spu.gpr[op.rb].vi, _mm_set1_epi32(0x80000000)); spu.gpr[op.rt].vi = _mm_srli_epi32(_mm_cmpgt_epi32(b, a), 31); + return true; } -void spu_interpreter::AH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::AH(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::add16(spu.gpr[op.ra], spu.gpr[op.rb]); + return true; } -void spu_interpreter::NAND(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::NAND(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = ~(spu.gpr[op.ra] & spu.gpr[op.rb]); + return true; } -void spu_interpreter::AVGB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::AVGB(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_avg_epu8(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); + return true; } -void spu_interpreter::MTSPR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MTSPR(SPUThread& spu, spu_opcode_t op) { // SPR writes are ignored. TODO: check it. + return true; } -void spu_interpreter::WRCH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::WRCH(SPUThread& spu, spu_opcode_t op) { - if (!spu.set_ch_value(op.ra, spu.gpr[op.rt]._u32[3])) - { - spu.pc -= 4; - } + return spu.set_ch_value(op.ra, spu.gpr[op.rt]._u32[3]); } -void spu_interpreter::BIZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BIZ(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u32[3] == 0) { - spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4; + spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]); set_interrupt_status(spu, op); + return false; } + return true; } -void spu_interpreter::BINZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BINZ(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u32[3] != 0) { - spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4; + spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]); set_interrupt_status(spu, op); + return false; } + return true; } -void spu_interpreter::BIHZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BIHZ(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u16[6] == 0) { - spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4; + spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]); set_interrupt_status(spu, op); + return false; } + return true; } -void spu_interpreter::BIHNZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BIHNZ(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u16[6] != 0) { - spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4; + spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]); set_interrupt_status(spu, op); + return false; } + return true; } -void spu_interpreter::STOPD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::STOPD(SPUThread& spu, spu_opcode_t op) { fmt::throw_exception("Unimplemented instruction" HERE); + return true; } -void spu_interpreter::STQX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::STQX(SPUThread& spu, spu_opcode_t op) { spu._ref((spu.gpr[op.ra]._u32[3] + spu.gpr[op.rb]._u32[3]) & 0x3fff0) = spu.gpr[op.rt]; + return true; } -void spu_interpreter::BI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BI(SPUThread& spu, spu_opcode_t op) { - spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4; + spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]); set_interrupt_status(spu, op); + return false; } -void spu_interpreter::BISL(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BISL(SPUThread& spu, spu_opcode_t op) { const u32 target = spu_branch_target(spu.gpr[op.ra]._u32[3]); spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4)); - spu.pc = target - 4; + spu.pc = target; set_interrupt_status(spu, op); + return false; } -void spu_interpreter::IRET(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::IRET(SPUThread& spu, spu_opcode_t op) { - spu.pc = spu_branch_target(spu.srr0) - 4; + spu.pc = spu_branch_target(spu.srr0); set_interrupt_status(spu, op); + return false; } -void spu_interpreter::BISLED(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BISLED(SPUThread& spu, spu_opcode_t op) { fmt::throw_exception("Unimplemented instruction" HERE); + return true; } -void spu_interpreter::HBR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HBR(SPUThread& spu, spu_opcode_t op) { + return true; } -void spu_interpreter::GB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::GB(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::from32r(_mm_movemask_ps(_mm_castsi128_ps(_mm_slli_epi32(spu.gpr[op.ra].vi, 31)))); + return true; } -void spu_interpreter::GBH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::GBH(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_packs_epi16(_mm_slli_epi16(spu.gpr[op.ra].vi, 15), _mm_setzero_si128()))); + return true; } -void spu_interpreter::GBB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::GBB(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_slli_epi64(spu.gpr[op.ra].vi, 7))); + return true; } -void spu_interpreter::FSM(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::FSM(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = g_spu_imm.fsm[spu.gpr[op.ra]._u32[3] & 0xf]; + return true; } -void spu_interpreter::FSMH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::FSMH(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = g_spu_imm.fsmh[spu.gpr[op.ra]._u32[3] & 0xff]; + return true; } -void spu_interpreter::FSMB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::FSMB(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = g_spu_imm.fsmb[spu.gpr[op.ra]._u32[3] & 0xffff]; + return true; } -void spu_interpreter_fast::FREST(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FREST(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vf = _mm_rcp_ps(spu.gpr[op.ra].vf); + return true; } -void spu_interpreter_fast::FRSQEST(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FRSQEST(SPUThread& spu, spu_opcode_t op) { const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); spu.gpr[op.rt].vf = _mm_rsqrt_ps(_mm_and_ps(spu.gpr[op.ra].vf, mask)); + return true; } -void spu_interpreter::LQX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::LQX(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu._ref((spu.gpr[op.ra]._u32[3] + spu.gpr[op.rb]._u32[3]) & 0x3fff0); + return true; } -void spu_interpreter_precise::ROTQBYBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::ROTQBYBI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(32) const __m128i buf[2]{a, a}; spu.gpr[op.rt].vi = _mm_loadu_si128((__m128i*)((u8*)buf + (16 - (spu.gpr[op.rb]._u32[3] >> 3 & 0xf)))); + return true; } -void spu_interpreter_fast::ROTQBYBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::ROTQBYBI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.rldq_pshufb[spu.gpr[op.rb]._u32[3] >> 3 & 0xf].vi); + return true; } -void spu_interpreter_precise::ROTQMBYBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::ROTQMBYBI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()}; spu.gpr[op.rt].vi = _mm_loadu_si128((__m128i*)((u8*)buf + ((0 - (spu.gpr[op.rb]._u32[3] >> 3)) & 0x1f))); + return true; } -void spu_interpreter_fast::ROTQMBYBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::ROTQMBYBI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.srdq_pshufb[spu.gpr[op.rb]._s32[3] >> 3 & 0x1f].vi); + return true; } -void spu_interpreter_precise::SHLQBYBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::SHLQBYBI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a}; spu.gpr[op.rt].vi = _mm_loadu_si128((__m128i*)((u8*)buf + (32 - (spu.gpr[op.rb]._u32[3] >> 3 & 0x1f)))); + return true; } -void spu_interpreter_fast::SHLQBYBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::SHLQBYBI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.sldq_pshufb[spu.gpr[op.rb]._u32[3] >> 3 & 0x1f].vi); + return true; } -void spu_interpreter::CBX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CBX(SPUThread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -492,9 +549,10 @@ void spu_interpreter::CBX(SPUThread& spu, spu_opcode_t op) const s32 t = ~(spu.gpr[op.rb]._u32[3] + spu.gpr[op.ra]._u32[3]) & 0xf; spu.gpr[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); spu.gpr[op.rt]._u8[t] = 0x03; + return true; } -void spu_interpreter::CHX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CHX(SPUThread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -504,9 +562,10 @@ void spu_interpreter::CHX(SPUThread& spu, spu_opcode_t op) const s32 t = (~(spu.gpr[op.rb]._u32[3] + spu.gpr[op.ra]._u32[3]) & 0xe) >> 1; spu.gpr[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); spu.gpr[op.rt]._u16[t] = 0x0203; + return true; } -void spu_interpreter::CWX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CWX(SPUThread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -516,9 +575,10 @@ void spu_interpreter::CWX(SPUThread& spu, spu_opcode_t op) const s32 t = (~(spu.gpr[op.rb]._u32[3] + spu.gpr[op.ra]._u32[3]) & 0xc) >> 2; spu.gpr[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); spu.gpr[op.rt]._u32[t] = 0x00010203; + return true; } -void spu_interpreter::CDX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CDX(SPUThread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -528,71 +588,82 @@ void spu_interpreter::CDX(SPUThread& spu, spu_opcode_t op) const s32 t = (~(spu.gpr[op.rb]._u32[3] + spu.gpr[op.ra]._u32[3]) & 0x8) >> 3; spu.gpr[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); spu.gpr[op.rt]._u64[t] = 0x0001020304050607ull; + return true; } -void spu_interpreter::ROTQBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQBI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = spu.gpr[op.rb]._s32[3] & 0x7; spu.gpr[op.rt].vi = _mm_or_si128(_mm_slli_epi64(a, n), _mm_srli_epi64(_mm_shuffle_epi32(a, 0x4E), 64 - n)); + return true; } -void spu_interpreter::ROTQMBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQMBI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = -spu.gpr[op.rb]._s32[3] & 0x7; spu.gpr[op.rt].vi = _mm_or_si128(_mm_srli_epi64(a, n), _mm_slli_epi64(_mm_srli_si128(a, 8), 64 - n)); + return true; } -void spu_interpreter::SHLQBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLQBI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = spu.gpr[op.rb]._u32[3] & 0x7; spu.gpr[op.rt].vi = _mm_or_si128(_mm_slli_epi64(a, n), _mm_srli_epi64(_mm_slli_si128(a, 8), 64 - n)); + return true; } -void spu_interpreter_precise::ROTQBY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::ROTQBY(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(32) const __m128i buf[2]{a, a}; spu.gpr[op.rt].vi = _mm_loadu_si128((__m128i*)((u8*)buf + (16 - (spu.gpr[op.rb]._u32[3] & 0xf)))); + return true; } -void spu_interpreter_fast::ROTQBY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::ROTQBY(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.rldq_pshufb[spu.gpr[op.rb]._u32[3] & 0xf].vi); + return true; } -void spu_interpreter_precise::ROTQMBY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::ROTQMBY(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()}; spu.gpr[op.rt].vi = _mm_loadu_si128((__m128i*)((u8*)buf + (0 - spu.gpr[op.rb]._u32[3] & 0x1f))); + return true; } -void spu_interpreter_fast::ROTQMBY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::ROTQMBY(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.srdq_pshufb[spu.gpr[op.rb]._s32[3] & 0x1f].vi); + return true; } -void spu_interpreter_precise::SHLQBY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::SHLQBY(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a}; spu.gpr[op.rt].vi = _mm_loadu_si128((__m128i*)((u8*)buf + (32 - (spu.gpr[op.rb]._u32[3] & 0x1f)))); + return true; } -void spu_interpreter_fast::SHLQBY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::SHLQBY(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.sldq_pshufb[spu.gpr[op.rb]._u32[3] & 0x1f].vi); + return true; } -void spu_interpreter::ORX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ORX(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::from32r(spu.gpr[op.ra]._u32[0] | spu.gpr[op.ra]._u32[1] | spu.gpr[op.ra]._u32[2] | spu.gpr[op.ra]._u32[3]); + return true; } -void spu_interpreter::CBD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CBD(SPUThread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -602,9 +673,10 @@ void spu_interpreter::CBD(SPUThread& spu, spu_opcode_t op) const s32 t = ~(op.i7 + spu.gpr[op.ra]._u32[3]) & 0xf; spu.gpr[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); spu.gpr[op.rt]._u8[t] = 0x03; + return true; } -void spu_interpreter::CHD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CHD(SPUThread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -614,9 +686,10 @@ void spu_interpreter::CHD(SPUThread& spu, spu_opcode_t op) const s32 t = (~(op.i7 + spu.gpr[op.ra]._u32[3]) & 0xe) >> 1; spu.gpr[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); spu.gpr[op.rt]._u16[t] = 0x0203; + return true; } -void spu_interpreter::CWD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CWD(SPUThread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -626,9 +699,10 @@ void spu_interpreter::CWD(SPUThread& spu, spu_opcode_t op) const s32 t = (~(op.i7 + spu.gpr[op.ra]._u32[3]) & 0xc) >> 2; spu.gpr[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); spu.gpr[op.rt]._u32[t] = 0x00010203; + return true; } -void spu_interpreter::CDD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CDD(SPUThread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -638,95 +712,111 @@ void spu_interpreter::CDD(SPUThread& spu, spu_opcode_t op) const s32 t = (~(op.i7 + spu.gpr[op.ra]._u32[3]) & 0x8) >> 3; spu.gpr[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); spu.gpr[op.rt]._u64[t] = 0x0001020304050607ull; + return true; } -void spu_interpreter::ROTQBII(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQBII(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = op.i7 & 0x7; spu.gpr[op.rt].vi = _mm_or_si128(_mm_slli_epi64(a, n), _mm_srli_epi64(_mm_shuffle_epi32(a, 0x4E), 64 - n)); + return true; } -void spu_interpreter::ROTQMBII(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQMBII(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = 0-op.i7 & 0x7; spu.gpr[op.rt].vi = _mm_or_si128(_mm_srli_epi64(a, n), _mm_slli_epi64(_mm_srli_si128(a, 8), 64 - n)); + return true; } -void spu_interpreter::SHLQBII(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLQBII(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = op.i7 & 0x7; spu.gpr[op.rt].vi = _mm_or_si128(_mm_slli_epi64(a, n), _mm_srli_epi64(_mm_slli_si128(a, 8), 64 - n)); + return true; } -void spu_interpreter_precise::ROTQBYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::ROTQBYI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(32) const __m128i buf[2]{a, a}; spu.gpr[op.rt].vi = _mm_loadu_si128((__m128i*)((u8*)buf + (16 - (op.i7 & 0xf)))); + return true; } -void spu_interpreter_fast::ROTQBYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::ROTQBYI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.rldq_pshufb[op.i7 & 0xf].vi); + return true; } -void spu_interpreter_precise::ROTQMBYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::ROTQMBYI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()}; spu.gpr[op.rt].vi = _mm_loadu_si128((__m128i*)((u8*)buf + (0 - op.i7 & 0x1f))); + return true; } -void spu_interpreter_fast::ROTQMBYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::ROTQMBYI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.srdq_pshufb[op.i7 & 0x1f].vi); + return true; } -void spu_interpreter_precise::SHLQBYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::SHLQBYI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a}; spu.gpr[op.rt].vi = _mm_loadu_si128((__m128i*)((u8*)buf + (32 - (op.i7 & 0x1f)))); + return true; } -void spu_interpreter_fast::SHLQBYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::SHLQBYI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.sldq_pshufb[op.i7 & 0x1f].vi); + return true; } -void spu_interpreter::NOP(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::NOP(SPUThread& spu, spu_opcode_t op) { + return true; } -void spu_interpreter::CGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGT(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi32(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); + return true; } -void spu_interpreter::XOR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XOR(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu.gpr[op.ra] ^ spu.gpr[op.rb]; + return true; } -void spu_interpreter::CGTH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGTH(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi16(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); + return true; } -void spu_interpreter::EQV(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::EQV(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = ~(spu.gpr[op.ra] ^ spu.gpr[op.rb]); + return true; } -void spu_interpreter::CGTB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGTB(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi8(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); + return true; } -void spu_interpreter::SUMB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SUMB(SPUThread& spu, spu_opcode_t op) { const auto m1 = _mm_set1_epi16(0xff); const auto m2 = _mm_set1_epi32(0xffff); @@ -743,36 +833,41 @@ void spu_interpreter::SUMB(SPUThread& spu, spu_opcode_t op) const auto s4 = _mm_andnot_si128(m2, sb); const auto s3 = _mm_slli_epi32(sb, 16); spu.gpr[op.rt].vi = _mm_or_si128(_mm_add_epi16(s1, s2), _mm_add_epi16(s3, s4)); + return true; } -void spu_interpreter::HGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HGT(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.ra]._s32[3] > spu.gpr[op.rb]._s32[3]) { spu.halt(); } + return true; } -void spu_interpreter::CLZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLZ(SPUThread& spu, spu_opcode_t op) { for (u32 i = 0; i < 4; i++) { spu.gpr[op.rt]._u32[i] = cntlz32(spu.gpr[op.ra]._u32[i]); } + return true; } -void spu_interpreter::XSWD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XSWD(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt]._s64[0] = spu.gpr[op.ra]._s32[0]; spu.gpr[op.rt]._s64[1] = spu.gpr[op.ra]._s32[2]; + return true; } -void spu_interpreter::XSHW(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XSHW(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srai_epi32(_mm_slli_epi32(spu.gpr[op.ra].vi, 16), 16); + return true; } -void spu_interpreter::CNTB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CNTB(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const auto mask1 = _mm_set1_epi8(0x55); @@ -782,24 +877,28 @@ void spu_interpreter::CNTB(SPUThread& spu, spu_opcode_t op) const auto mask3 = _mm_set1_epi8(0x0f); const auto sum3 = _mm_add_epi8(_mm_and_si128(_mm_srli_epi64(sum2, 4), mask3), _mm_and_si128(sum2, mask3)); spu.gpr[op.rt].vi = sum3; + return true; } -void spu_interpreter::XSBH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XSBH(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srai_epi16(_mm_slli_epi16(spu.gpr[op.ra].vi, 8), 8); + return true; } -void spu_interpreter::CLGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLGT(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = sse_cmpgt_epu32(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); + return true; } -void spu_interpreter::ANDC(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ANDC(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::andnot(spu.gpr[op.rb], spu.gpr[op.ra]); + return true; } -void spu_interpreter_fast::FCGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FCGT(SPUThread& spu, spu_opcode_t op) { // IMPL NOTES: // if (v is inf) v = (inf - 1) i.e nearest normal value to inf with mantissa bits left intact @@ -835,24 +934,28 @@ void spu_interpreter_fast::FCGT(SPUThread& spu, spu_opcode_t op) const auto final_b = _mm_andnot_ps(denorm_check_b, b_final1); spu.gpr[op.rt].vf = _mm_cmplt_ps(final_b, final_a); + return true; } -void spu_interpreter::DFCGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::DFCGT(SPUThread& spu, spu_opcode_t op) { fmt::throw_exception("Unexpected instruction" HERE); + return true; } -void spu_interpreter_fast::FA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FA(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::addfs(spu.gpr[op.ra], spu.gpr[op.rb]); + return true; } -void spu_interpreter_fast::FS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FS(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::subfs(spu.gpr[op.ra], spu.gpr[op.rb]); + return true; } -void spu_interpreter_fast::FM(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FM(SPUThread& spu, spu_opcode_t op) { const auto zero = _mm_set1_ps(0.f); const auto sign_bits = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); @@ -879,19 +982,22 @@ void spu_interpreter_fast::FM(SPUThread& spu, spu_opcode_t op) const auto set2 = _mm_and_ps(nan_check, final_extended); spu.gpr[op.rt].vf = _mm_or_ps(set1, set2); + return true; } -void spu_interpreter::CLGTH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLGTH(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = sse_cmpgt_epu16(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); + return true; } -void spu_interpreter::ORC(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ORC(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu.gpr[op.ra] | ~spu.gpr[op.rb]; + return true; } -void spu_interpreter_fast::FCMGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FCMGT(SPUThread& spu, spu_opcode_t op) { //IMPL NOTES: See FCGT @@ -915,202 +1021,237 @@ void spu_interpreter_fast::FCMGT(SPUThread& spu, spu_opcode_t op) const auto comparison = _mm_cmplt_ps(_mm_and_ps(final_b, sign_mask), _mm_and_ps(final_a, sign_mask)); spu.gpr[op.rt].vf = _mm_or_ps(comparison, nan_mask); + return true; } -void spu_interpreter::DFCMGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::DFCMGT(SPUThread& spu, spu_opcode_t op) { const auto mask = _mm_castsi128_pd(_mm_set1_epi64x(0x7fffffffffffffff)); const auto ra = _mm_and_pd(spu.gpr[op.ra].vd, mask); const auto rb = _mm_and_pd(spu.gpr[op.rb].vd, mask); spu.gpr[op.rt].vd = _mm_cmpgt_pd(ra, rb); + return true; } -void spu_interpreter_fast::DFA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFA(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::addfd(spu.gpr[op.ra], spu.gpr[op.rb]); + return true; } -void spu_interpreter_fast::DFS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFS(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::subfd(spu.gpr[op.ra], spu.gpr[op.rb]); + return true; } -void spu_interpreter_fast::DFM(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFM(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vd = _mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd); + return true; } -void spu_interpreter::CLGTB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLGTB(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = sse_cmpgt_epu8(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); + return true; } -void spu_interpreter::HLGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HLGT(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.ra]._u32[3] > spu.gpr[op.rb]._u32[3]) { spu.halt(); } + return true; } -void spu_interpreter_fast::DFMA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFMA(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vd = _mm_add_pd(_mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd), spu.gpr[op.rt].vd); + return true; } -void spu_interpreter_fast::DFMS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFMS(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vd = _mm_sub_pd(_mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd), spu.gpr[op.rt].vd); + return true; } -void spu_interpreter_fast::DFNMS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFNMS(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vd = _mm_sub_pd(spu.gpr[op.rt].vd, _mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd)); + return true; } -void spu_interpreter_fast::DFNMA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFNMA(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vd = _mm_sub_pd(_mm_set1_pd(0.0), _mm_add_pd(_mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd), spu.gpr[op.rt].vd)); + return true; } -void spu_interpreter::CEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CEQ(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpeq_epi32(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); + return true; } -void spu_interpreter::MPYHHU(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYHHU(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const auto b = spu.gpr[op.rb].vi; spu.gpr[op.rt].vi = _mm_or_si128(_mm_srli_epi32(_mm_mullo_epi16(a, b), 16), _mm_and_si128(_mm_mulhi_epu16(a, b), _mm_set1_epi32(0xffff0000))); + return true; } -void spu_interpreter::ADDX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ADDX(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::add32(v128::add32(spu.gpr[op.ra], spu.gpr[op.rb]), spu.gpr[op.rt] & v128::from32p(1)); + return true; } -void spu_interpreter::SFX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SFX(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::sub32(v128::sub32(spu.gpr[op.rb], spu.gpr[op.ra]), v128::andnot(spu.gpr[op.rt], v128::from32p(1))); + return true; } -void spu_interpreter::CGX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGX(SPUThread& spu, spu_opcode_t op) { for (s32 i = 0; i < 4; i++) { const u64 carry = spu.gpr[op.rt]._u32[i] & 1; spu.gpr[op.rt]._u32[i] = (carry + spu.gpr[op.ra]._u32[i] + spu.gpr[op.rb]._u32[i]) >> 32; } + return true; } -void spu_interpreter::BGX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BGX(SPUThread& spu, spu_opcode_t op) { for (s32 i = 0; i < 4; i++) { const s64 result = (u64)spu.gpr[op.rb]._u32[i] - (u64)spu.gpr[op.ra]._u32[i] - (u64)(1 - (spu.gpr[op.rt]._u32[i] & 1)); spu.gpr[op.rt]._u32[i] = result >= 0; } + return true; } -void spu_interpreter::MPYHHA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYHHA(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_add_epi32(spu.gpr[op.rt].vi, _mm_madd_epi16(_mm_srli_epi32(spu.gpr[op.ra].vi, 16), _mm_srli_epi32(spu.gpr[op.rb].vi, 16))); + return true; } -void spu_interpreter::MPYHHAU(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYHHAU(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const auto b = spu.gpr[op.rb].vi; spu.gpr[op.rt].vi = _mm_add_epi32(spu.gpr[op.rt].vi, _mm_or_si128(_mm_srli_epi32(_mm_mullo_epi16(a, b), 16), _mm_and_si128(_mm_mulhi_epu16(a, b), _mm_set1_epi32(0xffff0000)))); + return true; } -void spu_interpreter_fast::FSCRRD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FSCRRD(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].clear(); + return true; } -void spu_interpreter_fast::FESD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FESD(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vf; spu.gpr[op.rt].vd = _mm_cvtps_pd(_mm_shuffle_ps(a, a, 0x8d)); + return true; } -void spu_interpreter_fast::FRDS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FRDS(SPUThread& spu, spu_opcode_t op) { const auto t = _mm_cvtpd_ps(spu.gpr[op.ra].vd); spu.gpr[op.rt].vf = _mm_shuffle_ps(t, t, 0x72); + return true; } -void spu_interpreter_fast::FSCRWR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FSCRWR(SPUThread& spu, spu_opcode_t op) { + return true; } -void spu_interpreter::DFTSV(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::DFTSV(SPUThread& spu, spu_opcode_t op) { fmt::throw_exception("Unexpected instruction" HERE); + return true; } -void spu_interpreter_fast::FCEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FCEQ(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vf = _mm_cmpeq_ps(spu.gpr[op.rb].vf, spu.gpr[op.ra].vf); + return true; } -void spu_interpreter::DFCEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::DFCEQ(SPUThread& spu, spu_opcode_t op) { fmt::throw_exception("Unexpected instruction" HERE); + return true; } -void spu_interpreter::MPY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPY(SPUThread& spu, spu_opcode_t op) { const auto mask = _mm_set1_epi32(0xffff); spu.gpr[op.rt].vi = _mm_madd_epi16(_mm_and_si128(spu.gpr[op.ra].vi, mask), _mm_and_si128(spu.gpr[op.rb].vi, mask)); + return true; } -void spu_interpreter::MPYH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYH(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(spu.gpr[op.ra].vi, 16), spu.gpr[op.rb].vi), 16); + return true; } -void spu_interpreter::MPYHH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYHH(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_madd_epi16(_mm_srli_epi32(spu.gpr[op.ra].vi, 16), _mm_srli_epi32(spu.gpr[op.rb].vi, 16)); + return true; } -void spu_interpreter::MPYS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYS(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srai_epi32(_mm_slli_epi32(_mm_mulhi_epi16(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi), 16), 16); + return true; } -void spu_interpreter::CEQH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CEQH(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpeq_epi16(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); + return true; } -void spu_interpreter_fast::FCMEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FCMEQ(SPUThread& spu, spu_opcode_t op) { const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); spu.gpr[op.rt].vf = _mm_cmpeq_ps(_mm_and_ps(spu.gpr[op.rb].vf, mask), _mm_and_ps(spu.gpr[op.ra].vf, mask)); + return true; } -void spu_interpreter::DFCMEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::DFCMEQ(SPUThread& spu, spu_opcode_t op) { fmt::throw_exception("Unexpected instruction" HERE); + return true; } -void spu_interpreter::MPYU(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYU(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const auto b = spu.gpr[op.rb].vi; spu.gpr[op.rt].vi = _mm_or_si128(_mm_slli_epi32(_mm_mulhi_epu16(a, b), 16), _mm_and_si128(_mm_mullo_epi16(a, b), _mm_set1_epi32(0xffff))); + return true; } -void spu_interpreter::CEQB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CEQB(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpeq_epi8(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); + return true; } -void spu_interpreter_fast::FI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FI(SPUThread& spu, spu_opcode_t op) { // TODO const auto mask_se = _mm_castsi128_ps(_mm_set1_epi32(0xff800000)); // sign and exponent mask @@ -1121,322 +1262,382 @@ void spu_interpreter_fast::FI(SPUThread& spu, spu_opcode_t op) const auto step = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(spu.gpr[op.rb].vi, mask_sf)), _mm_set1_ps(std::exp2(-13.f))); const auto y = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(spu.gpr[op.ra].vi, mask_yf)), _mm_set1_ps(std::exp2(-19.f))); spu.gpr[op.rt].vf = _mm_or_ps(_mm_and_ps(mask_se, spu.gpr[op.rb].vf), _mm_andnot_ps(mask_se, _mm_sub_ps(base, _mm_mul_ps(step, y)))); + return true; } -void spu_interpreter::HEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HEQ(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.ra]._s32[3] == spu.gpr[op.rb]._s32[3]) { spu.halt(); } + return true; } -void spu_interpreter_fast::CFLTS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::CFLTS(SPUThread& spu, spu_opcode_t op) { const auto scaled = _mm_mul_ps(spu.gpr[op.ra].vf, g_spu_imm.scale[173 - op.i8]); spu.gpr[op.rt].vi = _mm_xor_si128(_mm_cvttps_epi32(scaled), _mm_castps_si128(_mm_cmpge_ps(scaled, _mm_set1_ps(0x80000000)))); + return true; } -void spu_interpreter_fast::CFLTU(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::CFLTU(SPUThread& spu, spu_opcode_t op) { const auto scaled1 = _mm_max_ps(_mm_mul_ps(spu.gpr[op.ra].vf, g_spu_imm.scale[173 - op.i8]), _mm_set1_ps(0.0f)); const auto scaled2 = _mm_and_ps(_mm_sub_ps(scaled1, _mm_set1_ps(0x80000000)), _mm_cmpge_ps(scaled1, _mm_set1_ps(0x80000000))); spu.gpr[op.rt].vi = _mm_or_si128(_mm_or_si128(_mm_cvttps_epi32(scaled1), _mm_cvttps_epi32(scaled2)), _mm_castps_si128(_mm_cmpge_ps(scaled1, _mm_set1_ps(0x100000000)))); + return true; } -void spu_interpreter_fast::CSFLT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::CSFLT(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vf = _mm_mul_ps(_mm_cvtepi32_ps(spu.gpr[op.ra].vi), g_spu_imm.scale[op.i8 - 155]); + return true; } -void spu_interpreter_fast::CUFLT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::CUFLT(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const auto fix = _mm_and_ps(_mm_castsi128_ps(_mm_srai_epi32(a, 31)), _mm_set1_ps(0x80000000)); spu.gpr[op.rt].vf = _mm_mul_ps(_mm_add_ps(_mm_cvtepi32_ps(_mm_and_si128(a, _mm_set1_epi32(0x7fffffff))), fix), g_spu_imm.scale[op.i8 - 155]); + return true; } -void spu_interpreter::BRZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRZ(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u32[3] == 0) { - spu.pc = spu_branch_target(spu.pc, op.i16) - 4; + spu.pc = spu_branch_target(spu.pc, op.i16); + return false; } + return true; } -void spu_interpreter::STQA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::STQA(SPUThread& spu, spu_opcode_t op) { spu._ref(spu_ls_target(0, op.i16)) = spu.gpr[op.rt]; + return true; } -void spu_interpreter::BRNZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRNZ(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u32[3] != 0) { - spu.pc = spu_branch_target(spu.pc, op.i16) - 4; + spu.pc = spu_branch_target(spu.pc, op.i16); + return false; } + return true; } -void spu_interpreter::BRHZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRHZ(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u16[6] == 0) { - spu.pc = spu_branch_target(spu.pc, op.i16) - 4; + spu.pc = spu_branch_target(spu.pc, op.i16); + return false; } + return true; } -void spu_interpreter::BRHNZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRHNZ(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u16[6] != 0) { - spu.pc = spu_branch_target(spu.pc, op.i16) - 4; + spu.pc = spu_branch_target(spu.pc, op.i16); + return false; } + return true; } -void spu_interpreter::STQR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::STQR(SPUThread& spu, spu_opcode_t op) { spu._ref(spu_ls_target(spu.pc, op.i16)) = spu.gpr[op.rt]; + return true; } -void spu_interpreter::BRA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRA(SPUThread& spu, spu_opcode_t op) { - spu.pc = spu_branch_target(0, op.i16) - 4; + spu.pc = spu_branch_target(0, op.i16); + return false; } -void spu_interpreter::LQA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::LQA(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu._ref(spu_ls_target(0, op.i16)); + return true; } -void spu_interpreter::BRASL(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRASL(SPUThread& spu, spu_opcode_t op) { const u32 target = spu_branch_target(0, op.i16); spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4)); - spu.pc = target - 4; + spu.pc = target; + return false; } -void spu_interpreter::BR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BR(SPUThread& spu, spu_opcode_t op) { - spu.pc = spu_branch_target(spu.pc, op.i16) - 4; + spu.pc = spu_branch_target(spu.pc, op.i16); + return false; } -void spu_interpreter::FSMBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::FSMBI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = g_spu_imm.fsmb[op.i16]; + return true; } -void spu_interpreter::BRSL(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRSL(SPUThread& spu, spu_opcode_t op) { const u32 target = spu_branch_target(spu.pc, op.i16); spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4)); - spu.pc = target - 4; + spu.pc = target; + return false; } -void spu_interpreter::LQR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::LQR(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu._ref(spu_ls_target(spu.pc, op.i16)); + return true; } -void spu_interpreter::IL(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::IL(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_set1_epi32(op.si16); + return true; } -void spu_interpreter::ILHU(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ILHU(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_set1_epi32(op.i16 << 16); + return true; } -void spu_interpreter::ILH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ILH(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_set1_epi16(op.i16); + return true; } -void spu_interpreter::IOHL(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::IOHL(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_or_si128(spu.gpr[op.rt].vi, _mm_set1_epi32(op.i16)); + return true; } -void spu_interpreter::ORI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ORI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_or_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10)); + return true; } -void spu_interpreter::ORHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ORHI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_or_si128(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10)); + return true; } -void spu_interpreter::ORBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ORBI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_or_si128(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8)); + return true; } -void spu_interpreter::SFI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SFI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_sub_epi32(_mm_set1_epi32(op.si10), spu.gpr[op.ra].vi); + return true; } -void spu_interpreter::SFHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SFHI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_sub_epi16(_mm_set1_epi16(op.si10), spu.gpr[op.ra].vi); + return true; } -void spu_interpreter::ANDI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ANDI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_and_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10)); + return true; } -void spu_interpreter::ANDHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ANDHI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_and_si128(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10)); + return true; } -void spu_interpreter::ANDBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ANDBI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_and_si128(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8)); + return true; } -void spu_interpreter::AI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::AI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_add_epi32(_mm_set1_epi32(op.si10), spu.gpr[op.ra].vi); + return true; } -void spu_interpreter::AHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::AHI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_add_epi16(_mm_set1_epi16(op.si10), spu.gpr[op.ra].vi); + return true; } -void spu_interpreter::STQD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::STQD(SPUThread& spu, spu_opcode_t op) { spu._ref((spu.gpr[op.ra]._s32[3] + (op.si10 << 4)) & 0x3fff0) = spu.gpr[op.rt]; + return true; } -void spu_interpreter::LQD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::LQD(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu._ref((spu.gpr[op.ra]._s32[3] + (op.si10 << 4)) & 0x3fff0); + return true; } -void spu_interpreter::XORI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XORI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10)); + return true; } -void spu_interpreter::XORHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XORHI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10)); + return true; } -void spu_interpreter::XORBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XORBI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8)); + return true; } -void spu_interpreter::CGTI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGTI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi32(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10)); + return true; } -void spu_interpreter::CGTHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGTHI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi16(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10)); + return true; } -void spu_interpreter::CGTBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGTBI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi8(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8)); + return true; } -void spu_interpreter::HGTI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HGTI(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.ra]._s32[3] > op.si10) { spu.halt(); } + return true; } -void spu_interpreter::CLGTI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLGTI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi32(_mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(0x80000000)), _mm_set1_epi32(op.si10 ^ 0x80000000)); + return true; } -void spu_interpreter::CLGTHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLGTHI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi16(_mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(0x80008000)), _mm_set1_epi16(op.si10 ^ 0x8000)); + return true; } -void spu_interpreter::CLGTBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLGTBI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi8(_mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(0x80808080)), _mm_set1_epi8(op.i8 ^ 0x80)); + return true; } -void spu_interpreter::HLGTI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HLGTI(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.ra]._u32[3] > static_cast(op.si10)) { spu.halt(); } + return true; } -void spu_interpreter::MPYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_madd_epi16(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10 & 0xffff)); + return true; } -void spu_interpreter::MPYUI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYUI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const auto i = _mm_set1_epi32(op.si10 & 0xffff); spu.gpr[op.rt].vi = _mm_or_si128(_mm_slli_epi32(_mm_mulhi_epu16(a, i), 16), _mm_mullo_epi16(a, i)); + return true; } -void spu_interpreter::CEQI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CEQI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpeq_epi32(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10)); + return true; } -void spu_interpreter::CEQHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CEQHI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpeq_epi16(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10)); + return true; } -void spu_interpreter::CEQBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CEQBI(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpeq_epi8(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8)); + return true; } -void spu_interpreter::HEQI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HEQI(SPUThread& spu, spu_opcode_t op) { if (spu.gpr[op.ra]._s32[3] == op.si10) { spu.halt(); } + return true; } -void spu_interpreter::HBRA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HBRA(SPUThread& spu, spu_opcode_t op) { + return true; } -void spu_interpreter::HBRR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HBRR(SPUThread& spu, spu_opcode_t op) { + return true; } -void spu_interpreter::ILA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ILA(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_set1_epi32(op.i18); + return true; } -void spu_interpreter::SELB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SELB(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt4] = (spu.gpr[op.rc] & spu.gpr[op.rb]) | v128::andnot(spu.gpr[op.rc], spu.gpr[op.ra]); + return true; } -void spu_interpreter_precise::SHUFB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::SHUFB(SPUThread& spu, spu_opcode_t op) { alignas(16) static thread_local u8 s_lut[256] { @@ -1476,9 +1677,10 @@ void spu_interpreter_precise::SHUFB(SPUThread& spu, spu_opcode_t op) { t._u8[i] = s_lut[mask._u8[i]]; } + return true; } -void spu_interpreter_fast::SHUFB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::SHUFB(SPUThread& spu, spu_opcode_t op) { const auto index = _mm_xor_si128(spu.gpr[op.rc].vi, _mm_set1_epi32(0x0f0f0f0f)); const auto res1 = _mm_shuffle_epi8(spu.gpr[op.ra].vi, index); @@ -1491,15 +1693,17 @@ void spu_interpreter_fast::SHUFB(SPUThread& spu, spu_opcode_t op) const auto bit567 = _mm_set1_epi32(0xe0e0e0e0); const auto k3 = _mm_cmpeq_epi8(_mm_and_si128(index, bit567), bit567); spu.gpr[op.rt4].vi = _mm_sub_epi8(res3, _mm_and_si128(k3, _mm_set1_epi32(0x7f7f7f7f))); + return true; } -void spu_interpreter::MPYA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYA(SPUThread& spu, spu_opcode_t op) { const auto mask = _mm_set1_epi32(0xffff); spu.gpr[op.rt4].vi = _mm_add_epi32(spu.gpr[op.rc].vi, _mm_madd_epi16(_mm_and_si128(spu.gpr[op.ra].vi, mask), _mm_and_si128(spu.gpr[op.rb].vi, mask))); + return true; } -void spu_interpreter_fast::FNMS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FNMS(SPUThread& spu, spu_opcode_t op) { const u32 test_bits = 0x7f800000; auto mask = _mm_set1_ps((f32&)test_bits); @@ -1513,9 +1717,10 @@ void spu_interpreter_fast::FNMS(SPUThread& spu, spu_opcode_t op) auto b = _mm_and_ps(spu.gpr[op.rb].vf, mask_b); spu.gpr[op.rt4].vf = _mm_sub_ps(spu.gpr[op.rc].vf, _mm_mul_ps(a, b)); + return true; } -void spu_interpreter_fast::FMA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FMA(SPUThread& spu, spu_opcode_t op) { const u32 test_bits = 0x7f800000; auto mask = _mm_set1_ps((f32&)test_bits); @@ -1529,9 +1734,10 @@ void spu_interpreter_fast::FMA(SPUThread& spu, spu_opcode_t op) auto b = _mm_and_ps(spu.gpr[op.rb].vf, mask_b); spu.gpr[op.rt4].vf = _mm_add_ps(_mm_mul_ps(a, b), spu.gpr[op.rc].vf); + return true; } -void spu_interpreter_fast::FMS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FMS(SPUThread& spu, spu_opcode_t op) { const u32 test_bits = 0x7f800000; auto mask = _mm_set1_ps((f32&)test_bits); @@ -1545,6 +1751,7 @@ void spu_interpreter_fast::FMS(SPUThread& spu, spu_opcode_t op) auto b = _mm_and_ps(spu.gpr[op.rb].vf, mask_b); spu.gpr[op.rt4].vf = _mm_sub_ps(_mm_mul_ps(a, b), spu.gpr[op.rc].vf); + return true; } static void SetHostRoundingMode(u32 rn) @@ -1612,7 +1819,7 @@ inline bool isdenormal(double x) return std::fpclassify(x) == FP_SUBNORMAL; } -void spu_interpreter_precise::FREST(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FREST(SPUThread& spu, spu_opcode_t op) { fesetround(FE_TOWARDZERO); for (int i = 0; i < 4; i++) @@ -1630,9 +1837,10 @@ void spu_interpreter_precise::FREST(SPUThread& spu, spu_opcode_t op) result = 1 / a; spu.gpr[op.rt]._f[i] = result; } + return true; } -void spu_interpreter_precise::FRSQEST(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FRSQEST(SPUThread& spu, spu_opcode_t op) { fesetround(FE_TOWARDZERO); for (int i = 0; i < 4; i++) @@ -1650,9 +1858,10 @@ void spu_interpreter_precise::FRSQEST(SPUThread& spu, spu_opcode_t op) result = 1 / std::sqrt(std::fabs(a)); spu.gpr[op.rt]._f[i] = result; } + return true; } -void spu_interpreter_precise::FCGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FCGT(SPUThread& spu, spu_opcode_t op) { for (int i = 0; i < 4; i++) { @@ -1673,6 +1882,7 @@ void spu_interpreter_precise::FCGT(SPUThread& spu, spu_opcode_t op) pass = (b >= 0x80000000 || a > b); spu.gpr[op.rt]._u32[i] = pass ? 0xFFFFFFFF : 0; } + return true; } static void FA_FS(SPUThread& spu, spu_opcode_t op, bool sub) @@ -1760,11 +1970,11 @@ static void FA_FS(SPUThread& spu, spu_opcode_t op, bool sub) } } -void spu_interpreter_precise::FA(SPUThread& spu, spu_opcode_t op) { FA_FS(spu, op, false); } +bool spu_interpreter_precise::FA(SPUThread& spu, spu_opcode_t op) { FA_FS(spu, op, false); return true; } -void spu_interpreter_precise::FS(SPUThread& spu, spu_opcode_t op) { FA_FS(spu, op, true); } +bool spu_interpreter_precise::FS(SPUThread& spu, spu_opcode_t op) { FA_FS(spu, op, true); return true; } -void spu_interpreter_precise::FM(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FM(SPUThread& spu, spu_opcode_t op) { fesetround(FE_TOWARDZERO); for (int w = 0; w < 4; w++) @@ -1835,9 +2045,10 @@ void spu_interpreter_precise::FM(SPUThread& spu, spu_opcode_t op) } spu.gpr[op.rt]._f[w] = result; } + return true; } -void spu_interpreter_precise::FCMGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FCMGT(SPUThread& spu, spu_opcode_t op) { for (int i = 0; i < 4; i++) { @@ -1856,6 +2067,7 @@ void spu_interpreter_precise::FCMGT(SPUThread& spu, spu_opcode_t op) pass = abs_a > abs_b; spu.gpr[op.rt]._u32[i] = pass ? 0xFFFFFFFF : 0; } + return true; } enum DoubleOp @@ -1919,11 +2131,11 @@ static void DFASM(SPUThread& spu, spu_opcode_t op, DoubleOp operation) } } -void spu_interpreter_precise::DFA(SPUThread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_A); } +bool spu_interpreter_precise::DFA(SPUThread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_A); return true; } -void spu_interpreter_precise::DFS(SPUThread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_S); } +bool spu_interpreter_precise::DFS(SPUThread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_S); return true; } -void spu_interpreter_precise::DFM(SPUThread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_M); } +bool spu_interpreter_precise::DFM(SPUThread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_M); return true; } static void DFMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub) { @@ -1981,20 +2193,21 @@ static void DFMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub) } } -void spu_interpreter_precise::DFMA(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, false, false); } +bool spu_interpreter_precise::DFMA(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, false, false); return true; } -void spu_interpreter_precise::DFMS(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, false, true); } +bool spu_interpreter_precise::DFMS(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, false, true); return true; } -void spu_interpreter_precise::DFNMS(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, true, true); } +bool spu_interpreter_precise::DFNMS(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, true, true); return true; } -void spu_interpreter_precise::DFNMA(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, true, false); } +bool spu_interpreter_precise::DFNMA(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, true, false); return true; } -void spu_interpreter_precise::FSCRRD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FSCRRD(SPUThread& spu, spu_opcode_t op) { spu.fpscr.Read(spu.gpr[op.rt]); + return true; } -void spu_interpreter_precise::FESD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FESD(SPUThread& spu, spu_opcode_t op) { for (int i = 0; i < 2; i++) { @@ -2016,9 +2229,10 @@ void spu_interpreter_precise::FESD(SPUThread& spu, spu_opcode_t op) spu.gpr[op.rt]._d[i] = (double)a; } } + return true; } -void spu_interpreter_precise::FRDS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FRDS(SPUThread& spu, spu_opcode_t op) { for (int i = 0; i < 2; i++) { @@ -2045,14 +2259,16 @@ void spu_interpreter_precise::FRDS(SPUThread& spu, spu_opcode_t op) } spu.gpr[op.rt]._u32[i * 2] = 0; } + return true; } -void spu_interpreter_precise::FSCRWR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FSCRWR(SPUThread& spu, spu_opcode_t op) { spu.fpscr.Write(spu.gpr[op.ra]); + return true; } -void spu_interpreter_precise::FCEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FCEQ(SPUThread& spu, spu_opcode_t op) { for (int i = 0; i < 4; i++) { @@ -2065,9 +2281,10 @@ void spu_interpreter_precise::FCEQ(SPUThread& spu, spu_opcode_t op) const bool pass = a == b || (a_zero && b_zero); spu.gpr[op.rt]._u32[i] = pass ? 0xFFFFFFFF : 0; } + return true; } -void spu_interpreter_precise::FCMEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FCMEQ(SPUThread& spu, spu_opcode_t op) { for (int i = 0; i < 4; i++) { @@ -2080,15 +2297,17 @@ void spu_interpreter_precise::FCMEQ(SPUThread& spu, spu_opcode_t op) const bool pass = abs_a == abs_b || (a_zero && b_zero); spu.gpr[op.rt]._u32[i] = pass ? 0xFFFFFFFF : 0; } + return true; } -void spu_interpreter_precise::FI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FI(SPUThread& spu, spu_opcode_t op) { // TODO spu.gpr[op.rt] = spu.gpr[op.rb]; + return true; } -void spu_interpreter_precise::CFLTS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::CFLTS(SPUThread& spu, spu_opcode_t op) { const int scale = 173 - (op.i8 & 0xff); //unsigned immediate for (int i = 0; i < 4; i++) @@ -2108,9 +2327,10 @@ void spu_interpreter_precise::CFLTS(SPUThread& spu, spu_opcode_t op) result = (s32)scaled; spu.gpr[op.rt]._s32[i] = result; } + return true; } -void spu_interpreter_precise::CFLTU(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::CFLTU(SPUThread& spu, spu_opcode_t op) { const int scale = 173 - (op.i8 & 0xff); //unsigned immediate for (int i = 0; i < 4; i++) @@ -2130,9 +2350,10 @@ void spu_interpreter_precise::CFLTU(SPUThread& spu, spu_opcode_t op) result = (u32)scaled; spu.gpr[op.rt]._u32[i] = result; } + return true; } -void spu_interpreter_precise::CSFLT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::CSFLT(SPUThread& spu, spu_opcode_t op) { fesetround(FE_TOWARDZERO); const int scale = 155 - (op.i8 & 0xff); //unsigned immediate @@ -2153,9 +2374,10 @@ void spu_interpreter_precise::CSFLT(SPUThread& spu, spu_opcode_t op) spu.gpr[op.rt]._f[i] = 0.0f; } } + return true; } -void spu_interpreter_precise::CUFLT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::CUFLT(SPUThread& spu, spu_opcode_t op) { fesetround(FE_TOWARDZERO); const int scale = 155 - (op.i8 & 0xff); //unsigned immediate @@ -2176,6 +2398,7 @@ void spu_interpreter_precise::CUFLT(SPUThread& spu, spu_opcode_t op) spu.gpr[op.rt]._f[i] = 0.0f; } } + return true; } static void FMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub) @@ -2323,8 +2546,8 @@ static void FMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub) } } -void spu_interpreter_precise::FNMS(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, true, true); } +bool spu_interpreter_precise::FNMS(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, true, true); return true; } -void spu_interpreter_precise::FMA(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, false, false); } +bool spu_interpreter_precise::FMA(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, false, false); return true; } -void spu_interpreter_precise::FMS(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, false, true); } +bool spu_interpreter_precise::FMS(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, false, true); return true; } diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 96a6ce307b..06ab662d70 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -4,258 +4,258 @@ class SPUThread; -using spu_inter_func_t = void(*)(SPUThread& spu, spu_opcode_t op); +using spu_inter_func_t = bool(*)(SPUThread& spu, spu_opcode_t op); struct spu_interpreter { - static void UNK(SPUThread&, spu_opcode_t); + static bool UNK(SPUThread&, spu_opcode_t); static void set_interrupt_status(SPUThread&, spu_opcode_t); - static void STOP(SPUThread&, spu_opcode_t); - static void LNOP(SPUThread&, spu_opcode_t); - static void SYNC(SPUThread&, spu_opcode_t); - static void DSYNC(SPUThread&, spu_opcode_t); - static void MFSPR(SPUThread&, spu_opcode_t); - static void RDCH(SPUThread&, spu_opcode_t); - static void RCHCNT(SPUThread&, spu_opcode_t); - static void SF(SPUThread&, spu_opcode_t); - static void OR(SPUThread&, spu_opcode_t); - static void BG(SPUThread&, spu_opcode_t); - static void SFH(SPUThread&, spu_opcode_t); - static void NOR(SPUThread&, spu_opcode_t); - static void ABSDB(SPUThread&, spu_opcode_t); - static void ROT(SPUThread&, spu_opcode_t); - static void ROTM(SPUThread&, spu_opcode_t); - static void ROTMA(SPUThread&, spu_opcode_t); - static void SHL(SPUThread&, spu_opcode_t); - static void ROTH(SPUThread&, spu_opcode_t); - static void ROTHM(SPUThread&, spu_opcode_t); - static void ROTMAH(SPUThread&, spu_opcode_t); - static void SHLH(SPUThread&, spu_opcode_t); - static void ROTI(SPUThread&, spu_opcode_t); - static void ROTMI(SPUThread&, spu_opcode_t); - static void ROTMAI(SPUThread&, spu_opcode_t); - static void SHLI(SPUThread&, spu_opcode_t); - static void ROTHI(SPUThread&, spu_opcode_t); - static void ROTHMI(SPUThread&, spu_opcode_t); - static void ROTMAHI(SPUThread&, spu_opcode_t); - static void SHLHI(SPUThread&, spu_opcode_t); - static void A(SPUThread&, spu_opcode_t); - static void AND(SPUThread&, spu_opcode_t); - static void CG(SPUThread&, spu_opcode_t); - static void AH(SPUThread&, spu_opcode_t); - static void NAND(SPUThread&, spu_opcode_t); - static void AVGB(SPUThread&, spu_opcode_t); - static void MTSPR(SPUThread&, spu_opcode_t); - static void WRCH(SPUThread&, spu_opcode_t); - static void BIZ(SPUThread&, spu_opcode_t); - static void BINZ(SPUThread&, spu_opcode_t); - static void BIHZ(SPUThread&, spu_opcode_t); - static void BIHNZ(SPUThread&, spu_opcode_t); - static void STOPD(SPUThread&, spu_opcode_t); - static void STQX(SPUThread&, spu_opcode_t); - static void BI(SPUThread&, spu_opcode_t); - static void BISL(SPUThread&, spu_opcode_t); - static void IRET(SPUThread&, spu_opcode_t); - static void BISLED(SPUThread&, spu_opcode_t); - static void HBR(SPUThread&, spu_opcode_t); - static void GB(SPUThread&, spu_opcode_t); - static void GBH(SPUThread&, spu_opcode_t); - static void GBB(SPUThread&, spu_opcode_t); - static void FSM(SPUThread&, spu_opcode_t); - static void FSMH(SPUThread&, spu_opcode_t); - static void FSMB(SPUThread&, spu_opcode_t); - static void LQX(SPUThread&, spu_opcode_t); - static void CBX(SPUThread&, spu_opcode_t); - static void CHX(SPUThread&, spu_opcode_t); - static void CWX(SPUThread&, spu_opcode_t); - static void CDX(SPUThread&, spu_opcode_t); - static void ROTQBI(SPUThread&, spu_opcode_t); - static void ROTQMBI(SPUThread&, spu_opcode_t); - static void SHLQBI(SPUThread&, spu_opcode_t); - static void ORX(SPUThread&, spu_opcode_t); - static void CBD(SPUThread&, spu_opcode_t); - static void CHD(SPUThread&, spu_opcode_t); - static void CWD(SPUThread&, spu_opcode_t); - static void CDD(SPUThread&, spu_opcode_t); - static void ROTQBII(SPUThread&, spu_opcode_t); - static void ROTQMBII(SPUThread&, spu_opcode_t); - static void SHLQBII(SPUThread&, spu_opcode_t); - static void NOP(SPUThread&, spu_opcode_t); - static void CGT(SPUThread&, spu_opcode_t); - static void XOR(SPUThread&, spu_opcode_t); - static void CGTH(SPUThread&, spu_opcode_t); - static void EQV(SPUThread&, spu_opcode_t); - static void CGTB(SPUThread&, spu_opcode_t); - static void SUMB(SPUThread&, spu_opcode_t); - static void HGT(SPUThread&, spu_opcode_t); - static void CLZ(SPUThread&, spu_opcode_t); - static void XSWD(SPUThread&, spu_opcode_t); - static void XSHW(SPUThread&, spu_opcode_t); - static void CNTB(SPUThread&, spu_opcode_t); - static void XSBH(SPUThread&, spu_opcode_t); - static void CLGT(SPUThread&, spu_opcode_t); - static void ANDC(SPUThread&, spu_opcode_t); - static void CLGTH(SPUThread&, spu_opcode_t); - static void ORC(SPUThread&, spu_opcode_t); - static void CLGTB(SPUThread&, spu_opcode_t); - static void HLGT(SPUThread&, spu_opcode_t); - static void CEQ(SPUThread&, spu_opcode_t); - static void MPYHHU(SPUThread&, spu_opcode_t); - static void ADDX(SPUThread&, spu_opcode_t); - static void SFX(SPUThread&, spu_opcode_t); - static void CGX(SPUThread&, spu_opcode_t); - static void BGX(SPUThread&, spu_opcode_t); - static void MPYHHA(SPUThread&, spu_opcode_t); - static void MPYHHAU(SPUThread&, spu_opcode_t); - static void MPY(SPUThread&, spu_opcode_t); - static void MPYH(SPUThread&, spu_opcode_t); - static void MPYHH(SPUThread&, spu_opcode_t); - static void MPYS(SPUThread&, spu_opcode_t); - static void CEQH(SPUThread&, spu_opcode_t); - static void MPYU(SPUThread&, spu_opcode_t); - static void CEQB(SPUThread&, spu_opcode_t); - static void HEQ(SPUThread&, spu_opcode_t); - static void BRZ(SPUThread&, spu_opcode_t); - static void STQA(SPUThread&, spu_opcode_t); - static void BRNZ(SPUThread&, spu_opcode_t); - static void BRHZ(SPUThread&, spu_opcode_t); - static void BRHNZ(SPUThread&, spu_opcode_t); - static void STQR(SPUThread&, spu_opcode_t); - static void BRA(SPUThread&, spu_opcode_t); - static void LQA(SPUThread&, spu_opcode_t); - static void BRASL(SPUThread&, spu_opcode_t); - static void BR(SPUThread&, spu_opcode_t); - static void FSMBI(SPUThread&, spu_opcode_t); - static void BRSL(SPUThread&, spu_opcode_t); - static void LQR(SPUThread&, spu_opcode_t); - static void IL(SPUThread&, spu_opcode_t); - static void ILHU(SPUThread&, spu_opcode_t); - static void ILH(SPUThread&, spu_opcode_t); - static void IOHL(SPUThread&, spu_opcode_t); - static void ORI(SPUThread&, spu_opcode_t); - static void ORHI(SPUThread&, spu_opcode_t); - static void ORBI(SPUThread&, spu_opcode_t); - static void SFI(SPUThread&, spu_opcode_t); - static void SFHI(SPUThread&, spu_opcode_t); - static void ANDI(SPUThread&, spu_opcode_t); - static void ANDHI(SPUThread&, spu_opcode_t); - static void ANDBI(SPUThread&, spu_opcode_t); - static void AI(SPUThread&, spu_opcode_t); - static void AHI(SPUThread&, spu_opcode_t); - static void STQD(SPUThread&, spu_opcode_t); - static void LQD(SPUThread&, spu_opcode_t); - static void XORI(SPUThread&, spu_opcode_t); - static void XORHI(SPUThread&, spu_opcode_t); - static void XORBI(SPUThread&, spu_opcode_t); - static void CGTI(SPUThread&, spu_opcode_t); - static void CGTHI(SPUThread&, spu_opcode_t); - static void CGTBI(SPUThread&, spu_opcode_t); - static void HGTI(SPUThread&, spu_opcode_t); - static void CLGTI(SPUThread&, spu_opcode_t); - static void CLGTHI(SPUThread&, spu_opcode_t); - static void CLGTBI(SPUThread&, spu_opcode_t); - static void HLGTI(SPUThread&, spu_opcode_t); - static void MPYI(SPUThread&, spu_opcode_t); - static void MPYUI(SPUThread&, spu_opcode_t); - static void CEQI(SPUThread&, spu_opcode_t); - static void CEQHI(SPUThread&, spu_opcode_t); - static void CEQBI(SPUThread&, spu_opcode_t); - static void HEQI(SPUThread&, spu_opcode_t); - static void HBRA(SPUThread&, spu_opcode_t); - static void HBRR(SPUThread&, spu_opcode_t); - static void ILA(SPUThread&, spu_opcode_t); - static void SELB(SPUThread&, spu_opcode_t); - static void MPYA(SPUThread&, spu_opcode_t); - static void DFCGT(SPUThread&, spu_opcode_t); - static void DFCMGT(SPUThread&, spu_opcode_t); - static void DFTSV(SPUThread&, spu_opcode_t); - static void DFCEQ(SPUThread&, spu_opcode_t); - static void DFCMEQ(SPUThread&, spu_opcode_t); + static bool STOP(SPUThread&, spu_opcode_t); + static bool LNOP(SPUThread&, spu_opcode_t); + static bool SYNC(SPUThread&, spu_opcode_t); + static bool DSYNC(SPUThread&, spu_opcode_t); + static bool MFSPR(SPUThread&, spu_opcode_t); + static bool RDCH(SPUThread&, spu_opcode_t); + static bool RCHCNT(SPUThread&, spu_opcode_t); + static bool SF(SPUThread&, spu_opcode_t); + static bool OR(SPUThread&, spu_opcode_t); + static bool BG(SPUThread&, spu_opcode_t); + static bool SFH(SPUThread&, spu_opcode_t); + static bool NOR(SPUThread&, spu_opcode_t); + static bool ABSDB(SPUThread&, spu_opcode_t); + static bool ROT(SPUThread&, spu_opcode_t); + static bool ROTM(SPUThread&, spu_opcode_t); + static bool ROTMA(SPUThread&, spu_opcode_t); + static bool SHL(SPUThread&, spu_opcode_t); + static bool ROTH(SPUThread&, spu_opcode_t); + static bool ROTHM(SPUThread&, spu_opcode_t); + static bool ROTMAH(SPUThread&, spu_opcode_t); + static bool SHLH(SPUThread&, spu_opcode_t); + static bool ROTI(SPUThread&, spu_opcode_t); + static bool ROTMI(SPUThread&, spu_opcode_t); + static bool ROTMAI(SPUThread&, spu_opcode_t); + static bool SHLI(SPUThread&, spu_opcode_t); + static bool ROTHI(SPUThread&, spu_opcode_t); + static bool ROTHMI(SPUThread&, spu_opcode_t); + static bool ROTMAHI(SPUThread&, spu_opcode_t); + static bool SHLHI(SPUThread&, spu_opcode_t); + static bool A(SPUThread&, spu_opcode_t); + static bool AND(SPUThread&, spu_opcode_t); + static bool CG(SPUThread&, spu_opcode_t); + static bool AH(SPUThread&, spu_opcode_t); + static bool NAND(SPUThread&, spu_opcode_t); + static bool AVGB(SPUThread&, spu_opcode_t); + static bool MTSPR(SPUThread&, spu_opcode_t); + static bool WRCH(SPUThread&, spu_opcode_t); + static bool BIZ(SPUThread&, spu_opcode_t); + static bool BINZ(SPUThread&, spu_opcode_t); + static bool BIHZ(SPUThread&, spu_opcode_t); + static bool BIHNZ(SPUThread&, spu_opcode_t); + static bool STOPD(SPUThread&, spu_opcode_t); + static bool STQX(SPUThread&, spu_opcode_t); + static bool BI(SPUThread&, spu_opcode_t); + static bool BISL(SPUThread&, spu_opcode_t); + static bool IRET(SPUThread&, spu_opcode_t); + static bool BISLED(SPUThread&, spu_opcode_t); + static bool HBR(SPUThread&, spu_opcode_t); + static bool GB(SPUThread&, spu_opcode_t); + static bool GBH(SPUThread&, spu_opcode_t); + static bool GBB(SPUThread&, spu_opcode_t); + static bool FSM(SPUThread&, spu_opcode_t); + static bool FSMH(SPUThread&, spu_opcode_t); + static bool FSMB(SPUThread&, spu_opcode_t); + static bool LQX(SPUThread&, spu_opcode_t); + static bool CBX(SPUThread&, spu_opcode_t); + static bool CHX(SPUThread&, spu_opcode_t); + static bool CWX(SPUThread&, spu_opcode_t); + static bool CDX(SPUThread&, spu_opcode_t); + static bool ROTQBI(SPUThread&, spu_opcode_t); + static bool ROTQMBI(SPUThread&, spu_opcode_t); + static bool SHLQBI(SPUThread&, spu_opcode_t); + static bool ORX(SPUThread&, spu_opcode_t); + static bool CBD(SPUThread&, spu_opcode_t); + static bool CHD(SPUThread&, spu_opcode_t); + static bool CWD(SPUThread&, spu_opcode_t); + static bool CDD(SPUThread&, spu_opcode_t); + static bool ROTQBII(SPUThread&, spu_opcode_t); + static bool ROTQMBII(SPUThread&, spu_opcode_t); + static bool SHLQBII(SPUThread&, spu_opcode_t); + static bool NOP(SPUThread&, spu_opcode_t); + static bool CGT(SPUThread&, spu_opcode_t); + static bool XOR(SPUThread&, spu_opcode_t); + static bool CGTH(SPUThread&, spu_opcode_t); + static bool EQV(SPUThread&, spu_opcode_t); + static bool CGTB(SPUThread&, spu_opcode_t); + static bool SUMB(SPUThread&, spu_opcode_t); + static bool HGT(SPUThread&, spu_opcode_t); + static bool CLZ(SPUThread&, spu_opcode_t); + static bool XSWD(SPUThread&, spu_opcode_t); + static bool XSHW(SPUThread&, spu_opcode_t); + static bool CNTB(SPUThread&, spu_opcode_t); + static bool XSBH(SPUThread&, spu_opcode_t); + static bool CLGT(SPUThread&, spu_opcode_t); + static bool ANDC(SPUThread&, spu_opcode_t); + static bool CLGTH(SPUThread&, spu_opcode_t); + static bool ORC(SPUThread&, spu_opcode_t); + static bool CLGTB(SPUThread&, spu_opcode_t); + static bool HLGT(SPUThread&, spu_opcode_t); + static bool CEQ(SPUThread&, spu_opcode_t); + static bool MPYHHU(SPUThread&, spu_opcode_t); + static bool ADDX(SPUThread&, spu_opcode_t); + static bool SFX(SPUThread&, spu_opcode_t); + static bool CGX(SPUThread&, spu_opcode_t); + static bool BGX(SPUThread&, spu_opcode_t); + static bool MPYHHA(SPUThread&, spu_opcode_t); + static bool MPYHHAU(SPUThread&, spu_opcode_t); + static bool MPY(SPUThread&, spu_opcode_t); + static bool MPYH(SPUThread&, spu_opcode_t); + static bool MPYHH(SPUThread&, spu_opcode_t); + static bool MPYS(SPUThread&, spu_opcode_t); + static bool CEQH(SPUThread&, spu_opcode_t); + static bool MPYU(SPUThread&, spu_opcode_t); + static bool CEQB(SPUThread&, spu_opcode_t); + static bool HEQ(SPUThread&, spu_opcode_t); + static bool BRZ(SPUThread&, spu_opcode_t); + static bool STQA(SPUThread&, spu_opcode_t); + static bool BRNZ(SPUThread&, spu_opcode_t); + static bool BRHZ(SPUThread&, spu_opcode_t); + static bool BRHNZ(SPUThread&, spu_opcode_t); + static bool STQR(SPUThread&, spu_opcode_t); + static bool BRA(SPUThread&, spu_opcode_t); + static bool LQA(SPUThread&, spu_opcode_t); + static bool BRASL(SPUThread&, spu_opcode_t); + static bool BR(SPUThread&, spu_opcode_t); + static bool FSMBI(SPUThread&, spu_opcode_t); + static bool BRSL(SPUThread&, spu_opcode_t); + static bool LQR(SPUThread&, spu_opcode_t); + static bool IL(SPUThread&, spu_opcode_t); + static bool ILHU(SPUThread&, spu_opcode_t); + static bool ILH(SPUThread&, spu_opcode_t); + static bool IOHL(SPUThread&, spu_opcode_t); + static bool ORI(SPUThread&, spu_opcode_t); + static bool ORHI(SPUThread&, spu_opcode_t); + static bool ORBI(SPUThread&, spu_opcode_t); + static bool SFI(SPUThread&, spu_opcode_t); + static bool SFHI(SPUThread&, spu_opcode_t); + static bool ANDI(SPUThread&, spu_opcode_t); + static bool ANDHI(SPUThread&, spu_opcode_t); + static bool ANDBI(SPUThread&, spu_opcode_t); + static bool AI(SPUThread&, spu_opcode_t); + static bool AHI(SPUThread&, spu_opcode_t); + static bool STQD(SPUThread&, spu_opcode_t); + static bool LQD(SPUThread&, spu_opcode_t); + static bool XORI(SPUThread&, spu_opcode_t); + static bool XORHI(SPUThread&, spu_opcode_t); + static bool XORBI(SPUThread&, spu_opcode_t); + static bool CGTI(SPUThread&, spu_opcode_t); + static bool CGTHI(SPUThread&, spu_opcode_t); + static bool CGTBI(SPUThread&, spu_opcode_t); + static bool HGTI(SPUThread&, spu_opcode_t); + static bool CLGTI(SPUThread&, spu_opcode_t); + static bool CLGTHI(SPUThread&, spu_opcode_t); + static bool CLGTBI(SPUThread&, spu_opcode_t); + static bool HLGTI(SPUThread&, spu_opcode_t); + static bool MPYI(SPUThread&, spu_opcode_t); + static bool MPYUI(SPUThread&, spu_opcode_t); + static bool CEQI(SPUThread&, spu_opcode_t); + static bool CEQHI(SPUThread&, spu_opcode_t); + static bool CEQBI(SPUThread&, spu_opcode_t); + static bool HEQI(SPUThread&, spu_opcode_t); + static bool HBRA(SPUThread&, spu_opcode_t); + static bool HBRR(SPUThread&, spu_opcode_t); + static bool ILA(SPUThread&, spu_opcode_t); + static bool SELB(SPUThread&, spu_opcode_t); + static bool MPYA(SPUThread&, spu_opcode_t); + static bool DFCGT(SPUThread&, spu_opcode_t); + static bool DFCMGT(SPUThread&, spu_opcode_t); + static bool DFTSV(SPUThread&, spu_opcode_t); + static bool DFCEQ(SPUThread&, spu_opcode_t); + static bool DFCMEQ(SPUThread&, spu_opcode_t); }; struct spu_interpreter_fast final : spu_interpreter { - static void ROTQBYBI(SPUThread&, spu_opcode_t); - static void ROTQMBYBI(SPUThread&, spu_opcode_t); - static void SHLQBYBI(SPUThread&, spu_opcode_t); - static void ROTQBY(SPUThread&, spu_opcode_t); - static void ROTQMBY(SPUThread&, spu_opcode_t); - static void SHLQBY(SPUThread&, spu_opcode_t); - static void ROTQBYI(SPUThread&, spu_opcode_t); - static void ROTQMBYI(SPUThread&, spu_opcode_t); - static void SHLQBYI(SPUThread&, spu_opcode_t); - static void SHUFB(SPUThread&, spu_opcode_t); + static bool ROTQBYBI(SPUThread&, spu_opcode_t); + static bool ROTQMBYBI(SPUThread&, spu_opcode_t); + static bool SHLQBYBI(SPUThread&, spu_opcode_t); + static bool ROTQBY(SPUThread&, spu_opcode_t); + static bool ROTQMBY(SPUThread&, spu_opcode_t); + static bool SHLQBY(SPUThread&, spu_opcode_t); + static bool ROTQBYI(SPUThread&, spu_opcode_t); + static bool ROTQMBYI(SPUThread&, spu_opcode_t); + static bool SHLQBYI(SPUThread&, spu_opcode_t); + static bool SHUFB(SPUThread&, spu_opcode_t); - static void FREST(SPUThread&, spu_opcode_t); - static void FRSQEST(SPUThread&, spu_opcode_t); - static void FCGT(SPUThread&, spu_opcode_t); - static void FA(SPUThread&, spu_opcode_t); - static void FS(SPUThread&, spu_opcode_t); - static void FM(SPUThread&, spu_opcode_t); - static void FCMGT(SPUThread&, spu_opcode_t); - static void DFA(SPUThread&, spu_opcode_t); - static void DFS(SPUThread&, spu_opcode_t); - static void DFM(SPUThread&, spu_opcode_t); - static void DFMA(SPUThread&, spu_opcode_t); - static void DFMS(SPUThread&, spu_opcode_t); - static void DFNMS(SPUThread&, spu_opcode_t); - static void DFNMA(SPUThread&, spu_opcode_t); - static void FSCRRD(SPUThread&, spu_opcode_t); - static void FESD(SPUThread&, spu_opcode_t); - static void FRDS(SPUThread&, spu_opcode_t); - static void FSCRWR(SPUThread&, spu_opcode_t); - static void FCEQ(SPUThread&, spu_opcode_t); - static void FCMEQ(SPUThread&, spu_opcode_t); - static void FI(SPUThread&, spu_opcode_t); - static void CFLTS(SPUThread&, spu_opcode_t); - static void CFLTU(SPUThread&, spu_opcode_t); - static void CSFLT(SPUThread&, spu_opcode_t); - static void CUFLT(SPUThread&, spu_opcode_t); - static void FNMS(SPUThread&, spu_opcode_t); - static void FMA(SPUThread&, spu_opcode_t); - static void FMS(SPUThread&, spu_opcode_t); + static bool FREST(SPUThread&, spu_opcode_t); + static bool FRSQEST(SPUThread&, spu_opcode_t); + static bool FCGT(SPUThread&, spu_opcode_t); + static bool FA(SPUThread&, spu_opcode_t); + static bool FS(SPUThread&, spu_opcode_t); + static bool FM(SPUThread&, spu_opcode_t); + static bool FCMGT(SPUThread&, spu_opcode_t); + static bool DFA(SPUThread&, spu_opcode_t); + static bool DFS(SPUThread&, spu_opcode_t); + static bool DFM(SPUThread&, spu_opcode_t); + static bool DFMA(SPUThread&, spu_opcode_t); + static bool DFMS(SPUThread&, spu_opcode_t); + static bool DFNMS(SPUThread&, spu_opcode_t); + static bool DFNMA(SPUThread&, spu_opcode_t); + static bool FSCRRD(SPUThread&, spu_opcode_t); + static bool FESD(SPUThread&, spu_opcode_t); + static bool FRDS(SPUThread&, spu_opcode_t); + static bool FSCRWR(SPUThread&, spu_opcode_t); + static bool FCEQ(SPUThread&, spu_opcode_t); + static bool FCMEQ(SPUThread&, spu_opcode_t); + static bool FI(SPUThread&, spu_opcode_t); + static bool CFLTS(SPUThread&, spu_opcode_t); + static bool CFLTU(SPUThread&, spu_opcode_t); + static bool CSFLT(SPUThread&, spu_opcode_t); + static bool CUFLT(SPUThread&, spu_opcode_t); + static bool FNMS(SPUThread&, spu_opcode_t); + static bool FMA(SPUThread&, spu_opcode_t); + static bool FMS(SPUThread&, spu_opcode_t); }; struct spu_interpreter_precise final : spu_interpreter { - static void ROTQBYBI(SPUThread&, spu_opcode_t); - static void ROTQMBYBI(SPUThread&, spu_opcode_t); - static void SHLQBYBI(SPUThread&, spu_opcode_t); - static void ROTQBY(SPUThread&, spu_opcode_t); - static void ROTQMBY(SPUThread&, spu_opcode_t); - static void SHLQBY(SPUThread&, spu_opcode_t); - static void ROTQBYI(SPUThread&, spu_opcode_t); - static void ROTQMBYI(SPUThread&, spu_opcode_t); - static void SHLQBYI(SPUThread&, spu_opcode_t); - static void SHUFB(SPUThread&, spu_opcode_t); + static bool ROTQBYBI(SPUThread&, spu_opcode_t); + static bool ROTQMBYBI(SPUThread&, spu_opcode_t); + static bool SHLQBYBI(SPUThread&, spu_opcode_t); + static bool ROTQBY(SPUThread&, spu_opcode_t); + static bool ROTQMBY(SPUThread&, spu_opcode_t); + static bool SHLQBY(SPUThread&, spu_opcode_t); + static bool ROTQBYI(SPUThread&, spu_opcode_t); + static bool ROTQMBYI(SPUThread&, spu_opcode_t); + static bool SHLQBYI(SPUThread&, spu_opcode_t); + static bool SHUFB(SPUThread&, spu_opcode_t); - static void FREST(SPUThread&, spu_opcode_t); - static void FRSQEST(SPUThread&, spu_opcode_t); - static void FCGT(SPUThread&, spu_opcode_t); - static void FA(SPUThread&, spu_opcode_t); - static void FS(SPUThread&, spu_opcode_t); - static void FM(SPUThread&, spu_opcode_t); - static void FCMGT(SPUThread&, spu_opcode_t); - static void DFA(SPUThread&, spu_opcode_t); - static void DFS(SPUThread&, spu_opcode_t); - static void DFM(SPUThread&, spu_opcode_t); - static void DFMA(SPUThread&, spu_opcode_t); - static void DFMS(SPUThread&, spu_opcode_t); - static void DFNMS(SPUThread&, spu_opcode_t); - static void DFNMA(SPUThread&, spu_opcode_t); - static void FSCRRD(SPUThread&, spu_opcode_t); - static void FESD(SPUThread&, spu_opcode_t); - static void FRDS(SPUThread&, spu_opcode_t); - static void FSCRWR(SPUThread&, spu_opcode_t); - static void FCEQ(SPUThread&, spu_opcode_t); - static void FCMEQ(SPUThread&, spu_opcode_t); - static void FI(SPUThread&, spu_opcode_t); - static void CFLTS(SPUThread&, spu_opcode_t); - static void CFLTU(SPUThread&, spu_opcode_t); - static void CSFLT(SPUThread&, spu_opcode_t); - static void CUFLT(SPUThread&, spu_opcode_t); - static void FNMS(SPUThread&, spu_opcode_t); - static void FMA(SPUThread&, spu_opcode_t); - static void FMS(SPUThread&, spu_opcode_t); + static bool FREST(SPUThread&, spu_opcode_t); + static bool FRSQEST(SPUThread&, spu_opcode_t); + static bool FCGT(SPUThread&, spu_opcode_t); + static bool FA(SPUThread&, spu_opcode_t); + static bool FS(SPUThread&, spu_opcode_t); + static bool FM(SPUThread&, spu_opcode_t); + static bool FCMGT(SPUThread&, spu_opcode_t); + static bool DFA(SPUThread&, spu_opcode_t); + static bool DFS(SPUThread&, spu_opcode_t); + static bool DFM(SPUThread&, spu_opcode_t); + static bool DFMA(SPUThread&, spu_opcode_t); + static bool DFMS(SPUThread&, spu_opcode_t); + static bool DFNMS(SPUThread&, spu_opcode_t); + static bool DFNMA(SPUThread&, spu_opcode_t); + static bool FSCRRD(SPUThread&, spu_opcode_t); + static bool FESD(SPUThread&, spu_opcode_t); + static bool FRDS(SPUThread&, spu_opcode_t); + static bool FSCRWR(SPUThread&, spu_opcode_t); + static bool FCEQ(SPUThread&, spu_opcode_t); + static bool FCMEQ(SPUThread&, spu_opcode_t); + static bool FI(SPUThread&, spu_opcode_t); + static bool CFLTS(SPUThread&, spu_opcode_t); + static bool CFLTU(SPUThread&, spu_opcode_t); + static bool CSFLT(SPUThread&, spu_opcode_t); + static bool CUFLT(SPUThread&, spu_opcode_t); + static bool FNMS(SPUThread&, spu_opcode_t); + static bool FMA(SPUThread&, spu_opcode_t); + static bool FMS(SPUThread&, spu_opcode_t); }; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index a23077dfb7..b308078c08 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -32,6 +32,7 @@ const bool s_use_ssse3 = true; #else false; +#define _mm_shuffle_epi8 #endif #ifdef _MSC_VER @@ -388,25 +389,75 @@ void SPUThread::cpu_task() g_cfg.core.spu_decoder == spu_decoder_type::fast ? &g_spu_interpreter_fast.get_table() : (fmt::throw_exception("Invalid SPU decoder"), nullptr)); - // LS base address - const auto base = vm::_ptr(offset); + // LS pointer + const auto base = vm::_ptr(offset); + const auto bswap4 = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3); + + v128 _op; + using func_t = decltype(&spu_interpreter::UNK); + func_t func0, func1, func2, func3, func4, func5; while (true) { - if (!test(state) || !check_state()) + if (UNLIKELY(test(state))) { - // Read opcode - const u32 op = base[pc / 4]; - - // Call interpreter function - table[spu_decode(op)](*this, { op }); - - // Next instruction - pc += 4; + if (check_state()) return; + // Decode single instruction (may be step) + const u32 op = *reinterpret_cast*>(base + pc); + if (table[spu_decode(op)](*this, {op})) { pc += 4; } continue; } - return; + + if (pc % 16 || !s_use_ssse3) + { + // Unaligned + const u32 op = *reinterpret_cast*>(base + pc); + if (table[spu_decode(op)](*this, {op})) { pc += 4; } + continue; + } + + // Reinitialize + _op.vi = _mm_shuffle_epi8(_mm_load_si128(reinterpret_cast(base + pc)), bswap4); + func0 = table[spu_decode(_op._u32[0])]; + func1 = table[spu_decode(_op._u32[1])]; + func2 = table[spu_decode(_op._u32[2])]; + func3 = table[spu_decode(_op._u32[3])]; + + while (LIKELY(func0(*this, {_op._u32[0]}))) + { + pc += 4; + if (LIKELY(func1(*this, {_op._u32[1]}))) + { + pc += 4; + u32 op2 = _op._u32[2]; + u32 op3 = _op._u32[3]; + _op.vi = _mm_shuffle_epi8(_mm_load_si128(reinterpret_cast(base + pc + 8)), bswap4); + func0 = table[spu_decode(_op._u32[0])]; + func1 = table[spu_decode(_op._u32[1])]; + func4 = table[spu_decode(_op._u32[2])]; + func5 = table[spu_decode(_op._u32[3])]; + if (LIKELY(func2(*this, {op2}))) + { + pc += 4; + if (LIKELY(func3(*this, {op3}))) + { + pc += 4; + func2 = func4; + func3 = func5; + + if (UNLIKELY(test(state))) + { + break; + } + continue; + } + break; + } + break; + } + break; + } } }