diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 2d928d88b5..ff08fb916e 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -2,6 +2,7 @@ #include "Emu/System.h" #include "Emu/IdManager.h" #include "Emu/Memory/Memory.h" +#include "Crypto/sha1.h" #include "SPUThread.h" #include "SPUAnalyser.h" @@ -262,11 +263,8 @@ std::vector spu_recompiler_base::block(SPUThread& spu, u32 lsa, std::bitset if (type != spu_itype::BISL || g_cfg.core.spu_block_size == spu_block_size_type::giga) { - // TODO - if (g_cfg.core.spu_block_size != spu_block_size_type::safe) - { - add_block(target); - } + LOG_WARNING(SPU, "[0x%x] At 0x%x: indirect branch to 0x%x", lsa, pos, target); + add_block(target); } if (type == spu_itype::BISL && target < lsa) @@ -545,3 +543,2293 @@ std::vector spu_recompiler_base::block(SPUThread& spu, u32 lsa, std::bitset return result; } + +#include "Emu/CPU/CPUTranslator.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Analysis/Lint.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Vectorize.h" +#include "Utilities/JIT.h" + +class spu_llvm_runtime +{ + shared_mutex m_mutex; + + // All functions + std::map, spu_function_t> m_map; + + // All dispatchers + std::array, 0x10000> m_dispatcher; + + // JIT instance (TODO: use small code model) + jit_compiler m_jit{{}, jit_compiler::cpu(g_cfg.core.llvm_cpu), true}; + + friend class spu_llvm_recompiler; + +public: + spu_llvm_runtime() + { + LOG_SUCCESS(SPU, "SPU Recompiler Runtime (LLVM) initialized..."); + + // Initialize lookup table + for (auto& v : m_dispatcher) + { + v.raw() = &spu_recompiler_base::dispatch; + } + + // Initialize "empty" block + m_map[std::vector()] = &spu_recompiler_base::dispatch; + } +}; + +class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator +{ + std::shared_ptr m_spurt; + + llvm::Function* m_function; + + using m_module = void; + + llvm::Value* m_thread; + llvm::Value* m_lsptr; + + llvm::BasicBlock* m_stop; + llvm::GlobalVariable* m_jt; + + std::array, 128> m_gpr; + std::array m_flush_gpr; + + std::map m_instr_map; + + template + llvm::Value* _ptr(llvm::Value* base, u32 offset, std::string name = "") + { + const auto off = m_ir->CreateAdd(base, m_ir->getInt64(offset)); + const auto ptr = m_ir->CreateIntToPtr(off, get_type()->getPointerTo(), name); + return ptr; + } + + template + llvm::Value* spu_ptr(Args... offset_args) + { + return _ptr(m_thread, ::offset32(offset_args...)); + } + + template + auto& init_vr(u32 index) + { + auto& gpr = m_gpr.at(index); + + if (!gpr.first) + { + // Save and restore current insert point if necessary + const auto block_cur = m_ir->GetInsertBlock(); + + // Emit register pointer at the beginning of function + m_ir->SetInsertPoint(&*m_function->begin()->getFirstInsertionPt()); + gpr.first = _ptr(m_thread, ::offset32(&SPUThread::gpr, index), fmt::format("Reg$%u", index)); + m_ir->SetInsertPoint(block_cur); + } + + return gpr; + } + + template + value_t get_vr(u32 index) + { + auto& gpr = init_vr(index); + + if (!gpr.second) + { + gpr.second = m_ir->CreateLoad(gpr.first, fmt::format("Load$%u", index)); + } + + value_t r; + r.value = m_ir->CreateBitCast(gpr.second, get_type()); + return r; + } + + template + void set_vr(u32 index, T expr) + { + auto& gpr = init_vr(index); + + gpr.second = expr.eval(m_ir); + + // Remember last insertion point for flush + if (m_ir->GetInsertBlock()->empty()) + { + // Insert dummy instruction if empty + m_flush_gpr.at(index) = llvm::cast(m_ir->CreateAdd(m_thread, m_ir->getInt64(8))); + } + else + { + m_flush_gpr.at(index) = m_ir->GetInsertBlock()->end()->getPrevNode(); + } + } + + void flush(std::pair& reg, llvm::Instruction*& flush_reg) + { + if (reg.first && reg.second && flush_reg) + { + // Save and restore current insert point if necessary + const auto block_cur = m_ir->GetInsertBlock(); + + // Try to emit store immediately after its last use + if (const auto next = flush_reg->getNextNode()) + { + m_ir->SetInsertPoint(next); + } + + m_ir->CreateStore(m_ir->CreateBitCast(reg.second, reg.first->getType()->getPointerElementType()), reg.first); + m_ir->SetInsertPoint(block_cur); + } + + // Unregister store + flush_reg = nullptr; + + // Invalidate current value (TODO) + reg.second = nullptr; + } + + void flush() + { + for (u32 i = 0; i < 128; i++) + { + flush(m_gpr[i], m_flush_gpr[i]); + } + } + + void update_pc() + { + m_ir->CreateStore(m_ir->getInt32(m_pos), spu_ptr(&SPUThread::pc)); + } + + // Perform external call + template + llvm::CallInst* call(RT(*_func)(FArgs...), Args... args) + { + static_assert(sizeof...(FArgs) == sizeof...(Args), "spu_llvm_recompiler::call(): unexpected arg number"); + const auto iptr = reinterpret_cast(_func); + const auto type = llvm::FunctionType::get(get_type(), {args->getType()...}, false)->getPointerTo(); + return m_ir->CreateCall(m_ir->CreateIntToPtr(m_ir->getInt64(iptr), type), {args...}); + } + + // Perform external call and return + template + void tail(RT(*_func)(FArgs...), Args... args) + { + const auto inst = call(_func, args...); + inst->setTailCall(); + + if (inst->getType() == get_type()) + { + m_ir->CreateRetVoid(); + } + else + { + m_ir->CreateRet(inst); + } + } + + void tail(llvm::Value* func_ptr) + { + m_ir->CreateCall(func_ptr, {m_thread, m_lsptr, m_ir->getInt32(0)})->setTailCall(); + m_ir->CreateRetVoid(); + } + + template + value_t pshufb(T1 a, T2 b) + { + value_t result; + result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_ssse3_pshuf_b_128), {a.eval(m_ir), b.eval(m_ir)}); + return result; + } + +public: + spu_llvm_recompiler(class SPUThread& spu) + : spu_recompiler_base(spu) + , cpu_translator(nullptr, false) + { + if (g_cfg.core.spu_shared_runtime) + { + // TODO (local context is unsupported) + //m_spurt = std::make_shared(); + } + } + + virtual spu_function_t get(u32 lsa) override + { + // Initialize if necessary + if (!m_spurt) + { + m_spurt = fxm::get_always(); + m_context = m_spurt->m_jit.get_context(); + } + + // Simple atomic read + return m_spurt->m_dispatcher[lsa / 4]; + } + + virtual spu_function_t compile(const std::vector& func) override + { + // Initialize if necessary + if (!m_spurt) + { + m_spurt = fxm::get_always(); + m_context = m_spurt->m_jit.get_context(); + } + + // Don't lock without shared runtime + std::unique_lock lock(m_spurt->m_mutex, std::defer_lock); + + if (g_cfg.core.spu_shared_runtime) + { + lock.lock(); + } + + // Try to find existing function, register new + auto& fn_location = m_spurt->m_map[func]; + + if (fn_location) + { + return fn_location; + } + + std::string hash; + { + sha1_context ctx; + u8 output[20]; + + sha1_starts(&ctx); + sha1_update(&ctx, reinterpret_cast(func.data() + 1), func.size() * 4 - 4); + sha1_finish(&ctx, output); + + fmt::append(hash, "spu-0x%05x-%s", func[0], fmt::base57(output)); + } + + LOG_NOTICE(SPU, "Building function 0x%x... (size %u, %s)", func[0], func.size() - 1, hash); + + using namespace llvm; + + SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode); + dis_asm.offset = reinterpret_cast(func.data() + 1) - func[0]; + std::string log; + + if (g_cfg.core.spu_debug) + { + fmt::append(log, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n\n", func[0], func.size() - 1, hash); + } + + // Create LLVM module + std::unique_ptr module = std::make_unique(hash, m_context); + + // Initialize target + module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); + + // Initialize pass manager + legacy::FunctionPassManager pm(module.get()); + + // Basic optimizations + pm.add(createEarlyCSEPass()); + pm.add(createDeadStoreEliminationPass()); + pm.add(createLintPass()); // Check + + // Add function + const auto main_func = cast(module->getOrInsertFunction(hash, get_type(), get_type(), get_type())); + m_function = main_func; + m_thread = &*m_function->arg_begin(); + m_lsptr = &*(m_function->arg_begin() + 1); + + // Initialize IR Builder + IRBuilder<> irb(BasicBlock::Create(m_context, "", m_function)); + m_ir = &irb; + + // Start compilation + m_pos = func[0]; + m_size = (func.size() - 1) * 4; + const u32 start = m_pos; + const u32 end = m_pos + m_size; + + m_stop = BasicBlock::Create(m_context, "", m_function); + + const auto jtt = ArrayType::get(GetType(), m_size / 4); + std::vector jt; + jt.reserve(m_size / 4); + + // Create instruction blocks + for (u32 i = 1, pos = start; i < func.size(); i++, pos += 4) + { + if (func[i] && m_block_info[pos / 4]) + { + const auto b = BasicBlock::Create(m_context, "", m_function); + jt.push_back(llvm::BlockAddress::get(b)); + m_instr_map.emplace(pos, b); + } + else + { + jt.push_back(llvm::BlockAddress::get(m_stop)); + } + } + + m_jt = new GlobalVariable(*module, jtt, true, GlobalValue::PrivateLinkage, llvm::ConstantArray::get(jtt, jt), "jt"); + + update_pc(); + + const auto label_test = BasicBlock::Create(m_context, "", m_function); + const auto label_diff = BasicBlock::Create(m_context, "", m_function); + const auto label_body = BasicBlock::Create(m_context, "", m_function); + + // Emit state check + const auto pstate = spu_ptr(&SPUThread::state); + m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(pstate), m_ir->getInt32(0)), m_stop, label_test); + + // Emit code check + m_ir->SetInsertPoint(label_test); + + if (false) + { + // Disable check (not available) + } + else if (func.size() - 1 == 1) + { + const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(_ptr(m_lsptr, m_pos)), m_ir->getInt32(func[1])); + m_ir->CreateCondBr(cond, label_diff, label_body); + } + else if (func.size() - 1 == 2) + { + const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(_ptr(m_lsptr, m_pos)), m_ir->getInt64(static_cast(func[2]) << 32 | func[1])); + m_ir->CreateCondBr(cond, label_diff, label_body); + } + else + { + const u32 starta = m_pos & -32; + const u32 enda = ::align(end, 32); + const u32 sizea = (enda - starta) / 32; + verify(HERE), sizea; + + llvm::Value* acc = nullptr; + + for (u32 j = starta; j < enda; j += 32) + { + u32 indices[8]; + bool holes = false; + bool data = false; + + for (u32 i = 0; i < 8; i++) + { + const u32 k = j + i * 4; + + if (k < m_pos || k >= end || !func[(k - m_pos) / 4 + 1]) + { + indices[i] = 8; + holes = true; + } + else + { + indices[i] = i; + data = true; + } + } + + if (!data) + { + // Skip aligned holes + continue; + } + + // Load aligned code block from LS + llvm::Value* vls = m_ir->CreateLoad(_ptr(m_lsptr, j)); + + // Mask if necessary + if (holes) + { + vls = m_ir->CreateShuffleVector(vls, ConstantVector::getSplat(8, m_ir->getInt32(0)), indices); + } + + // Perform bitwise comparison and accumulate + u32 words[8]; + + for (u32 i = 0; i < 8; i++) + { + const u32 k = j + i * 4; + words[i] = k >= m_pos && k < end ? func[(k - m_pos) / 4 + 1] : 0; + } + + vls = m_ir->CreateXor(vls, ConstantDataVector::get(m_context, words)); + acc = acc ? m_ir->CreateOr(acc, vls) : vls; + } + + // Pattern for PTEST + acc = m_ir->CreateBitCast(acc, get_type()); + llvm::Value* elem = m_ir->CreateExtractElement(acc, u64{0}); + elem = m_ir->CreateOr(elem, m_ir->CreateExtractElement(acc, 1)); + elem = m_ir->CreateOr(elem, m_ir->CreateExtractElement(acc, 2)); + elem = m_ir->CreateOr(elem, m_ir->CreateExtractElement(acc, 3)); + + // Compare result with zero + const auto cond = m_ir->CreateICmpNE(elem, m_ir->getInt64(0)); + m_ir->CreateCondBr(cond, label_diff, label_body); + } + + // Increase block counter + m_ir->SetInsertPoint(label_body); + const auto pbcount = spu_ptr(&SPUThread::block_counter); + m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(pbcount), m_ir->getInt64(1)), pbcount); + + // Emit instructions + for (u32 i = 1; i < func.size(); i++) + { + const u32 pos = start + (i - 1) * 4; + + if (g_cfg.core.spu_debug) + { + // Disasm + dis_asm.dump_pc = pos; + dis_asm.disasm(pos); + log += dis_asm.last_opcode; + log += '\n'; + } + + // Get opcode + const u32 op = se_storage::swap(func[i]); + + if (!op) + { + // Ignore hole + if (!m_ir->GetInsertBlock()->getTerminator()) + { + flush(); + branch_fixed(spu_branch_target(pos)); + LOG_ERROR(SPU, "Unexpected fallthrough to 0x%x", pos); + } + + continue; + } + + // Bind instruction label if necessary (TODO) + const auto found = m_instr_map.find(pos); + + if (found != m_instr_map.end()) + { + if (!m_ir->GetInsertBlock()->getTerminator()) + { + flush(); + m_ir->CreateBr(found->second); + } + + m_ir->SetInsertPoint(found->second); + } + + if (!m_ir->GetInsertBlock()->getTerminator()) + { + // Update position + m_pos = pos; + + // Execute recompiler function (TODO) + (this->*g_decoder.decode(op))({op}); + } + } + + if (g_cfg.core.spu_debug) + { + log += '\n'; + + for (u32 i = 0; i < 128; i++) + { + if (m_gpr[i].first) + { + fmt::append(log, "$% -3u = %s\n", i, m_spu.gpr[i]); + } + } + + log += '\n'; + } + + // Make fallthrough if necessary + if (!m_ir->GetInsertBlock()->getTerminator()) + { + flush(); + branch_fixed(spu_branch_target(end)); + } + + // + m_ir->SetInsertPoint(m_stop); + m_ir->CreateRetVoid(); + + m_ir->SetInsertPoint(label_diff); + const auto pbfail = spu_ptr(&SPUThread::block_failure); + m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(pbfail), m_ir->getInt64(1)), pbfail); + tail(&spu_recompiler_base::dispatch, m_thread, m_ir->getInt32(0), m_ir->getInt32(0)); + + // Clear context + m_gpr.fill({}); + m_flush_gpr.fill(0); + m_instr_map.clear(); + + // Generate a dispatcher (übertrampoline) + std::vector addrv{start}; + const auto beg = m_spurt->m_map.lower_bound(addrv); + addrv[0] += 4; + const auto _end = m_spurt->m_map.lower_bound(addrv); + const u32 size0 = std::distance(beg, _end); + + if (size0 > 1) + { + const auto trampoline = cast(module->getOrInsertFunction(fmt::format("tr_0x%05x_%03u", start, size0), get_type(), get_type(), get_type())); + m_function = trampoline; + m_thread = &*m_function->arg_begin(); + m_lsptr = &*(m_function->arg_begin() + 1); + + struct work + { + u32 size; + u32 level; + BasicBlock* label; + std::map, spu_function_t>::iterator beg; + std::map, spu_function_t>::iterator end; + }; + + std::vector workload; + workload.reserve(size0); + workload.emplace_back(); + workload.back().size = size0; + workload.back().level = 1; + workload.back().beg = beg; + workload.back().end = _end; + workload.back().label = llvm::BasicBlock::Create(m_context, "", m_function); + + for (std::size_t i = 0; i < workload.size(); i++) + { + // Get copy of the workload info + work w = workload[i]; + + // Switch targets + std::vector> targets; + + llvm::BasicBlock* def{}; + + while (true) + { + const u32 x1 = w.beg->first.at(w.level); + auto it = w.beg; + auto it2 = it; + u32 x = x1; + bool split = false; + + while (it2 != w.end) + { + it2++; + + const u32 x2 = it2 != w.end ? it2->first.at(w.level) : x1; + + if (x2 != x) + { + const u32 dist = std::distance(it, it2); + + const auto b = llvm::BasicBlock::Create(m_context, "", m_function); + + if (dist == 1 && x != 0) + { + m_ir->SetInsertPoint(b); + + if (const u64 fval = reinterpret_cast(it->second)) + { + const auto ptr = m_ir->CreateIntToPtr(m_ir->getInt64(fval), main_func->getType()); + m_ir->CreateCall(ptr, {m_thread, m_lsptr})->setTailCall(); + } + else + { + verify(HERE, &it->second == &fn_location); + m_ir->CreateCall(main_func, {m_thread, m_lsptr})->setTailCall(); + } + + m_ir->CreateRetVoid(); + } + else + { + workload.emplace_back(w); + workload.back().beg = it; + workload.back().end = it2; + workload.back().label = b; + workload.back().size = dist; + } + + if (x == 0) + { + def = b; + } + else + { + targets.emplace_back(std::make_pair(x, b)); + } + + x = x2; + it = it2; + split = true; + } + } + + if (!split) + { + // Cannot split: words are identical within the range at this level + w.level++; + } + else + { + break; + } + } + + if (!def) + { + def = llvm::BasicBlock::Create(m_context, "", m_function); + + m_ir->SetInsertPoint(def); + tail(&spu_recompiler_base::dispatch, m_thread, m_ir->getInt32(0), m_ir->getInt32(0)); + } + + m_ir->SetInsertPoint(w.label); + const auto add = m_ir->CreateAdd(m_lsptr, m_ir->getInt64(start + w.level * 4 - 4)); + const auto ptr = m_ir->CreateIntToPtr(add, get_type()); + const auto val = m_ir->CreateLoad(ptr); + const auto sw = m_ir->CreateSwitch(val, def, ::size32(targets)); + + for (auto& pair : targets) + { + sw->addCase(m_ir->getInt32(pair.first), pair.second); + } + } + } + + // Run some optimizations + //pm.run(*main_func); + + spu_function_t fn{}, tr{}; + + raw_string_ostream out(log); + + if (g_cfg.core.spu_debug) + { + fmt::append(log, "LLVM IR at 0x%x:\n", start); + out << *module; // print IR + out << "\n\n"; + } + + if (verifyModule(*module, &out)) + { + out.flush(); + LOG_ERROR(SPU, "LLVM: Verification failed at 0x%x:\n%s", start, log); + fmt::raw_error("Compilation failed"); + } + + if (g_cfg.core.spu_debug) + { + // Testing only + m_spurt->m_jit.add(std::move(module), fmt::format("%sSPU/%s.obj", Emu.GetCachePath(), hash)); + } + else + { + m_spurt->m_jit.add(std::move(module)); + } + + m_spurt->m_jit.fin(); + fn = reinterpret_cast(m_spurt->m_jit.get_engine().getPointerToFunction(main_func)); + tr = fn; + + if (size0 > 1) + { + tr = reinterpret_cast(m_spurt->m_jit.get_engine().getPointerToFunction(m_function)); + } + + // Register function pointer + fn_location = fn; + + // Trampoline + m_spurt->m_dispatcher[start / 4] = tr; + + LOG_NOTICE(SPU, "[0x%x] Compiled: %p", start, fn); + + if (tr != fn) + LOG_NOTICE(SPU, "[0x%x] T: %p", start, tr); + + if (g_cfg.core.spu_debug) + { + out.flush(); + fs::file(Emu.GetCachePath() + "SPU.log", fs::write + fs::append).write(log); + } + + return fn; + } + + template + static void exec_fall(SPUThread* _spu, spu_opcode_t op) + { + if (F(*_spu, op)) + { + _spu->pc += 4; + } + } + + template + void fall(spu_opcode_t op) + { + flush(); + update_pc(); + call(&exec_fall, m_thread, m_ir->getInt32(op.opcode)); + } + + static void exec_unk(SPUThread* _spu, u32 op) + { + fmt::throw_exception("Unknown/Illegal instruction (0x%08x)" HERE, op); + } + + void UNK(spu_opcode_t op_unk) + { + flush(); + update_pc(); + tail(&exec_unk, m_thread, m_ir->getInt32(op_unk.opcode)); + } + + static void exec_stop(SPUThread* _spu, u32 code) + { + if (_spu->stop_and_signal(code)) + { + _spu->pc += 4; + } + } + + void STOP(spu_opcode_t op) // + { + flush(); + update_pc(); + tail(&exec_stop, m_thread, m_ir->getInt32(op.opcode)); + } + + void STOPD(spu_opcode_t op) // + { + flush(); + update_pc(); + tail(&exec_stop, m_thread, m_ir->getInt32(0x3fff)); + } + + static s64 exec_rdch(SPUThread* _spu, u32 ch) + { + return _spu->get_ch_value(ch); + } + + void RDCH(spu_opcode_t op) // + { + flush(); + update_pc(); + value_t res; + res.value = call(&exec_rdch, m_thread, m_ir->getInt32(op.ra)); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(m_ir->CreateICmpSLT(res.value, m_ir->getInt64(0)), m_stop, next); + m_ir->SetInsertPoint(next); + set_vr(op.rt, insert(splat(0), 3, trunc(res))); + } + + static u32 exec_rchcnt(SPUThread* _spu, u32 ch) + { + return _spu->get_ch_count(ch); + } + + void RCHCNT(spu_opcode_t op) // + { + value_t res; + res.value = call(&exec_rchcnt, m_thread, m_ir->getInt32(op.ra)); + set_vr(op.rt, insert(splat(0), 3, res)); + } + + static bool exec_wrch(SPUThread* _spu, u32 ch, u32 value) + { + return _spu->set_ch_value(ch, value); + } + + void WRCH(spu_opcode_t op) // + { + flush(); + update_pc(); + const auto succ = call(&exec_wrch, m_thread, m_ir->getInt32(op.ra), extract(get_vr(op.rt), 3).value); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(succ, next, m_stop); + m_ir->SetInsertPoint(next); + } + + void LNOP(spu_opcode_t op) // + { + update_pc(); + } + + void NOP(spu_opcode_t op) // + { + update_pc(); + } + + void SYNC(spu_opcode_t op) // + { + // This instruction must be used following a store instruction that modifies the instruction stream. + m_ir->CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); + } + + void DSYNC(spu_opcode_t op) // + { + // This instruction forces all earlier load, store, and channel instructions to complete before proceeding. + m_ir->CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); + } + + void MFSPR(spu_opcode_t op) // + { + // Check SPUInterpreter for notes. + set_vr(op.rt, splat(0)); + } + + void MTSPR(spu_opcode_t op) // + { + // Check SPUInterpreter for notes. + } + + void SF(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.rb) - get_vr(op.ra)); + } + + void OR(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) | get_vr(op.rb)); + } + + void BG(spu_opcode_t op) + { + const auto a = get_vr(op.ra); + const auto b = get_vr(op.rb); + set_vr(op.rt, ~ucarry(a, eval(b - a), b) >> 31); + } + + void SFH(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.rb) - get_vr(op.ra)); + } + + void NOR(spu_opcode_t op) // + { + set_vr(op.rt, ~(get_vr(op.ra) | get_vr(op.rb))); + } + + void ABSDB(spu_opcode_t op) + { + const auto a = get_vr(op.ra); + const auto b = get_vr(op.rb); + set_vr(op.rt, max(a, b) - min(a, b)); + } + + void ROT(spu_opcode_t op) + { + set_vr(op.rt, rol(get_vr(op.ra), get_vr(op.rb))); + } + + void ROTM(spu_opcode_t op) + { + set_vr(op.rt, trunc(zext(get_vr(op.ra)) >> zext(-get_vr(op.rb) & 0x3f))); + } + + void ROTMA(spu_opcode_t op) + { + set_vr(op.rt, trunc(sext(get_vr(op.ra)) >> zext(-get_vr(op.rb) & 0x3f))); + } + + void SHL(spu_opcode_t op) + { + set_vr(op.rt, trunc(zext(get_vr(op.ra)) << zext(get_vr(op.rb) & 0x3f))); + } + + void ROTH(spu_opcode_t op) + { + set_vr(op.rt, rol(get_vr(op.ra), get_vr(op.rb))); + } + + void ROTHM(spu_opcode_t op) + { + set_vr(op.rt, trunc(zext(get_vr(op.ra)) >> zext(-get_vr(op.rb) & 0x1f))); + } + + void ROTMAH(spu_opcode_t op) + { + set_vr(op.rt, trunc(sext(get_vr(op.ra)) >> zext(-get_vr(op.rb) & 0x1f))); + } + + void SHLH(spu_opcode_t op) + { + set_vr(op.rt, trunc(zext(get_vr(op.ra)) << zext(get_vr(op.rb) & 0x1f))); + } + + void ROTI(spu_opcode_t op) + { + set_vr(op.rt, rol(get_vr(op.ra), op.i7)); + } + + void ROTMI(spu_opcode_t op) + { + if (-op.i7 & 0x20) + { + return set_vr(op.rt, splat(0)); + } + + set_vr(op.rt, get_vr(op.ra) >> (-op.i7 & 0x1f)); + } + + void ROTMAI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) >> (-op.i7 & 0x20 ? 0x1f : -op.i7 & 0x1f)); + } + + void SHLI(spu_opcode_t op) + { + if (op.i7 & 0x20) + { + return set_vr(op.rt, splat(0)); + } + + set_vr(op.rt, get_vr(op.ra) << (op.i7 & 0x1f)); + } + + void ROTHI(spu_opcode_t op) + { + set_vr(op.rt, rol(get_vr(op.ra), op.i7)); + } + + void ROTHMI(spu_opcode_t op) + { + if (-op.i7 & 0x10) + { + return set_vr(op.rt, splat(0)); + } + + set_vr(op.rt, get_vr(op.ra) >> (-op.i7 & 0xf)); + } + + void ROTMAHI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) >> (-op.i7 & 0x10 ? 0xf : -op.i7 & 0xf)); + } + + void SHLHI(spu_opcode_t op) + { + if (op.i7 & 0x10) + { + return set_vr(op.rt, splat(0)); + } + + set_vr(op.rt, get_vr(op.ra) << (op.i7 & 0xf)); + } + + void A(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) + get_vr(op.rb)); + } + + void AND(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) & get_vr(op.rb)); + } + + void CG(spu_opcode_t op) + { + const auto a = get_vr(op.ra); + const auto b = get_vr(op.rb); + set_vr(op.rt, ucarry(a, b, eval(a + b)) >> 31); + } + + void AH(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) + get_vr(op.rb)); + } + + void NAND(spu_opcode_t op) // + { + set_vr(op.rt, ~(get_vr(op.ra) & get_vr(op.rb))); + } + + void AVGB(spu_opcode_t op) + { + set_vr(op.rt, avg(get_vr(op.ra), get_vr(op.rb))); + } + + void GB(spu_opcode_t op) + { + // TODO + value_t m; + m.value = eval((get_vr(op.ra) << 31) < 0).value; + m.value = m_ir->CreateBitCast(m.value, m_ir->getIntNTy(4)); + m.value = m_ir->CreateZExt(m.value, get_type()); + set_vr(op.rt, insert(splat(0), 3, m)); + } + + void GBH(spu_opcode_t op) + { + const auto m = zext(bitcast((get_vr(op.ra) << 15) < 0)); + set_vr(op.rt, insert(splat(0), 3, m)); + } + + void GBB(spu_opcode_t op) + { + const auto m = zext(bitcast((get_vr(op.ra) << 7) < 0)); + set_vr(op.rt, insert(splat(0), 3, m)); + } + + void FSM(spu_opcode_t op) + { + // TODO + value_t m; + m.value = extract(get_vr(op.ra), 3).value; + m.value = m_ir->CreateTrunc(m.value, m_ir->getIntNTy(4)); + m.value = m_ir->CreateBitCast(m.value, get_type()); + set_vr(op.rt, sext(m)); + } + + void FSMH(spu_opcode_t op) + { + const auto m = bitcast(trunc(extract(get_vr(op.ra), 3))); + set_vr(op.rt, sext(m)); + } + + void FSMB(spu_opcode_t op) + { + const auto m = bitcast(trunc(extract(get_vr(op.ra), 3))); + set_vr(op.rt, sext(m)); + } + + void ROTQBYBI(spu_opcode_t op) + { + value_t sh; + u8 initial[16]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + sh.value = llvm::ConstantDataVector::get(m_context, initial); + sh = eval((sh - (zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3)) & 0xf); + set_vr(op.rt, pshufb(get_vr(op.ra), sh)); + } + + void ROTQMBYBI(spu_opcode_t op) + { + value_t sh; + u8 initial[16]{112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127}; + sh.value = llvm::ConstantDataVector::get(m_context, initial); + sh = eval(sh + (-(zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3) & 0x1f)); + set_vr(op.rt, pshufb(get_vr(op.ra), sh)); + } + + void SHLQBYBI(spu_opcode_t op) + { + value_t sh; + u8 initial[16]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + sh.value = llvm::ConstantDataVector::get(m_context, initial); + sh = eval(sh - (zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3)); + set_vr(op.rt, pshufb(get_vr(op.ra), sh)); + } + + void CBX(spu_opcode_t op) + { + const auto i = eval(~(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0xf); + value_t r; + u8 initial[16]{0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10}; + r.value = llvm::ConstantDataVector::get(m_context, initial); + r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt8(0x3), i.value); + set_vr(op.rt, r); + } + + void CHX(spu_opcode_t op) + { + const auto i = eval(~(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) >> 1 & 0x7); + value_t r; + u16 initial[8]{0x1e1f, 0x1c1d, 0x1a1b, 0x1819, 0x1617, 0x1415, 0x1213, 0x1011}; + r.value = llvm::ConstantDataVector::get(m_context, initial); + r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt16(0x0203), i.value); + set_vr(op.rt, r); + } + + void CWX(spu_opcode_t op) + { + const auto i = eval(~(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) >> 2 & 0x3); + value_t r; + u32 initial[4]{0x1c1d1e1f, 0x18191a1b, 0x14151617, 0x10111213}; + r.value = llvm::ConstantDataVector::get(m_context, initial); + r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt32(0x010203), i.value); + set_vr(op.rt, r); + } + + void CDX(spu_opcode_t op) + { + const auto i = eval(~(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) >> 3 & 0x1); + value_t r; + u64 initial[2]{0x18191a1b1c1d1e1f, 0x1011121314151617}; + r.value = llvm::ConstantDataVector::get(m_context, initial); + r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt64(0x01020304050607), i.value); + set_vr(op.rt, r); + } + + void ROTQBI(spu_opcode_t op) + { + const auto a = get_vr(op.ra); + const auto b = zshuffle(get_vr(op.rb) & 0x7, 3, 3, 3, 3); + set_vr(op.rt, a << b | zshuffle(a, 3, 0, 1, 2) >> (32 - b)); + } + + void ROTQMBI(spu_opcode_t op) + { + const auto a = get_vr(op.ra); + const auto b = zshuffle(-get_vr(op.rb) & 0x7, 3, 3, 3, 3); + set_vr(op.rt, a >> b | zshuffle(a, 1, 2, 3, 4) << (32 - b)); + } + + void SHLQBI(spu_opcode_t op) + { + const auto a = get_vr(op.ra); + const auto b = zshuffle(get_vr(op.rb) & 0x7, 3, 3, 3, 3); + set_vr(op.rt, a << b | zshuffle(a, 4, 0, 1, 2) >> (32 - b)); + } + + void ROTQBY(spu_opcode_t op) + { + value_t sh; + u8 initial[16]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + sh.value = llvm::ConstantDataVector::get(m_context, initial); + sh = eval((sh - zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12)) & 0xf); + set_vr(op.rt, pshufb(get_vr(op.ra), sh)); + } + + void ROTQMBY(spu_opcode_t op) + { + value_t sh; + u8 initial[16]{112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127}; + sh.value = llvm::ConstantDataVector::get(m_context, initial); + sh = eval(sh + (-zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) & 0x1f)); + set_vr(op.rt, pshufb(get_vr(op.ra), sh)); + } + + void SHLQBY(spu_opcode_t op) + { + value_t sh; + u8 initial[16]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + sh.value = llvm::ConstantDataVector::get(m_context, initial); + sh = eval(sh - (zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) & 0x1f)); + set_vr(op.rt, pshufb(get_vr(op.ra), sh)); + } + + void ORX(spu_opcode_t op) + { + const auto a = get_vr(op.ra); + const auto x = zshuffle(a, 2, 3, 0, 1) | a; + const auto y = zshuffle(x, 1, 0, 3, 2) | x; + set_vr(op.rt, zshuffle(y, 4, 4, 4, 3)); + } + + void CBD(spu_opcode_t op) + { + const auto i = eval(~(extract(get_vr(op.ra), 3) + op.i7) & 0xf); + value_t r; + u8 initial[16]{0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10}; + r.value = llvm::ConstantDataVector::get(m_context, initial); + r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt8(0x3), i.value); + set_vr(op.rt, r); + } + + void CHD(spu_opcode_t op) + { + const auto i = eval(~(extract(get_vr(op.ra), 3) + op.i7) >> 1 & 0x7); + value_t r; + u16 initial[8]{0x1e1f, 0x1c1d, 0x1a1b, 0x1819, 0x1617, 0x1415, 0x1213, 0x1011}; + r.value = llvm::ConstantDataVector::get(m_context, initial); + r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt16(0x0203), i.value); + set_vr(op.rt, r); + } + + void CWD(spu_opcode_t op) + { + const auto i = eval(~(extract(get_vr(op.ra), 3) + op.i7) >> 2 & 0x3); + value_t r; + u32 initial[4]{0x1c1d1e1f, 0x18191a1b, 0x14151617, 0x10111213}; + r.value = llvm::ConstantDataVector::get(m_context, initial); + r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt32(0x010203), i.value); + set_vr(op.rt, r); + } + + void CDD(spu_opcode_t op) + { + const auto i = eval(~(extract(get_vr(op.ra), 3) + op.i7) >> 3 & 0x1); + value_t r; + u64 initial[2]{0x18191a1b1c1d1e1f, 0x1011121314151617}; + r.value = llvm::ConstantDataVector::get(m_context, initial); + r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt64(0x01020304050607), i.value); + set_vr(op.rt, r); + } + + void ROTQBII(spu_opcode_t op) + { + const auto s = op.i7 & 0x7; + const auto a = get_vr(op.ra); + + if (s == 0) + { + return set_vr(op.rt, a); + } + + set_vr(op.rt, a << s | zshuffle(a, 3, 0, 1, 2) >> (32 - s)); + } + + void ROTQMBII(spu_opcode_t op) + { + const auto s = -op.i7 & 0x7; + const auto a = get_vr(op.ra); + + if (s == 0) + { + return set_vr(op.rt, a); + } + + set_vr(op.rt, a >> s | zshuffle(a, 1, 2, 3, 4) << (32 - s)); + } + + void SHLQBII(spu_opcode_t op) + { + const auto s = op.i7 & 0x7; + const auto a = get_vr(op.ra); + + if (s == 0) + { + return set_vr(op.rt, a); + } + + set_vr(op.rt, a << s | zshuffle(a, 4, 0, 1, 2) >> (32 - s)); + } + + void ROTQBYI(spu_opcode_t op) + { + const u32 s = -op.i7 & 0xf; + set_vr(op.rt, zshuffle(get_vr(op.ra), + s & 15, (s + 1) & 15, (s + 2) & 15, (s + 3) & 15, + (s + 4) & 15, (s + 5) & 15, (s + 6) & 15, (s + 7) & 15, + (s + 8) & 15, (s + 9) & 15, (s + 10) & 15, (s + 11) & 15, + (s + 12) & 15, (s + 13) & 15, (s + 14) & 15, (s + 15) & 15)); + } + + void ROTQMBYI(spu_opcode_t op) + { + const u32 s = -op.i7 & 0x1f; + + if (s >= 16) + { + return set_vr(op.rt, splat(0)); + } + + set_vr(op.rt, zshuffle(get_vr(op.ra), + s, s + 1, s + 2, s + 3, + s + 4, s + 5, s + 6, s + 7, + s + 8, s + 9, s + 10, s + 11, + s + 12, s + 13, s + 14, s + 15)); + } + + void SHLQBYI(spu_opcode_t op) + { + const u32 s = op.i7 & 0x1f; + + if (s >= 16) + { + return set_vr(op.rt, splat(0)); + } + + const u32 x = -s; + + set_vr(op.rt, zshuffle(get_vr(op.ra), + x & 31, (x + 1) & 31, (x + 2) & 31, (x + 3) & 31, + (x + 4) & 31, (x + 5) & 31, (x + 6) & 31, (x + 7) & 31, + (x + 8) & 31, (x + 9) & 31, (x + 10) & 31, (x + 11) & 31, + (x + 12) & 31, (x + 13) & 31, (x + 14) & 31, (x + 15) & 31)); + } + + void CGT(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); + } + + void XOR(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) ^ get_vr(op.rb)); + } + + void CGTH(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); + } + + void EQV(spu_opcode_t op) // + { + set_vr(op.rt, ~(get_vr(op.ra) ^ get_vr(op.rb))); + } + + void CGTB(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); + } + + void SUMB(spu_opcode_t op) + { + const auto a = get_vr(op.ra); + const auto b = get_vr(op.rb); + const auto ahs = eval((a >> 8) + (a & 0xff)); + const auto bhs = eval((b >> 8) + (b & 0xff)); + const auto lsh = shuffle2(ahs, bhs, 0, 9, 2, 11, 4, 13, 6, 15); + const auto hsh = shuffle2(ahs, bhs, 1, 8, 3, 10, 5, 12, 7, 14); + set_vr(op.rt, lsh + hsh); + } + + void CLZ(spu_opcode_t op) + { + set_vr(op.rt, ctlz(get_vr(op.ra))); + } + + void XSWD(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) << 32 >> 32); + } + + void XSHW(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) << 16 >> 16); + } + + void CNTB(spu_opcode_t op) + { + set_vr(op.rt, ctpop(get_vr(op.ra))); + } + + void XSBH(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) << 8 >> 8); + } + + void CLGT(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); + } + + void ANDC(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) & ~get_vr(op.rb)); + } + + void CLGTH(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); + } + + void ORC(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) | ~get_vr(op.rb)); + } + + void CLGTB(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); + } + + void CEQ(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) == get_vr(op.rb))); + } + + void MPYHHU(spu_opcode_t op) + { + set_vr(op.rt, (get_vr(op.ra) >> 16) * (get_vr(op.rb) >> 16)); + } + + void ADDX(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) + get_vr(op.rb) + (get_vr(op.rt) & 1)); + } + + void SFX(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.rb) - get_vr(op.ra) - (~get_vr(op.rt) & 1)); + } + + void CGX(spu_opcode_t op) + { + const auto a = get_vr(op.ra); + const auto b = get_vr(op.rb); + const auto c = eval(get_vr(op.rt) << 31); + const auto s = eval(a + b); + set_vr(op.rt, (ucarry(a, b, s) | (sext(s == 0xffffffffu) & c)) >> 31); + } + + void BGX(spu_opcode_t op) + { + const auto a = get_vr(op.ra); + const auto b = get_vr(op.rb); + const auto c = eval(get_vr(op.rt) << 31); + const auto d = eval(b - a); + set_vr(op.rt, (~ucarry(a, d, b) & ~(sext(d == 0) & ~c)) >> 31); + } + + void MPYHHA(spu_opcode_t op) + { + set_vr(op.rt, (get_vr(op.ra) >> 16) * (get_vr(op.rb) >> 16) + get_vr(op.rt)); + } + + void MPYHHAU(spu_opcode_t op) + { + set_vr(op.rt, (get_vr(op.ra) >> 16) * (get_vr(op.rb) >> 16) + get_vr(op.rt)); + } + + void MPY(spu_opcode_t op) + { + set_vr(op.rt, (get_vr(op.ra) << 16 >> 16) * (get_vr(op.rb) << 16 >> 16)); + } + + void MPYH(spu_opcode_t op) + { + set_vr(op.rt, (get_vr(op.ra) >> 16) * (get_vr(op.rb) << 16)); + } + + void MPYHH(spu_opcode_t op) + { + set_vr(op.rt, (get_vr(op.ra) >> 16) * (get_vr(op.rb) >> 16)); + } + + void MPYS(spu_opcode_t op) + { + set_vr(op.rt, (get_vr(op.ra) << 16 >> 16) * (get_vr(op.rb) << 16 >> 16) >> 16); + } + + void CEQH(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) == get_vr(op.rb))); + } + + void MPYU(spu_opcode_t op) + { + set_vr(op.rt, (get_vr(op.ra) << 16 >> 16) * (get_vr(op.rb) << 16 >> 16)); + } + + void CEQB(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) == get_vr(op.rb))); + } + + void FSMBI(spu_opcode_t op) + { + u8 data[16]; + for (u32 i = 0; i < 16; i++) + data[i] = op.i16 & (1u << i) ? 0xff : 0; + value_t r; + r.value = llvm::ConstantDataVector::get(m_context, data); + set_vr(op.rt, r); + } + + void IL(spu_opcode_t op) + { + set_vr(op.rt, splat(op.si16)); + } + + void ILHU(spu_opcode_t op) + { + set_vr(op.rt, splat(op.i16 << 16)); + } + + void ILH(spu_opcode_t op) + { + set_vr(op.rt, splat(op.i16)); + } + + void IOHL(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.rt) | op.i16); + } + + void ORI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) | op.si10); + } + + void ORHI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) | op.si10); + } + + void ORBI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) | op.si10); + } + + void SFI(spu_opcode_t op) + { + set_vr(op.rt, op.si10 - get_vr(op.ra)); + } + + void SFHI(spu_opcode_t op) + { + set_vr(op.rt, op.si10 - get_vr(op.ra)); + } + + void ANDI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) & op.si10); + } + + void ANDHI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) & op.si10); + } + + void ANDBI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) & op.si10); + } + + void AI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) + op.si10); + } + + void AHI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) + op.si10); + } + + void XORI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) ^ op.si10); + } + + void XORHI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) ^ op.si10); + } + + void XORBI(spu_opcode_t op) + { + set_vr(op.rt, get_vr(op.ra) ^ op.si10); + } + + void CGTI(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) > op.si10)); + } + + void CGTHI(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) > op.si10)); + } + + void CGTBI(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) > op.si10)); + } + + void CLGTI(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) > op.si10)); + } + + void CLGTHI(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) > op.si10)); + } + + void CLGTBI(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) > op.si10)); + } + + void MPYI(spu_opcode_t op) + { + set_vr(op.rt, (get_vr(op.ra) << 16 >> 16) * splat(op.si10)); + } + + void MPYUI(spu_opcode_t op) + { + set_vr(op.rt, (get_vr(op.ra) << 16 >> 16) * splat(op.si10 & 0xffff)); + } + + void CEQI(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) == op.si10)); + } + + void CEQHI(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) == op.si10)); + } + + void CEQBI(spu_opcode_t op) + { + set_vr(op.rt, sext(get_vr(op.ra) == op.si10)); + } + + void ILA(spu_opcode_t op) + { + set_vr(op.rt, splat(op.i18)); + } + + void SELB(spu_opcode_t op) + { + const auto c = get_vr(op.rc); + set_vr(op.rt4, (get_vr(op.ra) & ~c) | (get_vr(op.rb) & c)); + } + + void SHUFB(spu_opcode_t op) + { + const auto c = get_vr(op.rc); + const auto x = avg(sext((c & 0xc0) == 0xc0), sext((c & 0xe0) == 0xc0)); + const auto cr = c ^ 0xf; + const auto a = pshufb(get_vr(op.ra), cr); + const auto b = pshufb(get_vr(op.rb), cr); + set_vr(op.rt4, merge(sext((c & 0x10) == 0), a, b) | x); + } + + void MPYA(spu_opcode_t op) + { + set_vr(op.rt4, (get_vr(op.ra) << 16 >> 16) * (get_vr(op.rb) << 16 >> 16) + get_vr(op.rc)); + } + + void FSCRRD(spu_opcode_t op) // + { + // Hack + set_vr(op.rt, splat(0)); + } + + void FSCRWR(spu_opcode_t op) // + { + // Hack + } + + void DFCGT(spu_opcode_t op) // + { + return UNK(op); + } + + void DFCEQ(spu_opcode_t op) // + { + return UNK(op); + } + + void DFCMGT(spu_opcode_t op) // + { + set_vr(op.rt, sext(fcmp(fabs(get_vr(op.ra)), fabs(get_vr(op.rb))))); + } + + void DFCMEQ(spu_opcode_t op) // + { + return UNK(op); + } + + void DFTSV(spu_opcode_t op) // + { + return UNK(op); + } + + void DFA(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) + get_vr(op.rb)); + } + + void DFS(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) - get_vr(op.rb)); + } + + void DFM(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) * get_vr(op.rb)); + } + + void DFMA(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) * get_vr(op.rb) + get_vr(op.rt)); + } + + void DFMS(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) * get_vr(op.rb) - get_vr(op.rt)); + } + + void DFNMS(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.rt) - get_vr(op.ra) * get_vr(op.rb)); + } + + void DFNMA(spu_opcode_t op) // + { + set_vr(op.rt, -get_vr(op.rt) - get_vr(op.ra) * get_vr(op.rb)); + } + + void FREST(spu_opcode_t op) // + { + set_vr(op.rt, fsplat(1.0) / get_vr(op.ra)); + } + + void FRSQEST(spu_opcode_t op) // + { + set_vr(op.rt, fsplat(1.0) / sqrt(fabs(get_vr(op.ra)))); + } + + void FCGT(spu_opcode_t op) // + { + set_vr(op.rt, sext(fcmp(get_vr(op.ra), get_vr(op.rb)))); + } + + void FCMGT(spu_opcode_t op) // + { + set_vr(op.rt, sext(fcmp(fabs(get_vr(op.ra)), fabs(get_vr(op.rb))))); + } + + void FA(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) + get_vr(op.rb)); + } + + void FS(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) - get_vr(op.rb)); + } + + void FM(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.ra) * get_vr(op.rb)); + } + + void FESD(spu_opcode_t op) // + { + value_t r; + r.value = m_ir->CreateFPExt(shuffle2(get_vr(op.ra), fsplat(0.), 1, 3).value, get_type()); + set_vr(op.rt, r); + } + + void FRDS(spu_opcode_t op) // + { + value_t r; + r.value = m_ir->CreateFPTrunc(get_vr(op.ra).value, get_type()); + set_vr(op.rt, shuffle2(r, fsplat(0.), 2, 0, 3, 1)); + } + + void FCEQ(spu_opcode_t op) // + { + set_vr(op.rt, sext(fcmp(get_vr(op.ra), get_vr(op.rb)))); + } + + void FCMEQ(spu_opcode_t op) // + { + set_vr(op.rt, sext(fcmp(fabs(get_vr(op.ra)), fabs(get_vr(op.rb))))); + } + + void FNMS(spu_opcode_t op) // + { + set_vr(op.rt4, get_vr(op.rc) - get_vr(op.ra) * get_vr(op.rb)); + } + + void FMA(spu_opcode_t op) // + { + set_vr(op.rt4, get_vr(op.ra) * get_vr(op.rb) + get_vr(op.rc)); + } + + void FMS(spu_opcode_t op) // + { + set_vr(op.rt4, get_vr(op.ra) * get_vr(op.rb) - get_vr(op.rc)); + } + + void FI(spu_opcode_t op) // + { + set_vr(op.rt, get_vr(op.rb)); + } + + void CFLTS(spu_opcode_t op) // + { + value_t a = get_vr(op.ra); + if (op.i8 != 173) + a = eval(a * fsplat(std::exp2(static_cast(static_cast(173 - op.i8))))); + + value_t r; + r.value = m_ir->CreateFPToSI(a.value, get_type()); + set_vr(op.rt, r ^ sext(fcmp(a, fsplat(std::exp2(31.f))))); + } + + void CFLTU(spu_opcode_t op) // + { + value_t a = get_vr(op.ra); + if (op.i8 != 173) + a = eval(a * fsplat(std::exp2(static_cast(static_cast(173 - op.i8))))); + + value_t r; + r.value = m_ir->CreateFPToUI(a.value, get_type()); + set_vr(op.rt, r & ~(bitcast(a) >> 31)); + } + + void CSFLT(spu_opcode_t op) // + { + value_t r; + r.value = m_ir->CreateSIToFP(get_vr(op.ra).value, get_type()); + if (op.i8 != 155) + r = eval(r * fsplat(std::exp2(static_cast(static_cast(op.i8 - 155))))); + set_vr(op.rt, r); + } + + void CUFLT(spu_opcode_t op) // + { + value_t r; + r.value = m_ir->CreateUIToFP(get_vr(op.ra).value, get_type()); + if (op.i8 != 155) + r = eval(r * fsplat(std::exp2(static_cast(static_cast(op.i8 - 155))))); + set_vr(op.rt, r); + } + + void STQX(spu_opcode_t op) // + { + value_t addr = zext((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0); + addr.value = m_ir->CreateAdd(m_lsptr, addr.value); + value_t r = get_vr(op.rt); + r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); + m_ir->CreateStore(r.value, m_ir->CreateIntToPtr(addr.value, get_type()->getPointerTo())); + } + + void LQX(spu_opcode_t op) // + { + value_t addr = zext((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0); + addr.value = m_ir->CreateAdd(m_lsptr, addr.value); + value_t r; + r.value = m_ir->CreateLoad(m_ir->CreateIntToPtr(addr.value, get_type()->getPointerTo())); + r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); + set_vr(op.rt, r); + } + + void STQA(spu_opcode_t op) // + { + value_t addr = splat(spu_ls_target(0, op.i16)); + addr.value = m_ir->CreateAdd(m_lsptr, addr.value); + value_t r = get_vr(op.rt); + r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); + m_ir->CreateStore(r.value, m_ir->CreateIntToPtr(addr.value, get_type()->getPointerTo())); + } + + void LQA(spu_opcode_t op) // + { + value_t addr = splat(spu_ls_target(0, op.i16)); + addr.value = m_ir->CreateAdd(m_lsptr, addr.value); + value_t r; + r.value = m_ir->CreateLoad(m_ir->CreateIntToPtr(addr.value, get_type()->getPointerTo())); + r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); + set_vr(op.rt, r); + } + + void STQR(spu_opcode_t op) // + { + value_t addr = splat(spu_ls_target(m_pos, op.i16)); + addr.value = m_ir->CreateAdd(m_lsptr, addr.value); + value_t r = get_vr(op.rt); + r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); + m_ir->CreateStore(r.value, m_ir->CreateIntToPtr(addr.value, get_type()->getPointerTo())); + } + + void LQR(spu_opcode_t op) // + { + value_t addr = splat(spu_ls_target(m_pos, op.i16)); + addr.value = m_ir->CreateAdd(m_lsptr, addr.value); + value_t r; + r.value = m_ir->CreateLoad(m_ir->CreateIntToPtr(addr.value, get_type()->getPointerTo())); + r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); + set_vr(op.rt, r); + } + + void STQD(spu_opcode_t op) // + { + value_t addr = zext((extract(get_vr(op.ra), 3) + (op.si10 << 4)) & 0x3fff0); + addr.value = m_ir->CreateAdd(m_lsptr, addr.value); + value_t r = get_vr(op.rt); + r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); + m_ir->CreateStore(r.value, m_ir->CreateIntToPtr(addr.value, get_type()->getPointerTo())); + } + + void LQD(spu_opcode_t op) // + { + value_t addr = zext((extract(get_vr(op.ra), 3) + (op.si10 << 4)) & 0x3fff0); + addr.value = m_ir->CreateAdd(m_lsptr, addr.value); + value_t r; + r.value = m_ir->CreateLoad(m_ir->CreateIntToPtr(addr.value, get_type()->getPointerTo())); + r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); + set_vr(op.rt, r); + } + + void make_halt(llvm::BasicBlock* next) + { + const auto pstatus = spu_ptr(&SPUThread::status); + const auto chalt = m_ir->getInt32(SPU_STATUS_STOPPED_BY_HALT); + m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::Or, pstatus, chalt, llvm::AtomicOrdering::Release)->setVolatile(true); + const auto ptr = m_ir->CreateIntToPtr(m_ir->getInt64(reinterpret_cast(vm::base(0xffdead00))), get_type()); + m_ir->CreateStore(m_ir->getInt32("HALT"_u32), ptr)->setVolatile(true); + m_ir->CreateBr(next); + } + + void HGT(spu_opcode_t op) // + { + flush(); + const auto cond = eval(extract(get_vr(op.ra), 3) > extract(get_vr(op.rb), 3)); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto halt = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, halt, next); + m_ir->SetInsertPoint(halt); + make_halt(next); + m_ir->SetInsertPoint(next); + } + + void HEQ(spu_opcode_t op) // + { + flush(); + const auto cond = eval(extract(get_vr(op.ra), 3) == extract(get_vr(op.rb), 3)); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto halt = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, halt, next); + m_ir->SetInsertPoint(halt); + make_halt(next); + m_ir->SetInsertPoint(next); + } + + void HLGT(spu_opcode_t op) // + { + flush(); + const auto cond = eval(extract(get_vr(op.ra), 3) > extract(get_vr(op.rb), 3)); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto halt = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, halt, next); + m_ir->SetInsertPoint(halt); + make_halt(next); + m_ir->SetInsertPoint(next); + } + + void HGTI(spu_opcode_t op) // + { + flush(); + const auto cond = eval(extract(get_vr(op.ra), 3) > op.si10); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto halt = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, halt, next); + m_ir->SetInsertPoint(halt); + make_halt(next); + m_ir->SetInsertPoint(next); + } + + void HEQI(spu_opcode_t op) // + { + flush(); + const auto cond = eval(extract(get_vr(op.ra), 3) == op.si10); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto halt = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, halt, next); + m_ir->SetInsertPoint(halt); + make_halt(next); + m_ir->SetInsertPoint(next); + } + + void HLGTI(spu_opcode_t op) // + { + flush(); + const auto cond = eval(extract(get_vr(op.ra), 3) > op.si10); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto halt = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, halt, next); + m_ir->SetInsertPoint(halt); + make_halt(next); + m_ir->SetInsertPoint(next); + } + + void HBR(spu_opcode_t op) // + { + // TODO: use the hint. + } + + void HBRA(spu_opcode_t op) // + { + // TODO: use the hint. + } + + void HBRR(spu_opcode_t op) // + { + // TODO: use the hint. + } + + // TODO + static u32 exec_check_interrupts(SPUThread* _spu, u32 addr) + { + _spu->set_interrupt_status(true); + + if ((_spu->ch_event_mask & _spu->ch_event_stat & SPU_EVENT_INTR_IMPLEMENTED) > 0) + { + _spu->interrupts_enabled = false; + _spu->srr0 = addr; + return 0; + } + + return addr; + } + + void branch_indirect(spu_opcode_t op, value_t addr) + { + if (op.d) + { + m_ir->CreateStore(m_ir->getFalse(), spu_ptr(&SPUThread::interrupts_enabled))->setVolatile(true); + } + else if (op.e) + { + addr.value = call(&exec_check_interrupts, m_thread, addr.value); + } + + if (llvm::isa(addr.value)) + { + return branch_fixed(llvm::cast(addr.value)->getZExtValue()); + } + + m_ir->CreateStore(addr.value, spu_ptr(&SPUThread::pc)); + + const u32 start = m_instr_map.begin()->first; + const auto local = llvm::BasicBlock::Create(m_context, "", m_function); + const auto exter = llvm::BasicBlock::Create(m_context, "", m_function); + const auto off = m_ir->CreateSub(addr.value, m_ir->getInt32(start)); + m_ir->CreateCondBr(m_ir->CreateICmpULT(off, m_ir->getInt32(m_size)), local, exter); + m_ir->SetInsertPoint(local); + const auto table = m_ir->CreateIndirectBr(m_ir->CreateLoad(m_ir->CreateGEP(m_jt, {(llvm::Value*)m_ir->getInt32(0), m_ir->CreateLShr(off, 2)})), m_instr_map.size() + 1); + for (const auto& pair : m_instr_map) + table->addDestination(pair.second); + table->addDestination(m_stop); + m_ir->SetInsertPoint(exter); + const auto disp = m_ir->CreateAdd(m_thread, m_ir->getInt64(::offset32(&SPUThread::jit_dispatcher))); + const auto type = llvm::FunctionType::get(get_type(), {get_type(), get_type(), get_type()}, false)->getPointerTo()->getPointerTo(); + tail(m_ir->CreateLoad(m_ir->CreateIntToPtr(m_ir->CreateAdd(disp, zext(addr << 1).value), type))); + } + + void branch_fixed(u32 target) + { + m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&SPUThread::pc)); + + const auto found = m_instr_map.find(target); + + if (found != m_instr_map.end()) + { + m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(spu_ptr(&SPUThread::state)), m_ir->getInt32(0)), m_stop, found->second); + return; + } + + const auto addr = m_ir->CreateAdd(m_thread, m_ir->getInt64(::offset32(&SPUThread::jit_dispatcher) + target * 2)); + const auto type = llvm::FunctionType::get(get_type(), {get_type(), get_type(), get_type()}, false)->getPointerTo()->getPointerTo(); + const auto func = m_ir->CreateLoad(m_ir->CreateIntToPtr(addr, type)); + tail(func); + } + + void BIZ(spu_opcode_t op) // + { + flush(); + const auto cond = eval(extract(get_vr(op.rt), 3) == 0); + const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto jump = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, jump, next); + m_ir->SetInsertPoint(jump); + branch_indirect(op, addr); + m_ir->SetInsertPoint(next); + } + + void BINZ(spu_opcode_t op) // + { + flush(); + const auto cond = eval(extract(get_vr(op.rt), 3) != 0); + const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto jump = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, jump, next); + m_ir->SetInsertPoint(jump); + branch_indirect(op, addr); + m_ir->SetInsertPoint(next); + } + + void BIHZ(spu_opcode_t op) // + { + flush(); + const auto cond = eval(extract(get_vr(op.rt), 6) == 0); + const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto jump = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, jump, next); + m_ir->SetInsertPoint(jump); + branch_indirect(op, addr); + m_ir->SetInsertPoint(next); + } + + void BIHNZ(spu_opcode_t op) // + { + flush(); + const auto cond = eval(extract(get_vr(op.rt), 6) != 0); + const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto jump = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, jump, next); + m_ir->SetInsertPoint(jump); + branch_indirect(op, addr); + m_ir->SetInsertPoint(next); + } + + void BI(spu_opcode_t op) // + { + const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc); + flush(); + branch_indirect(op, addr); + } + + void BISL(spu_opcode_t op) // + { + const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc); + u32 values[4]{0, 0, 0, spu_branch_target(m_pos + 4)}; + value_t r; + r.value = llvm::ConstantDataVector::get(m_context, values); + set_vr(op.rt, r); + flush(); + branch_indirect(op, addr); + } + + void IRET(spu_opcode_t op) // + { + value_t srr0; + srr0.value = m_ir->CreateLoad(spu_ptr(&SPUThread::srr0)); + flush(); + branch_indirect(op, srr0); + } + + void BISLED(spu_opcode_t op) // + { + UNK(op); + } + + void BRZ(spu_opcode_t op) // + { + const u32 target = spu_branch_target(m_pos, op.i16); + + if (target == m_pos + 4) + { + return; + } + + flush(); + const auto cond = eval(extract(get_vr(op.rt), 3) == 0); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto jump = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, jump, next); + m_ir->SetInsertPoint(jump); + branch_fixed(target); + m_ir->SetInsertPoint(next); + } + + void BRNZ(spu_opcode_t op) // + { + const u32 target = spu_branch_target(m_pos, op.i16); + + if (target == m_pos + 4) + { + return; + } + + flush(); + const auto cond = eval(extract(get_vr(op.rt), 3) != 0); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto jump = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, jump, next); + m_ir->SetInsertPoint(jump); + branch_fixed(target); + m_ir->SetInsertPoint(next); + } + + void BRHZ(spu_opcode_t op) // + { + const u32 target = spu_branch_target(m_pos, op.i16); + + if (target == m_pos + 4) + { + return; + } + + flush(); + const auto cond = eval(extract(get_vr(op.rt), 6) == 0); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto jump = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, jump, next); + m_ir->SetInsertPoint(jump); + branch_fixed(target); + m_ir->SetInsertPoint(next); + } + + void BRHNZ(spu_opcode_t op) // + { + const u32 target = spu_branch_target(m_pos, op.i16); + + if (target == m_pos + 4) + { + return; + } + + flush(); + const auto cond = eval(extract(get_vr(op.rt), 6) != 0); + const auto next = llvm::BasicBlock::Create(m_context, "", m_function); + const auto jump = llvm::BasicBlock::Create(m_context, "", m_function); + m_ir->CreateCondBr(cond.value, jump, next); + m_ir->SetInsertPoint(jump); + branch_fixed(target); + m_ir->SetInsertPoint(next); + } + + void BRA(spu_opcode_t op) // + { + const u32 target = spu_branch_target(0, op.i16); + + if (target != m_pos + 4) + { + flush(); + branch_fixed(target); + } + } + + void BRASL(spu_opcode_t op) // + { + u32 values[4]{0, 0, 0, spu_branch_target(m_pos + 4)}; + value_t r; + r.value = llvm::ConstantDataVector::get(m_context, values); + set_vr(op.rt, r); + BRA(op); + } + + void BR(spu_opcode_t op) // + { + const u32 target = spu_branch_target(m_pos, op.i16); + + if (target != m_pos + 4) + { + flush(); + branch_fixed(target); + } + } + + void BRSL(spu_opcode_t op) // + { + u32 values[4]{0, 0, 0, spu_branch_target(m_pos + 4)}; + value_t r; + r.value = llvm::ConstantDataVector::get(m_context, values); + set_vr(op.rt, r); + BR(op); + } + + static const spu_decoder g_decoder; +}; + +std::unique_ptr spu_recompiler_base::make_llvm_recompiler(SPUThread& spu) +{ + return std::make_unique(spu); +} + +DECLARE(spu_llvm_recompiler::g_decoder); diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 1aa83bd373..cd9a7b7dc4 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -10,6 +10,7 @@ protected: SPUThread& m_spu; u32 m_pos; + u32 m_size; std::bitset<0x10000> m_block_info; @@ -35,4 +36,7 @@ public: // Create recompiler instance (ASMJIT) static std::unique_ptr make_asmjit_recompiler(SPUThread& spu); + + // Create recompiler instance (LLVM) + static std::unique_ptr make_llvm_recompiler(SPUThread& spu); }; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 8909c41409..8c20b4e12e 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -536,6 +536,7 @@ SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group) if (g_cfg.core.spu_decoder == spu_decoder_type::llvm) { + jit = spu_recompiler_base::make_llvm_recompiler(*this); } } diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 57d2a7c4ba..95f2f54673 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -851,7 +851,18 @@ void Emulator::Load(bool add_only) if (g_cfg.core.spu_debug) { - fs::file log(Emu.GetCachePath() + "SPUJIT.log", fs::rewrite); + fs::file log; + + if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit) + { + log.open(Emu.GetCachePath() + "SPUJIT.log", fs::rewrite); + } + + if (g_cfg.core.spu_decoder == spu_decoder_type::llvm) + { + log.open(Emu.GetCachePath() + "SPU.log", fs::rewrite); + } + log.write(fmt::format("SPU JIT Log\n\nTitle: %s\nTitle ID: %s\n\n", Emu.GetTitle(), Emu.GetTitleID())); fs::create_dir(Emu.GetCachePath() + "SPU"); fs::remove_all(Emu.GetCachePath() + "SPU", false); diff --git a/rpcs3/Json/tooltips.json b/rpcs3/Json/tooltips.json index 388bc7e9fc..701c81ae95 100644 --- a/rpcs3/Json/tooltips.json +++ b/rpcs3/Json/tooltips.json @@ -16,7 +16,7 @@ "precise": "This is extremely slow but may fix broken graphics in some games.", "fast": "This is slower than the SPU Recompiler but significantly faster than the precise interpreter.\nGames rarely need this however.", "ASMJIT": "This is the fastest option with very good compatibility.\nIf unsure, use this option.", - "LLVM": "This doesn't exist (yet)" + "LLVM": "" }, "libraries": { "auto": "Automatically selects the LLE libraries to load.\nWhile this option works fine in most cases, liblv2 is the preferred option.", diff --git a/rpcs3/rpcs3qt/settings_dialog.ui b/rpcs3/rpcs3qt/settings_dialog.ui index 3dcff55a7d..2e34cb08ed 100644 --- a/rpcs3/rpcs3qt/settings_dialog.ui +++ b/rpcs3/rpcs3qt/settings_dialog.ui @@ -106,9 +106,6 @@ - - false - LLVM Recompiler