diff --git a/rpcs3/Emu/Cell/RawSPUThread.cpp b/rpcs3/Emu/Cell/RawSPUThread.cpp index 5204e9b82b..64cbed38af 100644 --- a/rpcs3/Emu/Cell/RawSPUThread.cpp +++ b/rpcs3/Emu/Cell/RawSPUThread.cpp @@ -173,7 +173,7 @@ bool spu_thread::write_reg(const u32 addr, const u32 value) { case MFC_LSA_offs: { - if (value >= 0x40000) + if (value >= SPU_LS_SIZE) { break; } @@ -321,7 +321,7 @@ bool spu_thread::write_reg(const u32 addr, const u32 value) void spu_load_exec(const spu_exec_object& elf) { - auto ls0 = vm::cast(vm::falloc(RAW_SPU_BASE_ADDR, 0x80000, vm::spu)); + auto ls0 = vm::cast(vm::falloc(RAW_SPU_BASE_ADDR, SPU_LS_SIZE, vm::spu)); auto spu = idm::make_ptr>("TEST_SPU", ls0, nullptr, 0, "", 0); spu_thread::g_raw_spu_ctr++; @@ -331,7 +331,7 @@ void spu_load_exec(const spu_exec_object& elf) { if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz) { - std::memcpy(vm::base(spu->offset + prog.p_vaddr), prog.bin.data(), prog.p_filesz); + std::memcpy(spu->_ptr(prog.p_vaddr), prog.bin.data(), prog.p_filesz); } } diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index f55e81d3e8..3593c37b60 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -272,8 +272,7 @@ DECLARE(spu_runtime::g_tail_escape) = build_function_asmget_runtime().find(static_cast(vm::base(spu.offset)), spu.pc); + const auto func = spu.jit->get_runtime().find(static_cast(spu._ptr(0)), spu.pc); if (!func) { @@ -7902,13 +7901,51 @@ public: void STQX(spu_opcode_t op) { - value_t addr = eval(zext((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0)); + const auto a = get_vr(op.ra); + const auto b = get_vr(op.rb); + + for (auto pair : std::initializer_list, value_t>>{{a, b}, {b, a}}) + { + if (auto cv = llvm::dyn_cast(pair.first.value)) + { + v128 data = get_const_vector(cv, m_pos, 10000); + data._u32[3] %= SPU_LS_SIZE; + + if (data._u32[3] % 0x10 == 0) + { + value_t addr = eval(splat(data._u32[3]) + zext(extract(pair.second, 3) & 0x3fff0)); + make_store_ls(addr, get_vr(op.rt)); + return; + } + } + } + + value_t addr = eval(zext((extract(a, 3) + extract(b, 3)) & 0x3fff0)); make_store_ls(addr, get_vr(op.rt)); } void LQX(spu_opcode_t op) { - value_t addr = eval(zext((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0)); + const auto a = get_vr(op.ra); + const auto b = get_vr(op.rb); + + for (auto pair : std::initializer_list, value_t>>{{a, b}, {b, a}}) + { + if (auto cv = llvm::dyn_cast(pair.first.value)) + { + v128 data = get_const_vector(cv, m_pos, 10000); + data._u32[3] %= SPU_LS_SIZE; + + if (data._u32[3] % 0x10 == 0) + { + value_t addr = eval(splat(data._u32[3]) + zext(extract(pair.second, 3) & 0x3fff0)); + set_vr(op.rt, make_load_ls(addr)); + return; + } + } + } + + value_t addr = eval(zext((extract(a, 3) + extract(b, 3)) & 0x3fff0)); set_vr(op.rt, make_load_ls(addr)); } @@ -7928,7 +7965,7 @@ public: { value_t addr; addr.value = m_ir->CreateZExt(m_interp_magn ? m_interp_pc : get_pc(m_pos), get_type()); - addr = eval(((get_imm(op.i16, false) << 2) + addr) & 0x3fff0); + addr = eval(((get_imm(op.i16, false) << 2) + addr) & (m_interp_magn ? 0x3fff0 : ~0xf)); make_store_ls(addr, get_vr(op.rt)); } @@ -7936,7 +7973,7 @@ public: { value_t addr; addr.value = m_ir->CreateZExt(m_interp_magn ? m_interp_pc : get_pc(m_pos), get_type()); - addr = eval(((get_imm(op.i16, false) << 2) + addr) & 0x3fff0); + addr = eval(((get_imm(op.i16, false) << 2) + addr) & (m_interp_magn ? 0x3fff0 : ~0xf)); set_vr(op.rt, make_load_ls(addr)); } @@ -7953,13 +7990,13 @@ public: } } - value_t addr = eval(zext((extract(get_vr(op.ra), 3) + (get_imm(op.si10) << 4)) & 0x3fff0)); + value_t addr = eval(zext(extract(get_vr(op.ra), 3) & 0x3fff0) + (get_imm(op.si10) << 4)); make_store_ls(addr, get_vr(op.rt)); } void LQD(spu_opcode_t op) { - value_t addr = eval(zext((extract(get_vr(op.ra), 3) + (get_imm(op.si10) << 4)) & 0x3fff0)); + value_t addr = eval(zext(extract(get_vr(op.ra), 3) & 0x3fff0) + (get_imm(op.si10) << 4)); set_vr(op.rt, make_load_ls(addr)); } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 1d35231054..7b73ce3b09 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -2,6 +2,7 @@ #include "Utilities/JIT.h" #include "Utilities/asm.h" #include "Utilities/sysinfo.h" +#include "Emu/Memory/vm.h" #include "Emu/Memory/vm_ptr.h" #include "Emu/Memory/vm_reservation.h" @@ -1111,7 +1112,7 @@ void spu_thread::cpu_task() continue; } - spu_runtime::g_gateway(*this, vm::_ptr(offset), nullptr); + spu_runtime::g_gateway(*this, _ptr(0), nullptr); } // Print some stats @@ -1129,7 +1130,7 @@ void spu_thread::cpu_task() break; } - spu_runtime::g_interpreter(*this, vm::_ptr(offset), nullptr); + spu_runtime::g_interpreter(*this, _ptr(0), nullptr); } } @@ -1148,8 +1149,21 @@ void spu_thread::cpu_unmem() spu_thread::~spu_thread() { - // Deallocate Local Storage - vm::dealloc_verbose_nothrow(offset); + { + const auto [_, shm] = vm::get(vm::any, offset)->get(offset); + + for (s32 i = -1; i < 2; i++) + { + // Unmap LS mirrors + shm->unmap_critical(ls + (i * SPU_LS_SIZE)); + } + + // Deallocate Local Storage + vm::dealloc_verbose_nothrow(offset); + } + + // Release LS mirrors area + utils::memory_release(ls - SPU_LS_SIZE, SPU_LS_SIZE * 3); // Deallocate RawSPU ID if (!group && offset >= RAW_SPU_BASE_ADDR) @@ -1159,11 +1173,26 @@ spu_thread::~spu_thread() } } -spu_thread::spu_thread(vm::addr_t ls, lv2_spu_group* group, u32 index, std::string_view name, u32 lv2_id, bool is_isolated) +spu_thread::spu_thread(vm::addr_t _ls, lv2_spu_group* group, u32 index, std::string_view name, u32 lv2_id, bool is_isolated) : cpu_thread(idm::last_id()) , is_isolated(is_isolated) , index(index) - , offset(ls) + , offset(_ls) + , ls([&]() + { + const auto [_, shm] = vm::get(vm::any, _ls)->get(_ls); + const auto addr = static_cast(utils::memory_reserve(SPU_LS_SIZE * 3)); + + for (u32 i = 0; i < 3; i++) + { + // Map LS mirrors + const auto ptr = addr + (i * SPU_LS_SIZE); + verify(HERE), shm->map_critical(ptr) == ptr; + } + + // Use the middle mirror + return addr + SPU_LS_SIZE; + }()) , group(group) , lv2_id(lv2_id) , spu_tname(stx::shared_cptr::make(name)) @@ -1233,7 +1262,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) } u32 value; - if ((eal - RAW_SPU_BASE_ADDR) % RAW_SPU_OFFSET + args.size - 1 < 0x40000) // LS access + if ((eal - RAW_SPU_BASE_ADDR) % RAW_SPU_OFFSET + args.size - 1 < SPU_LS_SIZE) // LS access { } else if (args.size == 4 && is_get && thread->read_reg(eal, value)) @@ -1258,7 +1287,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) { auto& spu = static_cast(*group->threads[group->threads_map[index]]); - if (offset + args.size - 1 < 0x40000) // LS access + if (offset + args.size - 1 < SPU_LS_SIZE) // LS access { eal = spu.offset + offset; // redirect access } @@ -1282,7 +1311,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) auto [dst, src] = [&]() -> std::pair { u8* dst = vm::_ptr(eal); - u8* src = vm::_ptr(offset + lsa); + u8* src = _ptr(lsa); if (is_get) { @@ -1638,6 +1667,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args) transfer.cmd = MFC(args.cmd & ~MFC_LIST_MASK); args.lsa &= 0x3fff0; + args.eal &= 0x3fff8; u32 index = fetch_size; @@ -1650,7 +1680,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args) // Reset to elements array head index = 0; - const auto src = _ptr(args.eal & 0x3fff8); + const auto src = _ptr(args.eal); const v128 data0 = v128::loadu(src, 0); const v128 data1 = v128::loadu(src, 1); const v128 data2 = v128::loadu(src, 2); @@ -2947,7 +2977,7 @@ bool spu_thread::stop_and_signal(u32 code) spu_log.warning("STOP 0x0"); // HACK: find an ILA instruction - for (u32 addr = pc; addr < 0x40000; addr += 4) + for (u32 addr = pc; addr < SPU_LS_SIZE; addr += 4) { const u32 instr = _ref(addr); diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index ac27d55a2a..a20a085bc8 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -118,6 +118,11 @@ enum : u32 SPU_STATUS_IS_ISOLATED = 0x80, }; +enum : s32 +{ + SPU_LS_SIZE = 0x40000, +}; + enum : u32 { SYS_SPU_THREAD_BASE_LOW = 0xf0000000, @@ -636,6 +641,7 @@ public: const u32 index; // SPU index const u32 offset; // SPU LS offset + const std::add_pointer_t ls; // SPU LS pointer private: lv2_spu_group* const group; // SPU Thread Group (only safe to access in the spu thread itself) public: @@ -682,7 +688,7 @@ public: template inline to_be_t* _ptr(u32 lsa) { - return static_cast*>(vm::base(offset + lsa)); + return reinterpret_cast*>(ls + lsa); } // Convert specified SPU LS address to a reference of specified (possibly converted to BE) type diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.cpp b/rpcs3/Emu/Cell/lv2/sys_spu.cpp index e527a273f6..84dc6b2927 100644 --- a/rpcs3/Emu/Cell/lv2/sys_spu.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_spu.cpp @@ -397,7 +397,7 @@ error_code sys_spu_thread_initialize(ppu_thread& ppu, vm::ptr thread, u32 g sys_spu.warning("Unimplemented SPU Thread options (0x%x)", option); } - const vm::addr_t ls_addr{verify("SPU LS" HERE, vm::alloc(0x80000, vm::main))}; + const vm::addr_t ls_addr{verify("SPU LS" HERE, vm::alloc(SPU_LS_SIZE, vm::main))}; const u32 inited = group->init; @@ -579,7 +579,7 @@ error_code sys_spu_thread_group_create(ppu_thread& ppu, vm::ptr id, u32 num if (type & SYS_SPU_THREAD_GROUP_TYPE_COOPERATE_WITH_SYSTEM) { // Constant size, unknown what it means but it's definitely not for each spu thread alone - mem_size = 0x40000; + mem_size = SPU_LS_SIZE; use_scheduler = false; } else if (type & SYS_SPU_THREAD_GROUP_TYPE_NON_CONTEXT) @@ -591,7 +591,7 @@ error_code sys_spu_thread_group_create(ppu_thread& ppu, vm::ptr id, u32 num else { // 256kb for each spu thread, probably for saving and restoring SPU LS (used by scheduler?) - mem_size = 0x40000 * num; + mem_size = SPU_LS_SIZE * num; } if (num < min_threads || num > max_threads || @@ -1225,7 +1225,7 @@ error_code sys_spu_thread_write_ls(ppu_thread& ppu, u32 id, u32 lsa, u64 value, sys_spu.trace("sys_spu_thread_write_ls(id=0x%x, lsa=0x%05x, value=0x%llx, type=%d)", id, lsa, value, type); - if (lsa >= 0x40000 || type > 8 || !type || (type | lsa) & (type - 1)) // check range and alignment + if (lsa >= SPU_LS_SIZE || type > 8 || !type || (type | lsa) & (type - 1)) // check range and alignment { return CELL_EINVAL; } @@ -1268,7 +1268,7 @@ error_code sys_spu_thread_read_ls(ppu_thread& ppu, u32 id, u32 lsa, vm::ptr sys_spu.trace("sys_spu_thread_read_ls(id=0x%x, lsa=0x%05x, value=*0x%x, type=%d)", id, lsa, value, type); - if (lsa >= 0x40000 || type > 8 || !type || (type | lsa) & (type - 1)) // check range and alignment + if (lsa >= SPU_LS_SIZE || type > 8 || !type || (type | lsa) & (type - 1)) // check range and alignment { return CELL_EINVAL; } @@ -1831,7 +1831,7 @@ error_code sys_raw_spu_create(ppu_thread& ppu, vm::ptr id, vm::ptr at index = 0; } - const vm::addr_t ls_addr{verify(HERE, vm::falloc(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index, 0x40000, vm::spu))}; + const vm::addr_t ls_addr{verify(HERE, vm::falloc(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index, SPU_LS_SIZE, vm::spu))}; const u32 tid = idm::make>(fmt::format("RawSPU[0x%x] ", index), ls_addr, nullptr, index, "", index); @@ -1879,7 +1879,7 @@ error_code sys_isolated_spu_create(ppu_thread& ppu, vm::ptr id, vm::ptr>(fmt::format("IsoSPU[0x%x] ", index), ls_addr, nullptr, index, "", index, true);