diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 8908bd5ec1..2d928d88b5 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -1,11 +1,16 @@ #include "stdafx.h" +#include "Emu/System.h" #include "Emu/IdManager.h" #include "Emu/Memory/Memory.h" #include "SPUThread.h" #include "SPUAnalyser.h" +#include "SPUInterpreter.h" +#include "SPUDisAsm.h" #include "SPURecompiler.h" #include +#include +#include extern u64 get_system_time(); @@ -51,7 +56,7 @@ void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip) } // Compile - verify(HERE), spu.jit->compile(block(spu, spu.pc)); + verify(HERE), spu.jit->compile(block(spu, spu.pc, &spu.jit->m_block_info)); spu.jit_dispatcher[spu.pc / 4] = spu.jit->get(spu.pc); } @@ -109,89 +114,433 @@ void spu_recompiler_base::branch(SPUThread& spu, void*, u8* rip) #endif } -std::vector spu_recompiler_base::block(SPUThread& spu, u32 lsa) +std::vector spu_recompiler_base::block(SPUThread& spu, u32 lsa, std::bitset<0x10000>* out_info) { - u32 addr = lsa; + // Block info (local) + std::bitset<0x10000> block_info{}; - std::vector result; + // Select one to use + std::bitset<0x10000>& blocks = out_info ? *out_info : block_info; - while (addr < 0x40000) + if (out_info) { - const u32 data = spu._ref(addr); + out_info->reset(); + } - if (data == 0 && addr == lsa) + // Result: addr + raw instruction data + std::vector result; + result.reserve(256); + result.push_back(lsa); + blocks.set(lsa / 4); + + // Simple block entry workload list + std::vector wl; + wl.push_back(lsa); + + // Value flags (TODO) + enum class vf : u32 + { + is_const, + is_mask, + + __bitset_enum_max + }; + + // Weak constant propagation context (for guessing branch targets) + std::array, 128> vflags{}; + + // Associated constant values for 32-bit preferred slot + std::array values; + + if (spu.pc == lsa && g_cfg.core.spu_block_size == spu_block_size_type::giga) + { + // TODO: use current register values for speculations + vflags[0] = +vf::is_const; + values[0] = spu.gpr[0]._u32[3]; + } + + for (u32 wi = 0; wi < wl.size();) + { + const auto next_block = [&] { - break; - } + // Reset value information + vflags.fill({}); + wi++; + }; - addr += 4; - - if (result.empty()) + const auto add_block = [&](u32 target) { - result.emplace_back(lsa); - } + // Verify validity of the new target (TODO) + if (target > lsa) + { + // Check for redundancy + if (!blocks[target / 4]) + { + blocks[target / 4] = true; + wl.push_back(target); + return; + } + } + }; - result.emplace_back(se_storage::swap(data)); + const u32 pos = wl[wi]; + const u32 data = spu._ref(pos); + const auto op = spu_opcode_t{data}; - const auto type = s_spu_itype.decode(data); + wl[wi] += 4; - switch (type) + // Analyse instruction + switch (const auto type = s_spu_itype.decode(data)) { case spu_itype::UNK: - case spu_itype::STOP: - case spu_itype::STOPD: - case spu_itype::SYNC: - case spu_itype::DSYNC: case spu_itype::DFCEQ: case spu_itype::DFCMEQ: case spu_itype::DFCGT: //case spu_itype::DFCMGT: case spu_itype::DFTSV: - case spu_itype::BI: - case spu_itype::IRET: - case spu_itype::BISL: { + // Stop on invalid instructions (TODO) + blocks[pos / 4] = true; + next_block(); + continue; + } + + case spu_itype::SYNC: + case spu_itype::DSYNC: + case spu_itype::STOP: + case spu_itype::STOPD: + { + if (data == 0) + { + // Stop before null data + blocks[pos / 4] = true; + next_block(); + continue; + } + + if (g_cfg.core.spu_block_size != spu_block_size_type::giga) + { + // Stop on special instructions (TODO) + next_block(); + break; + } + break; } - case spu_itype::BRA: + + case spu_itype::IRET: + { + next_block(); + break; + } + + case spu_itype::BI: + case spu_itype::BISL: + case spu_itype::BIZ: + case spu_itype::BINZ: + case spu_itype::BIHZ: + case spu_itype::BIHNZ: + { + const auto af = vflags[op.ra]; + const auto av = values[op.ra]; + + if (type == spu_itype::BISL) + { + vflags[op.rt] = +vf::is_const; + values[op.rt] = pos + 4; + } + + if (test(af, vf::is_const)) + { + const u32 target = spu_branch_target(av); + + if (target == pos + 4) + { + // Nop (unless BISL) + break; + } + + if (type != spu_itype::BISL || g_cfg.core.spu_block_size == spu_block_size_type::giga) + { + // TODO + if (g_cfg.core.spu_block_size != spu_block_size_type::safe) + { + add_block(target); + } + } + + if (type == spu_itype::BISL && target < lsa) + { + next_block(); + break; + } + } + else if (type == spu_itype::BI && !op.d && !op.e) + { + // Analyse jump table (TODO) + std::basic_string jt_abs; + std::basic_string jt_rel; + const u32 start = pos + 4; + const u32 limit = 0x40000; + + for (u32 i = start; i < limit; i += 4) + { + const u32 target = spu._ref(i); + + if (target % 4) + { + // Address cannot be misaligned: abort + break; + } + + if (target >= lsa && target < limit) + { + // Possible jump table entry (absolute) + jt_abs.push_back(target); + } + + if (target + start >= lsa && target + start < limit) + { + // Possible jump table entry (relative) + jt_rel.push_back(target + start); + } + + if (std::max(jt_abs.size(), jt_rel.size()) * 4 + start <= i) + { + // Neither type of jump table completes + break; + } + } + + // Add detected jump table blocks (TODO: avoid adding both) + if (jt_abs.size() >= 3 || jt_rel.size() >= 3) + { + if (jt_abs.size() >= jt_rel.size()) + { + for (u32 target : jt_abs) + { + add_block(target); + } + } + + if (jt_rel.size() >= jt_abs.size()) + { + for (u32 target : jt_rel) + { + add_block(target); + } + } + } + } + + if (type == spu_itype::BI || type == spu_itype::BISL || g_cfg.core.spu_block_size == spu_block_size_type::safe) + { + if (type == spu_itype::BI || g_cfg.core.spu_block_size != spu_block_size_type::giga) + { + next_block(); + break; + } + } + + break; + } + + case spu_itype::BRSL: case spu_itype::BRASL: { - if (spu_branch_target(0, spu_opcode_t{data}.i16) == addr) + const u32 target = spu_branch_target(type == spu_itype::BRASL ? 0 : pos, op.i16); + + vflags[op.rt] = +vf::is_const; + values[op.rt] = pos + 4; + + if (target == pos + 4) { - continue; + // Get next instruction address idiom + break; + } + + if (target < lsa || g_cfg.core.spu_block_size != spu_block_size_type::giga) + { + // Stop on direct calls + next_block(); + break; + } + + if (g_cfg.core.spu_block_size == spu_block_size_type::giga) + { + add_block(target); } break; } + case spu_itype::BR: - case spu_itype::BRSL: - { - if (spu_branch_target(addr - 4, spu_opcode_t{data}.i16) == addr) - { - continue; - } - - break; - } + case spu_itype::BRA: case spu_itype::BRZ: case spu_itype::BRNZ: case spu_itype::BRHZ: case spu_itype::BRHNZ: { - if (spu_branch_target(addr - 4, spu_opcode_t{data}.i16) >= addr) + const u32 target = spu_branch_target(type == spu_itype::BRA ? 0 : pos, op.i16); + + if (target == pos + 4) { - continue; + // Nop + break; + } + + add_block(target); + + if (type == spu_itype::BR || type == spu_itype::BRA) + { + // Stop on direct branches + next_block(); + break; } break; } + + case spu_itype::HEQ: + case spu_itype::HEQI: + case spu_itype::HGT: + case spu_itype::HGTI: + case spu_itype::HLGT: + case spu_itype::HLGTI: + case spu_itype::HBR: + case spu_itype::HBRA: + case spu_itype::HBRR: + case spu_itype::LNOP: + case spu_itype::NOP: + case spu_itype::MTSPR: + case spu_itype::WRCH: + case spu_itype::FSCRWR: + case spu_itype::STQA: + case spu_itype::STQD: + case spu_itype::STQR: + case spu_itype::STQX: + { + // Do nothing + break; + } + + case spu_itype::IL: + { + vflags[op.rt] = +vf::is_const; + values[op.rt] = op.si16; + break; + } + case spu_itype::ILA: + { + vflags[op.rt] = +vf::is_const; + values[op.rt] = op.i18; + break; + } + case spu_itype::ILH: + { + vflags[op.rt] = +vf::is_const; + values[op.rt] = op.i16 << 16 | op.i16; + break; + } + case spu_itype::ILHU: + { + vflags[op.rt] = +vf::is_const; + values[op.rt] = op.i16 << 16; + break; + } + case spu_itype::IOHL: + { + values[op.rt] = values[op.rt] | op.i16; + break; + } + case spu_itype::ORI: + { + vflags[op.rt] = vflags[op.ra] & vf::is_const; + values[op.rt] = values[op.ra] | op.si10; + break; + } + case spu_itype::OR: + { + vflags[op.rt] = vflags[op.ra] & vflags[op.rb] & vf::is_const; + values[op.rt] = values[op.ra] | values[op.rb]; + break; + } + case spu_itype::AI: + { + vflags[op.rt] = vflags[op.ra] & vf::is_const; + values[op.rt] = values[op.ra] + op.si10; + break; + } + case spu_itype::A: + { + vflags[op.rt] = vflags[op.ra] & vflags[op.rb] & vf::is_const; + values[op.rt] = values[op.ra] + values[op.rb]; + break; + } default: { - continue; + // Unconst + vflags[type & spu_itype::_quadrop ? +op.rt4 : +op.rt] = {}; + break; } } - break; + // Insert raw instruction value + if (result.size() - 1 <= (pos - lsa) / 4) + { + if (result.size() - 1 < (pos - lsa) / 4) + { + result.resize((pos - lsa) / 4 + 1); + } + + result.emplace_back(se_storage::swap(data)); + } + else if (u32& raw_val = result[(pos - lsa) / 4 + 1]) + { + verify(HERE), raw_val == se_storage::swap(data); + } + else + { + raw_val = se_storage::swap(data); + } + } + + if (g_cfg.core.spu_block_size == spu_block_size_type::safe) + { + // Check holes in safe mode (TODO) + u32 valid_size = 0; + + for (u32 i = 1; i < result.size(); i++) + { + if (result[i] == 0) + { + const u32 pos = lsa + (i - 1) * 4; + const u32 data = spu._ref(pos); + const auto type = s_spu_itype.decode(data); + + // Allow only NOP or LNOP instructions in holes + if (type == spu_itype::NOP || type == spu_itype::LNOP) + { + if (i + 1 < result.size()) + { + continue; + } + } + + result.resize(valid_size + 1); + break; + } + else + { + valid_size = i; + } + } + } + + if (result.size() == 1) + { + // Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback + result.clear(); } return result; diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index f87fe92e6b..1aa83bd373 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -1,6 +1,7 @@ #pragma once #include "SPUThread.h" +#include // SPU Recompiler instance base class class spu_recompiler_base @@ -10,6 +11,8 @@ protected: u32 m_pos; + std::bitset<0x10000> m_block_info; + public: spu_recompiler_base(SPUThread& spu); @@ -28,7 +31,7 @@ public: static void branch(SPUThread&, void*, u8*); // Get the block at specified address - static std::vector block(SPUThread&, u32 lsa); + static std::vector block(SPUThread&, u32 lsa, std::bitset<0x10000>* = nullptr); // Create recompiler instance (ASMJIT) static std::unique_ptr make_asmjit_recompiler(SPUThread& spu); diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 61d55c4bae..57d2a7c4ba 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -853,6 +853,8 @@ void Emulator::Load(bool add_only) { fs::file log(Emu.GetCachePath() + "SPUJIT.log", fs::rewrite); log.write(fmt::format("SPU JIT Log\n\nTitle: %s\nTitle ID: %s\n\n", Emu.GetTitle(), Emu.GetTitleID())); + fs::create_dir(Emu.GetCachePath() + "SPU"); + fs::remove_all(Emu.GetCachePath() + "SPU", false); } ppu_load_exec(ppu_exec);