mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 19:45:20 +00:00
SPU: Make recompilers lock-free.
This commit is contained in:
parent
9ac6ef6494
commit
8c28c4e8ec
4 changed files with 248 additions and 260 deletions
|
@ -45,24 +45,23 @@ void spu_recompiler::init()
|
|||
}
|
||||
}
|
||||
|
||||
spu_function_t spu_recompiler::compile(const std::vector<u32>& func, void* fn_location)
|
||||
spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
|
||||
{
|
||||
if (!fn_location)
|
||||
{
|
||||
fn_location = m_spurt->find(func);
|
||||
}
|
||||
const auto add_loc = m_spurt->add_empty(std::move(_func));
|
||||
|
||||
if (fn_location == spu_runtime::g_dispatcher)
|
||||
{
|
||||
return &dispatch;
|
||||
}
|
||||
|
||||
if (!fn_location)
|
||||
if (!add_loc)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (auto cache = g_fxo->get<spu_cache>(); cache && g_cfg.core.spu_cache)
|
||||
if (add_loc->compiled)
|
||||
{
|
||||
return add_loc->compiled;
|
||||
}
|
||||
|
||||
const std::vector<u32>& func = add_loc->data;
|
||||
|
||||
if (auto cache = g_fxo->get<spu_cache>(); cache && g_cfg.core.spu_cache && !add_loc->cached.exchange(1))
|
||||
{
|
||||
cache->add(func);
|
||||
}
|
||||
|
@ -94,10 +93,10 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func, void* fn_lo
|
|||
X86Assembler compiler(&code);
|
||||
this->c = &compiler;
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
if (g_cfg.core.spu_debug && !add_loc->logged.exchange(1))
|
||||
{
|
||||
// Dump analyser data
|
||||
this->dump(log);
|
||||
this->dump(func, log);
|
||||
fs::file(m_spurt->get_cache_path() + "spu.log", fs::write + fs::append).write(log);
|
||||
|
||||
// Set logger
|
||||
|
@ -892,12 +891,21 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func, void* fn_lo
|
|||
LOG_FATAL(SPU, "Failed to build a function");
|
||||
}
|
||||
|
||||
if (!m_spurt->add(fn_location, fn))
|
||||
// Install compiled function pointer
|
||||
const bool added = !add_loc->compiled && add_loc->compiled.compare_and_swap_test(nullptr, fn);
|
||||
|
||||
// Rebuild trampoline if necessary
|
||||
if (!m_spurt->rebuild_ubertrampoline(func[1]))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
if (added)
|
||||
{
|
||||
add_loc->compiled.notify_all();
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_debug && added)
|
||||
{
|
||||
// Add ASMJIT logs
|
||||
fmt::append(log, "Address: %p\n\n", fn);
|
||||
|
|
|
@ -13,7 +13,7 @@ public:
|
|||
|
||||
virtual void init() override;
|
||||
|
||||
virtual spu_function_t compile(const std::vector<u32>&, void*) override;
|
||||
virtual spu_function_t compile(std::vector<u32>&&) override;
|
||||
|
||||
private:
|
||||
// ASMJIT runtime
|
||||
|
|
|
@ -400,15 +400,19 @@ void spu_cache::initialize()
|
|||
{
|
||||
compiler->init();
|
||||
|
||||
if (compiler->compile({}, nullptr) && spu_runtime::g_interpreter)
|
||||
if (compiler->compile({}) && spu_runtime::g_interpreter)
|
||||
{
|
||||
LOG_SUCCESS(SPU, "SPU Runtime: built interpreter.");
|
||||
LOG_SUCCESS(SPU, "SPU Runtime: Built the interpreter.");
|
||||
|
||||
if (g_cfg.core.spu_decoder != spu_decoder_type::llvm)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(SPU, "SPU Runtime: Failed to build the interpreter.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -472,34 +476,21 @@ void spu_cache::initialize()
|
|||
}
|
||||
|
||||
// Call analyser
|
||||
const std::vector<u32>& func2 = compiler->analyse(ls.data(), func[0]);
|
||||
std::vector<u32> func2 = compiler->analyse(ls.data(), func[0]);
|
||||
|
||||
if (func2.size() != size0)
|
||||
{
|
||||
LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed, %u vs %u", func2[0], func2.size() - 1, size0 - 1);
|
||||
}
|
||||
|
||||
if (!compiler->compile(func, nullptr))
|
||||
if (!compiler->compile(std::move(func2)))
|
||||
{
|
||||
// Likely, out of JIT memory. Signal to prevent further building.
|
||||
fail_flag |= 1;
|
||||
}
|
||||
|
||||
// Clear fake LS
|
||||
for (u32 i = 1, pos = start; i < func2.size(); i++, pos += 4)
|
||||
{
|
||||
if (se_storage<u32>::swap(func2[i]) != ls[pos / 4])
|
||||
{
|
||||
LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed at 0x%x", func2[0], pos);
|
||||
}
|
||||
|
||||
ls[pos / 4] = 0;
|
||||
}
|
||||
|
||||
if (func2.size() != size0)
|
||||
{
|
||||
std::memset(ls.data(), 0, 0x40000);
|
||||
}
|
||||
std::memset(ls.data() + start / 4, 0, 4 * (size0 - 1));
|
||||
|
||||
g_progr_pdone++;
|
||||
}
|
||||
|
@ -519,7 +510,7 @@ void spu_cache::initialize()
|
|||
|
||||
if (fail_flag)
|
||||
{
|
||||
LOG_ERROR(SPU, "SPU Runtime: Cache building failed (too much data). SPU Cache will be disabled.");
|
||||
LOG_FATAL(SPU, "SPU Runtime: Cache building failed (out of memory).");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -581,9 +572,6 @@ bool spu_runtime::func_compare::operator()(const std::vector<u32>& lhs, const st
|
|||
|
||||
spu_runtime::spu_runtime()
|
||||
{
|
||||
// Initialize "empty" block
|
||||
m_map[std::vector<u32>()] = tr_dispatch;
|
||||
|
||||
// Clear LLVM output
|
||||
m_cache_path = Emu.PPUCache();
|
||||
|
||||
|
@ -602,60 +590,77 @@ spu_runtime::spu_runtime()
|
|||
}
|
||||
}
|
||||
|
||||
bool spu_runtime::add(void* _where, spu_function_t compiled)
|
||||
spu_item* spu_runtime::add_empty(std::vector<u32>&& data)
|
||||
{
|
||||
writer_lock lock(*this);
|
||||
|
||||
if (!_where)
|
||||
if (data.size() <= 1)
|
||||
{
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Use opaque pointer
|
||||
auto& where = *static_cast<decltype(m_map)::value_type*>(_where);
|
||||
// Store previous item if already added
|
||||
spu_item* prev = nullptr;
|
||||
|
||||
// Function info
|
||||
const std::vector<u32>& func = get_func(_where);
|
||||
|
||||
//
|
||||
const u32 _off = 1 + (func[0] / 4) * (false);
|
||||
|
||||
// Set pointer to the compiled function
|
||||
where.second = compiled;
|
||||
|
||||
// Register function in PIC map
|
||||
m_pic_map[{func.data() + _off, func.size() - _off}] = compiled;
|
||||
|
||||
if (func.size() > 1)
|
||||
//Try to add item that doesn't exist yet
|
||||
const auto ret = m_stuff[data[1] >> 12].push_if([&](spu_item& _new, spu_item& _old)
|
||||
{
|
||||
// Rebuild trampolines if necessary
|
||||
if (const auto new_tr = rebuild_ubertrampoline(func[1]))
|
||||
{
|
||||
g_dispatcher->at(func[1] >> 12) = new_tr;
|
||||
}
|
||||
else
|
||||
std::basic_string_view<u32> lhs{_new.data.data() + 1, _new.data.size() - 1};
|
||||
std::basic_string_view<u32> rhs{_old.data.data() + 1, _old.data.size() - 1};
|
||||
|
||||
if (lhs == rhs)
|
||||
{
|
||||
prev = &_old;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}, std::move(data));
|
||||
|
||||
if (ret)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Notify in lock destructor
|
||||
lock.notify = true;
|
||||
return true;
|
||||
return prev;
|
||||
}
|
||||
|
||||
spu_function_t spu_runtime::rebuild_ubertrampoline(u32 id_inst)
|
||||
{
|
||||
// Prepare sorted list
|
||||
m_flat_list.clear();
|
||||
{
|
||||
// Select required subrange (fixed 20 bits for single pos in g_dispatcher table)
|
||||
const u32 id_lower = id_inst & ~0xfff;
|
||||
const u32 id_upper = id_inst | 0xfff;
|
||||
static thread_local std::vector<std::pair<std::basic_string_view<u32>, spu_function_t>> m_flat_list;
|
||||
|
||||
m_flat_list.assign(m_pic_map.lower_bound({&id_lower, 1}), m_pic_map.upper_bound({&id_upper, 1}));
|
||||
// Remember top position
|
||||
auto stuff_it = m_stuff.at(id_inst >> 12).begin();
|
||||
auto stuff_end = m_stuff.at(id_inst >> 12).end();
|
||||
{
|
||||
if (stuff_it->trampoline)
|
||||
{
|
||||
return stuff_it->trampoline;
|
||||
}
|
||||
|
||||
m_flat_list.clear();
|
||||
|
||||
for (auto it = stuff_it; it != stuff_end; ++it)
|
||||
{
|
||||
if (const auto ptr = it->compiled.load())
|
||||
{
|
||||
std::basic_string_view<u32> range{it->data.data() + 1, it->data.size() - 1};
|
||||
m_flat_list.emplace_back(range, ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Pull oneself deeper (TODO)
|
||||
++stuff_it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(m_flat_list.begin(), m_flat_list.end(), [&](const auto& a, const auto& b)
|
||||
{
|
||||
std::basic_string_view<u32> lhs = a.first;
|
||||
std::basic_string_view<u32> rhs = b.first;
|
||||
return lhs < rhs;
|
||||
});
|
||||
|
||||
struct work
|
||||
{
|
||||
u32 size;
|
||||
|
@ -674,6 +679,8 @@ spu_function_t spu_runtime::rebuild_ubertrampoline(u32 id_inst)
|
|||
const auto _end = m_flat_list.end();
|
||||
const u32 size0 = ::size32(m_flat_list);
|
||||
|
||||
auto result = beg->second;
|
||||
|
||||
if (size0 != 1)
|
||||
{
|
||||
// Allocate some writable executable memory
|
||||
|
@ -944,77 +951,63 @@ spu_function_t spu_runtime::rebuild_ubertrampoline(u32 id_inst)
|
|||
}
|
||||
|
||||
workload.clear();
|
||||
return reinterpret_cast<spu_function_t>(reinterpret_cast<u64>(wxptr));
|
||||
result = reinterpret_cast<spu_function_t>(reinterpret_cast<u64>(wxptr));
|
||||
}
|
||||
|
||||
// No trampoline required
|
||||
return beg->second;
|
||||
}
|
||||
|
||||
void* spu_runtime::find(const std::vector<u32>& func)
|
||||
{
|
||||
writer_lock lock(*this);
|
||||
|
||||
//
|
||||
const u32 _off = 1 + (func[0] / 4) * (false);
|
||||
|
||||
// Try to find PIC first
|
||||
const auto found = m_pic_map.find({func.data() + _off, func.size() - _off});
|
||||
|
||||
if (found != m_pic_map.end())
|
||||
if (auto _old = stuff_it->trampoline.compare_and_swap(nullptr, result))
|
||||
{
|
||||
// Wait if already in progress
|
||||
while (!found->second)
|
||||
return _old;
|
||||
}
|
||||
|
||||
// Install ubertrampoline
|
||||
auto& insert_to = spu_runtime::g_dispatcher->at(id_inst >> 12);
|
||||
|
||||
auto _old = insert_to.load();
|
||||
|
||||
do
|
||||
{
|
||||
// Make sure we are replacing an older ubertrampoline but not newer one
|
||||
if (_old != tr_dispatch)
|
||||
{
|
||||
m_cond.wait(m_mutex);
|
||||
bool ok = false;
|
||||
|
||||
for (auto it = stuff_it; it != stuff_end; ++it)
|
||||
{
|
||||
if (it->trampoline == _old)
|
||||
{
|
||||
ok = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Already compiled
|
||||
return g_dispatcher;
|
||||
}
|
||||
while (!insert_to.compare_exchange(_old, result));
|
||||
|
||||
// Try to find existing function, register new one if necessary
|
||||
const auto result = m_map.try_emplace(func, nullptr);
|
||||
|
||||
// Add PIC entry as well
|
||||
m_pic_map.try_emplace({result.first->first.data() + _off, result.first->first.size() - _off}, nullptr);
|
||||
|
||||
// Pointer to the value in the map (pair)
|
||||
const auto fn_location = &*result.first;
|
||||
|
||||
if (fn_location->second)
|
||||
{
|
||||
// Already compiled
|
||||
return g_dispatcher;
|
||||
}
|
||||
else if (!result.second)
|
||||
{
|
||||
// Wait if already in progress
|
||||
while (!fn_location->second)
|
||||
{
|
||||
m_cond.wait(m_mutex);
|
||||
}
|
||||
|
||||
return g_dispatcher;
|
||||
}
|
||||
|
||||
// Return location to compile and use in add()
|
||||
return fn_location;
|
||||
return result;
|
||||
}
|
||||
|
||||
spu_function_t spu_runtime::find(const u32* ls, u32 addr) const
|
||||
{
|
||||
reader_lock lock(this->m_mutex);
|
||||
|
||||
const auto upper = m_pic_map.upper_bound({ls + addr / 4, (0x40000 - addr) / 4});
|
||||
|
||||
if (upper != m_pic_map.begin())
|
||||
for (auto& item : m_stuff.at(ls[addr / 4] >> 12))
|
||||
{
|
||||
const auto found = std::prev(upper);
|
||||
|
||||
if (found->first.compare(0, found->first.size(), ls + addr / 4, found->first.size()) == 0)
|
||||
if (const auto ptr = item.compiled.load())
|
||||
{
|
||||
return found->second;
|
||||
std::basic_string_view<u32> range{item.data.data() + 1, item.data.size() - 1};
|
||||
|
||||
if (addr / 4 + range.size() > 0x10000)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (range.compare(0, range.size(), ls + addr / 4, range.size()) == 0)
|
||||
{
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1055,18 +1048,12 @@ spu_function_t spu_runtime::make_branch_patchpoint() const
|
|||
|
||||
spu_recompiler_base::spu_recompiler_base()
|
||||
{
|
||||
result.reserve(8192);
|
||||
}
|
||||
|
||||
spu_recompiler_base::~spu_recompiler_base()
|
||||
{
|
||||
}
|
||||
|
||||
void spu_recompiler_base::make_function(const std::vector<u32>& data)
|
||||
{
|
||||
compile(data, nullptr);
|
||||
}
|
||||
|
||||
void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)
|
||||
{
|
||||
// If code verification failed from a patched patchpoint, clear it with a dispatcher jump
|
||||
|
@ -1082,7 +1069,7 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)
|
|||
|
||||
bytes[0] = 0xe9; // jmp rel32
|
||||
std::memcpy(bytes + 1, &rel, 4);
|
||||
bytes[5] = 0x90;
|
||||
bytes[5] = 0x66; // lnop (2 bytes)
|
||||
bytes[6] = 0x90;
|
||||
bytes[7] = 0x90;
|
||||
|
||||
|
@ -1096,8 +1083,17 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)
|
|||
return;
|
||||
}
|
||||
|
||||
spu.jit->init();
|
||||
|
||||
// Compile
|
||||
spu.jit->make_function(spu.jit->analyse(spu._ptr<u32>(0), spu.pc));
|
||||
const auto func = spu.jit->compile(spu.jit->analyse(spu._ptr<u32>(0), spu.pc));
|
||||
|
||||
if (!func)
|
||||
{
|
||||
LOG_FATAL(SPU, "[0x%05x] Compilation failed.", spu.pc);
|
||||
Emu.Pause();
|
||||
return;
|
||||
}
|
||||
|
||||
// Diagnostic
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
|
||||
|
@ -1109,6 +1105,8 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)
|
|||
LOG_TRACE(SPU, "Called from 0x%x", _info._u32[2] - 4);
|
||||
}
|
||||
}
|
||||
|
||||
spu_runtime::g_tail_escape(&spu, func, nullptr);
|
||||
}
|
||||
|
||||
void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
|
||||
|
@ -1156,6 +1154,8 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
|
|||
}
|
||||
|
||||
atomic_storage<u64>::release(*reinterpret_cast<u64*>(rip), result);
|
||||
|
||||
spu_runtime::g_tail_escape(&spu, func, rip);
|
||||
}
|
||||
|
||||
void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* rip) try
|
||||
|
@ -1189,10 +1189,11 @@ catch (const std::exception& e)
|
|||
LOG_NOTICE(GENERAL, "\n%s", spu.dump());
|
||||
}
|
||||
|
||||
const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
|
||||
std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
|
||||
{
|
||||
// Result: addr + raw instruction data
|
||||
result.clear();
|
||||
std::vector<u32> result;
|
||||
result.reserve(10000);
|
||||
result.push_back(entry_point);
|
||||
|
||||
// Initialize block entries
|
||||
|
@ -3120,7 +3121,7 @@ const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 en
|
|||
return result;
|
||||
}
|
||||
|
||||
void spu_recompiler_base::dump(std::string& out)
|
||||
void spu_recompiler_base::dump(const std::vector<u32>& result, std::string& out)
|
||||
{
|
||||
SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode);
|
||||
dis_asm.offset = reinterpret_cast<const u8*>(result.data() + 1);
|
||||
|
@ -4153,31 +4154,25 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
virtual spu_function_t compile(const std::vector<u32>& func, void* fn_location) override
|
||||
virtual spu_function_t compile(std::vector<u32>&& _func) override
|
||||
{
|
||||
if (func.empty() && m_interp_magn)
|
||||
if (_func.empty() && m_interp_magn)
|
||||
{
|
||||
return compile_interpreter();
|
||||
}
|
||||
|
||||
if (!fn_location)
|
||||
{
|
||||
fn_location = m_spurt->find(func);
|
||||
}
|
||||
const auto add_loc = m_spurt->add_empty(std::move(_func));
|
||||
|
||||
if (fn_location == spu_runtime::g_dispatcher)
|
||||
{
|
||||
return &dispatch;
|
||||
}
|
||||
|
||||
if (!fn_location)
|
||||
if (!add_loc)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const std::vector<u32>& func = add_loc->data;
|
||||
|
||||
std::string log;
|
||||
|
||||
if (auto cache = g_fxo->get<spu_cache>(); cache && g_cfg.core.spu_cache)
|
||||
if (auto cache = g_fxo->get<spu_cache>(); cache && g_cfg.core.spu_cache && !add_loc->cached.exchange(1))
|
||||
{
|
||||
cache->add(func);
|
||||
}
|
||||
|
@ -4206,9 +4201,9 @@ public:
|
|||
const u32 start = m_pos;
|
||||
const u32 end = start + m_size;
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
if (g_cfg.core.spu_debug && !add_loc->logged.exchange(1))
|
||||
{
|
||||
this->dump(log);
|
||||
this->dump(func, log);
|
||||
fs::file(m_spurt->get_cache_path() + "spu.log", fs::write + fs::append).write(log);
|
||||
}
|
||||
|
||||
|
@ -4745,11 +4740,17 @@ public:
|
|||
// Register function pointer
|
||||
const spu_function_t fn = reinterpret_cast<spu_function_t>(m_jit.get_engine().getPointerToFunction(main_func));
|
||||
|
||||
if (!m_spurt->add(fn_location, fn))
|
||||
// Install unconditionally, possibly replacing existing one from spu_fast
|
||||
add_loc->compiled = fn;
|
||||
|
||||
// Rebuild trampoline if necessary
|
||||
if (!m_spurt->rebuild_ubertrampoline(func[1]))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
add_loc->compiled.notify_all();
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
{
|
||||
out.flush();
|
||||
|
@ -8236,7 +8237,7 @@ std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_llvm_recompiler(u
|
|||
struct spu_llvm
|
||||
{
|
||||
// Workload
|
||||
lf_queue<std::pair<void*, u8*>> registered;
|
||||
lf_queue<spu_item*> registered;
|
||||
|
||||
void operator()()
|
||||
{
|
||||
|
@ -8259,7 +8260,10 @@ struct spu_llvm
|
|||
continue;
|
||||
}
|
||||
|
||||
const std::vector<u32>& func = spu_runtime::get_func(parg->first);
|
||||
const std::vector<u32>& func = (*parg)->data;
|
||||
|
||||
// Old function pointer (pre-recompiled)
|
||||
const spu_function_t _old = (*parg)->compiled;
|
||||
|
||||
// Get data start
|
||||
const u32 start = func[0];
|
||||
|
@ -8272,17 +8276,17 @@ struct spu_llvm
|
|||
}
|
||||
|
||||
// Call analyser
|
||||
const std::vector<u32>& func2 = compiler->analyse(ls.data(), func[0]);
|
||||
std::vector<u32> func2 = compiler->analyse(ls.data(), func[0]);
|
||||
|
||||
if (func2.size() != size0)
|
||||
{
|
||||
LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed, %u vs %u", func2[0], func2.size() - 1, size0 - 1);
|
||||
}
|
||||
|
||||
if (const auto target = compiler->compile(func, parg->first))
|
||||
if (const auto target = compiler->compile(std::move(func2)))
|
||||
{
|
||||
// Redirect old function (TODO: patch in multiple places)
|
||||
const s64 rel = reinterpret_cast<u64>(target) - reinterpret_cast<u64>(parg->second) - 5;
|
||||
const s64 rel = reinterpret_cast<u64>(target) - reinterpret_cast<u64>(_old) - 5;
|
||||
|
||||
union
|
||||
{
|
||||
|
@ -8296,28 +8300,17 @@ struct spu_llvm
|
|||
bytes[6] = 0x90;
|
||||
bytes[7] = 0x90;
|
||||
|
||||
atomic_storage<u64>::release(*reinterpret_cast<u64*>(parg->second), result);
|
||||
atomic_storage<u64>::release(*reinterpret_cast<u64*>(_old), result);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(SPU, "[0x%05x] Compilation failed.", func2[0]);
|
||||
LOG_FATAL(SPU, "[0x%05x] Compilation failed.", func[0]);
|
||||
Emu.Pause();
|
||||
return;
|
||||
}
|
||||
|
||||
// Clear fake LS
|
||||
for (u32 i = 1, pos = start; i < func2.size(); i++, pos += 4)
|
||||
{
|
||||
if (se_storage<u32>::swap(func2[i]) != ls[pos / 4])
|
||||
{
|
||||
LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed at 0x%x", func2[0], pos);
|
||||
}
|
||||
|
||||
ls[pos / 4] = 0;
|
||||
}
|
||||
|
||||
if (func2.size() != size0)
|
||||
{
|
||||
std::memset(ls.data(), 0, 0x40000);
|
||||
}
|
||||
std::memset(ls.data() + start / 4, 0, 4 * (size0 - 1));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8336,27 +8329,26 @@ struct spu_fast : public spu_recompiler_base
|
|||
}
|
||||
}
|
||||
|
||||
virtual spu_function_t compile(const std::vector<u32>& func, void* fn_location) override
|
||||
virtual spu_function_t compile(std::vector<u32>&& _func) override
|
||||
{
|
||||
if (!fn_location)
|
||||
{
|
||||
fn_location = m_spurt->find(func);
|
||||
}
|
||||
const auto add_loc = m_spurt->add_empty(std::move(_func));
|
||||
|
||||
if (fn_location == spu_runtime::g_dispatcher)
|
||||
{
|
||||
return &dispatch;
|
||||
}
|
||||
|
||||
if (!fn_location)
|
||||
if (!add_loc)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
if (add_loc->compiled)
|
||||
{
|
||||
return add_loc->compiled;
|
||||
}
|
||||
|
||||
const std::vector<u32>& func = add_loc->data;
|
||||
|
||||
if (g_cfg.core.spu_debug && !add_loc->logged.exchange(1))
|
||||
{
|
||||
std::string log;
|
||||
this->dump(log);
|
||||
this->dump(func, log);
|
||||
fs::file(m_spurt->get_cache_path() + "spu.log", fs::write + fs::append).write(log);
|
||||
}
|
||||
|
||||
|
@ -8639,15 +8631,29 @@ struct spu_fast : public spu_recompiler_base
|
|||
*raw++ = 0x28;
|
||||
*raw++ = 0xc3;
|
||||
|
||||
if (!m_spurt->add(fn_location, reinterpret_cast<spu_function_t>(result)))
|
||||
const auto fn = reinterpret_cast<spu_function_t>(result);
|
||||
|
||||
// Install pointer carefully
|
||||
const bool added = !add_loc->compiled && add_loc->compiled.compare_and_swap_test(nullptr, fn);
|
||||
|
||||
if (added)
|
||||
{
|
||||
// Send work to LLVM compiler thread
|
||||
g_fxo->get<spu_llvm_thread>()->registered.push(add_loc);
|
||||
}
|
||||
|
||||
// Rebuild trampoline if necessary
|
||||
if (!m_spurt->rebuild_ubertrampoline(func[1]))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Send work to LLVM compiler thread; after add() to avoid race
|
||||
g_fxo->get<spu_llvm_thread>()->registered.push(fn_location, result);
|
||||
if (added)
|
||||
{
|
||||
add_loc->compiled.notify_all();
|
||||
}
|
||||
|
||||
return reinterpret_cast<spu_function_t>(result);
|
||||
return fn;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "Utilities/File.h"
|
||||
#include "Utilities/mutex.h"
|
||||
#include "Utilities/cond.h"
|
||||
#include "Utilities/JIT.h"
|
||||
#include "Utilities/lockless.h"
|
||||
#include "SPUThread.h"
|
||||
#include <vector>
|
||||
#include <bitset>
|
||||
|
@ -37,33 +36,47 @@ public:
|
|||
static void initialize();
|
||||
};
|
||||
|
||||
class spu_item
|
||||
{
|
||||
public:
|
||||
// SPU program
|
||||
const std::vector<u32> data;
|
||||
|
||||
// Compiled function pointer
|
||||
atomic_t<spu_function_t> compiled = nullptr;
|
||||
|
||||
// Ubertrampoline generated for this item when it was latest
|
||||
atomic_t<spu_function_t> trampoline = nullptr;
|
||||
|
||||
atomic_t<u8> cached = false;
|
||||
atomic_t<u8> logged = false;
|
||||
|
||||
spu_item(std::vector<u32>&& data)
|
||||
: data(std::move(data))
|
||||
{
|
||||
}
|
||||
|
||||
spu_item(const spu_item&) = delete;
|
||||
|
||||
spu_item& operator=(const spu_item&) = delete;
|
||||
};
|
||||
|
||||
// Helper class
|
||||
class spu_runtime
|
||||
{
|
||||
mutable shared_mutex m_mutex;
|
||||
|
||||
mutable cond_variable m_cond;
|
||||
|
||||
struct func_compare
|
||||
{
|
||||
// Comparison function for SPU programs
|
||||
bool operator()(const std::vector<u32>& lhs, const std::vector<u32>& rhs) const;
|
||||
};
|
||||
|
||||
// All functions
|
||||
std::map<std::vector<u32>, spu_function_t, func_compare> m_map;
|
||||
|
||||
// All functions as PIC
|
||||
std::map<std::basic_string_view<u32>, spu_function_t> m_pic_map;
|
||||
// All functions (2^20 bunches)
|
||||
std::array<lf_bunch<spu_item>, (1 << 20)> m_stuff;
|
||||
|
||||
// Debug module output location
|
||||
std::string m_cache_path;
|
||||
|
||||
// Scratch vector
|
||||
std::vector<std::pair<std::basic_string_view<u32>, spu_function_t>> m_flat_list;
|
||||
|
||||
public:
|
||||
|
||||
// Trampoline to spu_recompiler_base::dispatch
|
||||
static const spu_function_t tr_dispatch;
|
||||
|
||||
|
@ -88,23 +101,15 @@ public:
|
|||
return m_cache_path;
|
||||
}
|
||||
|
||||
// Add compiled function and generate trampoline if necessary
|
||||
bool add(void* where, spu_function_t compiled);
|
||||
|
||||
private:
|
||||
// Rebuild ubertrampoline for given identifier (first instruction)
|
||||
spu_function_t rebuild_ubertrampoline(u32 id_inst);
|
||||
|
||||
private:
|
||||
friend class spu_cache;
|
||||
|
||||
public:
|
||||
|
||||
// Return opaque pointer for add()
|
||||
void* find(const std::vector<u32>&);
|
||||
|
||||
// Get func from opaque ptr
|
||||
static inline const std::vector<u32>& get_func(void* _where)
|
||||
{
|
||||
return static_cast<decltype(m_map)::value_type*>(_where)->first;
|
||||
}
|
||||
// Return new pointer for add()
|
||||
spu_item* add_empty(std::vector<u32>&&);
|
||||
|
||||
// Find existing function
|
||||
spu_function_t find(const u32* ls, u32 addr) const;
|
||||
|
@ -129,31 +134,6 @@ public:
|
|||
|
||||
// Interpreter entry point
|
||||
static spu_function_t g_interpreter;
|
||||
|
||||
// Exclusive lock
|
||||
struct writer_lock
|
||||
{
|
||||
spu_runtime& _this;
|
||||
bool notify = false;
|
||||
|
||||
writer_lock(const writer_lock&) = delete;
|
||||
|
||||
writer_lock(spu_runtime& _this)
|
||||
: _this(_this)
|
||||
{
|
||||
_this.m_mutex.lock();
|
||||
}
|
||||
|
||||
~writer_lock()
|
||||
{
|
||||
_this.m_mutex.unlock();
|
||||
|
||||
if (notify)
|
||||
{
|
||||
_this.m_cond.notify_all();
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
// SPU Recompiler instance base class
|
||||
|
@ -303,9 +283,6 @@ private:
|
|||
// For private use
|
||||
std::vector<u32> workload;
|
||||
|
||||
// Result of analyse(), to avoid copying and allocation
|
||||
std::vector<u32> result;
|
||||
|
||||
public:
|
||||
spu_recompiler_base();
|
||||
|
||||
|
@ -314,11 +291,8 @@ public:
|
|||
// Initialize
|
||||
virtual void init() = 0;
|
||||
|
||||
// Compile function (may fail)
|
||||
virtual spu_function_t compile(const std::vector<u32>&, void*) = 0;
|
||||
|
||||
// Compile function, handle failure
|
||||
void make_function(const std::vector<u32>&);
|
||||
// Compile function
|
||||
virtual spu_function_t compile(std::vector<u32>&&) = 0;
|
||||
|
||||
// Default dispatch function fallback (second arg is unused)
|
||||
static void dispatch(spu_thread&, void*, u8* rip);
|
||||
|
@ -330,10 +304,10 @@ public:
|
|||
static void old_interpreter(spu_thread&, void* ls, u8*);
|
||||
|
||||
// Get the function data at specified address
|
||||
const std::vector<u32>& analyse(const be_t<u32>* ls, u32 lsa);
|
||||
std::vector<u32> analyse(const be_t<u32>* ls, u32 lsa);
|
||||
|
||||
// Print analyser internal state
|
||||
void dump(std::string& out);
|
||||
void dump(const std::vector<u32>& result, std::string& out);
|
||||
|
||||
// Get SPU Runtime
|
||||
spu_runtime& get_runtime()
|
||||
|
|
Loading…
Add table
Reference in a new issue