SPU: internal refactoring, add spu_program

Use struct (spu_program) instead of std::vector<u32>.
This commit is contained in:
Nekotekina 2019-11-23 19:30:54 +03:00
commit 1b9a3e6077
4 changed files with 215 additions and 213 deletions

View file

@ -45,9 +45,9 @@ void spu_recompiler::init()
} }
} }
spu_function_t spu_recompiler::compile(std::vector<u32>&& _func) spu_function_t spu_recompiler::compile(spu_program&& _func)
{ {
const u32 start0 = _func[0]; const u32 start0 = _func.entry_point;
const auto add_loc = m_spurt->add_empty(std::move(_func)); const auto add_loc = m_spurt->add_empty(std::move(_func));
@ -61,9 +61,9 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
return add_loc->compiled; return add_loc->compiled;
} }
const std::vector<u32>& func = add_loc->data; const spu_program& func = add_loc->data;
if (func[0] != start0) if (func.entry_point != start0)
{ {
// Wait for the duplicate // Wait for the duplicate
while (!add_loc->compiled) while (!add_loc->compiled)
@ -84,7 +84,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
u8 output[20]; u8 output[20];
sha1_starts(&ctx); sha1_starts(&ctx);
sha1_update(&ctx, reinterpret_cast<const u8*>(func.data() + 1), func.size() * 4 - 4); sha1_update(&ctx, reinterpret_cast<const u8*>(func.data.data()), func.data.size() * 4);
sha1_finish(&ctx, output); sha1_finish(&ctx, output);
be_t<u64> hash_start; be_t<u64> hash_start;
@ -168,18 +168,18 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
u32 words_align = 8; u32 words_align = 8;
// Start compilation // Start compilation
m_pos = func[0]; m_pos = func.lower_bound;
m_base = func[0]; m_base = func.entry_point;
m_size = ::size32(func) * 4 - 4; m_size = ::size32(func.data) * 4;
const u32 start = m_pos; const u32 start = m_pos;
const u32 end = start + m_size; const u32 end = start + m_size;
// Create block labels // Create block labels
for (u32 i = 1; i < func.size(); i++) for (u32 i = 0; i < func.data.size(); i++)
{ {
if (func[i] && m_block_info[i - 1 + start / 4]) if (func.data[i] && m_block_info[i + start / 4])
{ {
instr_labels[i * 4 - 4 + start] = c->newLabel(); instr_labels[i * 4 + start] = c->newLabel();
} }
} }
@ -211,7 +211,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
for (u32 addr = starta, m = 1; addr < enda && m; addr += 4, m <<= 1) for (u32 addr = starta, m = 1; addr < enda && m; addr += 4, m <<= 1)
{ {
// Filter out if out of range, or is a hole // Filter out if out of range, or is a hole
if (addr >= start && addr < end && func[(addr - start) / 4 + 1]) if (addr >= start && addr < end && func.data[(addr - start) / 4])
{ {
result |= m; result |= m;
} }
@ -226,7 +226,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
// Skip holes at the beginning (giga only) // Skip holes at the beginning (giga only)
for (u32 j = start; j < end; j += 4) for (u32 j = start; j < end; j += 4)
{ {
if (!func[(j - start) / 4 + 1]) if (!func.data[(j - start) / 4])
{ {
starta += 4; starta += 4;
} }
@ -261,7 +261,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
} }
else if (m_size == 8) else if (m_size == 8)
{ {
c->mov(x86::rax, static_cast<u64>(func[2]) << 32 | func[1]); c->mov(x86::rax, static_cast<u64>(func.data[1]) << 32 | func.data[0]);
c->cmp(x86::rax, x86::qword_ptr(*ls, *pc0)); c->cmp(x86::rax, x86::qword_ptr(*ls, *pc0));
c->jnz(label_diff); c->jnz(label_diff);
@ -272,7 +272,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
} }
else if (m_size == 4) else if (m_size == 4)
{ {
c->cmp(x86::dword_ptr(*ls, *pc0), func[1]); c->cmp(x86::dword_ptr(*ls, *pc0), func.data[0]);
c->jnz(label_diff); c->jnz(label_diff);
if (utils::has_avx()) if (utils::has_avx())
@ -351,7 +351,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
for (u32 i = j; i < j + 64; i += 4) for (u32 i = j; i < j + 64; i += 4)
{ {
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0); words.push_back(i >= start && i < end ? func.data[(i - start) / 4] : 0);
} }
code_off += 64; code_off += 64;
@ -391,7 +391,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
for (u32 i = starta; i < enda; i += 4) for (u32 i = starta; i < enda; i += 4)
{ {
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0); words.push_back(i >= start && i < end ? func.data[(i - start) / 4] : 0);
} }
} }
else if (sizea == 2 && (end - start) <= 32) else if (sizea == 2 && (end - start) <= 32)
@ -408,7 +408,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
for (u32 i = starta; i < starta + 32; i += 4) for (u32 i = starta; i < starta + 32; i += 4)
{ {
words.push_back(i >= start ? func[(i - start) / 4 + 1] : i + 32 < end ? func[(i + 32 - start) / 4 + 1] : 0); words.push_back(i >= start ? func.data[(i - start) / 4] : i + 32 < end ? func.data[(i + 32 - start) / 4] : 0);
} }
} }
else else
@ -471,7 +471,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
for (u32 i = j; i < j + 32; i += 4) for (u32 i = j; i < j + 32; i += 4)
{ {
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0); words.push_back(i >= start && i < end ? func.data[(i - start) / 4] : 0);
} }
code_off += 32; code_off += 32;
@ -513,7 +513,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
for (u32 i = starta; i < enda; i += 4) for (u32 i = starta; i < enda; i += 4)
{ {
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0); words.push_back(i >= start && i < end ? func.data[(i - start) / 4] : 0);
} }
} }
else if (sizea == 2 && (end - start) <= 32) else if (sizea == 2 && (end - start) <= 32)
@ -530,7 +530,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
for (u32 i = starta; i < starta + 32; i += 4) for (u32 i = starta; i < starta + 32; i += 4)
{ {
words.push_back(i >= start ? func[(i - start) / 4 + 1] : i + 32 < end ? func[(i + 32 - start) / 4 + 1] : 0); words.push_back(i >= start ? func.data[(i - start) / 4] : i + 32 < end ? func.data[(i + 32 - start) / 4] : 0);
} }
} }
else else
@ -605,7 +605,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
for (u32 i = j; i < j + 32; i += 4) for (u32 i = j; i < j + 32; i += 4)
{ {
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0); words.push_back(i >= start && i < end ? func.data[(i - start) / 4] : 0);
} }
code_off += 32; code_off += 32;
@ -675,10 +675,10 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
} }
// Determine which value will be duplicated at hole positions // Determine which value will be duplicated at hole positions
const u32 w3 = func.at((j - start + ~utils::cntlz32(cmask, true) % 4 * 4) / 4 + 1); const u32 w3 = func.data.at((j - start + ~utils::cntlz32(cmask, true) % 4 * 4) / 4);
words.push_back(cmask & 1 ? func[(j - start + 0) / 4 + 1] : w3); words.push_back(cmask & 1 ? func.data[(j - start + 0) / 4] : w3);
words.push_back(cmask & 2 ? func[(j - start + 4) / 4 + 1] : w3); words.push_back(cmask & 2 ? func.data[(j - start + 4) / 4] : w3);
words.push_back(cmask & 4 ? func[(j - start + 8) / 4 + 1] : w3); words.push_back(cmask & 4 ? func.data[(j - start + 8) / 4] : w3);
words.push_back(w3); words.push_back(w3);
// PSHUFD immediate table for all possible hole mask values, holes repeat highest valid word // PSHUFD immediate table for all possible hole mask values, holes repeat highest valid word
@ -771,10 +771,10 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
m_pos = -1; m_pos = -1;
} }
for (u32 i = 1; i < func.size(); i++) for (u32 i = 0; i < func.data.size(); i++)
{ {
const u32 pos = start + (i - 1) * 4; const u32 pos = start + i * 4;
const u32 op = se_storage<u32>::swap(func[i]); const u32 op = std::bit_cast<be_t<u32>>(func.data[i]);
if (!op) if (!op)
{ {
@ -908,7 +908,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
const bool added = !add_loc->compiled && add_loc->compiled.compare_and_swap_test(nullptr, fn); const bool added = !add_loc->compiled && add_loc->compiled.compare_and_swap_test(nullptr, fn);
// Rebuild trampoline if necessary // Rebuild trampoline if necessary
if (!m_spurt->rebuild_ubertrampoline(func[1])) if (!m_spurt->rebuild_ubertrampoline(func.data[0]))
{ {
return nullptr; return nullptr;
} }

View file

@ -13,7 +13,7 @@ public:
virtual void init() override; virtual void init() override;
virtual spu_function_t compile(std::vector<u32>&&) override; virtual spu_function_t compile(spu_program&&) override;
private: private:
// ASMJIT runtime // ASMJIT runtime

View file

@ -291,9 +291,9 @@ spu_cache::~spu_cache()
{ {
} }
std::deque<std::vector<u32>> spu_cache::get() std::deque<spu_program> spu_cache::get()
{ {
std::deque<std::vector<u32>> result; std::deque<spu_program> result;
if (!m_file) if (!m_file)
{ {
@ -314,41 +314,44 @@ std::deque<std::vector<u32>> spu_cache::get()
break; break;
} }
func.resize(size + 1); func.resize(size);
func[0] = addr;
if (m_file.read(func.data() + 1, func.size() * 4 - 4) != func.size() * 4 - 4) if (m_file.read(func.data(), func.size() * 4) != func.size() * 4)
{ {
break; break;
} }
if (!size || !func[1]) if (!size || !func[0])
{ {
// Skip old format Giga entries // Skip old format Giga entries
continue; continue;
} }
result.emplace_front(std::move(func)); spu_program res;
res.entry_point = addr;
res.lower_bound = addr;
res.data = std::move(func);
result.emplace_front(std::move(res));
} }
return result; return result;
} }
void spu_cache::add(const std::vector<u32>& func) void spu_cache::add(const spu_program& func)
{ {
if (!m_file) if (!m_file)
{ {
return; return;
} }
be_t<u32> size = ::size32(func) - 1; be_t<u32> size = ::size32(func.data);
be_t<u32> addr = func[0]; be_t<u32> addr = func.entry_point;
const fs::iovec_clone gather[3] const fs::iovec_clone gather[3]
{ {
{&size, sizeof(size)}, {&size, sizeof(size)},
{&addr, sizeof(addr)}, {&addr, sizeof(addr)},
{func.data() + 1, func.size() * 4 - 4} {func.data.data(), func.data.size() * 4}
}; };
// Append data // Append data
@ -458,7 +461,7 @@ void spu_cache::initialize()
// Build functions // Build functions
for (std::size_t func_i = fnext++; func_i < func_list.size(); func_i = fnext++) for (std::size_t func_i = fnext++; func_i < func_list.size(); func_i = fnext++)
{ {
const std::vector<u32>& func = std::as_const(func_list)[func_i]; const spu_program& func = std::as_const(func_list)[func_i];
if (Emu.IsStopped() || fail_flag) if (Emu.IsStopped() || fail_flag)
{ {
@ -467,21 +470,21 @@ void spu_cache::initialize()
} }
// Get data start // Get data start
const u32 start = func[0]; const u32 start = func.lower_bound;
const u32 size0 = ::size32(func); const u32 size0 = ::size32(func.data);
// Initialize LS with function data only // Initialize LS with function data only
for (u32 i = 1, pos = start; i < size0; i++, pos += 4) for (u32 i = 0, pos = start; i < size0; i++, pos += 4)
{ {
ls[pos / 4] = se_storage<u32>::swap(func[i]); ls[pos / 4] = std::bit_cast<be_t<u32>>(func.data[i]);
} }
// Call analyser // Call analyser
std::vector<u32> func2 = compiler->analyse(ls.data(), func[0]); spu_program func2 = compiler->analyse(ls.data(), func.entry_point);
if (func2 != func) if (func2 != func)
{ {
LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed, %u vs %u", func2[0], func2.size() - 1, size0 - 1); LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed, %u vs %u", func2.entry_point, func2.data.size(), size0);
} }
else if (!compiler->compile(std::move(func2))) else if (!compiler->compile(std::move(func2)))
{ {
@ -523,51 +526,39 @@ void spu_cache::initialize()
g_fxo->init<spu_cache>(std::move(cache)); g_fxo->init<spu_cache>(std::move(cache));
} }
bool spu_runtime::func_compare::operator()(const std::vector<u32>& lhs, const std::vector<u32>& rhs) const bool spu_program::operator==(const spu_program& rhs) const noexcept
{ {
if (lhs.empty()) // TODO
return !rhs.empty(); return entry_point - lower_bound == rhs.entry_point - rhs.lower_bound && data == rhs.data;
else if (rhs.empty()) }
return false;
const u32 lhs_addr = lhs[0]; bool spu_program::operator<(const spu_program& rhs) const noexcept
const u32 rhs_addr = rhs[0]; {
const u32 lhs_offs = (entry_point - lower_bound) / 4;
if (lhs_addr < rhs_addr) const u32 rhs_offs = (rhs.entry_point - rhs.lower_bound) / 4;
return true;
else if (lhs_addr > rhs_addr)
return false;
// Select range for comparison // Select range for comparison
std::basic_string_view<u32> lhs_data(lhs.data() + 1, lhs.size() - 1); std::basic_string_view<u32> lhs_data(data.data() + lhs_offs, data.size() - lhs_offs);
std::basic_string_view<u32> rhs_data(rhs.data() + 1, rhs.size() - 1); std::basic_string_view<u32> rhs_data(rhs.data.data() + rhs_offs, rhs.data.size() - rhs_offs);
const auto cmp0 = lhs_data.compare(rhs_data);
if (lhs_data.empty()) if (cmp0 < 0)
return !rhs_data.empty(); return true;
else if (rhs_data.empty()) else if (cmp0 > 0)
return false; return false;
if (false) // Compare from address 0 to the point before the entry point (TODO: undesirable)
{ lhs_data = {data.data(), lhs_offs};
// In Giga mode, compare instructions starting from the entry point first rhs_data = {rhs.data.data(), rhs_offs};
lhs_data.remove_prefix(lhs_addr / 4); const auto cmp1 = lhs_data.compare(rhs_data);
rhs_data.remove_prefix(rhs_addr / 4);
const auto cmp0 = lhs_data.compare(rhs_data);
if (cmp0 < 0) if (cmp1 < 0)
return true; return true;
else if (cmp0 > 0) else if (cmp1 > 0)
return false; return false;
// Compare from address 0 to the point before the entry point (undesirable) // TODO
lhs_data = {lhs.data() + 1, lhs_addr / 4}; return lhs_offs < rhs_offs;
rhs_data = {rhs.data() + 1, rhs_addr / 4};
return lhs_data < rhs_data;
}
else
{
return lhs_data < rhs_data;
}
} }
spu_runtime::spu_runtime() spu_runtime::spu_runtime()
@ -590,9 +581,9 @@ spu_runtime::spu_runtime()
} }
} }
spu_item* spu_runtime::add_empty(std::vector<u32>&& data) spu_item* spu_runtime::add_empty(spu_program&& data)
{ {
if (data.size() <= 1) if (data.data.empty())
{ {
return nullptr; return nullptr;
} }
@ -601,12 +592,9 @@ spu_item* spu_runtime::add_empty(std::vector<u32>&& data)
spu_item* prev = nullptr; spu_item* prev = nullptr;
//Try to add item that doesn't exist yet //Try to add item that doesn't exist yet
const auto ret = m_stuff[data[1] >> 12].push_if([&](spu_item& _new, spu_item& _old) const auto ret = m_stuff[data.data[0] >> 12].push_if([&](spu_item& _new, spu_item& _old)
{ {
std::basic_string_view<u32> lhs{_new.data.data() + 1, _new.data.size() - 1}; if (_new.data == _old.data)
std::basic_string_view<u32> rhs{_old.data.data() + 1, _old.data.size() - 1};
if (lhs == rhs)
{ {
prev = &_old; prev = &_old;
return false; return false;
@ -643,7 +631,8 @@ spu_function_t spu_runtime::rebuild_ubertrampoline(u32 id_inst)
{ {
if (const auto ptr = it->compiled.load()) if (const auto ptr = it->compiled.load())
{ {
std::basic_string_view<u32> range{it->data.data() + 1, it->data.size() - 1}; std::basic_string_view<u32> range{it->data.data.data(), it->data.data.size()};
range.remove_prefix((it->data.entry_point - it->data.lower_bound) / 4);
m_flat_list.emplace_back(range, ptr); m_flat_list.emplace_back(range, ptr);
} }
else else
@ -997,7 +986,8 @@ spu_function_t spu_runtime::find(const u32* ls, u32 addr) const
{ {
if (const auto ptr = item.compiled.load()) if (const auto ptr = item.compiled.load())
{ {
std::basic_string_view<u32> range{item.data.data() + 1, item.data.size() - 1}; std::basic_string_view<u32> range{item.data.data.data(), item.data.data.size()};
range.remove_prefix((item.data.entry_point - item.data.lower_bound) / 4);
if (addr / 4 + range.size() > 0x10000) if (addr / 4 + range.size() > 0x10000)
{ {
@ -1194,12 +1184,13 @@ catch (const std::exception& e)
LOG_NOTICE(GENERAL, "\n%s", spu.dump()); LOG_NOTICE(GENERAL, "\n%s", spu.dump());
} }
std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point) spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
{ {
// Result: addr + raw instruction data // Result: addr + raw instruction data
std::vector<u32> result; spu_program result;
result.reserve(10000); result.data.reserve(10000);
result.push_back(entry_point); result.entry_point = entry_point;
result.lower_bound = entry_point;
// Initialize block entries // Initialize block entries
m_block_info.reset(); m_block_info.reset();
@ -1400,7 +1391,7 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
{ {
const u32 target = spu_branch_target(av); const u32 target = spu_branch_target(av);
LOG_WARNING(SPU, "[0x%x] At 0x%x: indirect branch to 0x%x%s", result[0], pos, target, op.d ? " (D)" : op.e ? " (E)" : ""); LOG_WARNING(SPU, "[0x%x] At 0x%x: indirect branch to 0x%x%s", entry_point, pos, target, op.d ? " (D)" : op.e ? " (E)" : "");
m_targets[pos].push_back(target); m_targets[pos].push_back(target);
@ -1408,7 +1399,7 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
{ {
if (sync) if (sync)
{ {
LOG_NOTICE(SPU, "[0x%x] At 0x%x: ignoring %scall to 0x%x (SYNC)", result[0], pos, sl ? "" : "tail ", target); LOG_NOTICE(SPU, "[0x%x] At 0x%x: ignoring %scall to 0x%x (SYNC)", entry_point, pos, sl ? "" : "tail ", target);
if (target > entry_point) if (target > entry_point)
{ {
@ -1505,17 +1496,17 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
if (jt_abs.size() >= jt_rel.size()) if (jt_abs.size() >= jt_rel.size())
{ {
const u32 new_size = (start - lsa) / 4 + 1 + jt_abs.size(); const u32 new_size = (start - lsa) / 4 + jt_abs.size();
if (result.size() < new_size) if (result.data.size() < new_size)
{ {
result.resize(new_size); result.data.resize(new_size);
} }
for (u32 i = 0; i < jt_abs.size(); i++) for (u32 i = 0; i < jt_abs.size(); i++)
{ {
add_block(jt_abs[i]); add_block(jt_abs[i]);
result[(start - lsa) / 4 + 1 + i] = se_storage<u32>::swap(jt_abs[i]); result.data[(start - lsa) / 4 + i] = std::bit_cast<u32, be_t<u32>>(jt_abs[i]);
m_targets[start + i * 4]; m_targets[start + i * 4];
} }
@ -1524,17 +1515,17 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
if (jt_rel.size() >= jt_abs.size()) if (jt_rel.size() >= jt_abs.size())
{ {
const u32 new_size = (start - lsa) / 4 + 1 + jt_rel.size(); const u32 new_size = (start - lsa) / 4 + jt_rel.size();
if (result.size() < new_size) if (result.data.size() < new_size)
{ {
result.resize(new_size); result.data.resize(new_size);
} }
for (u32 i = 0; i < jt_rel.size(); i++) for (u32 i = 0; i < jt_rel.size(); i++)
{ {
add_block(jt_rel[i]); add_block(jt_rel[i]);
result[(start - lsa) / 4 + 1 + i] = se_storage<u32>::swap(jt_rel[i] - start); result.data[(start - lsa) / 4 + i] = std::bit_cast<u32, be_t<u32>>(jt_rel[i] - start);
m_targets[start + i * 4]; m_targets[start + i * 4];
} }
@ -1569,7 +1560,7 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
} }
else if (type == spu_itype::BI && sync) else if (type == spu_itype::BI && sync)
{ {
LOG_NOTICE(SPU, "[0x%x] At 0x%x: ignoring indirect branch (SYNC)", result[0], pos); LOG_NOTICE(SPU, "[0x%x] At 0x%x: ignoring indirect branch (SYNC)", entry_point, pos);
} }
if (type == spu_itype::BI || sl) if (type == spu_itype::BI || sl)
@ -1630,7 +1621,7 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
{ {
if (g_cfg.core.spu_block_size == spu_block_size_type::giga) if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
{ {
LOG_NOTICE(SPU, "[0x%x] At 0x%x: ignoring fixed call to 0x%x (SYNC)", result[0], pos, target); LOG_NOTICE(SPU, "[0x%x] At 0x%x: ignoring fixed call to 0x%x (SYNC)", entry_point, pos, target);
} }
if (target > entry_point) if (target > entry_point)
@ -1656,7 +1647,7 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
{ {
if (g_cfg.core.spu_block_size == spu_block_size_type::giga) if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
{ {
LOG_NOTICE(SPU, "[0x%x] At 0x%x: ignoring fixed tail call to 0x%x (SYNC)", result[0], pos, target); LOG_NOTICE(SPU, "[0x%x] At 0x%x: ignoring fixed tail call to 0x%x (SYNC)", entry_point, pos, target);
} }
if (target > entry_point) if (target > entry_point)
@ -1914,31 +1905,33 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
} }
// Insert raw instruction value // Insert raw instruction value
if (result.size() - 1 <= (pos - lsa) / 4) const u32 new_size = (pos - lsa) / 4;
if (result.data.size() <= new_size)
{ {
if (result.size() - 1 < (pos - lsa) / 4) if (result.data.size() < new_size)
{ {
result.resize((pos - lsa) / 4 + 1); result.data.resize(new_size);
} }
result.emplace_back(se_storage<u32>::swap(data)); result.data.emplace_back(std::bit_cast<u32, be_t<u32>>(data));
} }
else if (u32& raw_val = result[(pos - lsa) / 4 + 1]) else if (u32& raw_val = result.data[new_size])
{ {
verify(HERE), raw_val == se_storage<u32>::swap(data); verify(HERE), raw_val == std::bit_cast<u32, be_t<u32>>(data);
} }
else else
{ {
raw_val = se_storage<u32>::swap(data); raw_val = std::bit_cast<u32, be_t<u32>>(data);
} }
} }
while (lsa > 0 || limit < 0x40000) while (lsa > 0 || limit < 0x40000)
{ {
const u32 initial_size = result.size(); const u32 initial_size = result.data.size();
// Check unreachable blocks // Check unreachable blocks
limit = std::min<u32>(limit, lsa + initial_size * 4 - 4); limit = std::min<u32>(limit, lsa + initial_size * 4);
for (auto& pair : m_preds) for (auto& pair : m_preds)
{ {
@ -1961,7 +1954,7 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
for (u32 j = workload[i];; j -= 4) for (u32 j = workload[i];; j -= 4)
{ {
// Go backward from an address until the entry point is reached // Go backward from an address until the entry point is reached
if (j == result[0]) if (j == entry_point)
{ {
reachable = true; reachable = true;
break; break;
@ -1994,7 +1987,7 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
// Check for possible fallthrough predecessor // Check for possible fallthrough predecessor
if (!had_fallthrough) if (!had_fallthrough)
{ {
if (result.at((j - lsa) / 4) == 0 || m_targets.count(j - 4)) if (result.data.at((j - lsa) / 4 - 1) == 0 || m_targets.count(j - 4))
{ {
break; break;
} }
@ -2018,16 +2011,16 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
} }
} }
result.resize((limit - lsa) / 4 + 1); result.data.resize((limit - lsa) / 4);
// Check holes in safe mode (TODO) // Check holes in safe mode (TODO)
u32 valid_size = 0; u32 valid_size = 0;
for (u32 i = 1; i < result.size(); i++) for (u32 i = 0; i < result.data.size(); i++)
{ {
if (result[i] == 0) if (result.data[i] == 0)
{ {
const u32 pos = lsa + (i - 1) * 4; const u32 pos = lsa + i * 4;
const u32 data = ls[pos / 4]; const u32 data = ls[pos / 4];
// Allow only NOP or LNOP instructions in holes // Allow only NOP or LNOP instructions in holes
@ -2038,34 +2031,34 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
if (g_cfg.core.spu_block_size != spu_block_size_type::giga) if (g_cfg.core.spu_block_size != spu_block_size_type::giga)
{ {
result.resize(valid_size + 1); result.data.resize(valid_size);
break; break;
} }
} }
else else
{ {
valid_size = i; valid_size = i + 1;
} }
} }
// Even if NOP or LNOP, should be removed at the end // Even if NOP or LNOP, should be removed at the end
result.resize(valid_size + 1); result.data.resize(valid_size);
// Repeat if blocks were removed // Repeat if blocks were removed
if (result.size() == initial_size) if (result.data.size() == initial_size)
{ {
break; break;
} }
} }
limit = std::min<u32>(limit, lsa + ::size32(result) * 4 - 4); limit = std::min<u32>(limit, lsa + ::size32(result.data) * 4);
// Cleanup block info // Cleanup block info
for (u32 i = 0; i < workload.size(); i++) for (u32 i = 0; i < workload.size(); i++)
{ {
const u32 addr = workload[i]; const u32 addr = workload[i];
if (addr < lsa || addr >= limit || !result[(addr - lsa) / 4 + 1]) if (addr < lsa || addr >= limit || !result.data[(addr - lsa) / 4])
{ {
m_block_info[addr / 4] = false; m_block_info[addr / 4] = false;
m_entry_info[addr / 4] = false; m_entry_info[addr / 4] = false;
@ -2104,7 +2097,7 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
const u32 prev = (it->first - 4) & 0x3fffc; const u32 prev = (it->first - 4) & 0x3fffc;
// TODO: check the correctness // TODO: check the correctness
if (m_targets.count(prev) == 0 && prev >= lsa && prev < limit && result[(prev - lsa) / 4 + 1]) if (m_targets.count(prev) == 0 && prev >= lsa && prev < limit && result.data[(prev - lsa) / 4])
{ {
// Add target and the predecessor // Add target and the predecessor
m_targets[prev].push_back(it->first); m_targets[prev].push_back(it->first);
@ -2127,25 +2120,25 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
} }
// Fill holes which contain only NOP and LNOP instructions (TODO: compile) // Fill holes which contain only NOP and LNOP instructions (TODO: compile)
for (u32 i = 1, nnop = 0, vsize = 0; i <= result.size(); i++) for (u32 i = 0, nnop = 0, vsize = 0; i <= result.data.size(); i++)
{ {
if (i >= result.size() || result[i]) if (i >= result.data.size() || result.data[i])
{ {
if (nnop && nnop == i - vsize - 1) if (nnop && nnop == i - vsize)
{ {
// Write only complete NOP sequence // Write only complete NOP sequence
for (u32 j = vsize + 1; j < i; j++) for (u32 j = vsize; j < i; j++)
{ {
result[j] = se_storage<u32>::swap(ls[lsa / 4 + j - 1]); result.data[j] = std::bit_cast<u32, be_t<u32>>(ls[lsa / 4 + j]);
} }
} }
nnop = 0; nnop = 0;
vsize = i; vsize = i + 1;
} }
else else
{ {
const u32 pos = lsa + (i - 1) * 4; const u32 pos = lsa + i * 4;
const u32 data = ls[pos / 4]; const u32 data = ls[pos / 4];
if (data == 0x200000 || (data & 0xffffff80) == 0x40200000) if (data == 0x200000 || (data & 0xffffff80) == 0x40200000)
@ -2169,7 +2162,7 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
block.size++; block.size++;
// Decode instruction // Decode instruction
const spu_opcode_t op{se_storage<u32>::swap(result[(ia - lsa) / 4 + 1])}; const spu_opcode_t op{std::bit_cast<be_t<u32>>(result.data[(ia - lsa) / 4])};
const auto type = s_spu_itype.decode(op.opcode); const auto type = s_spu_itype.decode(op.opcode);
@ -2663,7 +2656,7 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
for (u32 ia = addr; ia < addr + bb.size * 4; ia += 4) for (u32 ia = addr; ia < addr + bb.size * 4; ia += 4)
{ {
// Decode instruction again // Decode instruction again
op.opcode = se_storage<u32>::swap(result[(ia - lsa) / 4 + 1]); op.opcode = std::bit_cast<be_t<u32>>(result.data[(ia - lsa) / 41]);
last_inst = s_spu_itype.decode(op.opcode); last_inst = s_spu_itype.decode(op.opcode);
// Propagate some constants // Propagate some constants
@ -3117,24 +3110,18 @@ std::vector<u32> spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_poi
} }
} }
if (result.size() == 1) if (result.data.empty())
{ {
// Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback // Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback
result.clear();
} }
return result; return result;
} }
void spu_recompiler_base::dump(const std::vector<u32>& result, std::string& out) void spu_recompiler_base::dump(const spu_program& result, std::string& out)
{ {
SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode); SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode);
dis_asm.offset = reinterpret_cast<const u8*>(result.data() + 1); dis_asm.offset = reinterpret_cast<const u8*>(result.data.data()) - result.lower_bound;
if (true)
{
dis_asm.offset -= result[0];
}
std::string hash; std::string hash;
{ {
@ -3142,12 +3129,12 @@ void spu_recompiler_base::dump(const std::vector<u32>& result, std::string& out)
u8 output[20]; u8 output[20];
sha1_starts(&ctx); sha1_starts(&ctx);
sha1_update(&ctx, reinterpret_cast<const u8*>(result.data() + 1), result.size() * 4 - 4); sha1_update(&ctx, reinterpret_cast<const u8*>(result.data.data()), result.data.size() * 4);
sha1_finish(&ctx, output); sha1_finish(&ctx, output);
fmt::append(hash, "%s", fmt::base57(output)); fmt::append(hash, "%s", fmt::base57(output));
} }
fmt::append(out, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n", result[0], result.size() - 1, hash); fmt::append(out, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n", result.entry_point, result.data.size(), hash);
for (auto& bb : m_bbs) for (auto& bb : m_bbs)
{ {
@ -4162,14 +4149,14 @@ public:
} }
} }
virtual spu_function_t compile(std::vector<u32>&& _func) override virtual spu_function_t compile(spu_program&& _func) override
{ {
if (_func.empty() && m_interp_magn) if (_func.data.empty() && m_interp_magn)
{ {
return compile_interpreter(); return compile_interpreter();
} }
const u32 start0 = _func[0]; const u32 start0 = _func.entry_point;
const auto add_loc = m_spurt->add_empty(std::move(_func)); const auto add_loc = m_spurt->add_empty(std::move(_func));
@ -4178,9 +4165,9 @@ public:
return nullptr; return nullptr;
} }
const std::vector<u32>& func = add_loc->data; const spu_program& func = add_loc->data;
if (func[0] != start0) if (func.entry_point != start0)
{ {
// Wait for the duplicate // Wait for the duplicate
while (!add_loc->compiled) while (!add_loc->compiled)
@ -4203,22 +4190,22 @@ public:
u8 output[20]; u8 output[20];
sha1_starts(&ctx); sha1_starts(&ctx);
sha1_update(&ctx, reinterpret_cast<const u8*>(func.data() + 1), func.size() * 4 - 4); sha1_update(&ctx, reinterpret_cast<const u8*>(func.data.data()), func.data.size() * 4);
sha1_finish(&ctx, output); sha1_finish(&ctx, output);
m_hash.clear(); m_hash.clear();
fmt::append(m_hash, "spu-0x%05x-%s", func[0], fmt::base57(output)); fmt::append(m_hash, "spu-0x%05x-%s", func.entry_point, fmt::base57(output));
be_t<u64> hash_start; be_t<u64> hash_start;
std::memcpy(&hash_start, output, sizeof(hash_start)); std::memcpy(&hash_start, output, sizeof(hash_start));
m_hash_start = hash_start; m_hash_start = hash_start;
} }
LOG_NOTICE(SPU, "Building function 0x%x... (size %u, %s)", func[0], func.size() - 1, m_hash); LOG_NOTICE(SPU, "Building function 0x%x... (size %u, %s)", func.entry_point, func.data.size(), m_hash);
m_pos = func[0]; m_pos = func.lower_bound;
m_base = func[0]; m_base = func.entry_point;
m_size = (func.size() - 1) * 4; m_size = ::size32(func.data) * 4;
const u32 start = m_pos; const u32 start = m_pos;
const u32 end = start + m_size; const u32 end = start + m_size;
@ -4279,16 +4266,16 @@ public:
// Disable check (unsafe) // Disable check (unsafe)
m_ir->CreateBr(label_body); m_ir->CreateBr(label_body);
} }
else if (func.size() - 1 == 1) else if (func.data.size() == 1)
{ {
const auto pu32 = m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, m_base_pc), get_type<u32*>()); const auto pu32 = m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, m_base_pc), get_type<u32*>());
const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(pu32), m_ir->getInt32(func[1])); const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(pu32), m_ir->getInt32(func.data[0]));
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely); m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
} }
else if (func.size() - 1 == 2) else if (func.data.size() == 2)
{ {
const auto pu64 = m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, m_base_pc), get_type<u64*>()); const auto pu64 = m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, m_base_pc), get_type<u64*>());
const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(pu64), m_ir->getInt64(static_cast<u64>(func[2]) << 32 | func[1])); const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(pu64), m_ir->getInt64(static_cast<u64>(func.data[1]) << 32 | func.data[0]));
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely); m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
} }
else else
@ -4298,7 +4285,7 @@ public:
// Skip holes at the beginning (giga only) // Skip holes at the beginning (giga only)
for (u32 j = start; j < end; j += 4) for (u32 j = start; j < end; j += 4)
{ {
if (!func[(j - start) / 4 + 1]) if (!func.data[(j - start) / 4])
{ {
starta += 4; starta += 4;
} }
@ -4324,7 +4311,7 @@ public:
{ {
const u32 k = j + i * 4; const u32 k = j + i * 4;
if (k < start || k >= end || !func[(k - start) / 4 + 1]) if (k < start || k >= end || !func.data[(k - start) / 4])
{ {
indices[i] = 8; indices[i] = 8;
holes = true; holes = true;
@ -4357,7 +4344,7 @@ public:
for (u32 i = 0; i < 8; i++) for (u32 i = 0; i < 8; i++)
{ {
const u32 k = j + i * 4; const u32 k = j + i * 4;
words[i] = k >= start && k < end ? func[(k - start) / 4 + 1] : 0; words[i] = k >= start && k < end ? func.data[(k - start) / 4] : 0;
} }
vls = m_ir->CreateXor(vls, ConstantDataVector::get(m_context, words)); vls = m_ir->CreateXor(vls, ConstantDataVector::get(m_context, words));
@ -4598,7 +4585,7 @@ public:
break; break;
} }
const u32 op = se_storage<u32>::swap(func[(m_pos - start) / 4 + 1]); const u32 op = std::bit_cast<be_t<u32>>(func.data[(m_pos - start) / 4]);
if (!op) if (!op)
{ {
@ -4744,7 +4731,7 @@ public:
if (g_cfg.core.spu_debug) if (g_cfg.core.spu_debug)
{ {
fmt::append(log, "LLVM IR at 0x%x:\n", func[0]); fmt::append(log, "LLVM IR at 0x%x:\n", func.entry_point);
out << *module; // print IR out << *module; // print IR
out << "\n\n"; out << "\n\n";
} }
@ -4752,7 +4739,7 @@ public:
if (verifyModule(*module, &out)) if (verifyModule(*module, &out))
{ {
out.flush(); out.flush();
LOG_ERROR(SPU, "LLVM: Verification failed at 0x%x:\n%s", func[0], log); LOG_ERROR(SPU, "LLVM: Verification failed at 0x%x:\n%s", func.entry_point, log);
if (g_cfg.core.spu_debug) if (g_cfg.core.spu_debug)
{ {
@ -4781,7 +4768,7 @@ public:
add_loc->compiled = fn; add_loc->compiled = fn;
// Rebuild trampoline if necessary // Rebuild trampoline if necessary
if (!m_spurt->rebuild_ubertrampoline(func[1])) if (!m_spurt->rebuild_ubertrampoline(func.data[0]))
{ {
return nullptr; return nullptr;
} }
@ -8398,7 +8385,7 @@ struct spu_llvm
} }
// Start compiling // Start compiling
const std::vector<u32>& func = found_it->second->data; const spu_program& func = found_it->second->data;
// Old function pointer (pre-recompiled) // Old function pointer (pre-recompiled)
const spu_function_t _old = found_it->second->compiled; const spu_function_t _old = found_it->second->compiled;
@ -8407,21 +8394,21 @@ struct spu_llvm
enqueued.erase(found_it); enqueued.erase(found_it);
// Get data start // Get data start
const u32 start = func[0]; const u32 start = func.lower_bound;
const u32 size0 = ::size32(func); const u32 size0 = ::size32(func.data);
// Initialize LS with function data only // Initialize LS with function data only
for (u32 i = 1, pos = start; i < size0; i++, pos += 4) for (u32 i = 0, pos = start; i < size0; i++, pos += 4)
{ {
ls[pos / 4] = se_storage<u32>::swap(func[i]); ls[pos / 4] = std::bit_cast<be_t<u32>>(func.data[i]);
} }
// Call analyser // Call analyser
std::vector<u32> func2 = compiler->analyse(ls.data(), func[0]); spu_program func2 = compiler->analyse(ls.data(), func.entry_point);
if (func2 != func) if (func2 != func)
{ {
LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed, %u vs %u", func2[0], func2.size() - 1, size0 - 1); LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed, %u vs %u", func2.entry_point, func2.data.size(), size0);
} }
else if (const auto target = compiler->compile(std::move(func2))) else if (const auto target = compiler->compile(std::move(func2)))
{ {
@ -8444,7 +8431,7 @@ struct spu_llvm
} }
else else
{ {
LOG_FATAL(SPU, "[0x%05x] Compilation failed.", func[0]); LOG_FATAL(SPU, "[0x%05x] Compilation failed.", func.entry_point);
Emu.Pause(); Emu.Pause();
return; return;
} }
@ -8469,7 +8456,7 @@ struct spu_fast : public spu_recompiler_base
} }
} }
virtual spu_function_t compile(std::vector<u32>&& _func) override virtual spu_function_t compile(spu_program&& _func) override
{ {
const auto add_loc = m_spurt->add_empty(std::move(_func)); const auto add_loc = m_spurt->add_empty(std::move(_func));
@ -8483,7 +8470,7 @@ struct spu_fast : public spu_recompiler_base
return add_loc->compiled; return add_loc->compiled;
} }
const std::vector<u32>& func = add_loc->data; const spu_program& func = add_loc->data;
if (g_cfg.core.spu_debug && !add_loc->logged.exchange(1)) if (g_cfg.core.spu_debug && !add_loc->logged.exchange(1))
{ {
@ -8493,22 +8480,22 @@ struct spu_fast : public spu_recompiler_base
} }
// Allocate executable area with necessary size // Allocate executable area with necessary size
const auto result = jit_runtime::alloc(22 + 1 + 9 + (::size32(func) - 1) * (16 + 16) + 36 + 47, 16); const auto result = jit_runtime::alloc(22 + 1 + 9 + ::size32(func.data) * (16 + 16) + 36 + 47, 16);
if (!result) if (!result)
{ {
return nullptr; return nullptr;
} }
m_pos = func[0]; m_pos = func.lower_bound;
m_size = (::size32(func) - 1) * 4; m_size = ::size32(func.data) * 4;
{ {
sha1_context ctx; sha1_context ctx;
u8 output[20]; u8 output[20];
sha1_starts(&ctx); sha1_starts(&ctx);
sha1_update(&ctx, reinterpret_cast<const u8*>(func.data() + 1), func.size() * 4 - 4); sha1_update(&ctx, reinterpret_cast<const u8*>(func.data.data()), func.data.size() * 4);
sha1_finish(&ctx, output); sha1_finish(&ctx, output);
be_t<u64> hash_start; be_t<u64> hash_start;
@ -8554,9 +8541,9 @@ struct spu_fast : public spu_recompiler_base
*raw++ = 0x00; *raw++ = 0x00;
// Verification (slow) // Verification (slow)
for (u32 i = 1; i < func.size(); i++) for (u32 i = 0; i < func.data.size(); i++)
{ {
if (!func[i]) if (!func.data[i])
{ {
continue; continue;
} }
@ -8564,8 +8551,8 @@ struct spu_fast : public spu_recompiler_base
// cmp dword ptr [rcx + off], opc // cmp dword ptr [rcx + off], opc
*raw++ = 0x81; *raw++ = 0x81;
*raw++ = 0xb9; *raw++ = 0xb9;
const u32 off = (i - 1) * 4; const u32 off = i * 4;
const u32 opc = func[i]; const u32 opc = func.data[i];
std::memcpy(raw + 0, &off, 4); std::memcpy(raw + 0, &off, 4);
std::memcpy(raw + 4, &opc, 4); std::memcpy(raw + 4, &opc, 4);
raw += 8; raw += 8;
@ -8627,16 +8614,16 @@ struct spu_fast : public spu_recompiler_base
*raw++ = 0x4c; *raw++ = 0x4c;
*raw++ = 0x8d; *raw++ = 0x8d;
*raw++ = 0x35; *raw++ = 0x35;
const u32 epi_off = (::size32(func) - 1) * 16; const u32 epi_off = ::size32(func.data) * 16;
std::memcpy(raw, &epi_off, 4); std::memcpy(raw, &epi_off, 4);
raw += 4; raw += 4;
// Instructions (each instruction occupies fixed number of bytes) // Instructions (each instruction occupies fixed number of bytes)
for (u32 i = 1; i < func.size(); i++) for (u32 i = 0; i < func.data.size(); i++)
{ {
const u32 pos = m_pos + (i - 1) * 4; const u32 pos = m_pos + i * 4;
if (!func[i]) if (!func.data[i])
{ {
// Save pc: mov [rbp + spu_thread::pc], r12d // Save pc: mov [rbp + spu_thread::pc], r12d
*raw++ = 0x44; *raw++ = 0x44;
@ -8658,7 +8645,7 @@ struct spu_fast : public spu_recompiler_base
} }
// Fix endianness // Fix endianness
const spu_opcode_t op{se_storage<u32>::swap(func[i])}; const spu_opcode_t op{std::bit_cast<be_t<u32>>(func.data[i])};
switch (auto type = s_spu_itype.decode(op.opcode)) switch (auto type = s_spu_itype.decode(op.opcode))
{ {
@ -8797,7 +8784,7 @@ struct spu_fast : public spu_recompiler_base
} }
// Rebuild trampoline if necessary // Rebuild trampoline if necessary
if (!m_spurt->rebuild_ubertrampoline(func[1])) if (!m_spurt->rebuild_ubertrampoline(func.data[0]))
{ {
return nullptr; return nullptr;
} }

View file

@ -29,18 +29,39 @@ public:
return m_file.operator bool(); return m_file.operator bool();
} }
std::deque<std::vector<u32>> get(); std::deque<struct spu_program> get();
void add(const std::vector<u32>& func); void add(const struct spu_program& func);
static void initialize(); static void initialize();
}; };
struct spu_program
{
// Address of the entry point in LS
u32 entry_point;
// Address of the data in LS
u32 lower_bound;
// Program data with intentionally wrong endianness (on LE platform opcode values are swapped)
std::vector<u32> data;
bool operator==(const spu_program& rhs) const noexcept;
bool operator!=(const spu_program& rhs) const noexcept
{
return !(*this == rhs);
}
bool operator<(const spu_program& rhs) const noexcept;
};
class spu_item class spu_item
{ {
public: public:
// SPU program // SPU program
const std::vector<u32> data; const spu_program data;
// Compiled function pointer // Compiled function pointer
atomic_t<spu_function_t> compiled = nullptr; atomic_t<spu_function_t> compiled = nullptr;
@ -51,7 +72,7 @@ public:
atomic_t<u8> cached = false; atomic_t<u8> cached = false;
atomic_t<u8> logged = false; atomic_t<u8> logged = false;
spu_item(std::vector<u32>&& data) spu_item(spu_program&& data)
: data(std::move(data)) : data(std::move(data))
{ {
} }
@ -64,12 +85,6 @@ public:
// Helper class // Helper class
class spu_runtime class spu_runtime
{ {
struct func_compare
{
// Comparison function for SPU programs
bool operator()(const std::vector<u32>& lhs, const std::vector<u32>& rhs) const;
};
// All functions (2^20 bunches) // All functions (2^20 bunches)
std::array<lf_bunch<spu_item>, (1 << 20)> m_stuff; std::array<lf_bunch<spu_item>, (1 << 20)> m_stuff;
@ -109,7 +124,7 @@ private:
public: public:
// Return new pointer for add() // Return new pointer for add()
spu_item* add_empty(std::vector<u32>&&); spu_item* add_empty(spu_program&&);
// Find existing function // Find existing function
spu_function_t find(const u32* ls, u32 addr) const; spu_function_t find(const u32* ls, u32 addr) const;
@ -292,7 +307,7 @@ public:
virtual void init() = 0; virtual void init() = 0;
// Compile function // Compile function
virtual spu_function_t compile(std::vector<u32>&&) = 0; virtual spu_function_t compile(spu_program&&) = 0;
// Default dispatch function fallback (second arg is unused) // Default dispatch function fallback (second arg is unused)
static void dispatch(spu_thread&, void*, u8* rip); static void dispatch(spu_thread&, void*, u8* rip);
@ -304,10 +319,10 @@ public:
static void old_interpreter(spu_thread&, void* ls, u8*); static void old_interpreter(spu_thread&, void* ls, u8*);
// Get the function data at specified address // Get the function data at specified address
std::vector<u32> analyse(const be_t<u32>* ls, u32 lsa); spu_program analyse(const be_t<u32>* ls, u32 lsa);
// Print analyser internal state // Print analyser internal state
void dump(const std::vector<u32>& result, std::string& out); void dump(const spu_program& result, std::string& out);
// Get SPU Runtime // Get SPU Runtime
spu_runtime& get_runtime() spu_runtime& get_runtime()