PPU LLVM: allow to drop setting SAT flag (optimization, module-wide)

Implement ppu_attr::has_mfvscr (partially, module-wide search).
If this instruction isn't found, allow to drop setting SAT flag.
It's based on presumption that only MFVSCR can retrieve SAT flag.
This commit is contained in:
Nekotekina 2021-06-25 10:50:42 +03:00
parent 86b194014b
commit c9d8e59dbf
5 changed files with 57 additions and 1 deletions

View file

@ -24,6 +24,7 @@ void fmt_class_string<ppu_attr>::format(std::string& out, u64 arg)
case ppu_attr::known_size: return "known_size";
case ppu_attr::no_return: return "no_return";
case ppu_attr::no_size: return "no_size";
case ppu_attr::has_mfvscr: return "has_mfvscr";
case ppu_attr::__bitset_enum_max: break;
}

View file

@ -16,6 +16,7 @@ enum class ppu_attr : u32
known_size,
no_return,
no_size,
has_mfvscr,
__bitset_enum_max
};

View file

@ -3099,6 +3099,44 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
bool compiled_new = false;
bool has_mfvscr = false;
for (auto& func : info.funcs)
{
if (func.size == 0)
{
continue;
}
for (const auto& [addr, size] : func.blocks)
{
if (size == 0)
{
continue;
}
for (u32 i = addr; i < addr + size; i += 4)
{
if (g_ppu_itype.decode(vm::read32(i)) == ppu_itype::MFVSCR)
{
ppu_log.warning("MFVSCR found");
has_mfvscr = true;
break;
}
}
if (has_mfvscr)
{
break;
}
}
if (has_mfvscr)
{
break;
}
}
while (!jit_mod.init && fpos < info.funcs.size())
{
// Initialize compiler instance
@ -3140,6 +3178,12 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
// Fixup some information
entry.name = fmt::format("__0x%x", entry.addr - reloc);
if (has_mfvscr)
{
// TODO
entry.attr += ppu_attr::has_mfvscr;
}
if (entry.blocks.empty())
{
entry.blocks.emplace(func.addr, func.size);
@ -3257,6 +3301,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
accurate_cache_line_stores,
reservations_128_byte,
greedy_mode,
has_mfvscr,
__bitset_enum_max
};
@ -3278,6 +3323,8 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
settings += ppu_settings::reservations_128_byte;
if (g_cfg.core.ppu_llvm_greedy_mode)
settings += ppu_settings::greedy_mode;
if (has_mfvscr)
settings += ppu_settings::has_mfvscr;
// Write version, hash, CPU, settings
fmt::append(obj_name, "v5-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));

View file

@ -141,6 +141,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
// Instruction address is (m_addr + base)
const u64 base = m_reloc ? m_reloc->addr : 0;
m_addr = info.addr - base;
m_attr = info.attr;
// Don't emit check in small blocks without terminator
bool need_check = info.size >= 16;
@ -4816,7 +4817,10 @@ void PPUTranslator::SetOverflow(Value* bit)
void PPUTranslator::SetSat(Value* bit)
{
RegStore(m_ir->CreateOr(RegLoad(m_sat), bit), m_sat);
if (m_attr & ppu_attr::has_mfvscr)
{
RegStore(m_ir->CreateOr(RegLoad(m_sat), bit), m_sat);
}
}
Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right)

View file

@ -28,6 +28,9 @@ class PPUTranslator final : public cpu_translator
// Current position-independent address
u64 m_addr = 0;
// Function attributes
bs_t<ppu_attr> m_attr{};
// Relocation info
const ppu_segment* m_reloc = nullptr;