PPU: MTOCRF optimization

This commit is contained in:
Nekotekina 2017-06-28 20:33:18 +03:00
parent ef55d6fa0c
commit 85f8a42bd2
3 changed files with 62 additions and 17 deletions

View file

@ -3420,6 +3420,26 @@ bool ppu_interpreter::ADDE(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::MTOCRF(ppu_thread& ppu, ppu_opcode_t op)
{
static u8 s_table[16][4]
{
{0, 0, 0, 0},
{0, 0, 0, 1},
{0, 0, 1, 0},
{0, 0, 1, 1},
{0, 1, 0, 0},
{0, 1, 0, 1},
{0, 1, 1, 0},
{0, 1, 1, 1},
{1, 0, 0, 0},
{1, 0, 0, 1},
{1, 0, 1, 0},
{1, 0, 1, 1},
{1, 1, 0, 0},
{1, 1, 0, 1},
{1, 1, 1, 0},
{1, 1, 1, 1},
};
const u64 s = ppu.gpr[op.rs];
if (op.l11)
@ -3428,11 +3448,8 @@ bool ppu_interpreter::MTOCRF(ppu_thread& ppu, ppu_opcode_t op)
const u32 n = cntlz32(op.crm) & 7;
const u32 p = n * 4;
const u64 v = s >> (p ^ 0x1c);
ppu.cr[p + 0] = (v & 8) != 0;
ppu.cr[p + 1] = (v & 4) != 0;
ppu.cr[p + 2] = (v & 2) != 0;
ppu.cr[p + 3] = (v & 1) != 0;
const u64 v = (s >> (p ^ 0x1c)) & 0xf;
*(u32*)(u8*)(ppu.cr + p) = *(u32*)(s_table + v);
}
else
{
@ -3440,15 +3457,11 @@ bool ppu_interpreter::MTOCRF(ppu_thread& ppu, ppu_opcode_t op)
for (u32 i = 0; i < 8; i++)
{
const u32 p = i * 4;
const u64 v = s >> (p ^ 0x1c);
if (op.crm & (128 >> i))
{
ppu.cr[p + 0] = (v & 8) != 0;
ppu.cr[p + 1] = (v & 4) != 0;
ppu.cr[p + 2] = (v & 2) != 0;
ppu.cr[p + 3] = (v & 1) != 0;
const u32 p = i * 4;
const u64 v = (s >> (p ^ 0x1c)) & 0xf;
*(u32*)(u8*)(ppu.cr + p) = *(u32*)(s_table + v);
}
}
}

View file

@ -2347,16 +2347,45 @@ void PPUTranslator::MTOCRF(ppu_opcode_t op)
// MTCRF
}
const auto value = GetGpr(op.rs);
static u8 s_table[64]
{
0, 0, 0, 0,
0, 0, 0, 1,
0, 0, 1, 0,
0, 0, 1, 1,
0, 1, 0, 0,
0, 1, 0, 1,
0, 1, 1, 0,
0, 1, 1, 1,
1, 0, 0, 0,
1, 0, 0, 1,
1, 0, 1, 0,
1, 0, 1, 1,
1, 1, 0, 0,
1, 1, 0, 1,
1, 1, 1, 0,
1, 1, 1, 1,
};
if (!m_mtocr_table)
{
m_mtocr_table = new GlobalVariable(*m_module, ArrayType::get(GetType<u8>(), 64), true, GlobalValue::PrivateLinkage, ConstantDataArray::get(m_context, s_table));
}
const auto value = GetGpr(op.rs, 32);
for (u32 i = 0; i < 8; i++)
{
if (op.crm & (128 >> i))
{
for (u32 bit = i * 4; bit < i * 4 + 4; bit++)
{
SetCrb(bit, Trunc(m_ir->CreateLShr(value, 31 - bit), GetType<bool>()));
}
// Discard pending values
std::fill_n(m_cr + i * 4, 4, nullptr);
std::fill_n(m_g_cr + i * 4, 4, nullptr);
const auto index = m_ir->CreateAnd(m_ir->CreateLShr(value, 28 - i * 4), 15);
const auto src = m_ir->CreateGEP(m_mtocr_table, {m_ir->getInt32(0), m_ir->CreateShl(index, 2)});
const auto dst = m_ir->CreateBitCast(m_ir->CreateStructGEP(nullptr, m_thread, m_cr - m_locals + i * 4), GetType<u8*>());
Call(GetType<void>(), "llvm.memcpy.p0i8.p0i8.i32", dst, src, m_ir->getInt32(4), m_ir->getInt32(4), m_ir->getFalse());
}
}
}

View file

@ -150,7 +150,10 @@ class PPUTranslator final //: public CPUTranslator
// Thread context struct
llvm::StructType* m_thread_type;
llvm::Value* m_mtocr_table{};
llvm::Value* m_globals[173];
llvm::Value** const m_g_cr = m_globals + 99;
llvm::Value* m_locals[173];
llvm::Value** const m_gpr = m_locals + 3;
llvm::Value** const m_fpr = m_locals + 35;