New SPU interpreter (SPU fast)

Use LLVM to build SPU interpreter.
Simplify interpreter loop.
This commit is contained in:
Nekotekina 2019-03-25 21:31:16 +03:00
parent a9eb321814
commit 71b88cdc82
5 changed files with 1115 additions and 340 deletions

View file

@ -923,6 +923,12 @@ public:
return llvm_value_t<T>::get_type(m_context);
}
template <typename R, typename... Args>
llvm::FunctionType* get_ftype()
{
return llvm::FunctionType::get(get_type<R>(), {get_type<Args>()...}, false);
}
template <typename T>
using value_t = llvm_value_t<T>;
@ -1083,6 +1089,15 @@ public:
return result;
}
template <typename T, typename V>
auto vsplat(V v)
{
value_t<T> result;
static_assert(result.is_vector);
result.value = m_ir->CreateVectorSplat(result.is_vector, v.eval(m_ir));
return result;
}
// Min
template <typename T>
auto min(T a, T b)
@ -1257,6 +1272,19 @@ public:
return result;
}
llvm::Value* load_const(llvm::GlobalVariable* g, llvm::Value* i)
{
return m_ir->CreateLoad(m_ir->CreateGEP(g, {m_ir->getInt64(0), m_ir->CreateZExtOrTrunc(i, get_type<u64>())}));
}
template <typename T, typename I>
value_t<T> load_const(llvm::GlobalVariable* g, I i)
{
value_t<T> result;
result.value = load_const(g, i.eval(m_ir));
return result;
}
template <typename R = v128>
R get_const_vector(llvm::Constant*, u32 a, u32 b);

File diff suppressed because it is too large Load diff

View file

@ -108,6 +108,9 @@ public:
// All dispatchers (array allocated in jit memory)
static atomic_t<spu_function_t>* const g_dispatcher;
// Interpreter entry point
static spu_function_t g_interpreter;
struct passive_lock
{
spu_runtime& _this;
@ -253,7 +256,7 @@ public:
static std::unique_ptr<spu_recompiler_base> make_asmjit_recompiler();
// Create recompiler instance (LLVM)
static std::unique_ptr<spu_recompiler_base> make_llvm_recompiler();
static std::unique_ptr<spu_recompiler_base> make_llvm_recompiler(u8 magn = 0);
enum : u8
{

View file

@ -23,16 +23,6 @@
#include <atomic>
#include <thread>
const bool s_use_ssse3 =
#ifdef _MSC_VER
utils::has_ssse3();
#elif __SSSE3__
true;
#else
false;
#define _mm_shuffle_epi8
#endif
#ifdef _MSC_VER
bool operator ==(const u128& lhs, const u128& rhs)
{
@ -597,6 +587,23 @@ void spu_thread::cpu_task()
return;
}
if (spu_runtime::g_interpreter)
{
while (true)
{
if (UNLIKELY(state))
{
if (check_state())
break;
}
spu_runtime::g_interpreter(*this, vm::_ptr<u8>(offset), nullptr);
}
cpu_stop();
return;
}
// Select opcode table
const auto& table = *(
g_cfg.core.spu_decoder == spu_decoder_type::precise ? &g_spu_interpreter_precise.get_table() :
@ -605,11 +612,6 @@ void spu_thread::cpu_task()
// LS pointer
const auto base = vm::_ptr<const u8>(offset);
const auto bswap4 = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
v128 _op;
using func_t = decltype(&spu_interpreter::UNK);
func_t func0, func1, func2, func3, func4, func5;
while (true)
{
@ -617,62 +619,11 @@ void spu_thread::cpu_task()
{
if (check_state())
break;
// Decode single instruction (may be step)
const u32 op = *reinterpret_cast<const be_t<u32>*>(base + pc);
if (table[spu_decode(op)](*this, {op})) { pc += 4; }
continue;
}
if (pc % 16 || !s_use_ssse3)
{
// Unaligned
const u32 op = *reinterpret_cast<const be_t<u32>*>(base + pc);
if (table[spu_decode(op)](*this, {op})) { pc += 4; }
continue;
}
// Reinitialize
_op.vi = _mm_shuffle_epi8(_mm_load_si128(reinterpret_cast<const __m128i*>(base + pc)), bswap4);
func0 = table[spu_decode(_op._u32[0])];
func1 = table[spu_decode(_op._u32[1])];
func2 = table[spu_decode(_op._u32[2])];
func3 = table[spu_decode(_op._u32[3])];
while (LIKELY(func0(*this, {_op._u32[0]})))
{
const u32 op = *reinterpret_cast<const be_t<u32>*>(base + pc);
if (table[spu_decode(op)](*this, {op}))
pc += 4;
if (LIKELY(func1(*this, {_op._u32[1]})))
{
pc += 4;
u32 op2 = _op._u32[2];
u32 op3 = _op._u32[3];
_op.vi = _mm_shuffle_epi8(_mm_load_si128(reinterpret_cast<const __m128i*>(base + pc + 8)), bswap4);
func0 = table[spu_decode(_op._u32[0])];
func1 = table[spu_decode(_op._u32[1])];
func4 = table[spu_decode(_op._u32[2])];
func5 = table[spu_decode(_op._u32[3])];
if (LIKELY(func2(*this, {op2})))
{
pc += 4;
if (LIKELY(func3(*this, {op3})))
{
pc += 4;
func2 = func4;
func3 = func5;
if (UNLIKELY(state))
{
break;
}
continue;
}
break;
}
break;
}
break;
}
}
cpu_stop();

View file

@ -291,7 +291,12 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> guiSettings, std:
ui->accurateXFloat->setEnabled(checked);
});
ui->accurateXFloat->setEnabled(ui->spu_llvm->isChecked());
connect(ui->spu_fast, &QAbstractButton::toggled, [this](bool checked)
{
ui->accurateXFloat->setEnabled(checked);
});
ui->accurateXFloat->setEnabled(ui->spu_llvm->isChecked() || ui->spu_fast->isChecked());
#ifndef LLVM_AVAILABLE
ui->ppu_llvm->setEnabled(false);