From 83da7f9b63c5fcdca87a0051a8ef2e6f63632b9a Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 20 Dec 2017 00:01:03 +0300 Subject: [PATCH] PPU: remove SSSE3 dependency --- rpcs3/CMakeLists.txt | 18 +++-- rpcs3/Emu/Cell/PPUInterpreter.cpp | 124 +++++++++++++++++++++++++++--- rpcs3/Emu/Cell/PPUInterpreter.h | 33 +++++--- rpcs3/Emu/Cell/PPUOpcodes.h | 8 +- rpcs3/Emu/Cell/PPUThread.cpp | 114 +++++++++++++++++++++------ rpcs3/rpcs3qt/main_window.cpp | 9 --- 6 files changed, 246 insertions(+), 60 deletions(-) diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index fb522da699..a6e9e68556 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -100,13 +100,17 @@ if(NOT MSVC) set(CMAKE_RC_COMPILER_INIT windres) enable_language(RC) set(CMAKE_RC_COMPILE_OBJECT " -O coff -i -o ") - + # Workaround for mingw64 (MSYS2) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--allow-multiple-definition") endif() - - add_compile_options(-msse -msse2 -mcx16 -mssse3 -mrtm) - + + add_compile_options(-msse -msse2 -mcx16 -mrtm) + + if(NOT TRAVIS) + add_compile_options(-march=native) + endif() + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") # This fixes 'some' of the st11range issues. See issue #2516 if(APPLE) @@ -115,7 +119,7 @@ if(NOT MSVC) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -image-base=0x10000") endif() endif() - + # Some distros have the compilers set to use PIE by default, but RPCS3 doesn't work with PIE, so we need to disable it. if(APPLE) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no_pie") @@ -133,7 +137,7 @@ if(NOT MSVC) find_package(ZLIB REQUIRED) else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:throwingNew /D _CRT_SECURE_NO_DEPRECATE=1 /D _CRT_NON_CONFORMING_SWPRINTFS=1 /D _SCL_SECURE_NO_WARNINGS=1") - + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:libc.lib /NODEFAULTLIB:libcmt.lib /NODEFAULTLIB:libcd.lib /NODEFAULTLIB:libcmtd.lib /NODEFAULTLIB:msvcrtd.lib") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /SUBSYSTEM:WINDOWS /DYNAMICBASE:NO /BASE:0x10000 /FIXED") endif() @@ -342,7 +346,7 @@ if(NOT WIN32 AND VULKAN_FOUND) "${RPCS3_SRC_DIR}/../Vulkan/glslang/OGLCompilersDLL/*.cpp" "${RPCS3_SRC_DIR}/../Vulkan/glslang/SPIRV/*.cpp" ) - + set(RPCS3_SRC ${RPCS3_SRC} ${UNIX_GLSLANG}) endif() diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 7498458139..64f9048ec0 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -5,6 +5,10 @@ #include +#if !defined(_MSC_VER) && !defined(__SSSE3__) +#define _mm_shuffle_epi8 +#endif + inline u64 dup32(u32 x) { return x | static_cast(x) << 32; } // Write values to CR field @@ -83,6 +87,20 @@ extern __m128 sse_log2_ps(__m128 A) return _mm_add_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(x5, x6), x7), x4), _c), _mm_add_ps(_mm_mul_ps(x4, _c), x8)); } +extern __m128i sse_pshufb(__m128i data, __m128i index) +{ + v128 m = v128::fromV(_mm_and_si128(index, _mm_set1_epi8(0xf))); + v128 a = v128::fromV(data); + v128 r; + + for (int i = 0; i < 16; i++) + { + r._u8[i] = a._u8[m._u8[i]]; + } + + return _mm_and_si128(r.vi, _mm_cmpgt_epi8(index, _mm_set1_epi8(-1))); +} + extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C) { const auto index = _mm_andnot_si128(C, _mm_set1_epi8(0x1f)); @@ -92,6 +110,20 @@ extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C) return _mm_or_si128(_mm_and_si128(mask, sa), _mm_andnot_si128(mask, sb)); } +extern __m128i sse_altivec_vperm_v0(__m128i A, __m128i B, __m128i C) +{ + __m128i ab[2]{B, A}; + v128 index = v128::fromV(_mm_andnot_si128(C, _mm_set1_epi8(0x1f))); + v128 res; + + for (int i = 0; i < 16; i++) + { + res._u8[i] = ((u8*)+ab)[index._u8[i]]; + } + + return res.vi; +} + extern __m128i sse_altivec_lvsl(u64 addr) { alignas(16) static const u8 lvsl_values[0x10][0x10] = @@ -202,6 +234,26 @@ extern void sse_cellbe_stvrx(u64 addr, __m128i a) _mm_maskmoveu_si128(_mm_shuffle_epi8(a, lvrx_masks[addr & 0xf]), lvlx_masks[addr & 0xf], (char*)vm::base(addr & ~0xf)); } +extern __m128i sse_cellbe_lvlx_v0(u64 addr) +{ + return sse_pshufb(_mm_load_si128((__m128i*)vm::base(addr & ~0xf)), lvlx_masks[addr & 0xf]); +} + +extern void sse_cellbe_stvlx_v0(u64 addr, __m128i a) +{ + _mm_maskmoveu_si128(sse_pshufb(a, lvlx_masks[addr & 0xf]), lvrx_masks[addr & 0xf], (char*)vm::base(addr & ~0xf)); +} + +extern __m128i sse_cellbe_lvrx_v0(u64 addr) +{ + return sse_pshufb(_mm_load_si128((__m128i*)vm::base(addr & ~0xf)), lvrx_masks[addr & 0xf]); +} + +extern void sse_cellbe_stvrx_v0(u64 addr, __m128i a) +{ + _mm_maskmoveu_si128(sse_pshufb(a, lvrx_masks[addr & 0xf]), lvlx_masks[addr & 0xf], (char*)vm::base(addr & ~0xf)); +} + template struct add_flags_result_t { @@ -1327,7 +1379,13 @@ bool ppu_interpreter::VOR(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter::VPERM(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter_precise::VPERM(ppu_thread& ppu, ppu_opcode_t op) +{ + ppu.vr[op.vd].vi = sse_altivec_vperm_v0(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi); + return true; +} + +bool ppu_interpreter_fast::VPERM(ppu_thread& ppu, ppu_opcode_t op) { ppu.vr[op.vd].vi = sse_altivec_vperm(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi); return true; @@ -3881,7 +3939,14 @@ bool ppu_interpreter::DIVW(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter::LVLX(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter_precise::LVLX(ppu_thread& ppu, ppu_opcode_t op) +{ + const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; + ppu.vr[op.vd].vi = sse_cellbe_lvlx_v0(addr); + return true; +} + +bool ppu_interpreter_fast::LVLX(ppu_thread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; ppu.vr[op.vd].vi = sse_cellbe_lvlx(addr); @@ -3945,7 +4010,14 @@ bool ppu_interpreter::SRD(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter::LVRX(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter_precise::LVRX(ppu_thread& ppu, ppu_opcode_t op) +{ + const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; + ppu.vr[op.vd].vi = sse_cellbe_lvrx_v0(addr); + return true; +} + +bool ppu_interpreter_fast::LVRX(ppu_thread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; ppu.vr[op.vd].vi = sse_cellbe_lvrx(addr); @@ -4013,7 +4085,14 @@ bool ppu_interpreter::LFDUX(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter::STVLX(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter_precise::STVLX(ppu_thread& ppu, ppu_opcode_t op) +{ + const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; + sse_cellbe_stvlx_v0(addr, ppu.vr[op.vs].vi); + return true; +} + +bool ppu_interpreter_fast::STVLX(ppu_thread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; sse_cellbe_stvlx(addr, ppu.vr[op.vs].vi); @@ -4061,7 +4140,14 @@ bool ppu_interpreter::STFSX(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter::STVRX(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter_precise::STVRX(ppu_thread& ppu, ppu_opcode_t op) +{ + const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; + sse_cellbe_stvrx_v0(addr, ppu.vr[op.vs].vi); + return true; +} + +bool ppu_interpreter_fast::STVRX(ppu_thread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; sse_cellbe_stvrx(addr, ppu.vr[op.vs].vi); @@ -4121,7 +4207,12 @@ bool ppu_interpreter::STFDUX(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter::LVLXL(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter_precise::LVLXL(ppu_thread& ppu, ppu_opcode_t op) +{ + return LVLX(ppu, op); +} + +bool ppu_interpreter_fast::LVLXL(ppu_thread& ppu, ppu_opcode_t op) { return LVLX(ppu, op); } @@ -4171,7 +4262,12 @@ bool ppu_interpreter::SRAD(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter::LVRXL(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter_precise::LVRXL(ppu_thread& ppu, ppu_opcode_t op) +{ + return LVRX(ppu, op); +} + +bool ppu_interpreter_fast::LVRXL(ppu_thread& ppu, ppu_opcode_t op) { return LVRX(ppu, op); } @@ -4208,7 +4304,12 @@ bool ppu_interpreter::EIEIO(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter::STVLXL(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter_precise::STVLXL(ppu_thread& ppu, ppu_opcode_t op) +{ + return STVLX(ppu, op); +} + +bool ppu_interpreter_fast::STVLXL(ppu_thread& ppu, ppu_opcode_t op) { return STVLX(ppu, op); } @@ -4227,7 +4328,12 @@ bool ppu_interpreter::EXTSH(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter::STVRXL(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter_precise::STVRXL(ppu_thread& ppu, ppu_opcode_t op) +{ + return STVRX(ppu, op); +} + +bool ppu_interpreter_fast::STVRXL(ppu_thread& ppu, ppu_opcode_t op) { return STVRX(ppu, op); } diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index afb27d38a6..57223b02db 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -4,6 +4,8 @@ class ppu_thread; +using ppu_inter_func_t = bool(*)(ppu_thread& ppu, ppu_opcode_t op); + struct ppu_interpreter { static bool MFVSCR(ppu_thread&, ppu_opcode_t); @@ -75,7 +77,6 @@ struct ppu_interpreter static bool VNMSUBFP(ppu_thread&, ppu_opcode_t); static bool VNOR(ppu_thread&, ppu_opcode_t); static bool VOR(ppu_thread&, ppu_opcode_t); - static bool VPERM(ppu_thread&, ppu_opcode_t); static bool VPKPX(ppu_thread&, ppu_opcode_t); static bool VPKUHUM(ppu_thread&, ppu_opcode_t); static bool VPKUWUM(ppu_thread&, ppu_opcode_t); @@ -247,42 +248,34 @@ struct ppu_interpreter static bool STVXL(ppu_thread&, ppu_opcode_t); static bool DIVD(ppu_thread&, ppu_opcode_t); static bool DIVW(ppu_thread&, ppu_opcode_t); - static bool LVLX(ppu_thread&, ppu_opcode_t); static bool LDBRX(ppu_thread&, ppu_opcode_t); static bool LSWX(ppu_thread&, ppu_opcode_t); static bool LWBRX(ppu_thread&, ppu_opcode_t); static bool LFSX(ppu_thread&, ppu_opcode_t); static bool SRW(ppu_thread&, ppu_opcode_t); static bool SRD(ppu_thread&, ppu_opcode_t); - static bool LVRX(ppu_thread&, ppu_opcode_t); static bool LSWI(ppu_thread&, ppu_opcode_t); static bool LFSUX(ppu_thread&, ppu_opcode_t); static bool SYNC(ppu_thread&, ppu_opcode_t); static bool LFDX(ppu_thread&, ppu_opcode_t); static bool LFDUX(ppu_thread&, ppu_opcode_t); - static bool STVLX(ppu_thread&, ppu_opcode_t); static bool STDBRX(ppu_thread&, ppu_opcode_t); static bool STSWX(ppu_thread&, ppu_opcode_t); static bool STWBRX(ppu_thread&, ppu_opcode_t); static bool STFSX(ppu_thread&, ppu_opcode_t); - static bool STVRX(ppu_thread&, ppu_opcode_t); static bool STFSUX(ppu_thread&, ppu_opcode_t); static bool STSWI(ppu_thread&, ppu_opcode_t); static bool STFDX(ppu_thread&, ppu_opcode_t); static bool STFDUX(ppu_thread&, ppu_opcode_t); - static bool LVLXL(ppu_thread&, ppu_opcode_t); static bool LHBRX(ppu_thread&, ppu_opcode_t); static bool SRAW(ppu_thread&, ppu_opcode_t); static bool SRAD(ppu_thread&, ppu_opcode_t); - static bool LVRXL(ppu_thread&, ppu_opcode_t); static bool DSS(ppu_thread&, ppu_opcode_t); static bool SRAWI(ppu_thread&, ppu_opcode_t); static bool SRADI(ppu_thread&, ppu_opcode_t); static bool EIEIO(ppu_thread&, ppu_opcode_t); - static bool STVLXL(ppu_thread&, ppu_opcode_t); static bool STHBRX(ppu_thread&, ppu_opcode_t); static bool EXTSH(ppu_thread&, ppu_opcode_t); - static bool STVRXL(ppu_thread&, ppu_opcode_t); static bool EXTSB(ppu_thread&, ppu_opcode_t); static bool STFIWX(ppu_thread&, ppu_opcode_t); static bool EXTSW(ppu_thread&, ppu_opcode_t); @@ -362,6 +355,16 @@ struct ppu_interpreter struct ppu_interpreter_precise final : ppu_interpreter { + static bool VPERM(ppu_thread&, ppu_opcode_t); + static bool LVLX(ppu_thread&, ppu_opcode_t); + static bool LVLXL(ppu_thread&, ppu_opcode_t); + static bool LVRX(ppu_thread&, ppu_opcode_t); + static bool LVRXL(ppu_thread&, ppu_opcode_t); + static bool STVLX(ppu_thread&, ppu_opcode_t); + static bool STVLXL(ppu_thread&, ppu_opcode_t); + static bool STVRX(ppu_thread&, ppu_opcode_t); + static bool STVRXL(ppu_thread&, ppu_opcode_t); + static bool VPKSHSS(ppu_thread&, ppu_opcode_t); static bool VPKSHUS(ppu_thread&, ppu_opcode_t); static bool VPKSWSS(ppu_thread&, ppu_opcode_t); @@ -395,11 +398,21 @@ struct ppu_interpreter_precise final : ppu_interpreter struct ppu_interpreter_fast final : ppu_interpreter { + static bool VPERM(ppu_thread&, ppu_opcode_t); + static bool LVLX(ppu_thread&, ppu_opcode_t); + static bool LVLXL(ppu_thread&, ppu_opcode_t); + static bool LVRX(ppu_thread&, ppu_opcode_t); + static bool LVRXL(ppu_thread&, ppu_opcode_t); + static bool STVLX(ppu_thread&, ppu_opcode_t); + static bool STVLXL(ppu_thread&, ppu_opcode_t); + static bool STVRX(ppu_thread&, ppu_opcode_t); + static bool STVRXL(ppu_thread&, ppu_opcode_t); + static bool VPKSHSS(ppu_thread&, ppu_opcode_t); static bool VPKSHUS(ppu_thread&, ppu_opcode_t); static bool VPKSWSS(ppu_thread&, ppu_opcode_t); static bool VPKSWUS(ppu_thread&, ppu_opcode_t); - static bool VPKUHUS(ppu_thread&, ppu_opcode_t); + static bool VPKUHUS(ppu_thread&, ppu_opcode_t); static bool VPKUWUS(ppu_thread&, ppu_opcode_t); static bool VADDSBS(ppu_thread&, ppu_opcode_t); static bool VADDSHS(ppu_thread&, ppu_opcode_t); diff --git a/rpcs3/Emu/Cell/PPUOpcodes.h b/rpcs3/Emu/Cell/PPUOpcodes.h index 7ae20d293b..8f358bf8d1 100644 --- a/rpcs3/Emu/Cell/PPUOpcodes.h +++ b/rpcs3/Emu/Cell/PPUOpcodes.h @@ -71,7 +71,7 @@ inline u32 ppu_decode(u32 inst) } // PPU decoder object. D provides functions. T is function pointer type returned. -template +template class ppu_decoder { // Fast lookup table @@ -555,6 +555,12 @@ public: }); } + template + ppu_decoder(F&& init) : ppu_decoder() + { + init(m_table); + } + const std::array& get_table() const { return m_table; diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 9a74ab65f3..e7ad2a7a50 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -56,6 +56,16 @@ const bool s_use_rtm = utils::has_rtm(); +const bool s_use_ssse3 = +#ifdef _MSC_VER + utils::has_ssse3(); +#elif __SSSE3__ + true; +#else + false; +#define _mm_shuffle_epi8 +#endif + extern u64 get_system_time(); namespace vm { using namespace ps3; } @@ -101,8 +111,57 @@ void fmt_class_string::format(std::string& out, u64 arg) }); } -const ppu_decoder s_ppu_interpreter_precise; -const ppu_decoder s_ppu_interpreter_fast; +// Table of identical interpreter functions when precise contains SSE2 version, and fast contains SSSE3 functions +const std::pair s_ppu_dispatch_table[] +{ +#define FUNC(x) {&ppu_interpreter_precise::x, &ppu_interpreter_fast::x} + FUNC(VPERM), + FUNC(LVLX), + FUNC(LVLXL), + FUNC(LVRX), + FUNC(LVRXL), + FUNC(STVLX), + FUNC(STVLXL), + FUNC(STVRX), + FUNC(STVRXL), +#undef FUNC +}; + +extern const ppu_decoder g_ppu_interpreter_precise([](auto& table) +{ + if (s_use_ssse3) + { + for (auto& func : table) + { + for (const auto& pair : s_ppu_dispatch_table) + { + if (pair.first == func) + { + func = pair.second; + break; + } + } + } + } +}); + +extern const ppu_decoder g_ppu_interpreter_fast([](auto& table) +{ + if (!s_use_ssse3) + { + for (auto& func : table) + { + for (const auto& pair : s_ppu_dispatch_table) + { + if (pair.second == func) + { + func = pair.first; + break; + } + } + } + } +}); extern void ppu_initialize(); extern void ppu_initialize(const ppu_module& info); @@ -120,8 +179,8 @@ static u32 ppu_cache(u32 addr) { // Select opcode table const auto& table = *( - g_cfg.core.ppu_decoder == ppu_decoder_type::precise ? &s_ppu_interpreter_precise.get_table() : - g_cfg.core.ppu_decoder == ppu_decoder_type::fast ? &s_ppu_interpreter_fast.get_table() : + g_cfg.core.ppu_decoder == ppu_decoder_type::precise ? &g_ppu_interpreter_precise.get_table() : + g_cfg.core.ppu_decoder == ppu_decoder_type::fast ? &g_ppu_interpreter_fast.get_table() : (fmt::throw_exception("Invalid PPU decoder"), nullptr)); return ::narrow(reinterpret_cast(table[ppu_decode(vm::read32(addr))])); @@ -154,7 +213,7 @@ static bool ppu_check_toc(ppu_thread& ppu, ppu_opcode_t op) if (ppu.gpr[2] != found->second) { LOG_ERROR(PPU, "Unexpected TOC (0x%x, expected 0x%x)", ppu.gpr[2], found->second); - + if (!ppu.state.test_and_set(cpu_flag::dbg_pause) && ppu.check_state()) { return false; @@ -207,8 +266,8 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr) { LOG_ERROR(PPU, "ppu_register_function_at(0x%x): empty range", addr); } - - return; + + return; } if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm) @@ -401,7 +460,7 @@ std::string ppu_thread::dump() const { ret += '\n'; } - + ret += "\nRegisters:\n=========\n"; for (uint i = 0; i < 32; ++i) fmt::append(ret, "GPR[%d] = 0x%llx\n", i, gpr[i]); for (uint i = 0; i < 32; ++i) fmt::append(ret, "FPR[%d] = %.6G\n", i, fpr[i]); @@ -455,7 +514,9 @@ void ppu_thread::cpu_task() { case ppu_cmd::opcode: { - cmd_pop(), s_ppu_interpreter_fast.decode(arg)(*this, {arg}); + cmd_pop(), g_cfg.core.ppu_decoder == ppu_decoder_type::precise + ? g_ppu_interpreter_precise.decode(arg)(*this, {arg}) + : g_ppu_interpreter_fast.decode(arg)(*this, {arg}); break; } case ppu_cmd::set_gpr: @@ -521,7 +582,7 @@ void ppu_thread::exec_task() { reinterpret_cast(static_cast(ppu_ref(cia)))(*this); } - + return; } @@ -545,7 +606,7 @@ void ppu_thread::exec_task() continue; } - if (cia % 16) + if (cia % 16 || !s_use_ssse3) { // Unaligned const u32 op = *reinterpret_cast*>(base + cia); @@ -808,12 +869,17 @@ extern ppu_function_t ppu_get_syscall(u64 code); extern __m128 sse_exp2_ps(__m128 A); extern __m128 sse_log2_ps(__m128 A); extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C); +extern __m128i sse_altivec_vperm_v0(__m128i A, __m128i B, __m128i C); extern __m128i sse_altivec_lvsl(u64 addr); extern __m128i sse_altivec_lvsr(u64 addr); extern __m128i sse_cellbe_lvlx(u64 addr); extern __m128i sse_cellbe_lvrx(u64 addr); extern void sse_cellbe_stvlx(u64 addr, __m128i a); extern void sse_cellbe_stvrx(u64 addr, __m128i a); +extern __m128i sse_cellbe_lvlx_v0(u64 addr); +extern __m128i sse_cellbe_lvrx_v0(u64 addr); +extern void sse_cellbe_stvlx_v0(u64 addr, __m128i a); +extern void sse_cellbe_stvrx_v0(u64 addr, __m128i a); [[noreturn]] static void ppu_trap(ppu_thread& ppu, u64 addr) { @@ -889,7 +955,7 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value) vm::writer_lock lock(0); const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u32)) && data.compare_and_swap_test(static_cast(ppu.rdata), reg_value); - + if (result) { vm::reservation_update(addr, sizeof(u32)); @@ -1024,13 +1090,13 @@ extern void ppu_initialize(const ppu_module& info) { "__stdcx", (u64)&ppu_stdcx }, { "__vexptefp", (u64)&sse_exp2_ps }, { "__vlogefp", (u64)&sse_log2_ps }, - { "__vperm", (u64)&sse_altivec_vperm }, + { "__vperm", s_use_ssse3 ? (u64)&sse_altivec_vperm : (u64)&sse_altivec_vperm_v0 }, { "__lvsl", (u64)&sse_altivec_lvsl }, { "__lvsr", (u64)&sse_altivec_lvsr }, - { "__lvlx", (u64)&sse_cellbe_lvlx }, - { "__lvrx", (u64)&sse_cellbe_lvrx }, - { "__stvlx", (u64)&sse_cellbe_stvlx }, - { "__stvrx", (u64)&sse_cellbe_stvrx }, + { "__lvlx", s_use_ssse3 ? (u64)&sse_cellbe_lvlx : (u64)&sse_cellbe_lvlx_v0 }, + { "__lvrx", s_use_ssse3 ? (u64)&sse_cellbe_lvrx : (u64)&sse_cellbe_lvrx_v0 }, + { "__stvlx", s_use_ssse3 ? (u64)&sse_cellbe_stvlx : (u64)&sse_cellbe_stvlx_v0 }, + { "__stvrx", s_use_ssse3 ? (u64)&sse_cellbe_stvrx : (u64)&sse_cellbe_stvrx_v0 }, }; for (u64 index = 0; index < 1024; index++) @@ -1075,7 +1141,7 @@ extern void ppu_initialize(const ppu_module& info) std::vector vars; std::vector funcs; }; - + // Permanently loaded compiled PPU modules (name -> data) jit_module& jit_mod = fxm::get_always>()->emplace(cache_path + info.name, jit_module{}).first->second; @@ -1239,7 +1305,7 @@ extern void ppu_initialize(const ppu_module& info) { // Set low priority thread_ctrl::set_native_priority(-1); - + // Allocate "core" { semaphore_lock jlock(jcores); @@ -1259,7 +1325,7 @@ extern void ppu_initialize(const ppu_module& info) return; } - // Proceed with original JIT instance + // Proceed with original JIT instance semaphore_lock lock(jmutex); jit->add(cache_path + obj_name); }); @@ -1278,7 +1344,7 @@ extern void ppu_initialize(const ppu_module& info) // Jit can be null if the loop doesn't ever enter. if (jit && jit_mod.vars.empty()) - { + { jit->fin(); // Get and install function addresses for (const auto& func : info.funcs) @@ -1356,7 +1422,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co // Initialize target module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); - + // Initialize translator PPUTranslator translator(jit.get_context(), module.get(), module_part); @@ -1429,7 +1495,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co if (module_part.funcs[fi].size) { - // Update dialog + // Update dialog Emu.CallAfter([=, max = module_part.funcs.size()]() { dlg->ProgressBarSetMsg(0, fmt::format("Compiling %u of %u", fi + 1, fmax)); @@ -1448,7 +1514,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co { Emu.Pause(); return; - } + } } } diff --git a/rpcs3/rpcs3qt/main_window.cpp b/rpcs3/rpcs3qt/main_window.cpp index 6b958b0cfa..a951574d27 100644 --- a/rpcs3/rpcs3qt/main_window.cpp +++ b/rpcs3/rpcs3qt/main_window.cpp @@ -110,15 +110,6 @@ void main_window::Init() Q_EMIT RequestGlobalStylesheetChange(guiSettings->GetCurrentStylesheetPath()); ConfigureGuiFromSettings(true); - if (!utils::has_ssse3()) - { - QMessageBox::critical(this, "SSSE3 Error (with three S, not two)", - "Your system does not meet the minimum requirements needed to run RPCS3.\n" - "Your CPU does not support SSSE3 (with three S, not two).\n"); - - std::exit(EXIT_FAILURE); - } - #ifdef BRANCH if ("RPCS3/rpcs3/master"s != STRINGIZE(BRANCH) && ""s != STRINGIZE(BRANCH)) #else