From 86fc842c89565b696263353785f9f5db35b9000d Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 31 Oct 2020 01:52:24 +0300 Subject: [PATCH] TSX: new fallback method (time-based) Basically, using timestamp counter. Rewritten vm::reservation_op with the same principle. Rewritten another transaction helper. Add two new settings for configuring fallbacks. Two limits are specified in nanoseconds (first and second). Fix PUTLLC reload logic (prevent reusing garbage). --- Utilities/JIT.h | 53 ++++++++-- Utilities/asm.h | 9 +- rpcs3/Emu/CPU/CPUThread.h | 5 + rpcs3/Emu/Cell/Modules/cellSpurs.cpp | 26 ++--- rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp | 2 +- rpcs3/Emu/Cell/PPUThread.cpp | 72 ++++++++++--- rpcs3/Emu/Cell/SPUThread.cpp | 129 ++++++++++++++++++------ rpcs3/Emu/Memory/vm.cpp | 8 +- rpcs3/Emu/Memory/vm_reservation.h | 22 ++-- rpcs3/Emu/System.cpp | 12 ++- rpcs3/Emu/System.h | 2 + rpcs3/Emu/system_config.h | 2 + 12 files changed, 263 insertions(+), 79 deletions(-) diff --git a/Utilities/JIT.h b/Utilities/JIT.h index 17be332d07..7a33d8f975 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -57,31 +57,68 @@ namespace asmjit // Emit xbegin and adjacent loop, return label at xbegin (don't use xabort please) template - [[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than, F func) + [[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, F func) { Label fall = c.newLabel(); Label begin = c.newLabel(); c.jmp(begin); c.bind(fall); - // First invoked after failure - func(); - - c.add(ctr, 1); - // Don't repeat on zero status (may indicate syscall or interrupt) c.test(x86::eax, x86::eax); c.jz(fallback); + // First invoked after failure (can fallback to proceed, or jump anywhere else) + func(); + // Other bad statuses are ignored regardless of repeat flag (TODO) - c.cmp(ctr, less_than); - c.jae(fallback); c.align(kAlignCode, 16); c.bind(begin); return fall; // xbegin should be issued manually, allows to add more check before entering transaction } + + // Helper to spill RDX (EDX) register for RDTSC + inline void build_swap_rdx_with(asmjit::X86Assembler& c, std::array& args, const asmjit::X86Gp& with) + { +#ifdef _WIN32 + c.xchg(args[1], with); + args[1] = with; +#else + c.xchg(args[2], with); + args[2] = with; +#endif + } + + // Get full RDTSC value into chosen register (clobbers rax/rdx or saves only rax with other target) + inline void build_get_tsc(asmjit::X86Assembler& c, const asmjit::X86Gp& to = asmjit::x86::rax) + { + if (&to != &x86::rax && &to != &x86::rdx) + { + // Swap to save its contents + c.xchg(x86::rax, to); + } + + c.rdtsc(); + c.shl(x86::rdx, 32); + + if (&to == &x86::rax) + { + c.or_(x86::rax, x86::rdx); + } + else if (&to == &x86::rdx) + { + c.or_(x86::rdx, x86::rax); + } + else + { + // Swap back, maybe there is more effective way to do it + c.xchg(x86::rax, to); + c.mov(to.r32(), to.r32()); + c.or_(to.r64(), x86::rdx); + } + } } // Build runtime function with asmjit::X86Assembler diff --git a/Utilities/asm.h b/Utilities/asm.h index 7a93c211ed..e56a2a121c 100644 --- a/Utilities/asm.h +++ b/Utilities/asm.h @@ -2,15 +2,18 @@ #include "types.h" +extern bool g_use_rtm; +extern u64 g_rtm_tx_limit1; + namespace utils { - // Transaction helper (Max = max attempts) (result = pair of success and op result) - template > + // Transaction helper (result = pair of success and op result, or just bool) + template > inline auto tx_start(F op) { uint status = -1; - for (uint i = 0; i < Max; i++) + for (auto stamp0 = __rdtsc(), stamp1 = stamp0; g_use_rtm && stamp1 - stamp0 <= g_rtm_tx_limit1; stamp1 = __rdtsc()) { #ifndef _MSC_VER __asm__ goto ("xbegin %l[retry];" ::: "memory" : retry); diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index b6e44aa891..044021d1cb 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -73,6 +73,11 @@ public: return !!(state & (cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause)); } + bool has_pause_flag() const + { + return !!(state & cpu_flag::pause); + } + // Check thread type u32 id_type() const { diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp index 56e49ac3db..abca9012e9 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp @@ -292,7 +292,7 @@ namespace _spurs namespace _spurs { // Add workload - s32 add_workload(vm::ptr spurs, vm::ptr wid, vm::cptr pm, u32 size, u64 data, const u8(&priorityTable)[8], u32 minContention, u32 maxContention, vm::cptr nameClass, vm::cptr nameInstance, vm::ptr hook, vm::ptr hookArg); + s32 add_workload(ppu_thread& ppu, vm::ptr spurs, vm::ptr wid, vm::cptr pm, u32 size, u64 data, const u8(&priorityTable)[8], u32 minContention, u32 maxContention, vm::cptr nameClass, vm::cptr nameInstance, vm::ptr hook, vm::ptr hookArg); } //s32 _cellSpursWorkloadAttributeInitialize(vm::ptr attr, u32 revision, u32 sdkVersion, vm::cptr pm, u32 size, u64 data, vm::cptr priority, u32 minCnt, u32 maxCnt); @@ -2295,7 +2295,7 @@ s32 cellSpursWorkloadAttributeSetShutdownCompletionEventHook(vm::ptr spurs, vm::ptr wid, vm::cptr pm, u32 size, u64 data, const u8(&priorityTable)[8], u32 minContention, u32 maxContention, vm::cptr nameClass, vm::cptr nameInstance, vm::ptr hook, vm::ptr hookArg) +s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr spurs, vm::ptr wid, vm::cptr pm, u32 size, u64 data, const u8(&priorityTable)[8], u32 minContention, u32 maxContention, vm::cptr nameClass, vm::cptr nameInstance, vm::ptr hook, vm::ptr hookArg) { if (!spurs || !wid || !pm) { @@ -2420,7 +2420,7 @@ s32 _spurs::add_workload(vm::ptr spurs, vm::ptr wid, vm::cptrwklInfo(wnum); - vm::reservation_op(vm::unsafe_ptr_cast(spurs.ptr(&CellSpurs::wklState1)), [&](spurs_wkl_state_op& op) + vm::reservation_op(ppu, vm::unsafe_ptr_cast(spurs.ptr(&CellSpurs::wklState1)), [&](spurs_wkl_state_op& op) { const u32 mask = op.wklMskB & ~(0x80000000u >> wnum); res_wkl = 0; @@ -2456,12 +2456,12 @@ s32 _spurs::add_workload(vm::ptr spurs, vm::ptr wid, vm::cptr spurs, vm::ptr wid, vm::cptr pm, u32 size, u64 data, vm::cptr priority, u32 minCnt, u32 maxCnt) +s32 cellSpursAddWorkload(ppu_thread& ppu, vm::ptr spurs, vm::ptr wid, vm::cptr pm, u32 size, u64 data, vm::cptr priority, u32 minCnt, u32 maxCnt) { cellSpurs.warning("cellSpursAddWorkload(spurs=*0x%x, wid=*0x%x, pm=*0x%x, size=0x%x, data=0x%llx, priority=*0x%x, minCnt=0x%x, maxCnt=0x%x)", spurs, wid, pm, size, data, priority, minCnt, maxCnt); - return _spurs::add_workload(spurs, wid, pm, size, data, *priority, minCnt, maxCnt, vm::null, vm::null, vm::null, vm::null); + return _spurs::add_workload(ppu, spurs, wid, pm, size, data, *priority, minCnt, maxCnt, vm::null, vm::null, vm::null, vm::null); } /// Add workload @@ -2484,7 +2484,7 @@ s32 cellSpursAddWorkloadWithAttribute(ppu_thread& ppu, vm::ptr spurs, return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; } - return _spurs::add_workload(spurs, wid, attr->pm, attr->size, attr->data, attr->priority, attr->minContention, attr->maxContention, attr->nameClass, attr->nameInstance, attr->hook, attr->hookArg); + return _spurs::add_workload(ppu, spurs, wid, attr->pm, attr->size, attr->data, attr->priority, attr->minContention, attr->maxContention, attr->nameClass, attr->nameInstance, attr->hook, attr->hookArg); } /// Request workload shutdown @@ -2506,7 +2506,7 @@ s32 cellSpursShutdownWorkload(ppu_thread& ppu, vm::ptr spurs, u32 wid bool send_event; s32 rc, old_state; - if (!vm::reservation_op(vm::unsafe_ptr_cast(spurs.ptr(&CellSpurs::wklState1)), [&](spurs_wkl_state_op& op) + if (!vm::reservation_op(ppu, vm::unsafe_ptr_cast(spurs.ptr(&CellSpurs::wklState1)), [&](spurs_wkl_state_op& op) { auto& state = wid < CELL_SPURS_MAX_WORKLOAD ? op.wklState1[wid] : op.wklState2[wid % 16]; @@ -2663,7 +2663,7 @@ s32 cellSpursRemoveWorkload(ppu_thread& ppu, vm::ptr spurs, u32 wid) } s32 rc; - vm::reservation_op(vm::unsafe_ptr_cast(spurs.ptr(&CellSpurs::wklState1)), [&](spurs_wkl_state_op& op) + vm::reservation_op(ppu, vm::unsafe_ptr_cast(spurs.ptr(&CellSpurs::wklState1)), [&](spurs_wkl_state_op& op) { auto& state = wid < CELL_SPURS_MAX_WORKLOAD ? op.wklState1[wid] : op.wklState2[wid % 16]; @@ -3040,7 +3040,7 @@ s32 _cellSpursWorkloadFlagReceiver(ppu_thread& ppu, vm::ptr spurs, u3 }; s32 res; - vm::reservation_op(vm::unsafe_ptr_cast(spurs), [&](wklFlagOp& val) + vm::reservation_op(ppu, vm::unsafe_ptr_cast(spurs), [&](wklFlagOp& val) { if (is_set) { @@ -3189,7 +3189,7 @@ s32 cellSpursEventFlagSet(ppu_thread& ppu, vm::ptr eventFlag u16 pendingRecv; u16 pendingRecvTaskEvents[16]; - vm::reservation_op(vm::unsafe_ptr_cast(eventFlag), [bits, &send, &ppuWaitSlot, &ppuEvents, &pendingRecv, &pendingRecvTaskEvents](CellSpursEventFlag_x00& eventFlag) + vm::reservation_op(ppu, vm::unsafe_ptr_cast(eventFlag), [bits, &send, &ppuWaitSlot, &ppuEvents, &pendingRecv, &pendingRecvTaskEvents](CellSpursEventFlag_x00& eventFlag) { send = false; ppuWaitSlot = 0; @@ -4081,7 +4081,7 @@ s32 _cellSpursSendSignal(ppu_thread& ppu, vm::ptr taskset, u32 int signal; - vm::reservation_op(vm::unsafe_ptr_cast(taskset), [&](spurs_taskset_signal_op& op) + vm::reservation_op(ppu, vm::unsafe_ptr_cast(taskset), [&](spurs_taskset_signal_op& op) { const u32 signalled = op.signalled[taskId / 32]; const u32 running = op.running[taskId / 32]; @@ -4972,7 +4972,7 @@ s32 cellSpursJobGuardNotify(ppu_thread& ppu, vm::ptr jobGuard u32 allow_jobchain_run = 0; // Affects cellSpursJobChainRun execution u32 old = 0; - const bool ok = vm::reservation_op(vm::unsafe_ptr_cast(jobGuard), [&](CellSpursJobGuard_x00& jg) + const bool ok = vm::reservation_op(ppu, vm::unsafe_ptr_cast(jobGuard), [&](CellSpursJobGuard_x00& jg) { allow_jobchain_run = jg.zero; old = jg.ncount0; @@ -5136,7 +5136,7 @@ s32 cellSpursAddUrgentCommand(ppu_thread& ppu, vm::ptr jobCha s32 result = CELL_OK; - vm::reservation_op(vm::unsafe_ptr_cast(jobChain), [&](CellSpursJobChain_x00& jch) + vm::reservation_op(ppu, vm::unsafe_ptr_cast(jobChain), [&](CellSpursJobChain_x00& jch) { for (auto& cmd : jch.urgentCmds) { diff --git a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp index 75cc6a0eb6..3083ff0725 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp @@ -2074,7 +2074,7 @@ void spursJobchainPopUrgentCommand(spu_thread& spu) const auto jc = vm::unsafe_ptr_cast(+ctxt->jobChain); const bool alterQueue = ctxt->unkFlag0; - vm::reservation_op(jc, [&](CellSpursJobChain_x00& op) + vm::reservation_op(spu, jc, [&](CellSpursJobChain_x00& op) { const auto ls = reinterpret_cast(ctxt->tempAreaJobChain); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 7e5844add7..0569ff44a1 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1216,6 +1216,7 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr) { ppu.rtime = ppu.last_ftime; ppu.raddr = ppu.last_faddr; + ppu.last_ftime = 0; return static_cast(rdata << data_off >> size_off); } @@ -1261,7 +1262,7 @@ extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr) return ppu_load_acquire_reservation(ppu, addr); } -const auto ppu_stcx_accurate_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto ppu_stcx_accurate_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; @@ -1282,6 +1283,8 @@ const auto ppu_stcx_accurate_tx = build_function_asm(cpu_flag::pause)); c.mov(x86::eax, _XABORT_EXPLICIT); c.jc(fall); @@ -1380,7 +1395,8 @@ const auto ppu_stcx_accurate_tx = build_function_asm(cpu_flag::pause)); c.jc(fall2); c.mov(x86::rax, x86::qword_ptr(x86::rbx)); - c.test(x86::rax, 127 - 1); - c.jnz(fall2); c.and_(x86::rax, -128); c.cmp(x86::rax, x86::r13); c.jne(fail2); @@ -1493,7 +1525,8 @@ const auto ppu_stcx_accurate_tx = build_function_asm(new_data))) + switch (u64 count = ppu_stcx_accurate_tx(addr & -8, rtime, ppu.rdata, std::bit_cast(new_data))) { - case UINT32_MAX: + case UINT64_MAX: { auto& all_data = *vm::get_super_ptr(addr & -128); auto& sdata = *vm::get_super_ptr>(addr & -8); @@ -1660,6 +1697,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) break; } + ppu.last_ftime = -1; [[fallthrough]]; } case 0: @@ -1669,6 +1707,12 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) ppu.last_fail++; } + if (ppu.last_ftime != umax) + { + ppu.last_faddr = 0; + return false; + } + _m_prefetchw(ppu.rdata); _m_prefetchw(ppu.rdata + 64); ppu.last_faddr = addr; @@ -1678,9 +1722,9 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) } default: { - if (count > 60 && g_cfg.core.perf_report) [[unlikely]] + if (count > 20000 && g_cfg.core.perf_report) [[unlikely]] { - perf_log.warning("STCX: took too long: %u", count); + perf_log.warning(u8"STCX: took too long: %.3fµs (%u c)", count / (utils::get_tsc_freq() / 1000'000.), count); } break; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 7c1dc1b762..b1d110f844 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -371,7 +371,7 @@ namespace spu } } -const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; @@ -415,6 +415,7 @@ const auto spu_putllc_tx = build_function_asm(cpu_flag::pause)); c.mov(x86::eax, _XABORT_EXPLICIT); @@ -531,7 +541,8 @@ const auto spu_putllc_tx = build_function_asm(cpu_flag::pause)); c.jc(fall2); c.mov(x86::rax, x86::qword_ptr(x86::rbx)); - c.test(x86::rax, 127 - 1); - c.jnz(fall2); c.and_(x86::rax, -128); c.cmp(x86::rax, x86::r13); c.jne(fail2); @@ -666,7 +691,8 @@ const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto spu_putlluc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; @@ -792,6 +820,7 @@ const auto spu_putlluc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const extern auto spu_getllar_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; @@ -979,6 +1033,7 @@ const extern auto spu_getllar_tx = build_function_asm> (number & 1); const u32 bitor_bit = (snr_config >> number) & 1; + // Redundant, g_use_rtm is checked inside tx_start now. if (g_use_rtm) { bool channel_notify = false; @@ -2422,9 +2485,9 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) if (g_use_rtm) [[likely]] { - switch (u32 count = spu_putllc_tx(addr, rtime, rdata, to_write)) + switch (u64 count = spu_putllc_tx(addr, rtime, rdata, to_write)) { - case UINT32_MAX: + case UINT64_MAX: { auto& data = *vm::get_super_ptr(addr); @@ -2451,6 +2514,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) break; } + last_ftime = -1; [[fallthrough]]; } case 0: @@ -2460,6 +2524,12 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) last_fail++; } + if (last_ftime != umax) + { + last_faddr = 0; + return false; + } + _m_prefetchw(rdata); _m_prefetchw(rdata + 64); last_faddr = addr; @@ -2469,9 +2539,9 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) } default: { - if (count > 60 && g_cfg.core.perf_report) [[unlikely]] + if (count > 20000 && g_cfg.core.perf_report) [[unlikely]] { - perf_log.warning("PUTLLC: took too long: %u", count); + perf_log.warning(u8"PUTLLC: took too long: %.3fµs (%u c)", count / (utils::get_tsc_freq() / 1000'000.), count); } break; @@ -2566,7 +2636,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write) if (g_use_rtm) [[likely]] { - const u32 result = spu_putlluc_tx(addr, to_write, cpu); + const u64 result = spu_putlluc_tx(addr, to_write, cpu); if (result == 0) { @@ -2579,9 +2649,9 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write) res += 127; }); } - else if (result > 60 && g_cfg.core.perf_report) [[unlikely]] + else if (result > 20000 && g_cfg.core.perf_report) [[unlikely]] { - perf_log.warning("STORE128: took too long: %u", result); + perf_log.warning(u8"STORE128: took too long: %.3fµs (%u c)", result / (utils::get_tsc_freq() / 1000'000.), result); } static_cast(cpu->test_stopped()); @@ -2796,6 +2866,7 @@ bool spu_thread::process_mfc_cmd() { rtime = last_ftime; raddr = last_faddr; + last_ftime = 0; mov_rdata(_ref(ch_mfc_cmd.lsa & 0x3ff80), rdata); ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS); diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index e28053e676..6fdd29072b 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -550,17 +550,19 @@ namespace vm void reservation_op_internal(u32 addr, std::function func) { - auto& res = vm::reservation_acquire(addr, 128); + auto& res = vm::reservation_acquire(addr, 1); + auto* ptr = vm::get_super_ptr(addr & -128); - cpu_thread::suspend_all(get_current_cpu_thread(), {&res}, [&] + cpu_thread::suspend_all(get_current_cpu_thread(), {ptr, ptr + 64, &res}, [&] { if (func()) { - // Success, release all locks if necessary + // Success, release the lock and progress res += 127; } else { + // Only release the lock on failure res -= 1; } }); diff --git a/rpcs3/Emu/Memory/vm_reservation.h b/rpcs3/Emu/Memory/vm_reservation.h index 1e4b586763..be3903421c 100644 --- a/rpcs3/Emu/Memory/vm_reservation.h +++ b/rpcs3/Emu/Memory/vm_reservation.h @@ -7,6 +7,7 @@ #include extern bool g_use_rtm; +extern u64 g_rtm_tx_limit2; namespace vm { @@ -70,8 +71,8 @@ namespace vm // TODO: remove and make it external void reservation_op_internal(u32 addr, std::function func); - template - SAFE_BUFFERS inline auto reservation_op(_ptr_base ptr, F op) + template + SAFE_BUFFERS inline auto reservation_op(CPU& cpu, _ptr_base ptr, F op) { // Atomic operation will be performed on aligned 128 bytes of data, so the data size and alignment must comply static_assert(sizeof(T) <= 128 && alignof(T) == sizeof(T), "vm::reservation_op: unsupported type"); @@ -94,9 +95,10 @@ namespace vm { // Stage 1: single optimistic transaction attempt unsigned status = _XBEGIN_STARTED; - unsigned count = 0; u64 _old = 0; + auto stamp0 = __rdtsc(), stamp1 = stamp0, stamp2 = stamp0; + #ifndef _MSC_VER __asm__ goto ("xbegin %l[stage2];" ::: "memory" : stage2); #else @@ -157,6 +159,7 @@ namespace vm #ifndef _MSC_VER __asm__ volatile ("mov %%eax, %0;" : "=r" (status) :: "memory"); #endif + stamp1 = __rdtsc(); // Touch memory if transaction failed with status 0 if (!status) @@ -167,12 +170,17 @@ namespace vm // Stage 2: try to lock reservation first _old = res.fetch_add(1); - // Also identify atomic op - count = 1; + // Compute stamps excluding memory touch + stamp2 = __rdtsc() - (stamp1 - stamp0); - // Start lightened transaction (TODO: tweaking) - for (; !(_old & rsrv_unique_lock) && count < 60; count++) + // Start lightened transaction + for (; !(_old & vm::rsrv_unique_lock) && stamp2 - stamp0 <= g_rtm_tx_limit2; stamp2 = __rdtsc()) { + if (cpu.has_pause_flag()) + { + break; + } + #ifndef _MSC_VER __asm__ goto ("xbegin %l[retry];" ::: "memory" : retry); #else diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index a71dbb51c1..521ec42b28 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -55,7 +55,9 @@ LOG_CHANNEL(sys_log, "SYS"); stx::manual_fixed_typemap g_fixed_typemap; -bool g_use_rtm; +bool g_use_rtm = false; +u64 g_rtm_tx_limit1 = 0; +u64 g_rtm_tx_limit2 = 0; std::string g_cfg_defaults; @@ -1019,6 +1021,14 @@ game_boot_result Emulator::Load(const std::string& title_id, bool add_only, bool } } + if (g_use_rtm) + { + // Update supplementary settings + const f64 _1ns = utils::get_tsc_freq() / 1000'000'000.; + g_rtm_tx_limit1 = g_cfg.core.tx_limit1_ns * _1ns; + g_rtm_tx_limit2 = g_cfg.core.tx_limit2_ns * _1ns; + } + // Load patches from different locations g_fxo->get()->append_title_patches(m_title_id); diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 2661d58f56..88d97d4115 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -240,3 +240,5 @@ private: extern Emulator Emu; extern bool g_use_rtm; +extern u64 g_rtm_tx_limit1; +extern u64 g_rtm_tx_limit2; diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index ca5f74145e..7be132b865 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -66,6 +66,8 @@ struct cfg_root : cfg::node cfg::_bool hle_lwmutex{ this, "HLE lwmutex" }; // Force alternative lwmutex/lwcond implementation cfg::uint64 spu_llvm_lower_bound{ this, "SPU LLVM Lower Bound" }; cfg::uint64 spu_llvm_upper_bound{ this, "SPU LLVM Upper Bound", 0xffffffffffffffff }; + cfg::uint64 tx_limit1_ns{this, "TSX Transaction First Limit", 800}; // In nanoseconds + cfg::uint64 tx_limit2_ns{this, "TSX Transaction Second Limit", 2000}; // In nanoseconds cfg::_int<10, 3000> clocks_scale{ this, "Clocks scale", 100, true }; // Changing this from 100 (percentage) may affect game speed in unexpected ways cfg::_enum sleep_timers_accuracy{ this, "Sleep Timers Accuracy",