diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index c6ba7d3de7..ebb74ef023 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -119,15 +119,16 @@ bool cpu_thread::check_state() while (true) { - if (state & cpu_flag::memory && state.test_and_reset(cpu_flag::memory)) + if (state & cpu_flag::memory) { - cpu_flag_memory = true; - if (auto& ptr = vm::g_tls_locked) { ptr->compare_and_swap(this, nullptr); ptr = nullptr; } + + cpu_flag_memory = true; + state -= cpu_flag::memory; } if (state & cpu_flag::exit + cpu_flag::dbg_global_stop) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 30d1a27ef5..87a27546e4 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -977,7 +977,7 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr) } } - vm::temporary_unlock(ppu); + vm::passive_unlock(ppu); for (u64 i = 0;; i++) { @@ -1003,8 +1003,7 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr) } } - ppu.cpu_mem(); - + vm::passive_lock(ppu); return static_cast(ppu.rdata << data_off >> size_off); } @@ -1044,7 +1043,7 @@ const auto ppu_stwcx_tx = build_function_asm>(addr & -4); const u32 old_data = static_cast(ppu.rdata << ((addr & 7) * 8) >> 32); - if (ppu.raddr != addr || addr & 3 || old_data != data.load() || ppu.rtime != vm::reservation_acquire(addr, sizeof(u32))) + if (ppu.raddr != addr || addr & 3 || old_data != data.load() || ppu.rtime != (vm::reservation_acquire(addr, sizeof(u32)) & ~1ull)) { ppu.raddr = 0; return false; @@ -1090,7 +1089,7 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value) return false; } - vm::temporary_unlock(ppu); + vm::passive_unlock(ppu); auto& res = vm::reservation_lock(addr, sizeof(u32)); @@ -1098,7 +1097,7 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value) if (result) { - vm::reservation_update(addr, sizeof(u32)); + res++; vm::reservation_notifier(addr, sizeof(u32)).notify_all(); } else @@ -1106,7 +1105,7 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value) res &= ~1ull; } - ppu.cpu_mem(); + vm::passive_lock(ppu); ppu.raddr = 0; return result; } @@ -1137,7 +1136,7 @@ const auto ppu_stdcx_tx = build_function_asm>(addr & -8); const u64 old_data = ppu.rdata << ((addr & 7) * 8); - if (ppu.raddr != addr || addr & 7 || old_data != data.load() || ppu.rtime != vm::reservation_acquire(addr, sizeof(u64))) + if (ppu.raddr != addr || addr & 7 || old_data != data.load() || ppu.rtime != (vm::reservation_acquire(addr, sizeof(u64)) & ~1ull)) { ppu.raddr = 0; return false; @@ -1183,7 +1182,7 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value) return false; } - vm::temporary_unlock(ppu); + vm::passive_unlock(ppu); auto& res = vm::reservation_lock(addr, sizeof(u64)); @@ -1191,7 +1190,7 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value) if (result) { - vm::reservation_update(addr, sizeof(u64)); + res++; vm::reservation_notifier(addr, sizeof(u64)).notify_all(); } else @@ -1199,7 +1198,7 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value) res &= ~1ull; } - ppu.cpu_mem(); + vm::passive_lock(ppu); ppu.raddr = 0; return result; } diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 6740c5cb9b..2751fb5fba 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -1436,6 +1436,7 @@ void spu_recompiler::get_events() c->mov(*qw0, imm_ptr(vm::g_reservations)); c->shr(qw1->r32(), 4); c->mov(*qw0, x86::qword_ptr(*qw0, *qw1)); + c->and_(qw0->r64(), (u64)(~1ull)); c->cmp(*qw0, SPU_OFF_64(rtime)); c->jne(fail); c->mov(*qw0, imm_ptr(vm::g_base_addr)); @@ -2596,7 +2597,7 @@ static void spu_wrch(spu_thread* _spu, u32 ch, u32 value, spu_function_t _ret) static void spu_wrch_mfc(spu_thread* _spu, spu_function_t _ret) { - if (!_spu->process_mfc_cmd(_spu->ch_mfc_cmd)) + if (!_spu->process_mfc_cmd()) { _ret = &spu_wrch_ret; } diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 3f29c0c2c7..5fdd988569 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -3362,7 +3362,7 @@ public: static bool exec_mfc_cmd(spu_thread* _spu) { - return _spu->process_mfc_cmd(_spu->ch_mfc_cmd); + return _spu->process_mfc_cmd(); } void WRCH(spu_opcode_t op) // @@ -3541,9 +3541,9 @@ public: csize = ci->getZExtValue(); } - if (cmd >= MFC_SNDSIG_CMD) + if (cmd >= MFC_SNDSIG_CMD && csize != 4) { - csize = 4; + csize = -1; } llvm::Value* src = m_ir->CreateGEP(m_lsptr, zext(lsa).value); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 9753affc83..1867b53bce 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -40,6 +40,34 @@ bool operator ==(const u128& lhs, const u128& rhs) } #endif +static FORCE_INLINE void mov_rdata(u128* const dst, const u128* const src) +{ + { + const u128 data0 = src[0]; + const u128 data1 = src[1]; + const u128 data2 = src[2]; + dst[0] = data0; + dst[1] = data1; + dst[2] = data2; + } + + { + const u128 data0 = src[3]; + const u128 data1 = src[4]; + const u128 data2 = src[5]; + dst[3] = data0; + dst[4] = data1; + dst[5] = data2; + } + + { + const u128 data0 = src[6]; + const u128 data1 = src[7]; + dst[6] = data0; + dst[7] = data1; + } +}; + extern u64 get_timebased_time(); extern u64 get_system_time(); @@ -158,12 +186,13 @@ namespace spu } } -const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; Label fall = c.newLabel(); Label fail = c.newLabel(); + Label retry = c.newLabel(); // Prepare registers c.mov(x86::rax, imm_ptr(&vm::g_reservations)); @@ -216,7 +245,7 @@ const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto spu_getll_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; @@ -271,8 +303,6 @@ const auto spu_getll_tx = build_function_asm(dst) = *static_cast(src); - res &= ~1ull; + *reinterpret_cast(dst) = *reinterpret_cast(src); + res++; break; } case 2: { auto& res = vm::reservation_lock(eal, 2); - *static_cast(dst) = *static_cast(src); - res &= ~1ull; + *reinterpret_cast(dst) = *reinterpret_cast(src); + res++; break; } case 4: { auto& res = vm::reservation_lock(eal, 4); - *static_cast(dst) = *static_cast(src); - res &= ~1ull; + *reinterpret_cast(dst) = *reinterpret_cast(src); + res++; break; } case 8: { auto& res = vm::reservation_lock(eal, 8); - *static_cast(dst) = *static_cast(src); - res &= ~1ull; - break; - } - case 16: - { - auto& res = vm::reservation_lock(eal, 16); - _mm_store_si128(static_cast<__m128i*>(dst), _mm_load_si128(static_cast(src))); - res &= ~1ull; + *reinterpret_cast(dst) = *reinterpret_cast(src); + res++; break; } default: { - auto* res = &vm::reservation_lock(eal, 16); - auto vdst = static_cast<__m128i*>(dst); - auto vsrc = static_cast(src); - - for (u32 addr = eal, end = eal + size;; vdst++, vsrc++) + if (((eal & 127) + size) <= 128) { - _mm_store_si128(vdst, _mm_load_si128(vsrc)); + // Lock one cache line + auto& res = vm::reservation_lock(eal, 128); - addr += 16; - - if (addr == end) + while (size) { - break; + *reinterpret_cast(dst) = *reinterpret_cast(src); + + dst += 16; + src += 16; + size -= 16; } - if (addr % 128) - { - continue; - } - - res->fetch_and(~1ull); - res = &vm::reservation_lock(addr, 16); + res++; + break; } - res->fetch_and(~1ull); + auto lock = vm::passive_lock(eal & -128u, ::align(eal + size, 128)); + + while (size >= 128) + { + mov_rdata(reinterpret_cast(dst), reinterpret_cast(src)); + + dst += 128; + src += 128; + size -= 128; + } + + while (size) + { + *reinterpret_cast(dst) = *reinterpret_cast(src); + + dst += 16; + src += 16; + size -= 16; + } + + *lock = 0; break; } } @@ -852,67 +889,44 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) { case 1: { - *static_cast(dst) = *static_cast(src); + *reinterpret_cast(dst) = *reinterpret_cast(src); break; } case 2: { - *static_cast(dst) = *static_cast(src); + *reinterpret_cast(dst) = *reinterpret_cast(src); break; } case 4: { - *static_cast(dst) = *static_cast(src); + *reinterpret_cast(dst) = *reinterpret_cast(src); break; } case 8: { - *static_cast(dst) = *static_cast(src); - break; - } - case 16: - { - _mm_store_si128(static_cast<__m128i*>(dst), _mm_load_si128(static_cast(src))); + *reinterpret_cast(dst) = *reinterpret_cast(src); break; } default: { - auto vdst = static_cast<__m128i*>(dst); - auto vsrc = static_cast(src); - auto vcnt = size / sizeof(__m128i); - - while (vcnt >= 8) + while (size >= 128) { - const __m128i data[] - { - _mm_load_si128(vsrc + 0), - _mm_load_si128(vsrc + 1), - _mm_load_si128(vsrc + 2), - _mm_load_si128(vsrc + 3), - _mm_load_si128(vsrc + 4), - _mm_load_si128(vsrc + 5), - _mm_load_si128(vsrc + 6), - _mm_load_si128(vsrc + 7), - }; + mov_rdata(reinterpret_cast(dst), reinterpret_cast(src)); - _mm_store_si128(vdst + 0, data[0]); - _mm_store_si128(vdst + 1, data[1]); - _mm_store_si128(vdst + 2, data[2]); - _mm_store_si128(vdst + 3, data[3]); - _mm_store_si128(vdst + 4, data[4]); - _mm_store_si128(vdst + 5, data[5]); - _mm_store_si128(vdst + 6, data[6]); - _mm_store_si128(vdst + 7, data[7]); - - vcnt -= 8; - vsrc += 8; - vdst += 8; + dst += 128; + src += 128; + size -= 128; } - while (vcnt--) + while (size) { - _mm_store_si128(vdst++, _mm_load_si128(vsrc++)); + *reinterpret_cast(dst) = *reinterpret_cast(src); + + dst += 16; + src += 16; + size -= 16; } + break; } } @@ -1030,7 +1044,12 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args) if (raddr && addr == raddr) { - ch_event_stat |= SPU_EVENT_LR; + // Last check for event before we clear the reservation + if ((vm::reservation_acquire(addr, 128) & ~1ull) != rtime || rdata != vm::_ref(addr)) + { + ch_event_stat |= SPU_EVENT_LR; + } + raddr = 0; } @@ -1057,20 +1076,20 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args) auto& data = vm::_ref(addr); auto& res = vm::reservation_lock(addr, 128); - vm::_ref>(addr) += 0; + *reinterpret_cast*>(&data) += 0; if (g_cfg.core.spu_accurate_putlluc) { // Full lock (heavyweight) // TODO: vm::check_addr - vm::writer_lock lock(1); - data = to_write; - vm::reservation_update(addr, 128); + vm::writer_lock lock(addr); + mov_rdata(data.data(), to_write.data()); + res++; } else { - data = to_write; - vm::reservation_update(addr, 128); + mov_rdata(data.data(), to_write.data()); + res++; } } @@ -1140,11 +1159,7 @@ void spu_thread::do_mfc(bool wait) return false; } - if (args.size) - { - do_dma_transfer(args); - } - else if (args.cmd == MFC_PUTQLLUC_CMD) + if (args.cmd == MFC_PUTQLLUC_CMD) { if (fence & mask) { @@ -1153,6 +1168,10 @@ void spu_thread::do_mfc(bool wait) do_putlluc(args); } + else if (args.size) + { + do_dma_transfer(args); + } removed++; return true; @@ -1184,7 +1203,7 @@ u32 spu_thread::get_mfc_completed() return ch_tag_mask & ~mfc_fence; } -bool spu_thread::process_mfc_cmd(spu_mfc_cmd args) +bool spu_thread::process_mfc_cmd() { // Stall infinitely if MFC queue is full while (UNLIKELY(mfc_size >= 16)) @@ -1198,29 +1217,24 @@ bool spu_thread::process_mfc_cmd(spu_mfc_cmd args) } spu::scheduler::concurrent_execution_watchdog watchdog(*this); - LOG_TRACE(SPU, "DMAC: cmd=%s, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x", args.cmd, args.lsa, args.eal, args.tag, args.size); + LOG_TRACE(SPU, "DMAC: cmd=%s, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x", ch_mfc_cmd.cmd, ch_mfc_cmd.lsa, ch_mfc_cmd.eal, ch_mfc_cmd.tag, ch_mfc_cmd.size); - switch (args.cmd) + switch (ch_mfc_cmd.cmd) { case MFC_GETLLAR_CMD: { - const u32 addr = args.eal & -128u; + const u32 addr = ch_mfc_cmd.eal & -128u; auto& data = vm::_ref(addr); - - if (raddr && raddr != addr) - { - ch_event_stat |= SPU_EVENT_LR; - } - - raddr = addr; + auto& dst = _ref(ch_mfc_cmd.lsa & 0x3ff80); + u64 ntime; const bool is_polling = false; // TODO if (is_polling) { - rtime = vm::reservation_acquire(raddr, 128); + rtime = vm::reservation_acquire(addr, 128); - while (rdata == data && vm::reservation_acquire(raddr, 128) == rtime) + while (rdata == data && vm::reservation_acquire(addr, 128) == rtime) { if (is_stopped()) { @@ -1235,57 +1249,78 @@ bool spu_thread::process_mfc_cmd(spu_mfc_cmd args) { u64 count = 1; - while (g_cfg.core.spu_accurate_getllar && !spu_getll_tx(raddr, rdata.data(), &rtime)) + if (g_cfg.core.spu_accurate_getllar) { - std::this_thread::yield(); - count += 2; + while ((ntime = spu_getll_tx(addr, dst.data())) & 1) + { + std::this_thread::yield(); + count += 2; + } } - - if (!g_cfg.core.spu_accurate_getllar) + else { for (;; count++, busy_wait(300)) { - rtime = vm::reservation_acquire(raddr, 128); - rdata = data; + ntime = vm::reservation_acquire(addr, 128); + dst = data; - if (LIKELY(vm::reservation_acquire(raddr, 128) == rtime)) + if (LIKELY(vm::reservation_acquire(addr, 128) == ntime)) { break; } } } - if (count > 9) + if (count > 15) { - LOG_ERROR(SPU, "%s took too long: %u", args.cmd, count); + LOG_ERROR(SPU, "%s took too long: %u", ch_mfc_cmd.cmd, count); } } else { - auto& res = vm::reservation_lock(raddr, 128); + auto& res = vm::reservation_lock(addr, 128); if (g_cfg.core.spu_accurate_getllar) { - vm::_ref>(raddr) += 0; + *reinterpret_cast*>(&data) += 0; // Full lock (heavyweight) // TODO: vm::check_addr - vm::writer_lock lock(1); + vm::writer_lock lock(addr); - rtime = res & ~1ull; - rdata = data; + ntime = res & ~1ull; + mov_rdata(dst.data(), data.data()); res &= ~1ull; } else { - rtime = res & ~1ull; - rdata = data; + ntime = res & ~1ull; + mov_rdata(dst.data(), data.data()); res &= ~1ull; } } - // Copy to LS - _ref(args.lsa & 0x3ff80) = rdata; + if (const u32 _addr = raddr) + { + // Last check for event before we replace the reservation with a new one + if ((vm::reservation_acquire(_addr, 128) & ~1ull) != rtime || rdata != vm::_ref(_addr)) + { + ch_event_stat |= SPU_EVENT_LR; + + if (_addr == addr) + { + // Lost current reservation + raddr = 0; + ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS); + return true; + } + } + } + + raddr = addr; + rtime = ntime; + mov_rdata(rdata.data(), dst.data()); + ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS); return true; } @@ -1293,40 +1328,50 @@ bool spu_thread::process_mfc_cmd(spu_mfc_cmd args) case MFC_PUTLLC_CMD: { // Store conditionally - const u32 addr = args.eal & -128u; + const u32 addr = ch_mfc_cmd.eal & -128u; + u32 result = 0; - bool result = false; - - if (raddr == addr && rtime == vm::reservation_acquire(raddr, 128)) + if (raddr == addr && rtime == (vm::reservation_acquire(raddr, 128) & ~1ull)) { - const auto& to_write = _ref(args.lsa & 0x3ff80); + const auto& to_write = _ref(ch_mfc_cmd.lsa & 0x3ff80); if (LIKELY(g_use_rtm)) { - if (spu_putllc_tx(raddr, rtime, rdata.data(), to_write.data())) + while (true) { - vm::reservation_notifier(raddr, 128).notify_all(); - result = true; - } + result = spu_putllc_tx(addr, rtime, rdata.data(), to_write.data()); + + if (result < 2) + { + break; + } - // Don't fallback to heavyweight lock, just give up + // Retry + std::this_thread::yield(); + } } else if (auto& data = vm::_ref(addr); rdata == data) { auto& res = vm::reservation_lock(raddr, 128); - vm::_ref>(raddr) += 0; - - // Full lock (heavyweight) - // TODO: vm::check_addr - vm::writer_lock lock(1); - - if (rtime == (res & ~1ull) && rdata == data) + if (rtime == (res & ~1ull)) { - data = to_write; - vm::reservation_update(raddr, 128); - vm::reservation_notifier(raddr, 128).notify_all(); - result = true; + *reinterpret_cast*>(&data) += 0; + + // Full lock (heavyweight) + // TODO: vm::check_addr + vm::writer_lock lock(addr); + + if (rdata == data) + { + mov_rdata(data.data(), to_write.data()); + res++; + result = 1; + } + else + { + res &= ~1ull; + } } else { @@ -1337,16 +1382,21 @@ bool spu_thread::process_mfc_cmd(spu_mfc_cmd args) if (result) { + vm::reservation_notifier(addr, 128).notify_all(); ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS); } else { - ch_atomic_stat.set_value(MFC_PUTLLC_FAILURE); - } + if (raddr) + { + // Last check for event before we clear the reservation + if (raddr == addr || rtime != (vm::reservation_acquire(raddr, 128) & ~1ull) || rdata != vm::_ref(raddr)) + { + ch_event_stat |= SPU_EVENT_LR; + } + } - if (raddr && !result) - { - ch_event_stat |= SPU_EVENT_LR; + ch_atomic_stat.set_value(MFC_PUTLLC_FAILURE); } raddr = 0; @@ -1354,23 +1404,22 @@ bool spu_thread::process_mfc_cmd(spu_mfc_cmd args) } case MFC_PUTLLUC_CMD: { - do_putlluc(args); + do_putlluc(ch_mfc_cmd); ch_atomic_stat.set_value(MFC_PUTLLUC_SUCCESS); return true; } case MFC_PUTQLLUC_CMD: { - const u32 mask = utils::rol32(1, args.tag); + const u32 mask = utils::rol32(1, ch_mfc_cmd.tag); if (UNLIKELY((mfc_barrier | mfc_fence) & mask)) { - args.size = 0; - mfc_queue[mfc_size++] = args; + mfc_queue[mfc_size++] = ch_mfc_cmd; mfc_fence |= mask; } else { - do_putlluc(args); + do_putlluc(ch_mfc_cmd); } return true; @@ -1379,7 +1428,11 @@ bool spu_thread::process_mfc_cmd(spu_mfc_cmd args) case MFC_SNDSIGB_CMD: case MFC_SNDSIGF_CMD: { - args.size = 4; + if (ch_mfc_cmd.size != 4) + { + break; + } + // Fallthrough } case MFC_PUT_CMD: @@ -1392,24 +1445,24 @@ bool spu_thread::process_mfc_cmd(spu_mfc_cmd args) case MFC_GETB_CMD: case MFC_GETF_CMD: { - if (LIKELY(args.size <= 0x4000)) + if (LIKELY(ch_mfc_cmd.size <= 0x4000)) { - if (LIKELY(do_dma_check(args))) + if (LIKELY(do_dma_check(ch_mfc_cmd))) { - if (LIKELY(args.size)) + if (ch_mfc_cmd.size) { - do_dma_transfer(args); + do_dma_transfer(ch_mfc_cmd); } return true; } - mfc_queue[mfc_size++] = args; - mfc_fence |= utils::rol32(1, args.tag); + mfc_queue[mfc_size++] = ch_mfc_cmd; + mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag); - if (args.cmd & MFC_BARRIER_MASK) + if (ch_mfc_cmd.cmd & MFC_BARRIER_MASK) { - mfc_barrier |= utils::rol32(1, args.tag); + mfc_barrier |= utils::rol32(1, ch_mfc_cmd.tag); } return true; @@ -1427,22 +1480,25 @@ bool spu_thread::process_mfc_cmd(spu_mfc_cmd args) case MFC_GETLB_CMD: case MFC_GETLF_CMD: { - if (LIKELY(args.size <= 0x4000)) + if (LIKELY(ch_mfc_cmd.size <= 0x4000)) { - if (LIKELY(do_dma_check(args))) + auto& cmd = mfc_queue[mfc_size]; + cmd = ch_mfc_cmd; + + if (LIKELY(do_dma_check(cmd))) { - if (LIKELY(do_list_transfer(args))) + if (LIKELY(do_list_transfer(cmd))) { return true; } } - mfc_queue[mfc_size++] = args; - mfc_fence |= utils::rol32(1, args.tag); + mfc_size++; + mfc_fence |= utils::rol32(1, cmd.tag); - if (args.cmd & MFC_BARRIER_MASK) + if (cmd.cmd & MFC_BARRIER_MASK) { - mfc_barrier |= utils::rol32(1, args.tag); + mfc_barrier |= utils::rol32(1, cmd.tag); } return true; @@ -1460,7 +1516,7 @@ bool spu_thread::process_mfc_cmd(spu_mfc_cmd args) } else { - mfc_queue[mfc_size++] = args; + mfc_queue[mfc_size++] = ch_mfc_cmd; mfc_barrier |= -1; } @@ -1473,7 +1529,7 @@ bool spu_thread::process_mfc_cmd(spu_mfc_cmd args) } fmt::throw_exception("Unknown command (cmd=%s, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x)" HERE, - args.cmd, args.lsa, args.eal, args.tag, args.size); + ch_mfc_cmd.cmd, ch_mfc_cmd.lsa, ch_mfc_cmd.eal, ch_mfc_cmd.tag, ch_mfc_cmd.size); } u32 spu_thread::get_events(bool waiting) @@ -1486,7 +1542,7 @@ u32 spu_thread::get_events(bool waiting) } // Check reservation status and set SPU_EVENT_LR if lost - if (raddr && (vm::reservation_acquire(raddr, sizeof(rdata)) != rtime || rdata != vm::_ref(raddr))) + if (raddr && ((vm::reservation_acquire(raddr, sizeof(rdata)) & ~1ull) != rtime || rdata != vm::_ref(raddr))) { ch_event_stat |= SPU_EVENT_LR; raddr = 0; @@ -2026,7 +2082,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) case MFC_Cmd: { ch_mfc_cmd.cmd = MFC(value & 0xff); - return process_mfc_cmd(ch_mfc_cmd); + return process_mfc_cmd(); } case MFC_WrListStallAck: diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 7ec18ddee4..4bc071af0d 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -1,8 +1,9 @@ -#pragma once +#pragma once #include "Emu/Cell/Common.h" #include "Emu/CPU/CPUThread.h" #include "Emu/Cell/SPUInterpreter.h" +#include "Emu/Memory/vm.h" #include "MFC.h" #include @@ -595,7 +596,7 @@ public: void do_mfc(bool wait = true); u32 get_mfc_completed(); - bool process_mfc_cmd(spu_mfc_cmd args); + bool process_mfc_cmd(); u32 get_events(bool waiting = false); void set_events(u32 mask); void set_interrupt_status(bool enable); diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 67fde64b31..9202cc03fa 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -57,8 +57,12 @@ namespace vm // Memory mutex acknowledgement thread_local atomic_t* g_tls_locked = nullptr; + // Currently locked address + atomic_t g_addr_lock = 0; + // Memory mutex: passive locks - std::array, 32> g_locks; + std::array, 4> g_locks{}; + std::array, 6> g_range_locks{}; static void _register_lock(cpu_thread* _cpu) { @@ -72,11 +76,25 @@ namespace vm } } - bool passive_lock(cpu_thread& cpu, bool wait) + static atomic_t* _register_range_lock(const u64 lock_info) + { + while (true) + { + for (auto& lock : g_range_locks) + { + if (!lock && lock.compare_and_swap_test(0, lock_info)) + { + return &lock; + } + } + } + } + + void passive_lock(cpu_thread& cpu) { if (UNLIKELY(g_tls_locked && *g_tls_locked == &cpu)) { - return true; + return; } if (LIKELY(g_mutex.is_lockable())) @@ -84,31 +102,46 @@ namespace vm // Optimistic path (hope that mutex is not exclusively locked) _register_lock(&cpu); - if (UNLIKELY(!g_mutex.is_lockable())) + if (LIKELY(g_mutex.is_lockable())) { - passive_unlock(cpu); - - if (!wait) - { - return false; - } - - ::reader_lock lock(g_mutex); - _register_lock(&cpu); + return; } + + passive_unlock(cpu); } - else + + ::reader_lock lock(g_mutex); + _register_lock(&cpu); + } + + atomic_t* passive_lock(const u32 addr, const u32 end) + { + static const auto test_addr = [](const u32 target, const u32 addr, const u32 end) { - if (!wait) + return addr > target || end <= target; + }; + + atomic_t* _ret; + + if (LIKELY(test_addr(g_addr_lock.load(), addr, end))) + { + // Optimistic path (hope that address range is not locked) + _ret = _register_range_lock((u64)end << 32 | addr); + + if (LIKELY(test_addr(g_addr_lock.load(), addr, end))) { - return false; + return _ret; } - ::reader_lock lock(g_mutex); - _register_lock(&cpu); + *_ret = 0; } - return true; + { + ::reader_lock lock(g_mutex); + _ret = _register_range_lock((u64)end << 32 | addr); + } + + return _ret; } void passive_unlock(cpu_thread& cpu) @@ -194,8 +227,7 @@ namespace vm m_upgraded = true; } - writer_lock::writer_lock(int full) - : locked(true) + writer_lock::writer_lock(u32 addr) { auto cpu = get_current_cpu_thread(); @@ -206,7 +238,7 @@ namespace vm g_mutex.lock(); - if (full) + if (addr) { for (auto& lock : g_locks) { @@ -216,6 +248,30 @@ namespace vm } } + g_addr_lock = addr; + + for (auto& lock : g_range_locks) + { + while (true) + { + const u64 value = lock; + + // Test beginning address + if (static_cast(value) > addr) + { + break; + } + + // Test end address + if (static_cast(value >> 32) <= addr) + { + break; + } + + _mm_pause(); + } + } + for (auto& lock : g_locks) { while (cpu_thread* ptr = lock) @@ -225,7 +281,7 @@ namespace vm break; } - busy_wait(); + _mm_pause(); } } } @@ -239,10 +295,8 @@ namespace vm writer_lock::~writer_lock() { - if (locked) - { - g_mutex.unlock(); - } + g_addr_lock.raw() = 0; + g_mutex.unlock(); } void reservation_lock_internal(atomic_t& res) diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h index 0e1ab0332d..562ba25218 100644 --- a/rpcs3/Emu/Memory/vm.h +++ b/rpcs3/Emu/Memory/vm.h @@ -53,7 +53,8 @@ namespace vm extern thread_local atomic_t* g_tls_locked; // Register reader - bool passive_lock(cpu_thread& cpu, bool wait = true); + void passive_lock(cpu_thread& cpu); + atomic_t* passive_lock(const u32 begin, const u32 end); // Unregister reader void passive_unlock(cpu_thread& cpu); @@ -80,14 +81,10 @@ namespace vm struct writer_lock final { - const bool locked; - writer_lock(const writer_lock&) = delete; writer_lock& operator=(const writer_lock&) = delete; - writer_lock(int full); + writer_lock(u32 addr = 0); ~writer_lock(); - - explicit operator bool() const { return locked; } }; // Get reservation status for further atomic update: last update timestamp @@ -101,7 +98,7 @@ namespace vm inline void reservation_update(u32 addr, u32 size, bool lsb = false) { // Update reservation info with new timestamp - reservation_acquire(addr, size) = (__rdtsc() << 1) | u64{lsb}; + reservation_acquire(addr, size) += 2; } // Get reservation sync variable diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index aa8872e868..2aa9490593 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -354,7 +354,7 @@ struct cfg_root : cfg::node node_core(cfg::node* _this) : cfg::node(_this, "Core") {} cfg::_enum ppu_decoder{this, "PPU Decoder", ppu_decoder_type::llvm}; - cfg::_int<1, 16> ppu_threads{this, "PPU Threads", 2}; // Amount of PPU threads running simultaneously (must be 2) + cfg::_int<1, 4> ppu_threads{this, "PPU Threads", 2}; // Amount of PPU threads running simultaneously (must be 2) cfg::_bool ppu_debug{this, "PPU Debug"}; cfg::_bool llvm_logs{this, "Save LLVM logs"}; cfg::string llvm_cpu{this, "Use LLVM CPU"};