diff --git a/Utilities/VirtualMemory.cpp b/Utilities/VirtualMemory.cpp index d03b26e307..0d21858c2c 100644 --- a/Utilities/VirtualMemory.cpp +++ b/Utilities/VirtualMemory.cpp @@ -141,8 +141,9 @@ namespace utils #endif } - shm::shm(u32 size) + shm::shm(u32 size, u32 flags) : m_size(::align(size, 0x10000)) + , m_flags(flags) { #ifdef _WIN32 m_handle = ::CreateFileMappingW(INVALID_HANDLE_VALUE, NULL, PAGE_EXECUTE_READWRITE, 0, m_size, NULL); diff --git a/Utilities/VirtualMemory.h b/Utilities/VirtualMemory.h index 4e7afce3ef..5dd6be3c9f 100644 --- a/Utilities/VirtualMemory.h +++ b/Utilities/VirtualMemory.h @@ -48,9 +48,10 @@ namespace utils int m_file; #endif u32 m_size; + u32 m_flags; public: - explicit shm(u32 size); + explicit shm(u32 size, u32 flags = 0); shm(const shm&) = delete; @@ -74,5 +75,11 @@ namespace utils { return m_size; } + + // Flags are unspecified, consider it userdata + u32 flags() const + { + return m_flags; + } }; } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index c548afff5e..d3968ba20a 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -604,7 +604,7 @@ void ppu_thread::cpu_task() } case ppu_cmd::opd_call: { - const ppu_func_opd_t opd = cmd_get(1).as(); + const ppu_func_opd_t opd = cmd_get(1).as(); cmd_pop(1), fast_call(opd.addr, opd.rtoc); break; } @@ -1089,13 +1089,13 @@ const auto ppu_stwcx_tx = build_function_asmnewLabel(); c->bind(rcheck); c->mov(qw1->r32(), *addr); - c->mov(*qw0, imm_ptr(vm::g_reservations)); - c->shr(qw1->r32(), 4); + c->mov(*qw0, imm_ptr(+vm::g_reservations)); + c->and_(qw1->r32(), 0xff80); + c->shr(qw1->r32(), 1); c->mov(*qw0, x86::qword_ptr(*qw0, *qw1)); - c->and_(qw0->r64(), -128); c->cmp(*qw0, SPU_OFF_64(rtime)); c->jne(fail); c->mov(*qw0, imm_ptr(vm::g_base_addr)); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 98a08469e5..7f79952849 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -219,12 +219,12 @@ const auto spu_putllc_tx = build_function_asm([]( #endif // Prepare registers - c.mov(x86::rax, imm_ptr(&vm::g_reservations)); - c.mov(x86::rbx, x86::qword_ptr(x86::rax)); + c.mov(x86::rbx, imm_ptr(+vm::g_reservations)); c.mov(x86::rax, imm_ptr(&vm::g_base_addr)); c.mov(x86::rbp, x86::qword_ptr(x86::rax)); c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0])); - c.shr(args[0], 4); + c.and_(args[0].r32(), 0xff80); + c.shr(args[0].r32(), 1); c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0])); c.xor_(x86::r12d, x86::r12d); c.mov(x86::r13, args[1]); @@ -608,12 +608,12 @@ const auto spu_getll_inexact = build_function_asm> 16 != (range_end - 1) >> 16) + { + u32 nexta = range_end & -65536; + u32 size0 = nexta - eal; + size -= size0; + + // Split locking + transfer in two parts (before 64K border, and after it) + const auto lock = vm::range_lock(range_addr, nexta); +#ifdef __GNUG__ + std::memcpy(dst, src, size0); + dst += size0; + src += size0; +#else + while (size0 >= 128) + { + mov_rdata(*reinterpret_cast(dst), *reinterpret_cast(src)); + + dst += 128; + src += 128; + size0 -= 128; + } + + while (size0) + { + *reinterpret_cast(dst) = *reinterpret_cast(src); + + dst += 16; + src += 16; + size0 -= 16; + } +#endif + lock->release(0); + range_addr = nexta; + } + + const auto lock = vm::range_lock(range_addr, range_end); #ifdef __GNUG__ std::memcpy(dst, src, size); diff --git a/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp b/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp index 21266fff62..c6f7caa1a9 100644 --- a/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp @@ -17,7 +17,7 @@ lv2_memory::lv2_memory(u32 size, u32 align, u64 flags, lv2_memory_container* ct) , align(align) , flags(flags) , ct(ct) - , shm(std::make_shared(size)) + , shm(std::make_shared(size, 1 /* shareable flag */)) { } diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 4e87c09005..b328e24373 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -47,8 +47,11 @@ namespace vm // Stats for debugging u8* const g_stat_addr = memory_reserve_4GiB(g_exec_addr); - // Reservation stats (compressed x16) - u8* const g_reservations = memory_reserve_4GiB(g_stat_addr); + // Reservation stats + alignas(4096) u8 g_reservations[65536 / 128 * 64]{0}; + + // Shareable memory bits + alignas(4096) atomic_t g_shareable[65536]{0}; // Memory locations std::vector> g_locations; @@ -94,6 +97,34 @@ namespace vm } } + static void _lock_shareable_cache(u8 /*value*/, u32 addr /*mutable*/, u32 end /*mutable*/) + { + // Special value to block new range locks + g_addr_lock = 1; + + // Convert to 64K-page numbers + addr >>= 16; + end >>= 16; + + // Wait for range locks to clear + for (auto& lock : g_range_locks) + { + while (const u64 _lock = lock.load()) + { + if (const u32 lock_page = static_cast(_lock) >> 16) + { + if (lock_page < addr || lock_page >= end) + { + // Ignoreable range lock + break; + } + } + + _mm_pause(); + } + } + } + void passive_lock(cpu_thread& cpu) { if (g_tls_locked && *g_tls_locked == &cpu) [[unlikely]] @@ -118,21 +149,45 @@ namespace vm _register_lock(&cpu); } - atomic_t* passive_lock(const u32 addr, const u32 end) + atomic_t* range_lock(u32 addr, u32 end) { - static const auto test_addr = [](const u32 target, const u32 addr, const u32 end) + static const auto test_addr = [](u32 target, u32 addr, u32 end) -> u64 { - return addr > target || end <= target; + if (target == 1) + { + // Shareable info is being modified + return 0; + } + + if (g_shareable[target >> 16]) + { + // Target within shareable memory range + target &= 0xffff; + } + + if (g_shareable[addr >> 16]) + { + // Track shareable memory locks in 0x0..0xffff address range + addr &= 0xffff; + end = ((end - 1) & 0xffff) + 1; + } + + if (addr > target || end <= target) + { + return u64{end} << 32 | addr; + } + + return 0; }; atomic_t* _ret; - if (test_addr(g_addr_lock.load(), addr, end)) [[likely]] + if (u64 _a1 = test_addr(g_addr_lock.load(), addr, end)) [[likely]] { // Optimistic path (hope that address range is not locked) - _ret = _register_range_lock(u64{end} << 32 | addr); + _ret = _register_range_lock(_a1); - if (test_addr(g_addr_lock.load(), addr, end)) [[likely]] + if (_a1 == test_addr(g_addr_lock.load(), addr, end)) [[likely]] { return _ret; } @@ -142,7 +197,7 @@ namespace vm { ::reader_lock lock(g_mutex); - _ret = _register_range_lock(u64{end} << 32 | addr); + _ret = _register_range_lock(test_addr(-1, addr, end)); } return _ret; @@ -233,7 +288,7 @@ namespace vm m_upgraded = true; } - writer_lock::writer_lock(u32 addr) + writer_lock::writer_lock(u32 addr /*mutable*/) { auto cpu = get_current_cpu_thread(); @@ -244,7 +299,7 @@ namespace vm g_mutex.lock(); - if (addr) + if (addr >= 0x10000) { for (auto lock = g_locks.cbegin(), end = lock + g_cfg.core.ppu_threads; lock != end; lock++) { @@ -256,6 +311,12 @@ namespace vm g_addr_lock = addr; + if (g_shareable[addr >> 16]) + { + // Reservation address in shareable memory range + addr = addr & 0xffff; + } + for (auto& lock : g_range_locks) { while (true) @@ -345,6 +406,19 @@ namespace vm } } + if (shm && shm->flags() != 0) + { + _lock_shareable_cache(1, addr, addr + size); + + for (u32 i = addr / 65536; i < addr / 65536 + size / 65536; i++) + { + g_shareable[i] = 1; + } + + // Unlock + g_addr_lock.compare_and_swap(1, 0); + } + // Notify rsx that range has become valid // Note: This must be done *before* memory gets mapped while holding the vm lock, otherwise // the RSX might try to invalidate memory that got unmapped and remapped @@ -482,6 +556,19 @@ namespace vm } } + if (g_shareable[addr >> 16]) + { + _lock_shareable_cache(0, addr, addr + size); + + for (u32 i = addr / 65536; i < addr / 65536 + size / 65536; i++) + { + g_shareable[i] = 0; + } + + // Unlock + g_addr_lock.compare_and_swap(1, 0); + } + // Notify rsx to invalidate range // Note: This must be done *before* memory gets unmapped while holding the vm lock, otherwise // the RSX might try to call VirtualProtect on memory that is already unmapped @@ -624,35 +711,12 @@ namespace vm , size(size) , flags(flags) { - // Allocate compressed reservation info area (avoid SPU MMIO area) - if (addr != 0xe0000000) - { - // Beginning of the address space - if (addr == 0x10000) - { - utils::memory_commit(g_reservations, 0x1000); - } - - utils::memory_commit(g_reservations + addr / 16, size / 16); - } - else - { - // RawSPU LS - for (u32 i = 0; i < 6; i++) - { - utils::memory_commit(g_reservations + addr / 16 + i * 0x10000, 0x4000); - } - - // End of the address space - utils::memory_commit(g_reservations + 0xfff0000, 0x10000); - } - if (flags & 0x100) { // Special path for 4k-aligned pages m_common = std::make_shared(size); verify(HERE), m_common->map_critical(vm::base(addr), utils::protection::no) == vm::base(addr); - verify(HERE), m_common->map_critical(vm::get_super_ptr(addr), utils::protection::rw) == vm::get_super_ptr(addr); + verify(HERE), m_common->map_critical(vm::get_super_ptr(addr)) == vm::get_super_ptr(addr); } } @@ -1142,7 +1206,7 @@ namespace vm g_sudo_addr, g_sudo_addr + UINT32_MAX, g_exec_addr, g_exec_addr + 0x200000000 - 1, g_stat_addr, g_stat_addr + UINT32_MAX, - g_reservations, g_reservations + UINT32_MAX); + g_reservations, g_reservations + sizeof(g_reservations) - 1); g_locations = { @@ -1154,6 +1218,9 @@ namespace vm std::make_shared(0xD0000000, 0x10000000, 0x111), // stack std::make_shared(0xE0000000, 0x20000000), // SPU reserved }; + + std::memset(g_reservations, 0, sizeof(g_reservations)); + std::memset(g_shareable, 0, sizeof(g_shareable)); } } @@ -1164,7 +1231,6 @@ namespace vm utils::memory_decommit(g_base_addr, 0x100000000); utils::memory_decommit(g_exec_addr, 0x100000000); utils::memory_decommit(g_stat_addr, 0x100000000); - utils::memory_decommit(g_reservations, 0x100000000); } } diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h index a9b833bae4..fbb9ab5728 100644 --- a/rpcs3/Emu/Memory/vm.h +++ b/rpcs3/Emu/Memory/vm.h @@ -13,7 +13,7 @@ namespace vm extern u8* const g_sudo_addr; extern u8* const g_exec_addr; extern u8* const g_stat_addr; - extern u8* const g_reservations; + extern u8 g_reservations[]; struct writer_lock; diff --git a/rpcs3/Emu/Memory/vm_locking.h b/rpcs3/Emu/Memory/vm_locking.h index 330d8865c1..9716bab16a 100644 --- a/rpcs3/Emu/Memory/vm_locking.h +++ b/rpcs3/Emu/Memory/vm_locking.h @@ -13,7 +13,7 @@ namespace vm // Register reader void passive_lock(cpu_thread& cpu); - atomic_t* passive_lock(const u32 begin, const u32 end); + atomic_t* range_lock(u32 begin, u32 end); // Unregister reader void passive_unlock(cpu_thread& cpu); diff --git a/rpcs3/Emu/Memory/vm_reservation.h b/rpcs3/Emu/Memory/vm_reservation.h index 537600514b..3c15d40447 100644 --- a/rpcs3/Emu/Memory/vm_reservation.h +++ b/rpcs3/Emu/Memory/vm_reservation.h @@ -10,7 +10,7 @@ namespace vm inline atomic_t& reservation_acquire(u32 addr, u32 size) { // Access reservation info: stamp and the lock bit - return reinterpret_cast*>(g_reservations)[addr / 128]; + return *reinterpret_cast*>(g_reservations + (addr & 0xff80) / 2); } // Update reservation status @@ -23,7 +23,7 @@ namespace vm // Get reservation sync variable inline atomic_t& reservation_notifier(u32 addr, u32 size) { - return reinterpret_cast*>(g_reservations)[addr / 128]; + return *reinterpret_cast*>(g_reservations + (addr & 0xff80) / 2); } void reservation_lock_internal(atomic_t&);