Optimize vm::range_lock

Only test address on `range_locked` Don't check current transaction Remove vm::clear_range_locks completely
2025-04-20 03:25:16 +00:00 · 2020-11-01 16:46:06 +03:00 · 2020-11-01 16:46:06 +03:00 · 46d3066c62
commit 46d3066c62
parent 8d12816001
3 changed files with 39 additions and 47 deletions
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@ -2104,8 +2104,8 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*

 		perf_meter<"DMA_PUT"_u64> perf2;

-		// TODO: split range-locked stores in cache lines for consistency
-		auto res = &vm::reservation_acquire(eal, args.size);
+		// Temporarily disabled, may be removed at all
+		atomic_t<u64>* res = nullptr;

 		switch (u32 size = args.size)
 		{
--- a/rpcs3/Emu/Memory/vm.cpp
+++ b/rpcs3/Emu/Memory/vm.cpp
@ -156,7 +156,14 @@ namespace vm
 	{
 		perf_meter<"RHW_LOCK"_u64> perf0;

-		while (true)
+		auto _cpu = get_current_cpu_thread();
+
+		if (_cpu)
+		{
+			_cpu->state += cpu_flag::wait + cpu_flag::temp;
+		}
+
+		for (u64 i = 0;; i++)
 		{
 			const u64 lock_val = g_range_lock.load();
 			const u64 lock_addr = static_cast<u32>(lock_val); // -> u64
@ -166,12 +173,13 @@ namespace vm

 			u64 addr = begin;

-			if (g_shareable[begin >> 16] || lock_bits == range_sharing)
+			// Only useful for range_locked, and is reliable in this case
+			if (g_shareable[begin >> 16])
 			{
 				addr = addr & 0xffff;
 			}

-			if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]]
+			if ((lock_bits != range_locked || addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]]
 			{
 				range_lock->store(begin | (u64{size} << 32));

@ -180,18 +188,28 @@ namespace vm

 				if (!new_lock_val && !new_res_val) [[likely]]
 				{
-					return;
+					break;
 				}

 				if (new_lock_val == lock_val && !new_res_val) [[likely]]
 				{
-					return;
+					break;
 				}

 				range_lock->release(0);
 			}

-			std::shared_lock lock(g_mutex);
+			std::shared_lock lock(g_mutex, std::try_to_lock);
+
+			if (!lock && i < 15)
+			{
+				busy_wait(200);
+				continue;
+			}
+			else if (!lock)
+			{
+				lock.lock();
+			}

 			u32 test = 0;

@ -213,6 +231,14 @@ namespace vm
 				vm::_ref<atomic_t<u8>>(test) += 0;
 				continue;
 			}
+
+			range_lock->release(begin | (u64{size} << 32));
+			break;
+		}
+
+		if (_cpu)
+		{
+			_cpu->check_state();
 		}
 	}

@ -252,36 +278,6 @@ namespace vm
 		return result;
 	}

-	void clear_range_locks(u32 addr, u32 size)
-	{
-		ASSUME(size);
-
-		const auto range = utils::address_range::start_length(addr, size);
-
-		// Wait for range locks to clear
-		while (true)
-		{
-			const u64 bads = for_all_range_locks([&](u32 addr2, u32 size2)
-			{
-				ASSUME(size2);
-
-				if (range.overlaps(utils::address_range::start_length(addr2, size2))) [[unlikely]]
-				{
-					return 1;
-				}
-
-				return 0;
-			});
-
-			if (!bads)
-			{
-				return;
-			}
-
-			_mm_pause();
-		}
-	}
-
 	static void _lock_shareable_cache(u64 flags, u32 addr, u32 size)
 	{
 		// Can't do 512 MiB or more at once
@ -290,10 +286,8 @@ namespace vm
 			fmt::throw_exception("Failed to lock range (flags=0x%x, addr=0x%x, size=0x%x)" HERE, flags >> 32, addr, size);
 		}

-		// Block new range locks
+		// Block or signal new range locks
 		g_range_lock = addr | u64{size} << 35 | flags;
-
-		clear_range_locks(addr, size);
 	}

 	void passive_lock(cpu_thread& cpu)
--- a/rpcs3/Emu/Memory/vm_locking.h
+++ b/rpcs3/Emu/Memory/vm_locking.h
@ -23,7 +23,7 @@ namespace vm
 		/* flag combinations with special meaning */

 		range_normal = 3ull << 32, // R+W
-		range_locked = 2ull << 32, // R+W as well but do not
+		range_locked = 2ull << 32, // R+W as well, the only range flag that should block by address
 		range_sharing = 4ull << 32, // Range being registered as shared, flags are unchanged
 		range_allocation = 0, // Allocation, no safe access
 		range_deallocation = 6ull << 32, // Deallocation, no safe access
@ -52,12 +52,13 @@ namespace vm

 		u64 addr = begin;

-		if (g_shareable[begin >> 16] || lock_bits == range_sharing)
+		// Only used for range_locked and is reliable in this case
+		if (g_shareable[begin >> 16])
 		{
 			addr = addr & 0xffff;
 		}

-		if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]]
+		if ((lock_bits != range_locked || addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]]
 		{
 			// Optimistic locking
 			range_lock->store(begin | (u64{size} << 32));
@ -82,9 +83,6 @@ namespace vm
 		range_lock_internal(res, range_lock, begin, size);
 	}

-	// Wait for all range locks to release in specified range
-	void clear_range_locks(u32 addr, u32 size);
-
 	// Release it
 	void free_range_lock(atomic_t<u64, 64>*) noexcept;