mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-19 19:15:26 +00:00
vm::range_lock cleanup and minor optimization
Removed unused arg. Linearized some branches.
This commit is contained in:
parent
46d3066c62
commit
06ecc2ae68
3 changed files with 25 additions and 44 deletions
|
@ -2032,7 +2032,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
|||
}
|
||||
|
||||
// Obtain range lock as normal store
|
||||
vm::range_lock(nullptr, range_lock, eal, size0);
|
||||
vm::range_lock(range_lock, eal, size0);
|
||||
|
||||
switch (size0)
|
||||
{
|
||||
|
@ -2104,35 +2104,32 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
|||
|
||||
perf_meter<"DMA_PUT"_u64> perf2;
|
||||
|
||||
// Temporarily disabled, may be removed at all
|
||||
atomic_t<u64>* res = nullptr;
|
||||
|
||||
switch (u32 size = args.size)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
vm::range_lock(res, range_lock, eal, 1);
|
||||
vm::range_lock(range_lock, eal, 1);
|
||||
*reinterpret_cast<u8*>(dst) = *reinterpret_cast<const u8*>(src);
|
||||
range_lock->release(0);
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
vm::range_lock(res, range_lock, eal, 2);
|
||||
vm::range_lock(range_lock, eal, 2);
|
||||
*reinterpret_cast<u16*>(dst) = *reinterpret_cast<const u16*>(src);
|
||||
range_lock->release(0);
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
vm::range_lock(res, range_lock, eal, 4);
|
||||
vm::range_lock(range_lock, eal, 4);
|
||||
*reinterpret_cast<u32*>(dst) = *reinterpret_cast<const u32*>(src);
|
||||
range_lock->release(0);
|
||||
break;
|
||||
}
|
||||
case 8:
|
||||
{
|
||||
vm::range_lock(res, range_lock, eal, 8);
|
||||
vm::range_lock(range_lock, eal, 8);
|
||||
*reinterpret_cast<u64*>(dst) = *reinterpret_cast<const u64*>(src);
|
||||
range_lock->release(0);
|
||||
break;
|
||||
|
@ -2141,7 +2138,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
|||
{
|
||||
if (((eal & 127) + size) <= 128)
|
||||
{
|
||||
vm::range_lock(res, range_lock, eal, size);
|
||||
vm::range_lock(range_lock, eal, size);
|
||||
|
||||
while (size)
|
||||
{
|
||||
|
@ -2167,7 +2164,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
|||
size -= size0;
|
||||
|
||||
// Split locking + transfer in two parts (before 64K border, and after it)
|
||||
vm::range_lock(res, range_lock, range_addr, size0);
|
||||
vm::range_lock(range_lock, range_addr, size0);
|
||||
|
||||
// Avoid unaligned stores in mov_rdata_avx
|
||||
if (reinterpret_cast<u64>(dst) & 0x10)
|
||||
|
@ -2201,7 +2198,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
|||
range_addr = nexta;
|
||||
}
|
||||
|
||||
vm::range_lock(res, range_lock, range_addr, range_end - range_addr);
|
||||
vm::range_lock(range_lock, range_addr, range_end - range_addr);
|
||||
|
||||
// Avoid unaligned stores in mov_rdata_avx
|
||||
if (reinterpret_cast<u64>(dst) & 0x10)
|
||||
|
|
|
@ -152,7 +152,7 @@ namespace vm
|
|||
return &g_range_lock_set[std::countr_one(bits)];
|
||||
}
|
||||
|
||||
void range_lock_internal(atomic_t<u64>* res, atomic_t<u64, 64>* range_lock, u32 begin, u32 size)
|
||||
void range_lock_internal(atomic_t<u64, 64>* range_lock, u32 begin, u32 size)
|
||||
{
|
||||
perf_meter<"RHW_LOCK"_u64> perf0;
|
||||
|
||||
|
@ -168,30 +168,22 @@ namespace vm
|
|||
const u64 lock_val = g_range_lock.load();
|
||||
const u64 lock_addr = static_cast<u32>(lock_val); // -> u64
|
||||
const u32 lock_size = static_cast<u32>(lock_val >> 35);
|
||||
const u64 lock_bits = lock_val & range_mask;
|
||||
const u64 res_val = res ? res->load() & 127 : 0;
|
||||
|
||||
u64 addr = begin;
|
||||
|
||||
// Only useful for range_locked, and is reliable in this case
|
||||
if (g_shareable[begin >> 16])
|
||||
// See range_lock()
|
||||
if (g_shareable[begin >> 16] | (((lock_val >> 32) & (range_full_mask >> 32)) ^ (range_locked >> 32)))
|
||||
{
|
||||
addr = addr & 0xffff;
|
||||
}
|
||||
|
||||
if ((lock_bits != range_locked || addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]]
|
||||
if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]]
|
||||
{
|
||||
range_lock->store(begin | (u64{size} << 32));
|
||||
|
||||
const u64 new_lock_val = g_range_lock.load();
|
||||
const u64 new_res_val = res ? res->load() & 127 : 0;
|
||||
|
||||
if (!new_lock_val && !new_res_val) [[likely]]
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (new_lock_val == lock_val && !new_res_val) [[likely]]
|
||||
if (!(new_lock_val | (new_lock_val != lock_val))) [[likely]]
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
@ -820,7 +812,7 @@ namespace vm
|
|||
}
|
||||
|
||||
// Protect range locks from actual memory protection changes
|
||||
_lock_shareable_cache(range_deallocation, addr, size);
|
||||
_lock_shareable_cache(range_allocation, addr, size);
|
||||
|
||||
if (shm && shm->flags() != 0 && g_shareable[addr >> 16])
|
||||
{
|
||||
|
|
|
@ -18,15 +18,14 @@ namespace vm
|
|||
range_readable = 1ull << 32,
|
||||
range_writable = 2ull << 32,
|
||||
range_executable = 4ull << 32,
|
||||
range_mask = 7ull << 32,
|
||||
range_full_mask = 7ull << 32,
|
||||
|
||||
/* flag combinations with special meaning */
|
||||
|
||||
range_normal = 3ull << 32, // R+W
|
||||
range_normal = 3ull << 32, // R+W, testing as mask for zero can check no access
|
||||
range_locked = 2ull << 32, // R+W as well, the only range flag that should block by address
|
||||
range_sharing = 4ull << 32, // Range being registered as shared, flags are unchanged
|
||||
range_sharing = 6ull << 32, // Range being registered as shared, flags are unchanged
|
||||
range_allocation = 0, // Allocation, no safe access
|
||||
range_deallocation = 6ull << 32, // Deallocation, no safe access
|
||||
};
|
||||
|
||||
extern atomic_t<u64> g_range_lock;
|
||||
|
@ -39,39 +38,32 @@ namespace vm
|
|||
// Register range lock for further use
|
||||
atomic_t<u64, 64>* alloc_range_lock();
|
||||
|
||||
void range_lock_internal(atomic_t<u64>* res, atomic_t<u64, 64>* range_lock, u32 begin, u32 size);
|
||||
void range_lock_internal(atomic_t<u64, 64>* range_lock, u32 begin, u32 size);
|
||||
|
||||
// Lock memory range
|
||||
FORCE_INLINE void range_lock(atomic_t<u64>* res, atomic_t<u64, 64>* range_lock, u32 begin, u32 size)
|
||||
FORCE_INLINE void range_lock(atomic_t<u64, 64>* range_lock, u32 begin, u32 size)
|
||||
{
|
||||
const u64 lock_val = g_range_lock.load();
|
||||
const u64 lock_addr = static_cast<u32>(lock_val); // -> u64
|
||||
const u32 lock_size = static_cast<u32>(lock_val >> 35);
|
||||
const u64 lock_bits = lock_val & range_mask;
|
||||
const u64 res_val = res ? res->load() & 127 : 0;
|
||||
|
||||
u64 addr = begin;
|
||||
|
||||
// Only used for range_locked and is reliable in this case
|
||||
if (g_shareable[begin >> 16])
|
||||
// Optimization: if range_locked is not used, the addr check will always pass
|
||||
// Otherwise, g_shareable is unchanged and its value is reliable to read
|
||||
if (g_shareable[begin >> 16] | (((lock_val >> 32) & (range_full_mask >> 32)) ^ (range_locked >> 32)))
|
||||
{
|
||||
addr = addr & 0xffff;
|
||||
}
|
||||
|
||||
if ((lock_bits != range_locked || addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]]
|
||||
if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]]
|
||||
{
|
||||
// Optimistic locking
|
||||
range_lock->store(begin | (u64{size} << 32));
|
||||
|
||||
const u64 new_lock_val = g_range_lock.load();
|
||||
const u64 new_res_val = res ? res->load() & 127 : 0;
|
||||
|
||||
if (!new_lock_val && !new_res_val) [[likely]]
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (new_lock_val == lock_val && !new_res_val) [[likely]]
|
||||
if (!(new_lock_val | (new_lock_val != lock_val))) [[likely]]
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -80,7 +72,7 @@ namespace vm
|
|||
}
|
||||
|
||||
// Fallback to slow path
|
||||
range_lock_internal(res, range_lock, begin, size);
|
||||
range_lock_internal(range_lock, begin, size);
|
||||
}
|
||||
|
||||
// Release it
|
||||
|
|
Loading…
Add table
Reference in a new issue