Improve vm::range_lock

Not sure how it ever worked
Clear redundant vm::clear_range_lock usage
This commit is contained in:
Nekotekina 2020-10-31 19:17:38 +03:00
parent a42663b09b
commit 78c986b5dd
4 changed files with 48 additions and 18 deletions

View file

@ -1760,9 +1760,6 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
// Align address: we do not need the lower 7 bits anymore
addr &= -128;
// Wait for range locks to clear
vm::clear_range_locks(addr, 128);
// Cache line data
auto& cline_data = vm::_ref<spu_rdata_t>(addr);

View file

@ -2038,7 +2038,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
}
// Obtain range lock as normal store
vm::range_lock(res, range_lock, eal, size0);
vm::range_lock(nullptr, range_lock, eal, size0);
switch (size0)
{
@ -2111,7 +2111,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
perf_meter<"DMA_PUT"_u64> perf2;
// TODO: split range-locked stores in cache lines for consistency
auto& res = vm::reservation_acquire(eal, args.size);
auto res = &vm::reservation_acquire(eal, args.size);
switch (u32 size = args.size)
{
@ -2574,9 +2574,6 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
return false;
}
// Wait for range locks to clear
vm::clear_range_locks(addr, 128);
vm::_ref<atomic_t<u32>>(addr) += 0;
auto& super_data = *vm::get_super_ptr<spu_rdata_t>(addr);

View file

@ -152,12 +152,44 @@ namespace vm
return &g_range_lock_set[std::countr_one(bits)];
}
void range_lock_internal(atomic_t<u64, 64>* range_lock, u32 begin, u32 size)
void range_lock_internal(atomic_t<u64>* res, atomic_t<u64, 64>* range_lock, u32 begin, u32 size)
{
perf_meter<"RHW_LOCK"_u64> perf0;
while (true)
{
const u64 lock_val = g_range_lock.load();
const u64 lock_addr = static_cast<u32>(lock_val); // -> u64
const u32 lock_size = static_cast<u32>(lock_val >> 35);
const u64 res_val = res ? res->load() & 127 : 0;
u64 addr = begin;
if (g_shareable[begin >> 16])
{
addr = addr & 0xffff;
}
if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]]
{
range_lock->store(begin | (u64{size} << 32));
const u64 new_lock_val = g_range_lock.load();
const u64 new_res_val = res ? res->load() & 127 : 0;
if (!new_lock_val && !new_res_val) [[likely]]
{
return;
}
if (new_lock_val == lock_val && !new_res_val) [[likely]]
{
return;
}
range_lock->release(0);
}
std::shared_lock lock(g_mutex);
u32 test = 0;
@ -180,9 +212,6 @@ namespace vm
vm::_ref<atomic_t<u8>>(test) += 0;
continue;
}
range_lock->release(begin | u64{size} << 32);
return;
}
}

View file

@ -38,14 +38,15 @@ namespace vm
// Register range lock for further use
atomic_t<u64, 64>* alloc_range_lock();
void range_lock_internal(atomic_t<u64, 64>* range_lock, u32 begin, u32 size);
void range_lock_internal(atomic_t<u64>* res, atomic_t<u64, 64>* range_lock, u32 begin, u32 size);
// Lock memory range
FORCE_INLINE void range_lock(atomic_t<u64>& res, atomic_t<u64, 64>* range_lock, u32 begin, u32 size)
FORCE_INLINE void range_lock(atomic_t<u64>* res, atomic_t<u64, 64>* range_lock, u32 begin, u32 size)
{
const u64 lock_val = g_range_lock.load();
const u64 lock_addr = static_cast<u32>(lock_val); // -> u64
const u32 lock_size = static_cast<u32>(lock_val >> 35);
const u64 res_val = res ? res->load() & 127 : 0;
u64 addr = begin;
@ -54,14 +55,20 @@ namespace vm
addr = addr & 0xffff;
}
if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !(res.load() & 127)) [[likely]]
if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]]
{
// Optimistic locking
range_lock->release(begin | (u64{size} << 32));
range_lock->store(begin | (u64{size} << 32));
const u64 new_lock_val = g_range_lock.load();
const u64 new_res_val = res ? res->load() & 127 : 0;
if ((!new_lock_val || new_lock_val == lock_val) && !(res.load() & 127)) [[likely]]
if (!new_lock_val && !new_res_val) [[likely]]
{
return;
}
if (new_lock_val == lock_val && !new_res_val) [[likely]]
{
return;
}
@ -70,7 +77,7 @@ namespace vm
}
// Fallback to slow path
range_lock_internal(range_lock, begin, size);
range_lock_internal(res, range_lock, begin, size);
}
// Wait for all range locks to release in specified range