mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
PPU: Implement support for 128-byte reservations coherency
This commit is contained in:
parent
3f7eba19c8
commit
f4ca6f02a1
8 changed files with 539 additions and 144 deletions
|
@ -4,6 +4,7 @@
|
|||
#include "Utilities/JIT.h"
|
||||
#include "Crypto/sha1.h"
|
||||
#include "Emu/Memory/vm_reservation.h"
|
||||
#include "Emu/RSX/RSXThread.h"
|
||||
#include "Emu/VFS.h"
|
||||
#include "PPUThread.h"
|
||||
#include "PPUInterpreter.h"
|
||||
|
@ -72,6 +73,15 @@ extern atomic_t<const char*> g_progr;
|
|||
extern atomic_t<u32> g_progr_ptotal;
|
||||
extern atomic_t<u32> g_progr_pdone;
|
||||
|
||||
// Should be of the same type
|
||||
using spu_rdata_t = decltype(ppu_thread::full_rdata);
|
||||
|
||||
extern void mov_rdata(spu_rdata_t& _dst, const spu_rdata_t& _src);
|
||||
extern bool cmp_rdata(const spu_rdata_t& _lhs, const spu_rdata_t& _rhs);
|
||||
|
||||
// Verify AVX availability for TSX transactions
|
||||
static const bool s_tsx_avx = utils::has_avx();
|
||||
|
||||
template <>
|
||||
void fmt_class_string<ppu_join_status>::format(std::string& out, u64 arg)
|
||||
{
|
||||
|
@ -1095,6 +1105,58 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr)
|
|||
ppu.raddr = addr;
|
||||
const u64 mask_res = g_use_rtm ? (-128 | vm::dma_lockb) : -1;
|
||||
|
||||
if (const s32 max = g_cfg.core.ppu_128_reservations_loop_max_length)
|
||||
{
|
||||
// If we use it in HLE it means we want the accurate version
|
||||
ppu.use_full_rdata = max < 0 || ppu.current_function || [&]()
|
||||
{
|
||||
const u32 cia = ppu.cia;
|
||||
|
||||
if ((cia & 0xffff) >= 0x10000u - max * 4)
|
||||
{
|
||||
// Do not cross 64k boundary
|
||||
return true;
|
||||
}
|
||||
|
||||
const auto inst = vm::_ptr<const nse_t<u32>>(cia);
|
||||
|
||||
// Search for STWCX or STDCX nearby (LDARX-STWCX and LWARX-STDCX loops will use accurate 128-byte reservations)
|
||||
constexpr u32 store_cond = se_storage<u32>::swap(sizeof(T) == 8 ? 0x7C00012D : 0x7C0001AD);
|
||||
constexpr u32 mask = se_storage<u32>::swap(0xFC0007FF);
|
||||
|
||||
const auto store_vec = v128::from32p(store_cond);
|
||||
const auto mask_vec = v128::from32p(mask);
|
||||
|
||||
s32 i = 2;
|
||||
|
||||
for (const s32 _max = max - 3; i < _max; i += 4)
|
||||
{
|
||||
const auto _inst = v128::loadu(inst + i) & mask_vec;
|
||||
|
||||
if (_mm_movemask_epi8(v128::eq32(_inst, store_vec).vi))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < max; i++)
|
||||
{
|
||||
const u32 val = inst[i] & mask;
|
||||
|
||||
if (val == store_cond)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}();
|
||||
}
|
||||
else
|
||||
{
|
||||
ppu.use_full_rdata = false;
|
||||
}
|
||||
|
||||
for (u64 count = 0;; [&]()
|
||||
{
|
||||
if (ppu.state)
|
||||
|
@ -1120,7 +1182,13 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr)
|
|||
continue;
|
||||
}
|
||||
|
||||
ppu.rdata = data;
|
||||
const u64 rdata = data;
|
||||
|
||||
if (ppu.use_full_rdata)
|
||||
{
|
||||
mov_rdata(ppu.full_rdata, vm::_ref<spu_rdata_t>(addr & -128));
|
||||
reinterpret_cast<be_t<u64>&>(ppu.full_rdata[addr & 0x78]) = rdata; // Must match with rdata and must be of atomic 64-bits load
|
||||
}
|
||||
|
||||
if ((vm::reservation_acquire(addr, sizeof(T)) & mask_res) == ppu.rtime) [[likely]]
|
||||
{
|
||||
|
@ -1129,6 +1197,7 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr)
|
|||
ppu_log.warning("%s took too long: %u", sizeof(T) == 4 ? "LWARX" : "LDARX", count);
|
||||
}
|
||||
|
||||
ppu.rdata = rdata;
|
||||
return static_cast<T>(ppu.rdata << data_off >> size_off);
|
||||
}
|
||||
}
|
||||
|
@ -1190,6 +1259,251 @@ const auto ppu_stcx_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, u64 rda
|
|||
c.ret();
|
||||
});
|
||||
|
||||
const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const void* _old, u64 _new)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
Label fall = c.newLabel();
|
||||
Label fail = c.newLabel();
|
||||
Label _ret = c.newLabel();
|
||||
Label skip = c.newLabel();
|
||||
Label next = c.newLabel();
|
||||
|
||||
//if (utils::has_avx() && !s_tsx_avx)
|
||||
//{
|
||||
// c.vzeroupper();
|
||||
//}
|
||||
|
||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::r13);
|
||||
c.push(x86::r12);
|
||||
c.push(x86::rbx);
|
||||
c.sub(x86::rsp, 40);
|
||||
#ifdef _WIN32
|
||||
if (!s_tsx_avx)
|
||||
{
|
||||
c.movups(x86::oword_ptr(x86::rsp, 0), x86::xmm6);
|
||||
c.movups(x86::oword_ptr(x86::rsp, 16), x86::xmm7);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Prepare registers
|
||||
c.mov(x86::rbx, imm_ptr(+vm::g_reservations));
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||
c.mov(x86::rbp, x86::qword_ptr(x86::rax));
|
||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
||||
c.and_(x86::rbp, -128);
|
||||
c.movzx(args[0].r32(), args[0].r16());
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
|
||||
c.and_(x86::rbx, -128 / 2);
|
||||
c.xor_(x86::r12d, x86::r12d);
|
||||
c.mov(x86::r13, args[1]);
|
||||
c.bswap(args[3]);
|
||||
|
||||
// Prepare data
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovups(x86::ymm0, x86::yword_ptr(args[2], 0));
|
||||
c.vmovups(x86::ymm1, x86::yword_ptr(args[2], 32));
|
||||
c.vmovups(x86::ymm2, x86::yword_ptr(args[2], 64));
|
||||
c.vmovups(x86::ymm3, x86::yword_ptr(args[2], 96));
|
||||
}
|
||||
else
|
||||
{
|
||||
c.movaps(x86::xmm0, x86::oword_ptr(args[2], 0));
|
||||
c.movaps(x86::xmm1, x86::oword_ptr(args[2], 16));
|
||||
c.movaps(x86::xmm2, x86::oword_ptr(args[2], 32));
|
||||
c.movaps(x86::xmm3, x86::oword_ptr(args[2], 48));
|
||||
c.movaps(x86::xmm4, x86::oword_ptr(args[2], 64));
|
||||
c.movaps(x86::xmm5, x86::oword_ptr(args[2], 80));
|
||||
c.movaps(x86::xmm6, x86::oword_ptr(args[2], 96));
|
||||
c.movaps(x86::xmm7, x86::oword_ptr(args[2], 112));
|
||||
}
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall, x86::r12, 4);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.and_(x86::rax, -128);
|
||||
c.cmp(x86::rax, x86::r13);
|
||||
c.jne(fail);
|
||||
c.test(x86::qword_ptr(x86::rbx), 127);
|
||||
c.jnz(skip);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(x86::rbp, 0));
|
||||
c.vxorps(x86::ymm1, x86::ymm1, x86::yword_ptr(x86::rbp, 32));
|
||||
c.vxorps(x86::ymm2, x86::ymm2, x86::yword_ptr(x86::rbp, 64));
|
||||
c.vxorps(x86::ymm3, x86::ymm3, x86::yword_ptr(x86::rbp, 96));
|
||||
c.vorps(x86::ymm0, x86::ymm0, x86::ymm1);
|
||||
c.vorps(x86::ymm1, x86::ymm2, x86::ymm3);
|
||||
c.vorps(x86::ymm0, x86::ymm1, x86::ymm0);
|
||||
c.vptest(x86::ymm0, x86::ymm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
c.xorps(x86::xmm0, x86::oword_ptr(x86::rbp, 0));
|
||||
c.xorps(x86::xmm1, x86::oword_ptr(x86::rbp, 16));
|
||||
c.xorps(x86::xmm2, x86::oword_ptr(x86::rbp, 32));
|
||||
c.xorps(x86::xmm3, x86::oword_ptr(x86::rbp, 48));
|
||||
c.xorps(x86::xmm4, x86::oword_ptr(x86::rbp, 64));
|
||||
c.xorps(x86::xmm5, x86::oword_ptr(x86::rbp, 80));
|
||||
c.xorps(x86::xmm6, x86::oword_ptr(x86::rbp, 96));
|
||||
c.xorps(x86::xmm7, x86::oword_ptr(x86::rbp, 112));
|
||||
c.orps(x86::xmm0, x86::xmm1);
|
||||
c.orps(x86::xmm2, x86::xmm3);
|
||||
c.orps(x86::xmm4, x86::xmm5);
|
||||
c.orps(x86::xmm6, x86::xmm7);
|
||||
c.orps(x86::xmm0, x86::xmm2);
|
||||
c.orps(x86::xmm4, x86::xmm6);
|
||||
c.orps(x86::xmm0, x86::xmm4);
|
||||
c.ptest(x86::xmm0, x86::xmm0);
|
||||
}
|
||||
|
||||
c.jnz(fail);
|
||||
|
||||
c.mov(x86::rax, x86::rbp);
|
||||
c.shl(args[0], 1);
|
||||
c.or_(x86::rax, args[0]);
|
||||
c.mov(x86::qword_ptr(x86::rax), args[3]);
|
||||
c.shr(args[0], 1);
|
||||
|
||||
c.sub(x86::qword_ptr(x86::rbx), -128);
|
||||
c.xend();
|
||||
c.mov(x86::eax, 1);
|
||||
c.jmp(_ret);
|
||||
|
||||
c.bind(skip);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.xor_(x86::r12d, x86::r12d);
|
||||
build_transaction_abort(c, 0);
|
||||
//c.jmp(fall);
|
||||
|
||||
c.bind(fall);
|
||||
c.sar(x86::eax, 24);
|
||||
c.js(fail);
|
||||
c.lock().bts(x86::dword_ptr(args[2], ::offset32(&ppu_thread::state) - ::offset32(&ppu_thread::full_rdata)), static_cast<u32>(cpu_flag::wait));
|
||||
|
||||
// Touch memory if transaction failed without RETRY flag on the first attempt
|
||||
c.cmp(x86::r12, 1);
|
||||
c.jne(next);
|
||||
c.xor_(x86::rbp, 0xf80);
|
||||
c.lock().add(x86::dword_ptr(x86::rbp), 0);
|
||||
c.xor_(x86::rbp, 0xf80);
|
||||
|
||||
Label fall2 = c.newLabel();
|
||||
Label fail2 = c.newLabel();
|
||||
|
||||
// Lightened transaction: only compare and swap data
|
||||
c.bind(next);
|
||||
|
||||
// Try to "lock" reservation
|
||||
c.mov(x86::rax, x86::r13);
|
||||
c.add(x86::r13, 1);
|
||||
c.lock().cmpxchg(x86::qword_ptr(x86::rbx), x86::r13);
|
||||
c.jne(fail);
|
||||
|
||||
build_transaction_enter(c, fall2, x86::r12, 666);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(x86::rbp, 0));
|
||||
c.vxorps(x86::ymm1, x86::ymm1, x86::yword_ptr(x86::rbp, 32));
|
||||
c.vxorps(x86::ymm2, x86::ymm2, x86::yword_ptr(x86::rbp, 64));
|
||||
c.vxorps(x86::ymm3, x86::ymm3, x86::yword_ptr(x86::rbp, 96));
|
||||
c.vorps(x86::ymm0, x86::ymm0, x86::ymm1);
|
||||
c.vorps(x86::ymm1, x86::ymm2, x86::ymm3);
|
||||
c.vorps(x86::ymm0, x86::ymm1, x86::ymm0);
|
||||
c.vptest(x86::ymm0, x86::ymm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
c.xorps(x86::xmm0, x86::oword_ptr(x86::rbp, 0));
|
||||
c.xorps(x86::xmm1, x86::oword_ptr(x86::rbp, 16));
|
||||
c.xorps(x86::xmm2, x86::oword_ptr(x86::rbp, 32));
|
||||
c.xorps(x86::xmm3, x86::oword_ptr(x86::rbp, 48));
|
||||
c.xorps(x86::xmm4, x86::oword_ptr(x86::rbp, 64));
|
||||
c.xorps(x86::xmm5, x86::oword_ptr(x86::rbp, 80));
|
||||
c.xorps(x86::xmm6, x86::oword_ptr(x86::rbp, 96));
|
||||
c.xorps(x86::xmm7, x86::oword_ptr(x86::rbp, 112));
|
||||
c.orps(x86::xmm0, x86::xmm1);
|
||||
c.orps(x86::xmm2, x86::xmm3);
|
||||
c.orps(x86::xmm4, x86::xmm5);
|
||||
c.orps(x86::xmm6, x86::xmm7);
|
||||
c.orps(x86::xmm0, x86::xmm2);
|
||||
c.orps(x86::xmm4, x86::xmm6);
|
||||
c.orps(x86::xmm0, x86::xmm4);
|
||||
c.ptest(x86::xmm0, x86::xmm0);
|
||||
}
|
||||
|
||||
c.jnz(fail2);
|
||||
|
||||
c.mov(x86::rax, x86::rbp);
|
||||
c.shl(args[0], 1);
|
||||
c.or_(x86::rax, args[0]);
|
||||
c.mov(x86::qword_ptr(x86::rax), args[3]);
|
||||
c.shr(args[0], 1);
|
||||
|
||||
c.xend();
|
||||
c.lock().add(x86::qword_ptr(x86::rbx), 127);
|
||||
c.mov(x86::eax, 1);
|
||||
c.jmp(_ret);
|
||||
|
||||
c.bind(fall2);
|
||||
c.sar(x86::eax, 24);
|
||||
c.js(fail2);
|
||||
c.mov(x86::eax, 2);
|
||||
c.jmp(_ret);
|
||||
|
||||
c.bind(fail);
|
||||
build_transaction_abort(c, 0xff);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.jmp(_ret);
|
||||
|
||||
c.bind(fail2);
|
||||
build_transaction_abort(c, 0xff);
|
||||
c.lock().sub(x86::qword_ptr(x86::rbx), 1);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
//c.jmp(_ret);
|
||||
|
||||
c.bind(_ret);
|
||||
|
||||
#ifdef _WIN32
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovups(x86::xmm6, x86::oword_ptr(x86::rsp, 0));
|
||||
c.vmovups(x86::xmm7, x86::oword_ptr(x86::rsp, 16));
|
||||
}
|
||||
else
|
||||
{
|
||||
c.movups(x86::xmm6, x86::oword_ptr(x86::rsp, 0));
|
||||
c.movups(x86::xmm7, x86::oword_ptr(x86::rsp, 16));
|
||||
c.movups(x86::xmm8, x86::oword_ptr(x86::rsp, 32));
|
||||
c.movups(x86::xmm9, x86::oword_ptr(x86::rsp, 48));
|
||||
c.movups(x86::xmm10, x86::oword_ptr(x86::rsp, 64));
|
||||
c.movups(x86::xmm11, x86::oword_ptr(x86::rsp, 80));
|
||||
c.movups(x86::xmm12, x86::oword_ptr(x86::rsp, 96));
|
||||
c.movups(x86::xmm13, x86::oword_ptr(x86::rsp, 112));
|
||||
c.movups(x86::xmm14, x86::oword_ptr(x86::rsp, 128));
|
||||
c.movups(x86::xmm15, x86::oword_ptr(x86::rsp, 144));
|
||||
}
|
||||
#endif
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vzeroupper();
|
||||
}
|
||||
|
||||
c.add(x86::rsp, 40);
|
||||
c.pop(x86::rbx);
|
||||
c.pop(x86::r12);
|
||||
c.pop(x86::r13);
|
||||
c.pop(x86::rbp);
|
||||
c.ret();
|
||||
});
|
||||
|
||||
template <typename T>
|
||||
static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
|
||||
{
|
||||
|
@ -1199,11 +1513,9 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
|
|||
}
|
||||
|
||||
auto& data = vm::_ref<atomic_be_t<u64>>(addr & -8);
|
||||
constexpr u64 size_off = (sizeof(T) * 8) & 63;
|
||||
const u64 old_data = ppu.rdata;
|
||||
|
||||
const T old_data = static_cast<T>(ppu.rdata << ((addr & 7) * 8) >> size_off);
|
||||
auto& res = vm::reservation_acquire(addr, sizeof(T));
|
||||
const u64 old_data = ppu.rdata;
|
||||
const u64 rtime = ppu.rtime;
|
||||
|
||||
if constexpr (sizeof(T) == sizeof(u32))
|
||||
{
|
||||
|
@ -1223,80 +1535,151 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
|
|||
reg_value = bf.all;
|
||||
}
|
||||
|
||||
if ((std::exchange(ppu.raddr, 0) ^ addr) & -8 || old_data != data || ppu.rtime != (res & -128))
|
||||
// Test if store address is on the same aligned 8-bytes memory as load
|
||||
if (const u32 raddr = std::exchange(ppu.raddr, 0); raddr / 8 != addr / 8)
|
||||
{
|
||||
// Even when the reservation address does not match the target address must be valid
|
||||
if (!vm::check_addr(addr, 1, vm::page_writable))
|
||||
// If not and it is on the same aligned 128-byte memory, proceed only if 128-byte reservations are enabled
|
||||
// In realhw the store address can be at any address of the 128-byte cache line
|
||||
if (raddr / 128 != addr / 128 || !ppu.use_full_rdata)
|
||||
{
|
||||
// Access violate
|
||||
data += 0;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (reg_value == old_data)
|
||||
{
|
||||
if (res.compare_and_swap_test(ppu.rtime, ppu.rtime + 128))
|
||||
{
|
||||
res.notify_all();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
addr &= -8;
|
||||
|
||||
if (g_use_rtm) [[likely]]
|
||||
{
|
||||
switch (ppu_stcx_tx(addr, ppu.rtime, old_data, reg_value))
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
// Reservation lost
|
||||
return false;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
res.notify_all();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (res == ppu.rtime && vm::reservation_trylock(res, ppu.rtime))
|
||||
{
|
||||
if (data.compare_and_swap_test(old_data, reg_value))
|
||||
// Even when the reservation address does not match the target address must be valid
|
||||
if (!vm::check_addr(addr, 1, vm::page_writable))
|
||||
{
|
||||
res += 127;
|
||||
res.notify_all();
|
||||
return true;
|
||||
// Access violate
|
||||
data += 0;
|
||||
}
|
||||
|
||||
res -= 1;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (old_data != data || rtime != (res & -128))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if ([&]()
|
||||
{
|
||||
if (ppu.use_full_rdata) [[unlikely]]
|
||||
{
|
||||
if (g_use_rtm) [[likely]]
|
||||
{
|
||||
switch (ppu_stcx_accurate_tx(addr & -8, rtime, ppu.full_rdata, reg_value))
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
// Reservation lost
|
||||
return false;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
return true;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
|
||||
cpu_thread::suspend_all cpu_lock(&ppu);
|
||||
|
||||
// Give up if PUTLLUC happened
|
||||
if (res == (rtime | 1) && cmp_rdata(ppu.full_rdata, vm::_ref<spu_rdata_t>(addr & -128)))
|
||||
{
|
||||
data.release(reg_value);
|
||||
res.release(rtime + 128);
|
||||
return true;
|
||||
}
|
||||
|
||||
res.release(rtime);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!vm::reservation_trylock(res, rtime))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Align address: we do not need the lower 7 bits anymore
|
||||
addr &= -128;
|
||||
|
||||
// Cache line data
|
||||
auto& cline_data = vm::_ref<spu_rdata_t>(addr);
|
||||
|
||||
data += 0;
|
||||
|
||||
const auto render = rsx::get_rsx_if_needs_res_pause(addr);
|
||||
|
||||
if (render) render->pause();
|
||||
|
||||
auto& super_data = *vm::get_super_ptr<spu_rdata_t>(addr);
|
||||
const bool success = [&]()
|
||||
{
|
||||
// Full lock (heavyweight)
|
||||
// TODO: vm::check_addr
|
||||
vm::writer_lock lock(addr);
|
||||
|
||||
if (cmp_rdata(ppu.full_rdata, super_data))
|
||||
{
|
||||
data.release(reg_value);
|
||||
res.release(rtime + 128);
|
||||
return true;
|
||||
}
|
||||
|
||||
res.release(rtime);
|
||||
return false;
|
||||
}();
|
||||
|
||||
if (render) render->unpause();
|
||||
return success;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
if (reg_value == old_data)
|
||||
{
|
||||
return res.compare_and_swap_test(rtime, rtime + 128);
|
||||
}
|
||||
|
||||
if (!vm::reservation_trylock(res, ppu.rtime))
|
||||
// Aligned 8-byte reservations will be used here
|
||||
addr &= -8;
|
||||
|
||||
if (g_use_rtm) [[likely]]
|
||||
{
|
||||
switch (ppu_stcx_tx(addr, rtime, old_data, reg_value))
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
// Reservation lost
|
||||
return false;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
return true;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
|
||||
if (res == rtime && vm::reservation_trylock(res, rtime))
|
||||
{
|
||||
const bool ret = data.compare_and_swap_test(old_data, reg_value);
|
||||
res.release(rtime + 128);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!vm::reservation_trylock(res, ppu.rtime))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool ret = data.compare_and_swap_test(old_data, reg_value);
|
||||
res.release(rtime + 128);
|
||||
return ret;
|
||||
}())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool result = data.compare_and_swap_test(old_data, reg_value);
|
||||
|
||||
if (result)
|
||||
{
|
||||
res.release(ppu.rtime + 128);
|
||||
res.notify_all();
|
||||
}
|
||||
else
|
||||
{
|
||||
res.release(ppu.rtime);
|
||||
return true;
|
||||
}
|
||||
|
||||
return result;
|
||||
return false;
|
||||
}
|
||||
|
||||
extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value)
|
||||
|
@ -1645,6 +2028,7 @@ extern void ppu_initialize(const ppu_module& info)
|
|||
accurate_ppu_vector_nan,
|
||||
java_mode_handling,
|
||||
accurate_cache_line_stores,
|
||||
reservations_128_byte,
|
||||
|
||||
__bitset_enum_max
|
||||
};
|
||||
|
@ -1670,6 +2054,10 @@ extern void ppu_initialize(const ppu_module& info)
|
|||
{
|
||||
settings += ppu_settings::accurate_cache_line_stores;
|
||||
}
|
||||
if (g_cfg.core.ppu_128_reservations_loop_max_length > 0)
|
||||
{
|
||||
settings += ppu_settings::reservations_128_byte;
|
||||
}
|
||||
|
||||
// Write version, hash, CPU, settings
|
||||
fmt::append(obj_name, "v3-tane-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
|
||||
|
|
|
@ -194,6 +194,8 @@ public:
|
|||
u32 raddr{0}; // Reservation addr
|
||||
u64 rtime{0};
|
||||
u64 rdata{0}; // Reservation data
|
||||
alignas(64) std::byte full_rdata[128]{}; // Full reservation data
|
||||
bool use_full_rdata{};
|
||||
|
||||
atomic_t<s32> prio{0}; // Thread priority (0..3071)
|
||||
const u32 stack_size; // Stack size
|
||||
|
|
|
@ -2527,6 +2527,12 @@ void PPUTranslator::MFOCRF(ppu_opcode_t op)
|
|||
|
||||
void PPUTranslator::LWARX(ppu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.ppu_128_reservations_loop_max_length > 0)
|
||||
{
|
||||
// CIA will be used in lwarx handler
|
||||
m_ir->CreateStore(Trunc(GetAddr(), GetType<u32>()), m_ir->CreateStructGEP(nullptr, m_thread, static_cast<uint>(&m_cia - m_locals)), true);
|
||||
}
|
||||
|
||||
SetGpr(op.rd, Call(GetType<u32>(), "__lwarx", m_thread, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)));
|
||||
}
|
||||
|
||||
|
@ -2663,6 +2669,12 @@ void PPUTranslator::MULHW(ppu_opcode_t op)
|
|||
|
||||
void PPUTranslator::LDARX(ppu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.ppu_128_reservations_loop_max_length > 0)
|
||||
{
|
||||
// CIA will be used in ldarx handler
|
||||
m_ir->CreateStore(Trunc(GetAddr(), GetType<u32>()), m_ir->CreateStructGEP(nullptr, m_thread, static_cast<uint>(&m_cia - m_locals)), true);
|
||||
}
|
||||
|
||||
SetGpr(op.rd, Call(GetType<u64>(), "__ldarx", m_thread, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)));
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
#include <atomic>
|
||||
#include <thread>
|
||||
|
||||
using spu_rdata_t = decltype(spu_thread::rdata);
|
||||
|
||||
template <>
|
||||
void fmt_class_string<mfc_atomic_status>::format(std::string& out, u64 arg)
|
||||
{
|
||||
|
@ -121,15 +123,22 @@ static FORCE_INLINE bool cmp_rdata_avx(const __m256i* lhs, const __m256i* rhs)
|
|||
#endif
|
||||
}
|
||||
|
||||
static FORCE_INLINE bool cmp_rdata(const decltype(spu_thread::rdata)& lhs, const decltype(spu_thread::rdata)& rhs)
|
||||
#ifdef _MSC_VER
|
||||
__forceinline
|
||||
#else
|
||||
__attribute__((always_inline))
|
||||
#endif
|
||||
extern bool cmp_rdata(const spu_rdata_t& _lhs, const spu_rdata_t& _rhs)
|
||||
{
|
||||
#ifndef __AVX__
|
||||
if (s_tsx_avx) [[likely]]
|
||||
#endif
|
||||
{
|
||||
return cmp_rdata_avx(reinterpret_cast<const __m256i*>(&lhs), reinterpret_cast<const __m256i*>(&rhs));
|
||||
return cmp_rdata_avx(reinterpret_cast<const __m256i*>(_lhs), reinterpret_cast<const __m256i*>(_rhs));
|
||||
}
|
||||
|
||||
const auto lhs = reinterpret_cast<const v128*>(_lhs);
|
||||
const auto rhs = reinterpret_cast<const v128*>(_rhs);
|
||||
const v128 a = (lhs[0] ^ rhs[0]) | (lhs[1] ^ rhs[1]);
|
||||
const v128 b = (lhs[2] ^ rhs[2]) | (lhs[3] ^ rhs[3]);
|
||||
const v128 c = (lhs[4] ^ rhs[4]) | (lhs[5] ^ rhs[5]);
|
||||
|
@ -170,60 +179,23 @@ static FORCE_INLINE void mov_rdata_avx(__m256i* dst, const __m256i* src)
|
|||
#endif
|
||||
}
|
||||
|
||||
static FORCE_INLINE void mov_rdata(decltype(spu_thread::rdata)& dst, const decltype(spu_thread::rdata)& src)
|
||||
#ifdef _MSC_VER
|
||||
__forceinline
|
||||
#else
|
||||
__attribute__((always_inline))
|
||||
#endif
|
||||
extern void mov_rdata(spu_rdata_t& _dst, const spu_rdata_t& _src)
|
||||
{
|
||||
#ifndef __AVX__
|
||||
if (s_tsx_avx) [[likely]]
|
||||
#endif
|
||||
{
|
||||
mov_rdata_avx(reinterpret_cast<__m256i*>(&dst), reinterpret_cast<const __m256i*>(&src));
|
||||
mov_rdata_avx(reinterpret_cast<__m256i*>(_dst), reinterpret_cast<const __m256i*>(_src));
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
const v128 data0 = src[0];
|
||||
const v128 data1 = src[1];
|
||||
const v128 data2 = src[2];
|
||||
dst[0] = data0;
|
||||
dst[1] = data1;
|
||||
dst[2] = data2;
|
||||
}
|
||||
|
||||
{
|
||||
const v128 data0 = src[3];
|
||||
const v128 data1 = src[4];
|
||||
const v128 data2 = src[5];
|
||||
dst[3] = data0;
|
||||
dst[4] = data1;
|
||||
dst[5] = data2;
|
||||
}
|
||||
|
||||
{
|
||||
const v128 data0 = src[6];
|
||||
const v128 data1 = src[7];
|
||||
dst[6] = data0;
|
||||
dst[7] = data1;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns nullptr if rsx does not need pausing on reservations op, rsx ptr otherwise
|
||||
static FORCE_INLINE rsx::thread* get_rsx_if_needs_res_pause(u32 addr)
|
||||
{
|
||||
if (!g_cfg.core.rsx_accurate_res_access) [[likely]]
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto render = rsx::get_current_renderer();
|
||||
|
||||
ASSUME(render);
|
||||
|
||||
if (render->iomap_table.io[addr >> 20].load() == umax) [[likely]]
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
return render;
|
||||
// TODO: use std::assume_aligned
|
||||
std::memcpy(reinterpret_cast<v128*>(_dst), reinterpret_cast<const v128*>(_src), 128);
|
||||
}
|
||||
|
||||
extern u64 get_timebased_time();
|
||||
|
@ -1402,7 +1374,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
|||
}
|
||||
case 128:
|
||||
{
|
||||
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
||||
mov_rdata(*reinterpret_cast<spu_rdata_t*>(dst), *reinterpret_cast<const spu_rdata_t*>(src));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -1424,7 +1396,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
|||
}
|
||||
}
|
||||
|
||||
if (time0 != vm::reservation_acquire(eal, size0) || (size0 == 128 && !cmp_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src))))
|
||||
if (time0 != vm::reservation_acquire(eal, size0) || (size0 == 128 && !cmp_rdata(*reinterpret_cast<spu_rdata_t*>(dst), *reinterpret_cast<const spu_rdata_t*>(src))))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
@ -1496,7 +1468,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
|||
{
|
||||
case 128:
|
||||
{
|
||||
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
||||
mov_rdata(*reinterpret_cast<spu_rdata_t*>(dst), *reinterpret_cast<const spu_rdata_t*>(src));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -1572,7 +1544,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
|||
|
||||
while (size0 >= 128)
|
||||
{
|
||||
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
||||
mov_rdata(*reinterpret_cast<spu_rdata_t*>(dst), *reinterpret_cast<const spu_rdata_t*>(src));
|
||||
|
||||
dst += 128;
|
||||
src += 128;
|
||||
|
@ -1606,7 +1578,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
|||
|
||||
while (size >= 128)
|
||||
{
|
||||
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
||||
mov_rdata(*reinterpret_cast<spu_rdata_t*>(dst), *reinterpret_cast<const spu_rdata_t*>(src));
|
||||
|
||||
dst += 128;
|
||||
src += 128;
|
||||
|
@ -1671,7 +1643,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
|||
|
||||
while (size >= 128)
|
||||
{
|
||||
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
||||
mov_rdata(*reinterpret_cast<spu_rdata_t*>(dst), *reinterpret_cast<const spu_rdata_t*>(src));
|
||||
|
||||
dst += 128;
|
||||
src += 128;
|
||||
|
@ -1849,7 +1821,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
|||
return false;
|
||||
}
|
||||
|
||||
const auto& to_write = _ref<decltype(rdata)>(args.lsa & 0x3ff80);
|
||||
const auto& to_write = _ref<spu_rdata_t>(args.lsa & 0x3ff80);
|
||||
auto& res = vm::reservation_acquire(addr, 128);
|
||||
|
||||
if (!g_use_rtm && rtime != res)
|
||||
|
@ -1860,16 +1832,16 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
|||
if (!g_use_rtm && cmp_rdata(to_write, rdata))
|
||||
{
|
||||
// Writeback of unchanged data. Only check memory change
|
||||
return cmp_rdata(rdata, vm::_ref<decltype(rdata)>(addr)) && res.compare_and_swap_test(rtime, rtime + 128);
|
||||
return cmp_rdata(rdata, vm::_ref<spu_rdata_t>(addr)) && res.compare_and_swap_test(rtime, rtime + 128);
|
||||
}
|
||||
|
||||
if (g_use_rtm) [[likely]]
|
||||
{
|
||||
switch (spu_putllc_tx(addr, rtime, rdata.data(), to_write.data()))
|
||||
switch (spu_putllc_tx(addr, rtime, rdata, to_write))
|
||||
{
|
||||
case 2:
|
||||
{
|
||||
const auto render = get_rsx_if_needs_res_pause(addr);
|
||||
const auto render = rsx::get_rsx_if_needs_res_pause(addr);
|
||||
|
||||
if (render) render->pause();
|
||||
|
||||
|
@ -1878,7 +1850,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
|||
// Give up if PUTLLUC happened
|
||||
if (res == (rtime | 1))
|
||||
{
|
||||
auto& data = vm::_ref<decltype(rdata)>(addr);
|
||||
auto& data = vm::_ref<spu_rdata_t>(addr);
|
||||
|
||||
if (cmp_rdata(rdata, data))
|
||||
{
|
||||
|
@ -1906,11 +1878,11 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
|||
|
||||
vm::_ref<atomic_t<u32>>(addr) += 0;
|
||||
|
||||
const auto render = get_rsx_if_needs_res_pause(addr);
|
||||
const auto render = rsx::get_rsx_if_needs_res_pause(addr);
|
||||
|
||||
if (render) render->pause();
|
||||
|
||||
auto& super_data = *vm::get_super_ptr<decltype(rdata)>(addr);
|
||||
auto& super_data = *vm::get_super_ptr<spu_rdata_t>(addr);
|
||||
const bool success = [&]()
|
||||
{
|
||||
// Full lock (heavyweight)
|
||||
|
@ -1941,7 +1913,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
|||
if (raddr)
|
||||
{
|
||||
// Last check for event before we clear the reservation
|
||||
if (raddr == addr || rtime != (vm::reservation_acquire(raddr, 128) & (-128 | vm::dma_lockb)) || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(raddr)))
|
||||
if (raddr == addr || rtime != (vm::reservation_acquire(raddr, 128) & (-128 | vm::dma_lockb)) || !cmp_rdata(rdata, vm::_ref<spu_rdata_t>(raddr)))
|
||||
{
|
||||
set_events(SPU_EVENT_LR);
|
||||
}
|
||||
|
@ -1954,14 +1926,13 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
|||
|
||||
void do_cell_atomic_128_store(u32 addr, const void* to_write)
|
||||
{
|
||||
using rdata_t = decltype(spu_thread::rdata);
|
||||
const auto cpu = get_current_cpu_thread();
|
||||
|
||||
if (g_use_rtm) [[likely]]
|
||||
{
|
||||
const u32 result = spu_putlluc_tx(addr, to_write, cpu);
|
||||
|
||||
const auto render = result != 1 ? get_rsx_if_needs_res_pause(addr) : nullptr;
|
||||
const auto render = result != 1 ? rsx::get_rsx_if_needs_res_pause(addr) : nullptr;
|
||||
|
||||
if (render) render->pause();
|
||||
|
||||
|
@ -1977,7 +1948,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
|
|||
busy_wait(100);
|
||||
}
|
||||
|
||||
mov_rdata(vm::_ref<rdata_t>(addr), *static_cast<const rdata_t*>(to_write));
|
||||
mov_rdata(vm::_ref<spu_rdata_t>(addr), *static_cast<const spu_rdata_t*>(to_write));
|
||||
vm::reservation_acquire(addr, 128) += 64;
|
||||
}
|
||||
}
|
||||
|
@ -1995,7 +1966,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
|
|||
busy_wait(100);
|
||||
}
|
||||
|
||||
mov_rdata(vm::_ref<rdata_t>(addr), *static_cast<const rdata_t*>(to_write));
|
||||
mov_rdata(vm::_ref<spu_rdata_t>(addr), *static_cast<const spu_rdata_t*>(to_write));
|
||||
vm::reservation_acquire(addr, 128) += 64;
|
||||
}
|
||||
|
||||
|
@ -2004,21 +1975,21 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
|
|||
}
|
||||
else
|
||||
{
|
||||
auto& data = vm::_ref<rdata_t>(addr);
|
||||
auto& data = vm::_ref<spu_rdata_t>(addr);
|
||||
auto [res, time0] = vm::reservation_lock(addr, 128);
|
||||
|
||||
*reinterpret_cast<atomic_t<u32>*>(&data) += 0;
|
||||
|
||||
const auto render = get_rsx_if_needs_res_pause(addr);
|
||||
const auto render = rsx::get_rsx_if_needs_res_pause(addr);
|
||||
|
||||
if (render) render->pause();
|
||||
|
||||
auto& super_data = *vm::get_super_ptr<rdata_t>(addr);
|
||||
auto& super_data = *vm::get_super_ptr<spu_rdata_t>(addr);
|
||||
{
|
||||
// Full lock (heavyweight)
|
||||
// TODO: vm::check_addr
|
||||
vm::writer_lock lock(addr);
|
||||
mov_rdata(super_data, *static_cast<const rdata_t*>(to_write));
|
||||
mov_rdata(super_data, *static_cast<const spu_rdata_t*>(to_write));
|
||||
res.release(time0 + 128);
|
||||
}
|
||||
|
||||
|
@ -2044,7 +2015,7 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args)
|
|||
// Failure, fallback to the main implementation
|
||||
}
|
||||
|
||||
do_cell_atomic_128_store(addr, _ptr<decltype(rdata)>(args.lsa & 0x3ff80));
|
||||
do_cell_atomic_128_store(addr, _ptr<spu_rdata_t>(args.lsa & 0x3ff80));
|
||||
vm::reservation_notifier(addr, 128).notify_all();
|
||||
}
|
||||
|
||||
|
@ -2202,7 +2173,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
case MFC_GETLLAR_CMD:
|
||||
{
|
||||
const u32 addr = ch_mfc_cmd.eal & -128;
|
||||
const auto& data = vm::_ref<decltype(rdata)>(addr);
|
||||
const auto& data = vm::_ref<spu_rdata_t>(addr);
|
||||
|
||||
if (addr == raddr && !g_use_rtm && g_cfg.core.spu_getllar_polling_detection && rtime == vm::reservation_acquire(addr, 128) && cmp_rdata(rdata, data))
|
||||
{
|
||||
|
@ -2210,7 +2181,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
std::this_thread::yield();
|
||||
}
|
||||
|
||||
auto& dst = _ref<decltype(rdata)>(ch_mfc_cmd.lsa & 0x3ff80);
|
||||
auto& dst = _ref<spu_rdata_t>(ch_mfc_cmd.lsa & 0x3ff80);
|
||||
u64 ntime;
|
||||
|
||||
for (u64 i = 0;; [&]()
|
||||
|
@ -2269,7 +2240,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
if (raddr && raddr != addr)
|
||||
{
|
||||
// Last check for event before we replace the reservation with a new one
|
||||
if ((vm::reservation_acquire(raddr, 128) & (-128 | vm::dma_lockb)) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(raddr)))
|
||||
if ((vm::reservation_acquire(raddr, 128) & (-128 | vm::dma_lockb)) != rtime || !cmp_rdata(rdata, vm::_ref<spu_rdata_t>(raddr)))
|
||||
{
|
||||
set_events(SPU_EVENT_LR);
|
||||
}
|
||||
|
@ -2443,7 +2414,7 @@ spu_thread::ch_events_t spu_thread::get_events(u32 mask_hint, bool waiting, bool
|
|||
u32 collect = 0;
|
||||
|
||||
// Check reservation status and set SPU_EVENT_LR if lost
|
||||
if (mask_hint & SPU_EVENT_LR && raddr && ((vm::reservation_acquire(raddr, sizeof(rdata)) & -128) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(raddr))))
|
||||
if (mask_hint & SPU_EVENT_LR && raddr && ((vm::reservation_acquire(raddr, sizeof(rdata)) & -128) != rtime || !cmp_rdata(rdata, vm::_ref<spu_rdata_t>(raddr))))
|
||||
{
|
||||
collect |= SPU_EVENT_LR;
|
||||
raddr = 0;
|
||||
|
|
|
@ -669,7 +669,7 @@ public:
|
|||
|
||||
// Reservation Data
|
||||
u64 rtime = 0;
|
||||
alignas(64) std::array<v128, 8> rdata{};
|
||||
alignas(64) std::byte rdata[128]{};
|
||||
u32 raddr = 0;
|
||||
|
||||
u32 srr0;
|
||||
|
|
|
@ -273,7 +273,7 @@ namespace vm
|
|||
{
|
||||
if (auto& ptr = g_tls_locked)
|
||||
{
|
||||
*ptr = nullptr;
|
||||
ptr->release(nullptr);
|
||||
ptr = nullptr;
|
||||
|
||||
if (cpu.state & cpu_flag::memory)
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
#include "Emu/Cell/lv2/sys_rsx.h"
|
||||
#include "Emu/IdManager.h"
|
||||
#include "Emu/system_config.h"
|
||||
|
||||
extern u64 get_guest_system_time();
|
||||
extern u64 get_system_time();
|
||||
|
@ -964,4 +965,24 @@ namespace rsx
|
|||
{
|
||||
return g_fxo->get<rsx::thread>();
|
||||
}
|
||||
|
||||
// Returns nullptr if rsx does not need pausing on reservations op, rsx ptr otherwise
|
||||
inline thread* get_rsx_if_needs_res_pause(u32 addr)
|
||||
{
|
||||
if (!g_cfg.core.rsx_accurate_res_access) [[likely]]
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto render = get_current_renderer();
|
||||
|
||||
ASSUME(render);
|
||||
|
||||
if (render->iomap_table.io[addr >> 20].load() == umax) [[likely]]
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
return render;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,6 +54,7 @@ struct cfg_root : cfg::node
|
|||
cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true };
|
||||
cfg::_bool llvm_accurate_dfma{ this, "LLVM Accurate DFMA", true }; // Enable accurate double-precision FMA for CPUs which do not support it natively
|
||||
cfg::_bool llvm_ppu_jm_handling{ this, "PPU LLVM Java Mode Handling", false }; // Respect current Java Mode for alti-vec ops by PPU LLVM
|
||||
cfg::_int<-1, 14> ppu_128_reservations_loop_max_length{ this, "Accurate PPU 128-byte Reservation Op Max Length", 0, true }; // -1: Always accurate, 0: Never accurate, 1-14: max accurate loop length
|
||||
cfg::_bool llvm_ppu_accurate_vector_nan{ this, "PPU LLVM Accurate Vector NaN values", false };
|
||||
cfg::_int<-64, 64> stub_ppu_traps{ this, "Stub PPU Traps", 0, true }; // Hack, skip PPU traps for rare cases where the trap is continueable (specify relative instructions to skip)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue