mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 03:25:16 +00:00
Merge 119589f50d
into 8437a5f5ac
This commit is contained in:
commit
ddf9c672ed
18 changed files with 317 additions and 214 deletions
|
@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.28)
|
|||
project(rpcs3 LANGUAGES C CXX)
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11)
|
||||
|
|
|
@ -344,15 +344,7 @@ jit_runtime_base& asmjit::get_global_runtime()
|
|||
{
|
||||
custom_runtime() noexcept
|
||||
{
|
||||
// Search starting in first 2 GiB of memory
|
||||
for (u64 addr = size;; addr += size)
|
||||
{
|
||||
if (auto ptr = utils::memory_reserve(size, reinterpret_cast<void*>(addr)))
|
||||
{
|
||||
m_pos.raw() = static_cast<uchar*>(ptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
ensure(m_pos.raw() = static_cast<uchar*>(utils::memory_reserve(size)));
|
||||
|
||||
// Initialize "end" pointer
|
||||
m_max = m_pos + size;
|
||||
|
|
|
@ -2490,7 +2490,7 @@ void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
|
|||
if (alert)
|
||||
{
|
||||
list.set<0>(_this->m_sync, 0);
|
||||
list.set<1>(utils::bless<atomic_t<u32>>(&_this->m_taskq)[1], 0);
|
||||
list.template set<1>(_this->m_taskq);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -49,7 +49,7 @@ public:
|
|||
|
||||
if (!next)
|
||||
{
|
||||
// Do not allow access beyond many element more at a time
|
||||
// Do not allow access beyond many element more at a time
|
||||
ensure(!installed && index - i < N * 2);
|
||||
|
||||
installed = true;
|
||||
|
@ -384,17 +384,26 @@ public:
|
|||
template <typename T>
|
||||
class lf_queue final
|
||||
{
|
||||
atomic_t<u64> m_head{0};
|
||||
|
||||
lf_queue_item<T>* load(u64 value) const noexcept
|
||||
public:
|
||||
struct fat_ptr
|
||||
{
|
||||
return reinterpret_cast<lf_queue_item<T>*>(value >> 16);
|
||||
u64 ptr{};
|
||||
u32 is_non_null{};
|
||||
u32 reserved{};
|
||||
};
|
||||
|
||||
private:
|
||||
atomic_t<fat_ptr> m_head{fat_ptr{}};
|
||||
|
||||
lf_queue_item<T>* load(fat_ptr value) const noexcept
|
||||
{
|
||||
return reinterpret_cast<lf_queue_item<T>*>(value.ptr);
|
||||
}
|
||||
|
||||
// Extract all elements and reverse element order (FILO to FIFO)
|
||||
lf_queue_item<T>* reverse() noexcept
|
||||
{
|
||||
if (auto* head = load(m_head) ? load(m_head.exchange(0)) : nullptr)
|
||||
if (auto* head = load(m_head) ? load(m_head.exchange(fat_ptr{})) : nullptr)
|
||||
{
|
||||
if (auto* prev = head->m_link)
|
||||
{
|
||||
|
@ -420,7 +429,7 @@ public:
|
|||
|
||||
lf_queue(lf_queue&& other) noexcept
|
||||
{
|
||||
m_head.release(other.m_head.exchange(0));
|
||||
m_head.release(other.m_head.exchange(fat_ptr{}));
|
||||
}
|
||||
|
||||
lf_queue& operator=(lf_queue&& other) noexcept
|
||||
|
@ -431,7 +440,7 @@ public:
|
|||
}
|
||||
|
||||
delete load(m_head);
|
||||
m_head.release(other.m_head.exchange(0));
|
||||
m_head.release(other.m_head.exchange(fat_ptr{}));
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -442,9 +451,9 @@ public:
|
|||
|
||||
void wait(std::nullptr_t /*null*/ = nullptr) noexcept
|
||||
{
|
||||
if (m_head == 0)
|
||||
if (!operator bool())
|
||||
{
|
||||
utils::bless<atomic_t<u32>>(&m_head)[1].wait(0);
|
||||
utils::bless<atomic_t<u32>>(&m_head.raw().is_non_null)->wait(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -455,7 +464,7 @@ public:
|
|||
|
||||
explicit operator bool() const noexcept
|
||||
{
|
||||
return m_head != 0;
|
||||
return observe() != nullptr;
|
||||
}
|
||||
|
||||
template <bool Notify = true, typename... Args>
|
||||
|
@ -464,25 +473,25 @@ public:
|
|||
auto oldv = m_head.load();
|
||||
auto item = new lf_queue_item<T>(load(oldv), std::forward<Args>(args)...);
|
||||
|
||||
while (!m_head.compare_exchange(oldv, reinterpret_cast<u64>(item) << 16))
|
||||
while (!m_head.compare_exchange(oldv, fat_ptr{reinterpret_cast<u64>(item), item != nullptr, 0}))
|
||||
{
|
||||
item->m_link = load(oldv);
|
||||
}
|
||||
|
||||
if (!oldv && Notify)
|
||||
if (!oldv.ptr && Notify)
|
||||
{
|
||||
// Notify only if queue was empty
|
||||
notify(true);
|
||||
}
|
||||
|
||||
return !oldv;
|
||||
return !oldv.ptr;
|
||||
}
|
||||
|
||||
void notify(bool force = false)
|
||||
{
|
||||
if (force || operator bool())
|
||||
{
|
||||
utils::bless<atomic_t<u32>>(&m_head)[1].notify_one();
|
||||
utils::bless<atomic_t<u32>>(&m_head.raw().is_non_null)->notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -498,7 +507,7 @@ public:
|
|||
lf_queue_slice<T> pop_all_reversed()
|
||||
{
|
||||
lf_queue_slice<T> result;
|
||||
result.m_head = load(m_head.exchange(0));
|
||||
result.m_head = load(m_head.exchange(fat_ptr{}));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,13 +5,12 @@ if(MSVC)
|
|||
add_compile_definitions(
|
||||
_CRT_SECURE_NO_DEPRECATE=1 _CRT_NON_CONFORMING_SWPRINTFS=1 _SCL_SECURE_NO_WARNINGS=1
|
||||
NOMINMAX _ENABLE_EXTENDED_ALIGNED_STORAGE=1 _HAS_EXCEPTIONS=0)
|
||||
add_link_options(/DYNAMICBASE:NO /BASE:0x10000 /FIXED)
|
||||
add_link_options(/DYNAMICBASE:YES)
|
||||
|
||||
#TODO: Some of these could be cleaned up
|
||||
add_compile_options(/wd4805) # Comparing boolean and int
|
||||
add_compile_options(/wd4804) # Using integer operators with booleans
|
||||
add_compile_options(/wd4200) # Zero-sized array in struct/union
|
||||
add_link_options(/ignore:4281) # Undesirable base address 0x10000
|
||||
|
||||
# MSVC 2017 uses iterator as base class internally, causing a lot of warning spam
|
||||
add_compile_definitions(_SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING=1)
|
||||
|
@ -19,8 +18,6 @@ if(MSVC)
|
|||
# Increase stack limit to 8 MB
|
||||
add_link_options(/STACK:8388608,1048576)
|
||||
else()
|
||||
# Some distros have the compilers set to use PIE by default, but RPCS3 doesn't work with PIE, so we need to disable it.
|
||||
check_cxx_compiler_flag("-no-pie" HAS_NO_PIE)
|
||||
check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
|
||||
check_cxx_compiler_flag("-msse -msse2 -mcx16" COMPILER_X86)
|
||||
if (APPLE)
|
||||
|
@ -96,15 +93,6 @@ else()
|
|||
if(NOT APPLE AND NOT WIN32)
|
||||
# This hides our LLVM from mesa's LLVM, otherwise we get some unresolvable conflicts.
|
||||
add_link_options(-Wl,--exclude-libs,ALL)
|
||||
|
||||
if(HAS_NO_PIE)
|
||||
add_link_options(-no-pie)
|
||||
endif()
|
||||
elseif(APPLE)
|
||||
if (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
|
||||
add_link_options(-Wl,-image_base,0x10000 -Wl,-pagezero_size,0x10000)
|
||||
add_link_options(-Wl,-no_pie)
|
||||
endif()
|
||||
elseif(WIN32)
|
||||
add_compile_definitions(__STDC_FORMAT_MACROS=1)
|
||||
|
||||
|
@ -113,11 +101,6 @@ else()
|
|||
|
||||
# Increase stack limit to 8 MB
|
||||
add_link_options(-Wl,--stack -Wl,8388608)
|
||||
|
||||
# For arm64 windows, the image base cannot be below 4GB or the OS rejects the binary without much explanation.
|
||||
if(COMPILER_X86)
|
||||
add_link_options(-Wl,--image-base,0x10000)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Specify C++ library to use as standard C++ when using clang (not required on linux due to GNU)
|
||||
|
|
|
@ -1902,8 +1902,9 @@ auto gen_ghc_cpp_trampoline(ppu_intrp_func_t fn_target)
|
|||
// Take second ghc arg
|
||||
c.mov(args[0], x86::rbp);
|
||||
c.mov(args[2].r32(), x86::dword_ptr(args[0], ::offset32(&ppu_thread::cia)));
|
||||
c.add(args[2], x86::qword_ptr(reinterpret_cast<u64>(&vm::g_base_addr)));
|
||||
c.jmp(fn_target);
|
||||
c.movabs(args[1], reinterpret_cast<u64>(&vm::g_base_addr));
|
||||
c.add(args[2], x86::qword_ptr(args[1]));
|
||||
c.jmp(Imm(fn_target));
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -220,19 +220,21 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
|||
c.mov(x86::qword_ptr(args[0], ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs)), x86::rsp);
|
||||
|
||||
// Initialize args
|
||||
c.mov(x86::r13, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_exec_addr)));
|
||||
c.movabs(x86::r13, reinterpret_cast<u64>(&vm::g_exec_addr));
|
||||
c.mov(x86::r13, x86::qword_ptr(x86::r13));
|
||||
c.mov(x86::rbp, args[0]);
|
||||
c.mov(x86::edx, x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia))); // Load PC
|
||||
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::r13, x86::edx, 1, 0)); // Load call target
|
||||
c.mov(x86::rdx, x86::rax);
|
||||
c.shl(x86::rax, 16);
|
||||
c.shr(x86::rax, 16);
|
||||
c.shr(x86::rdx, 48);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::r13, x86::rdx, 1, 0)); // Load call target
|
||||
c.movabs(x86::r12, vm::g_exec_addr_seg_offset);
|
||||
c.add(x86::r12, x86::r13);
|
||||
c.shr(x86::edx, 1);
|
||||
c.mov(x86::edx, x86::word_ptr(x86::r12, x86::edx)); // Load relocation base
|
||||
c.shl(x86::edx, 13);
|
||||
c.mov(x86::r12d, x86::edx); // Load relocation base
|
||||
c.mov(x86::r12d, x86::edx); // Set relocation base
|
||||
|
||||
c.mov(x86::rbx, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_base_addr)));
|
||||
c.movabs(x86::rbx, reinterpret_cast<u64>(&vm::g_base_addr));
|
||||
c.mov(x86::rbx, x86::qword_ptr(x86::rbx));
|
||||
c.mov(x86::r14, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 0))); // Load some registers
|
||||
c.mov(x86::rsi, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 1)));
|
||||
c.mov(x86::rdi, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 2)));
|
||||
|
@ -346,14 +348,11 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
|||
c.ldr(call_target, arm::Mem(a64::x19, pc));
|
||||
// Compute REG_Hp
|
||||
const arm::GpX reg_hp = a64::x21;
|
||||
c.mov(reg_hp, call_target);
|
||||
c.lsr(reg_hp, reg_hp, 48);
|
||||
c.mov(reg_hp, Imm(vm::g_exec_addr_seg_offset));
|
||||
c.add(reg_hp, reg_hp, pc, arm::Shift(arm::ShiftOp::kLSR, 2));
|
||||
c.ldrh(reg_hp.w(), arm::Mem(a64::x19, reg_hp));
|
||||
c.lsl(reg_hp.w(), reg_hp.w(), 13);
|
||||
|
||||
// Zero top 16 bits of call target
|
||||
c.lsl(call_target, call_target, Imm(16));
|
||||
c.lsr(call_target, call_target, Imm(16));
|
||||
|
||||
// Load registers
|
||||
c.mov(a64::x22, Imm(reinterpret_cast<u64>(&vm::g_base_addr)));
|
||||
c.ldr(a64::x22, arm::Mem(a64::x22));
|
||||
|
@ -473,6 +472,11 @@ static inline u8* ppu_ptr(u32 addr)
|
|||
return vm::g_exec_addr + u64{addr} * 2;
|
||||
}
|
||||
|
||||
static inline u8* ppu_seg_ptr(u32 addr)
|
||||
{
|
||||
return vm::g_exec_addr + vm::g_exec_addr_seg_offset + (addr >> 1);
|
||||
}
|
||||
|
||||
static inline ppu_intrp_func_t ppu_read(u32 addr)
|
||||
{
|
||||
return read_from_ptr<ppu_intrp_func_t>(ppu_ptr(addr));
|
||||
|
@ -518,7 +522,7 @@ void ppu_recompiler_fallback(ppu_thread& ppu)
|
|||
|
||||
while (true)
|
||||
{
|
||||
if (uptr func = uptr(ppu_read(ppu.cia)); (func << 16 >> 16) != reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
|
||||
if (uptr func = uptr(ppu_read(ppu.cia)); func != reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
|
||||
{
|
||||
// We found a recompiler function at cia, return
|
||||
break;
|
||||
|
@ -773,6 +777,9 @@ extern void ppu_register_range(u32 addr, u32 size)
|
|||
utils::memory_commit(ppu_ptr(addr), u64{size} * 2, utils::protection::rw);
|
||||
ensure(vm::page_protect(addr, size, 0, vm::page_executable));
|
||||
|
||||
// Segment data
|
||||
utils::memory_commit(ppu_seg_ptr(addr), size >> 1, utils::protection::rw);
|
||||
|
||||
if (g_cfg.core.ppu_debug)
|
||||
{
|
||||
utils::memory_commit(vm::g_stat_addr + addr, size);
|
||||
|
@ -785,12 +792,13 @@ extern void ppu_register_range(u32 addr, u32 size)
|
|||
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
|
||||
{
|
||||
// Assume addr is the start of first segment of PRX
|
||||
const uptr entry_value = reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc) | (seg_base << (32 + 3));
|
||||
write_to_ptr<uptr>(ppu_ptr(addr), entry_value);
|
||||
write_to_ptr<uptr>(ppu_ptr(addr), std::bit_cast<uptr>(ppu_recompiler_fallback_ghc));
|
||||
write_to_ptr<u16>(ppu_seg_ptr(addr), static_cast<u16>(seg_base >> 13));
|
||||
}
|
||||
else
|
||||
{
|
||||
write_to_ptr<ppu_intrp_func_t>(ppu_ptr(addr), ppu_fallback);
|
||||
write_to_ptr<u16>(ppu_seg_ptr(addr), 0);
|
||||
}
|
||||
|
||||
addr += 4;
|
||||
|
@ -805,7 +813,7 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_intrp_func_t ptr =
|
|||
// Initialize specific function
|
||||
if (ptr)
|
||||
{
|
||||
write_to_ptr<uptr>(ppu_ptr(addr), (reinterpret_cast<uptr>(ptr) & 0xffff'ffff'ffffu) | (uptr(ppu_read(addr)) & ~0xffff'ffff'ffffu));
|
||||
write_to_ptr<uptr>(ppu_ptr(addr), std::bit_cast<uptr>(ptr));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -3164,8 +3172,9 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime
|
|||
|
||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::r13);
|
||||
c.push(x86::r14);
|
||||
c.sub(x86::rsp, 40);
|
||||
c.sub(x86::rsp, 48);
|
||||
#ifdef _WIN32
|
||||
if (!s_tsx_avx)
|
||||
{
|
||||
|
@ -3176,14 +3185,16 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime
|
|||
|
||||
// Prepare registers
|
||||
build_swap_rdx_with(c, args, x86::r10);
|
||||
c.mov(x86::rbp, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.movabs(x86::rbp, reinterpret_cast<u64>(&vm::g_sudo_addr));
|
||||
c.mov(x86::rbp, x86::qword_ptr(x86::rbp));
|
||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
||||
c.and_(x86::rbp, -128);
|
||||
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
||||
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
||||
c.movzx(args[0].r32(), args[0].r16());
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(x86::r11, x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
c.movabs(x86::r11, reinterpret_cast<u64>(+vm::g_reservations));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.and_(x86::r11, -128 / 2);
|
||||
c.and_(args[0].r32(), 63);
|
||||
|
||||
|
@ -3217,7 +3228,8 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime
|
|||
{
|
||||
build_get_tsc(c);
|
||||
c.sub(x86::rax, stamp0);
|
||||
c.cmp(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&g_rtm_tx_limit2)));
|
||||
c.movabs(x86::r13, reinterpret_cast<u64>(&g_rtm_tx_limit2));
|
||||
c.cmp(x86::rax, x86::qword_ptr(x86::r13));
|
||||
c.jae(fall);
|
||||
});
|
||||
|
||||
|
@ -3342,8 +3354,9 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime
|
|||
c.vzeroupper();
|
||||
}
|
||||
|
||||
c.add(x86::rsp, 40);
|
||||
c.add(x86::rsp, 48);
|
||||
c.pop(x86::r14);
|
||||
c.pop(x86::r13);
|
||||
c.pop(x86::rbp);
|
||||
|
||||
maybe_flush_lbr(c);
|
||||
|
@ -4179,7 +4192,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
|||
// 2 7MB overlay files -> 14GB
|
||||
// The growth in memory requirements of LLVM is not linear with file size of course
|
||||
// But these estimates should hopefully protect RPCS3 in the coming years
|
||||
// Especially when thread count is on the rise with each CPU generation
|
||||
// Especially when thread count is on the rise with each CPU generation
|
||||
atomic_t<u32> file_size_limit = static_cast<u32>(std::clamp<u64>(utils::aligned_div<u64>(utils::get_total_memory(), 2000), 65536, u32{umax}));
|
||||
|
||||
const u32 software_thread_limit = std::min<u32>(g_cfg.core.llvm_threads ? g_cfg.core.llvm_threads : u32{umax}, ::size32(file_queue));
|
||||
|
@ -4301,8 +4314,8 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
|||
if (!src && !Emu.klic.empty() && src.open(path))
|
||||
{
|
||||
src = decrypt_self(src, reinterpret_cast<u8*>(&Emu.klic[0]));
|
||||
|
||||
if (src)
|
||||
|
||||
if (src)
|
||||
{
|
||||
ppu_log.error("Possible missed KLIC for precompilation of '%s', please report to developers.", path);
|
||||
|
||||
|
@ -4333,7 +4346,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
|||
{
|
||||
if (value)
|
||||
{
|
||||
// Allow at least one file, make 0 the "memory unavailable" sign value for atomic waiting efficiency
|
||||
// Allow at least one file, make 0 the "memory unavailable" sign value for atomic waiting efficiency
|
||||
const u32 new_val = static_cast<u32>(utils::sub_saturate<u64>(value, file_size));
|
||||
restore_mem = value - new_val;
|
||||
value = new_val;
|
||||
|
@ -4506,8 +4519,8 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
|||
if (!src && !Emu.klic.empty() && src.open(path))
|
||||
{
|
||||
src = decrypt_self(src, reinterpret_cast<u8*>(&Emu.klic[0]));
|
||||
|
||||
if (src)
|
||||
|
||||
if (src)
|
||||
{
|
||||
ppu_log.error("Possible missed KLIC for precompilation of '%s', please report to developers.", path);
|
||||
}
|
||||
|
@ -5079,17 +5092,18 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
code_size_until_jump = buf_end - buf_start;
|
||||
|
||||
c.add(x86::edx, seg0);
|
||||
c.mov(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_exec_addr)));
|
||||
c.movabs(x86::rax, reinterpret_cast<u64>(&vm::g_exec_addr));
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rax));
|
||||
c.mov(x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia)), x86::edx);
|
||||
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rax, x86::rdx, 1, 0)); // Load call target
|
||||
c.mov(x86::rdx, x86::rax);
|
||||
c.shl(x86::rax, 16);
|
||||
c.shr(x86::rax, 16);
|
||||
c.shr(x86::rdx, 48);
|
||||
c.mov(x86::rcx, x86::qword_ptr(x86::rax, x86::rdx, 1, 0)); // Load call target
|
||||
c.movabs(x86::r12, vm::g_exec_addr_seg_offset);
|
||||
c.add(x86::rax, x86::r12);
|
||||
c.shr(x86::edx, 1);
|
||||
c.mov(x86::edx, x86::word_ptr(x86::rax, x86::edx)); // Load relocation base
|
||||
c.shl(x86::edx, 13);
|
||||
c.mov(x86::r12d, x86::edx); // Load relocation base
|
||||
c.jmp(x86::rax);
|
||||
c.mov(x86::r12d, x86::edx); // Set relocation base
|
||||
c.jmp(x86::rcx);
|
||||
#else
|
||||
// Load REG_Base - use absolute jump target to bypass rel jmp range limits
|
||||
// X19 contains vm::g_exec_addr
|
||||
|
@ -5125,14 +5139,11 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
|
||||
// Compute REG_Hp
|
||||
const arm::GpX reg_hp = a64::x21;
|
||||
c.mov(reg_hp, call_target);
|
||||
c.lsr(reg_hp, reg_hp, 48);
|
||||
c.mov(reg_hp, Imm(vm::g_exec_addr_seg_offset));
|
||||
c.add(reg_hp, reg_hp, pc, arm::Shift(arm::ShiftOp::kLSR, 2));
|
||||
c.ldrh(reg_hp.w(), arm::Mem(exec_addr, reg_hp));
|
||||
c.lsl(reg_hp.w(), reg_hp.w(), 13);
|
||||
|
||||
// Zero top 16 bits of call target
|
||||
c.lsl(call_target, call_target, 16);
|
||||
c.lsr(call_target, call_target, 16);
|
||||
|
||||
// Execute LLE call
|
||||
c.br(call_target);
|
||||
#endif
|
||||
|
@ -5340,7 +5351,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
sha1_update(&ctx, reinterpret_cast<const u8*>(addrs.data()), addrs.size() * sizeof(be_t<u32>));
|
||||
}
|
||||
|
||||
part.jit_bounds = std::move(local_jit_bounds);
|
||||
part.jit_bounds = std::move(local_jit_bounds);
|
||||
local_jit_bounds = std::make_shared<std::pair<u32, u32>>(u32{umax}, 0);
|
||||
}
|
||||
|
||||
|
@ -5400,7 +5411,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
settings += ppu_settings::contains_symbol_resolver; // Avoid invalidating all modules for this purpose
|
||||
|
||||
// Write version, hash, CPU, settings
|
||||
fmt::append(obj_name, "v6-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
|
||||
fmt::append(obj_name, "v7-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
|
||||
}
|
||||
|
||||
if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped())
|
||||
|
@ -5712,7 +5723,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
|
||||
for (u32 addr = info.segs[0].addr; addr < info.segs[0].addr + info.segs[0].size; addr += 4, inst_ptr++)
|
||||
{
|
||||
if (*inst_ptr == ppu_instructions::BLR() && (reinterpret_cast<uptr>(ppu_read(addr)) << 16 >> 16) == reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
|
||||
if (*inst_ptr == ppu_instructions::BLR() && reinterpret_cast<uptr>(ppu_read(addr)) == reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
|
||||
{
|
||||
write_to_ptr<ppu_intrp_func_t>(ppu_ptr(addr), BLR_func);
|
||||
}
|
||||
|
|
|
@ -411,12 +411,19 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
|
|||
|
||||
const auto faddr = m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst);
|
||||
const auto faddr_int = m_ir->CreatePtrToInt(faddr, get_type<uptr>());
|
||||
const auto fval = m_ir->CreateOr(m_ir->CreateShl(m_seg0, 32 + 3), faddr_int);
|
||||
const auto pos = m_ir->CreateShl(m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc, 1);
|
||||
const auto pos_32 = m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc;
|
||||
const auto pos = m_ir->CreateShl(pos_32, 1);
|
||||
const auto ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(get_type<u8>(), m_exec, pos));
|
||||
|
||||
const auto seg_base_ptr = m_ir->CreateIntToPtr(m_ir->CreateAdd(
|
||||
m_ir->CreatePtrToInt(m_exec, get_type<u64>()), m_ir->getInt64(vm::g_exec_addr_seg_offset)), m_exec->getType());
|
||||
const auto seg_pos = m_ir->CreateLShr(pos_32, 1);
|
||||
const auto seg_ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(get_type<u8>(), seg_base_ptr, seg_pos));
|
||||
const auto seg_val = m_ir->CreateTrunc(m_ir->CreateLShr(m_seg0, 13), get_type<u16>());
|
||||
|
||||
// Store to jumptable
|
||||
m_ir->CreateStore(fval, ptr);
|
||||
m_ir->CreateStore(faddr_int, ptr);
|
||||
m_ir->CreateStore(seg_val, seg_ptr);
|
||||
|
||||
// Increment index and branch back to loop
|
||||
const auto post_add = m_ir->CreateAdd(index_value, m_ir->getInt64(1));
|
||||
|
@ -605,10 +612,15 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
|||
const auto pos = m_ir->CreateShl(indirect, 1);
|
||||
const auto ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(get_type<u8>(), m_exec, pos));
|
||||
const auto val = m_ir->CreateLoad(get_type<u64>(), ptr);
|
||||
callee = FunctionCallee(type, m_ir->CreateIntToPtr(m_ir->CreateAnd(val, 0xffff'ffff'ffff), type->getPointerTo()));
|
||||
callee = FunctionCallee(type, m_ir->CreateIntToPtr(val, type->getPointerTo()));
|
||||
|
||||
// Load new segment address
|
||||
seg0 = m_ir->CreateShl(m_ir->CreateLShr(val, 48), 13);
|
||||
const auto seg_base_ptr = m_ir->CreateIntToPtr(m_ir->CreateAdd(
|
||||
m_ir->CreatePtrToInt(m_exec, get_type<u64>()), m_ir->getInt64(vm::g_exec_addr_seg_offset)), m_exec->getType());
|
||||
const auto seg_pos = m_ir->CreateLShr(indirect, 1);
|
||||
const auto seg_ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(get_type<u8>(), seg_base_ptr, seg_pos));
|
||||
const auto seg_val = m_ir->CreateZExt(m_ir->CreateLoad(get_type<u16>(), seg_ptr), get_type<u64>());
|
||||
seg0 = m_ir->CreateShl(seg_val, 13);
|
||||
}
|
||||
|
||||
m_ir->SetInsertPoint(block);
|
||||
|
|
|
@ -2770,14 +2770,17 @@ void spu_recompiler::FREST(spu_opcode_t op)
|
|||
const u64 fraction_lut_addr = reinterpret_cast<u64>(spu_frest_fraction_lut);
|
||||
const u64 exponent_lut_addr = reinterpret_cast<u64>(spu_frest_exponent_lut);
|
||||
|
||||
c->movabs(*arg0, fraction_lut_addr);
|
||||
c->movabs(*arg1, exponent_lut_addr);
|
||||
|
||||
for (u32 index = 0; index < 4; index++)
|
||||
{
|
||||
c->pextrd(*qw0, v_fraction, index);
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(fraction_lut_addr, *qw0, 2));
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(*arg0, *qw0, 2));
|
||||
c->pinsrd(v_fraction, *qw1, index);
|
||||
|
||||
c->pextrd(*qw0, v_exponent, index);
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(exponent_lut_addr, *qw0, 2));
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(*arg1, *qw0, 2));
|
||||
c->pinsrd(v_exponent, *qw1, index);
|
||||
}
|
||||
|
||||
|
@ -2810,14 +2813,17 @@ void spu_recompiler::FRSQEST(spu_opcode_t op)
|
|||
const u64 fraction_lut_addr = reinterpret_cast<u64>(spu_frsqest_fraction_lut);
|
||||
const u64 exponent_lut_addr = reinterpret_cast<u64>(spu_frsqest_exponent_lut);
|
||||
|
||||
c->movabs(*arg0, fraction_lut_addr);
|
||||
c->movabs(*arg1, exponent_lut_addr);
|
||||
|
||||
for (u32 index = 0; index < 4; index++)
|
||||
{
|
||||
c->pextrd(*qw0, v_fraction, index);
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(fraction_lut_addr, *qw0, 2));
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(*arg0, *qw0, 2));
|
||||
c->pinsrd(v_fraction, *qw1, index);
|
||||
|
||||
c->pextrd(*qw0, v_exponent, index);
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(exponent_lut_addr, *qw0, 2));
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(*arg1, *qw0, 2));
|
||||
c->pinsrd(v_exponent, *qw1, index);
|
||||
}
|
||||
|
||||
|
|
|
@ -842,6 +842,7 @@ void spu_cache::initialize(bool build_existing_cache)
|
|||
// Initialize compiler instances for parallel compilation
|
||||
std::unique_ptr<spu_recompiler_base> compiler;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit)
|
||||
{
|
||||
compiler = spu_recompiler_base::make_asmjit_recompiler();
|
||||
|
@ -850,6 +851,22 @@ void spu_cache::initialize(bool build_existing_cache)
|
|||
{
|
||||
compiler = spu_recompiler_base::make_llvm_recompiler();
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Unsupported spu decoder '%s'", g_cfg.core.spu_decoder);
|
||||
}
|
||||
#elif defined(ARCH_ARM64)
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::llvm)
|
||||
{
|
||||
compiler = spu_recompiler_base::make_llvm_recompiler();
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Unsupported spu decoder '%s'", g_cfg.core.spu_decoder);
|
||||
}
|
||||
#else
|
||||
#error "Unimplemented"
|
||||
#endif
|
||||
|
||||
compiler->init();
|
||||
|
||||
|
@ -2545,7 +2562,7 @@ bool reg_state_t::is_const() const
|
|||
|
||||
bool reg_state_t::compare_tags(const reg_state_t& rhs) const
|
||||
{
|
||||
// Compare by tag, address of instruction origin
|
||||
// Compare by tag, address of instruction origin
|
||||
return tag == rhs.tag && origin == rhs.origin && is_instruction == rhs.is_instruction;
|
||||
}
|
||||
|
||||
|
@ -6066,7 +6083,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
else if (atomic16->ls_offs.compare_with_mask_indifference(atomic16->lsa, SPU_LS_MASK_128) && atomic16->ls.is_less_than(128 - (atomic16->ls_offs.value & 127)))
|
||||
{
|
||||
// Relative memory access with offset less than 128 bytes
|
||||
// Common around SPU utilities which have less strict restrictions about memory alignment
|
||||
// Common around SPU utilities which have less strict restrictions about memory alignment
|
||||
ok = true;
|
||||
}
|
||||
}
|
||||
|
@ -6340,7 +6357,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
{
|
||||
atomic16->mem_count++;
|
||||
|
||||
// Do not clear lower 16 bytes addressing because the program can move on 4-byte basis
|
||||
// Do not clear lower 16 bytes addressing because the program can move on 4-byte basis
|
||||
const u32 offs = spu_branch_target(pos - result.lower_bound, op.si16);
|
||||
|
||||
if (atomic16->lsa.is_const() && [&]()
|
||||
|
@ -8142,7 +8159,7 @@ std::array<reg_state_t, s_reg_max>& block_reg_info::evaluate_start_state(const s
|
|||
// Check if the node is resolved
|
||||
if (!node->has_true_state)
|
||||
{
|
||||
// Assume this block cannot be resolved at the moment
|
||||
// Assume this block cannot be resolved at the moment
|
||||
is_all_resolved = false;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -628,6 +628,8 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, void*
|
|||
//}
|
||||
|
||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::rbx);
|
||||
#ifdef _WIN32
|
||||
c.sub(x86::rsp, 168);
|
||||
if (s_tsx_avx)
|
||||
|
@ -648,17 +650,21 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, void*
|
|||
c.movups(x86::oword_ptr(x86::rsp, 128), x86::xmm14);
|
||||
c.movups(x86::oword_ptr(x86::rsp, 144), x86::xmm15);
|
||||
}
|
||||
#else
|
||||
c.sub(x86::rsp, 40);
|
||||
#endif
|
||||
|
||||
// Prepare registers
|
||||
build_swap_rdx_with(c, args, x86::r10);
|
||||
c.mov(args[1], x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.movabs(args[1], reinterpret_cast<u64>(&vm::g_sudo_addr));
|
||||
c.mov(args[1], x86::qword_ptr(args[1]));
|
||||
c.lea(args[1], x86::qword_ptr(args[1], args[0]));
|
||||
c.prefetchw(x86::byte_ptr(args[1], 0));
|
||||
c.prefetchw(x86::byte_ptr(args[1], 64));
|
||||
c.and_(args[0].r32(), 0xff80);
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(x86::r11, x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
c.movabs(x86::r11, reinterpret_cast<u64>(+vm::g_reservations));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
|
||||
// Prepare data
|
||||
if (s_tsx_avx)
|
||||
|
@ -703,7 +709,8 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, void*
|
|||
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx) - ::offset32(&spu_thread::rdata)), 1);
|
||||
build_get_tsc(c);
|
||||
c.sub(x86::rax, stamp0);
|
||||
c.cmp(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&g_rtm_tx_limit2)));
|
||||
c.movabs(x86::rbx, reinterpret_cast<u64>(&g_rtm_tx_limit2));
|
||||
c.cmp(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.jae(fall);
|
||||
});
|
||||
|
||||
|
@ -853,8 +860,13 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, void*
|
|||
c.movups(x86::xmm15, x86::oword_ptr(x86::rsp, 144));
|
||||
}
|
||||
c.add(x86::rsp, 168);
|
||||
#else
|
||||
c.add(x86::rsp, 40);
|
||||
#endif
|
||||
|
||||
c.pop(x86::rbx);
|
||||
c.pop(x86::rbp);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vzeroupper();
|
||||
|
@ -884,8 +896,10 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
|||
//}
|
||||
|
||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
||||
#ifdef _WIN32
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::rbx);
|
||||
c.sub(x86::rsp, 40);
|
||||
#ifdef _WIN32
|
||||
if (!s_tsx_avx)
|
||||
{
|
||||
c.movups(x86::oword_ptr(x86::rsp, 0), x86::xmm6);
|
||||
|
@ -894,7 +908,8 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
|||
#endif
|
||||
// Prepare registers
|
||||
build_swap_rdx_with(c, args, x86::r10);
|
||||
c.mov(x86::r11, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.movabs(x86::r11, reinterpret_cast<u64>(&vm::g_sudo_addr));
|
||||
c.mov(x86::r11, x86::qword_ptr(x86::r11));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.prefetchw(x86::byte_ptr(x86::r11, 0));
|
||||
c.prefetchw(x86::byte_ptr(x86::r11, 64));
|
||||
|
@ -921,7 +936,8 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
|||
|
||||
c.and_(args[0].r32(), 0xff80);
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(args[1], x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
c.movabs(args[1], reinterpret_cast<u64>(+vm::g_reservations));
|
||||
c.lea(args[1], x86::qword_ptr(args[1], args[0]));
|
||||
|
||||
// Alloc args[0] to stamp0
|
||||
const auto stamp0 = args[0];
|
||||
|
@ -933,7 +949,8 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
|||
c.add(x86::qword_ptr(args[3]), 1);
|
||||
build_get_tsc(c);
|
||||
c.sub(x86::rax, stamp0);
|
||||
c.cmp(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&g_rtm_tx_limit2)));
|
||||
c.movabs(x86::rbx, reinterpret_cast<u64>(&g_rtm_tx_limit2));
|
||||
c.cmp(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.jae(fall);
|
||||
});
|
||||
|
||||
|
@ -986,6 +1003,10 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
|||
c.vzeroupper();
|
||||
}
|
||||
|
||||
c.add(x86::rsp, 40);
|
||||
c.pop(x86::rbx);
|
||||
c.pop(x86::rbp);
|
||||
|
||||
maybe_flush_lbr(c);
|
||||
c.ret();
|
||||
#else
|
||||
|
@ -1023,11 +1044,13 @@ const auto spu_getllar_tx = build_function_asm<u64(*)(u32 raddr, void* rdata, cp
|
|||
|
||||
// Prepare registers
|
||||
build_swap_rdx_with(c, args, x86::r10);
|
||||
c.mov(x86::rbp, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.movabs(x86::rbp, reinterpret_cast<u64>(&vm::g_sudo_addr));
|
||||
c.mov(x86::rbp, x86::qword_ptr(x86::rbp));
|
||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
||||
c.and_(args[0].r32(), 0xff80);
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(x86::r11, x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
c.movabs(x86::r11, reinterpret_cast<u64>(+vm::g_reservations));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
|
||||
// Alloc args[0] to stamp0
|
||||
const auto stamp0 = args[0];
|
||||
|
@ -1039,7 +1062,8 @@ const auto spu_getllar_tx = build_function_asm<u64(*)(u32 raddr, void* rdata, cp
|
|||
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx)), 1);
|
||||
build_get_tsc(c);
|
||||
c.sub(x86::rax, stamp0);
|
||||
c.cmp(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&g_rtm_tx_limit1)));
|
||||
c.movabs(x86::rbx, reinterpret_cast<u64>(&g_rtm_tx_limit1));
|
||||
c.cmp(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.jae(fall);
|
||||
});
|
||||
|
||||
|
@ -2118,20 +2142,31 @@ spu_thread::spu_thread(lv2_spu_group* group, u32 index, std::string_view name, u
|
|||
, lv2_id(lv2_id)
|
||||
, spu_tname(make_single<std::string>(name))
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit)
|
||||
{
|
||||
jit = spu_recompiler_base::make_asmjit_recompiler();
|
||||
}
|
||||
else if (g_cfg.core.spu_decoder == spu_decoder_type::llvm)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
jit = spu_recompiler_base::make_fast_llvm_recompiler();
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Unsupported spu decoder '%s'", g_cfg.core.spu_decoder);
|
||||
}
|
||||
#elif defined(ARCH_ARM64)
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::llvm)
|
||||
{
|
||||
jit = spu_recompiler_base::make_llvm_recompiler();
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Unsupported spu decoder '%s'", g_cfg.core.spu_decoder);
|
||||
}
|
||||
#else
|
||||
#error "Unimplemented"
|
||||
#endif
|
||||
}
|
||||
|
||||
if (g_cfg.core.mfc_debug)
|
||||
{
|
||||
|
@ -2193,20 +2228,31 @@ spu_thread::spu_thread(utils::serial& ar, lv2_spu_group* group)
|
|||
, lv2_id(ar)
|
||||
, spu_tname(make_single<std::string>(ar.operator std::string()))
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit)
|
||||
{
|
||||
jit = spu_recompiler_base::make_asmjit_recompiler();
|
||||
}
|
||||
else if (g_cfg.core.spu_decoder == spu_decoder_type::llvm)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
jit = spu_recompiler_base::make_fast_llvm_recompiler();
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Unsupported spu decoder '%s'", g_cfg.core.spu_decoder);
|
||||
}
|
||||
#elif defined(ARCH_ARM64)
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::llvm)
|
||||
{
|
||||
jit = spu_recompiler_base::make_llvm_recompiler();
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Unsupported spu decoder '%s'", g_cfg.core.spu_decoder);
|
||||
}
|
||||
#else
|
||||
#error "Unimplemented"
|
||||
#endif
|
||||
}
|
||||
|
||||
if (g_cfg.core.mfc_debug)
|
||||
{
|
||||
|
@ -4445,7 +4491,7 @@ bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_add
|
|||
// Detect "invalid" relative branches
|
||||
// Branch offsets that, although are the only way to get X code address using relative address
|
||||
// Rely on overflow/underflow of SPU memory bounds
|
||||
// Thus they would behave differently if SPU LS memory size was to increase (evolving the CELL architecture was the original plan)
|
||||
// Thus they would behave differently if SPU LS memory size was to increase (evolving the CELL architecture was the original plan)
|
||||
// Making them highly unlikely to be valid code
|
||||
|
||||
if (rel < 0)
|
||||
|
@ -4666,7 +4712,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
|
||||
// Add to chance if previous wait was long enough
|
||||
const u32 add_count = zero_count == 3 && total_wait >= 40 ? (total_wait - 39) * 40
|
||||
: zero_count == 2 && total_wait >= 11 ? (total_wait - 10) * 40
|
||||
: zero_count == 2 && total_wait >= 11 ? (total_wait - 10) * 40
|
||||
: zero_count == 1 && total_wait >= 8 ? (total_wait - 7) * 40
|
||||
: zero_count == 0 && total_wait >= 6 ? (total_wait - 5) * 40
|
||||
: 0;
|
||||
|
@ -5004,7 +5050,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
|
||||
if (group->spurs_running == max_run - 1)
|
||||
{
|
||||
// Try to let another thread slip in and take over execution
|
||||
// Try to let another thread slip in and take over execution
|
||||
thread_ctrl::wait_for(300);
|
||||
|
||||
// Update value
|
||||
|
@ -5029,7 +5075,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
if (spurs_last_task_timestamp)
|
||||
{
|
||||
const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate;
|
||||
spurs_average_task_duration -= avg_entry;
|
||||
spurs_average_task_duration -= avg_entry;
|
||||
spurs_average_task_duration += std::min<u64>(45'000, current - spurs_last_task_timestamp);
|
||||
spu_log.trace("duration: %d, avg=%d", current - spurs_last_task_timestamp, spurs_average_task_duration / spurs_task_count_to_calculate);
|
||||
spurs_last_task_timestamp = 0;
|
||||
|
@ -5050,7 +5096,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
}
|
||||
|
||||
max_run = group->max_run;
|
||||
|
||||
|
||||
prev_running = group->spurs_running.fetch_op([max_run](u32& x)
|
||||
{
|
||||
if (x < max_run)
|
||||
|
@ -5115,7 +5161,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
if (spurs_last_task_timestamp)
|
||||
{
|
||||
const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate;
|
||||
spurs_average_task_duration -= avg_entry;
|
||||
spurs_average_task_duration -= avg_entry;
|
||||
spurs_average_task_duration += std::min<u64>(45'000, current - spurs_last_task_timestamp);
|
||||
spu_log.trace("duration: %d, avg=%d", current - spurs_last_task_timestamp, spurs_average_task_duration / spurs_task_count_to_calculate);
|
||||
spurs_last_task_timestamp = 0;
|
||||
|
|
|
@ -47,7 +47,7 @@ namespace vm
|
|||
u8* const g_sudo_addr = g_base_addr + 0x1'0000'0000;
|
||||
|
||||
// Auxiliary virtual memory for executable areas
|
||||
u8* const g_exec_addr = memory_reserve_4GiB(g_sudo_addr, 0x200000000);
|
||||
u8* const g_exec_addr = memory_reserve_4GiB(g_sudo_addr, 0x300000000);
|
||||
|
||||
// Hooks for memory R/W interception (default: zero offset to some function with only ret instructions)
|
||||
u8* const g_hook_addr = memory_reserve_4GiB(g_exec_addr, 0x800000000);
|
||||
|
|
|
@ -34,6 +34,8 @@ namespace vm
|
|||
extern u8* const g_free_addr;
|
||||
extern u8 g_reservations[65536 / 128 * 64];
|
||||
|
||||
static constexpr u64 g_exec_addr_seg_offset = 0x2'0000'0000ULL;
|
||||
|
||||
struct writer_lock;
|
||||
|
||||
enum memory_location_t : uint
|
||||
|
|
|
@ -97,10 +97,9 @@
|
|||
<IgnoreImportLibrary>true</IgnoreImportLibrary>
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<OutputFile>$(OutDir)\rpcs3.exe</OutputFile>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<RandomizedBaseAddress>true</RandomizedBaseAddress>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<BaseAddress>0x10000</BaseAddress>
|
||||
<EntryPointSymbol>mainCRTStartup</EntryPointSymbol>
|
||||
</Link>
|
||||
<Midl>
|
||||
|
@ -148,10 +147,11 @@
|
|||
<GenerateDebugInformation>Debug</GenerateDebugInformation>
|
||||
<IgnoreImportLibrary>true</IgnoreImportLibrary>
|
||||
<OutputFile>$(OutDir)\rpcs3d.exe</OutputFile>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<RandomizedBaseAddress>true</RandomizedBaseAddress>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<BaseAddress>0x10000</BaseAddress>
|
||||
<BaseAddress>
|
||||
</BaseAddress>
|
||||
<EntryPointSymbol>mainCRTStartup</EntryPointSymbol>
|
||||
</Link>
|
||||
<Midl>
|
||||
|
@ -2123,4 +2123,4 @@
|
|||
<UserProperties MocDir=".\QTGeneratedFiles\$(ConfigurationName)" Qt5Version_x0020_x64="$(DefaultQtVersion)" RccDir=".\QTGeneratedFiles" UicDir=".\QTGeneratedFiles" />
|
||||
</VisualStudio>
|
||||
</ProjectExtensions>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -398,6 +398,10 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
|
|||
spu_bg->addButton(ui->spu_asmjit, static_cast<int>(spu_decoder_type::asmjit));
|
||||
spu_bg->addButton(ui->spu_llvm, static_cast<int>(spu_decoder_type::llvm));
|
||||
|
||||
#ifndef ARCH_X64
|
||||
ui->spu_asmjit->setEnabled(false);
|
||||
#endif
|
||||
|
||||
connect(spu_bg, &QButtonGroup::idToggled, [this](int id, bool checked)
|
||||
{
|
||||
if (!checked) return;
|
||||
|
|
|
@ -57,8 +57,8 @@ static bool has_waitv()
|
|||
// Total number of entries.
|
||||
static constexpr usz s_hashtable_size = 1u << 17;
|
||||
|
||||
// Reference counter combined with shifted pointer (which is assumed to be 48 bit)
|
||||
static constexpr uptr s_ref_mask = 0xffff;
|
||||
// Reference counter mask
|
||||
static constexpr uptr s_ref_mask = 0xffff'ffff;
|
||||
|
||||
// Fix for silly on-first-use initializer
|
||||
static bool s_null_wait_cb(const void*, u64, u64){ return true; };
|
||||
|
@ -153,8 +153,16 @@ namespace
|
|||
// Essentially a fat semaphore
|
||||
struct alignas(64) cond_handle
|
||||
{
|
||||
// Combined pointer (most significant 48 bits) and ref counter (16 least significant bits)
|
||||
atomic_t<u64> ptr_ref;
|
||||
struct fat_ptr
|
||||
{
|
||||
u64 ptr{};
|
||||
u32 reserved{};
|
||||
u32 ref_ctr{};
|
||||
|
||||
auto operator<=>(const fat_ptr& other) const = default;
|
||||
};
|
||||
|
||||
atomic_t<fat_ptr> ptr_ref;
|
||||
u64 tid;
|
||||
u32 oldv;
|
||||
|
||||
|
@ -183,7 +191,7 @@ namespace
|
|||
mtx.init(mtx);
|
||||
#endif
|
||||
|
||||
ensure(!ptr_ref.exchange((iptr << 16) | 1));
|
||||
ensure(ptr_ref.exchange(fat_ptr{iptr, 0, 1}) == fat_ptr{});
|
||||
}
|
||||
|
||||
void destroy()
|
||||
|
@ -370,7 +378,7 @@ namespace
|
|||
if (cond_id)
|
||||
{
|
||||
// Set fake refctr
|
||||
s_cond_list[cond_id].ptr_ref.release(1);
|
||||
s_cond_list[cond_id].ptr_ref.release(cond_handle::fat_ptr{0, 0, 1});
|
||||
cond_free(cond_id, -1);
|
||||
}
|
||||
}
|
||||
|
@ -390,7 +398,7 @@ static u32 cond_alloc(uptr iptr, u32 tls_slot = -1)
|
|||
{
|
||||
// Fast reinitialize
|
||||
const u32 id = std::exchange(*ptls, 0);
|
||||
s_cond_list[id].ptr_ref.release((iptr << 16) | 1);
|
||||
s_cond_list[id].ptr_ref.release(cond_handle::fat_ptr{iptr, 0, 1});
|
||||
return id;
|
||||
}
|
||||
|
||||
|
@ -461,15 +469,15 @@ static void cond_free(u32 cond_id, u32 tls_slot = -1)
|
|||
const auto cond = s_cond_list + cond_id;
|
||||
|
||||
// Dereference, destroy on last ref
|
||||
const bool last = cond->ptr_ref.atomic_op([](u64& val)
|
||||
const bool last = cond->ptr_ref.atomic_op([](cond_handle::fat_ptr& val)
|
||||
{
|
||||
ensure(val & s_ref_mask);
|
||||
ensure(val.ref_ctr);
|
||||
|
||||
val--;
|
||||
val.ref_ctr--;
|
||||
|
||||
if ((val & s_ref_mask) == 0)
|
||||
if (val.ref_ctr == 0)
|
||||
{
|
||||
val = 0;
|
||||
val = cond_handle::fat_ptr{};
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -525,15 +533,15 @@ static cond_handle* cond_id_lock(u32 cond_id, uptr iptr = 0)
|
|||
|
||||
while (true)
|
||||
{
|
||||
const auto [old, ok] = cond->ptr_ref.fetch_op([&](u64& val)
|
||||
const auto [old, ok] = cond->ptr_ref.fetch_op([&](cond_handle::fat_ptr& val)
|
||||
{
|
||||
if (!val || (val & s_ref_mask) == s_ref_mask)
|
||||
if (val == cond_handle::fat_ptr{} || val.ref_ctr == s_ref_mask)
|
||||
{
|
||||
// Don't reference already deallocated semaphore
|
||||
return false;
|
||||
}
|
||||
|
||||
if (iptr && (val >> 16) != iptr)
|
||||
if (iptr && val.ptr != iptr)
|
||||
{
|
||||
// Pointer mismatch
|
||||
return false;
|
||||
|
@ -548,7 +556,7 @@ static cond_handle* cond_id_lock(u32 cond_id, uptr iptr = 0)
|
|||
|
||||
if (!did_ref)
|
||||
{
|
||||
val++;
|
||||
val.ref_ctr++;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -566,7 +574,7 @@ static cond_handle* cond_id_lock(u32 cond_id, uptr iptr = 0)
|
|||
return cond;
|
||||
}
|
||||
|
||||
if ((old & s_ref_mask) == s_ref_mask)
|
||||
if (old.ref_ctr == s_ref_mask)
|
||||
{
|
||||
fmt::throw_exception("Reference count limit (%u) reached in an atomic notifier.", s_ref_mask);
|
||||
}
|
||||
|
@ -589,12 +597,14 @@ namespace
|
|||
u64 maxc: 5; // Collision counter
|
||||
u64 maxd: 11; // Distance counter
|
||||
u64 bits: 24; // Allocated bits
|
||||
u64 prio: 24; // Reserved
|
||||
u64 prio: 8; // Reserved
|
||||
|
||||
u64 ref : 16; // Ref counter
|
||||
u64 iptr: 48; // First pointer to use slot (to count used slots)
|
||||
u64 iptr: 64; // First pointer to use slot (to count used slots)
|
||||
};
|
||||
|
||||
static_assert(sizeof(slot_allocator) == 16);
|
||||
|
||||
// Need to spare 16 bits for ref counter
|
||||
static constexpr u64 max_threads = 24;
|
||||
|
||||
|
@ -935,7 +945,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
|
|||
|
||||
const auto stamp0 = utils::get_unique_tsc();
|
||||
|
||||
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 16);
|
||||
const uptr iptr = reinterpret_cast<uptr>(data);
|
||||
|
||||
uptr iptr_ext[atomic_wait::max_list - 1]{};
|
||||
|
||||
|
@ -956,7 +966,7 @@ atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wa
|
|||
}
|
||||
}
|
||||
|
||||
iptr_ext[ext_size] = reinterpret_cast<uptr>(e->data) & (~s_ref_mask >> 16);
|
||||
iptr_ext[ext_size] = reinterpret_cast<uptr>(e->data);
|
||||
ext_size++;
|
||||
}
|
||||
}
|
||||
|
@ -1266,7 +1276,7 @@ void atomic_wait_engine::notify_one(const void* data)
|
|||
return;
|
||||
}
|
||||
#endif
|
||||
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 16);
|
||||
const uptr iptr = reinterpret_cast<uptr>(data);
|
||||
|
||||
root_info::slot_search(iptr, [&](u32 cond_id)
|
||||
{
|
||||
|
@ -1289,7 +1299,7 @@ atomic_wait_engine::notify_all(const void* data)
|
|||
return;
|
||||
}
|
||||
#endif
|
||||
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 16);
|
||||
const uptr iptr = reinterpret_cast<uptr>(data);
|
||||
|
||||
// Array count for batch notification
|
||||
u32 count = 0;
|
||||
|
|
|
@ -205,9 +205,9 @@ namespace atomic_wait
|
|||
constexpr void set(lf_queue<T2>& var, std::nullptr_t = nullptr)
|
||||
{
|
||||
static_assert(Index < Max);
|
||||
static_assert(sizeof(var) == sizeof(uptr));
|
||||
static_assert(sizeof(var) == sizeof(uptr) * 2);
|
||||
|
||||
m_info[Index].data = reinterpret_cast<char*>(&var) + sizeof(u32);
|
||||
m_info[Index].data = reinterpret_cast<char*>(&var) + offsetof(typename lf_queue<T2>::fat_ptr, is_non_null);
|
||||
m_info[Index].old = 0;
|
||||
}
|
||||
|
||||
|
@ -215,9 +215,9 @@ namespace atomic_wait
|
|||
constexpr void set(stx::atomic_ptr<T2>& var, std::nullptr_t = nullptr)
|
||||
{
|
||||
static_assert(Index < Max);
|
||||
static_assert(sizeof(var) == sizeof(uptr));
|
||||
static_assert(sizeof(var) == sizeof(uptr) * 2);
|
||||
|
||||
m_info[Index].data = reinterpret_cast<char*>(&var) + sizeof(u32);
|
||||
m_info[Index].data = reinterpret_cast<char*>(&var) + offsetof(typename stx::atomic_ptr<T2>::fat_ptr, is_non_null);
|
||||
m_info[Index].old = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,14 +19,8 @@ namespace stx
|
|||
template <typename T>
|
||||
class atomic_ptr;
|
||||
|
||||
// Basic assumption of userspace pointer size
|
||||
constexpr uint c_ptr_size = 48;
|
||||
|
||||
// Use lower 16 bits as atomic_ptr internal counter of borrowed refs (pointer itself is shifted)
|
||||
constexpr uint c_ref_mask = 0xffff, c_ref_size = 16;
|
||||
|
||||
// Remaining pointer bits
|
||||
constexpr uptr c_ptr_mask = static_cast<uptr>(-1) << c_ref_size;
|
||||
// Use 16 bits as atomic_ptr internal counter of borrowed refs
|
||||
constexpr uint c_ref_mask = 0xffff;
|
||||
|
||||
struct shared_counter
|
||||
{
|
||||
|
@ -574,7 +568,6 @@ namespace stx
|
|||
}
|
||||
|
||||
// Random checks which may fail on invalid pointer
|
||||
ensure((reinterpret_cast<u64>(r.d()->destroy.load()) - 0x10000) >> 47 == 0);
|
||||
ensure((r.d()->refs++ - 1) >> 58 == 0);
|
||||
return r;
|
||||
}
|
||||
|
@ -583,11 +576,21 @@ namespace stx
|
|||
template <typename T>
|
||||
class atomic_ptr
|
||||
{
|
||||
mutable atomic_t<uptr> m_val{0};
|
||||
|
||||
static shared_counter* d(uptr val) noexcept
|
||||
public:
|
||||
struct fat_ptr
|
||||
{
|
||||
return std::launder(reinterpret_cast<shared_counter*>((val >> c_ref_size) - sizeof(shared_counter)));
|
||||
uptr ptr{};
|
||||
u32 is_non_null{};
|
||||
u32 ref_ctr{};
|
||||
};
|
||||
|
||||
private:
|
||||
|
||||
mutable atomic_t<fat_ptr> m_val{fat_ptr{}};
|
||||
|
||||
static shared_counter* d(fat_ptr val) noexcept
|
||||
{
|
||||
return std::launder(reinterpret_cast<shared_counter*>(val.ptr - sizeof(shared_counter)));
|
||||
}
|
||||
|
||||
shared_counter* d() const noexcept
|
||||
|
@ -595,14 +598,19 @@ namespace stx
|
|||
return d(m_val);
|
||||
}
|
||||
|
||||
static uptr to_val(const volatile std::remove_extent_t<T>* ptr) noexcept
|
||||
static fat_ptr to_val(const volatile std::remove_extent_t<T>* ptr) noexcept
|
||||
{
|
||||
return (reinterpret_cast<uptr>(ptr) << c_ref_size);
|
||||
return fat_ptr{reinterpret_cast<uptr>(ptr), ptr != nullptr, 0};
|
||||
}
|
||||
|
||||
static std::remove_extent_t<T>* ptr_to(uptr val) noexcept
|
||||
static fat_ptr to_val(uptr ptr) noexcept
|
||||
{
|
||||
return reinterpret_cast<std::remove_extent_t<T>*>(val >> c_ref_size);
|
||||
return fat_ptr{ptr, ptr != 0, 0};
|
||||
}
|
||||
|
||||
static std::remove_extent_t<T>* ptr_to(fat_ptr val) noexcept
|
||||
{
|
||||
return reinterpret_cast<std::remove_extent_t<T>*>(val.ptr);
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
|
@ -645,7 +653,7 @@ namespace stx
|
|||
atomic_ptr(const shared_ptr<U>& r) noexcept
|
||||
{
|
||||
// Obtain a ref + as many refs as an atomic_ptr can additionally reference
|
||||
if (uptr rval = to_val(r.m_ptr))
|
||||
if (fat_ptr rval = to_val(r.m_ptr); rval.ptr != 0)
|
||||
{
|
||||
m_val.raw() = rval;
|
||||
d(rval)->refs += c_ref_mask + 1;
|
||||
|
@ -655,7 +663,7 @@ namespace stx
|
|||
template <typename U> requires same_ptr_implicit_v<T, U>
|
||||
atomic_ptr(shared_ptr<U>&& r) noexcept
|
||||
{
|
||||
if (uptr rval = to_val(r.m_ptr))
|
||||
if (fat_ptr rval = to_val(r.m_ptr); rval.ptr != 0)
|
||||
{
|
||||
m_val.raw() = rval;
|
||||
d(rval)->refs += c_ref_mask;
|
||||
|
@ -667,7 +675,7 @@ namespace stx
|
|||
template <typename U> requires same_ptr_implicit_v<T, U>
|
||||
atomic_ptr(single_ptr<U>&& r) noexcept
|
||||
{
|
||||
if (uptr rval = to_val(r.m_ptr))
|
||||
if (fat_ptr rval = to_val(r.m_ptr); rval.ptr != 0)
|
||||
{
|
||||
m_val.raw() = rval;
|
||||
d(rval)->refs += c_ref_mask;
|
||||
|
@ -678,13 +686,13 @@ namespace stx
|
|||
|
||||
~atomic_ptr() noexcept
|
||||
{
|
||||
const uptr v = m_val.raw();
|
||||
const fat_ptr v = m_val.raw();
|
||||
|
||||
if (v >> c_ref_size)
|
||||
if (v.ptr)
|
||||
{
|
||||
const auto o = d(v);
|
||||
|
||||
if (!o->refs.sub_fetch(c_ref_mask + 1 - (v & c_ref_mask)))
|
||||
if (!o->refs.sub_fetch(c_ref_mask + 1 - (v.ref_ctr & c_ref_mask)))
|
||||
{
|
||||
o->destroy.load()(o);
|
||||
}
|
||||
|
@ -733,11 +741,11 @@ namespace stx
|
|||
shared_type r;
|
||||
|
||||
// Add reference
|
||||
const auto [prev, did_ref] = m_val.fetch_op([](uptr& val)
|
||||
const auto [prev, did_ref] = m_val.fetch_op([](fat_ptr& val)
|
||||
{
|
||||
if (val >> c_ref_size)
|
||||
if (val.ptr)
|
||||
{
|
||||
val++;
|
||||
val.ref_ctr++;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -755,11 +763,11 @@ namespace stx
|
|||
r.d()->refs++;
|
||||
|
||||
// Dereference if still the same pointer
|
||||
const auto [_, did_deref] = m_val.fetch_op([prev = prev](uptr& val)
|
||||
const auto [_, did_deref] = m_val.fetch_op([prev = prev](fat_ptr& val)
|
||||
{
|
||||
if (val >> c_ref_size == prev >> c_ref_size)
|
||||
if (val.ptr == prev.ptr)
|
||||
{
|
||||
val--;
|
||||
val.ref_ctr--;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -782,11 +790,11 @@ namespace stx
|
|||
shared_type r;
|
||||
|
||||
// Add reference
|
||||
const auto [prev, did_ref] = m_val.fetch_op([](uptr& val)
|
||||
const auto [prev, did_ref] = m_val.fetch_op([](fat_ptr& val)
|
||||
{
|
||||
if (val >> c_ref_size)
|
||||
if (val.ptr)
|
||||
{
|
||||
val++;
|
||||
val.ref_ctr++;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -823,11 +831,11 @@ namespace stx
|
|||
}
|
||||
|
||||
// Dereference if still the same pointer
|
||||
const auto [_, did_deref] = m_val.fetch_op([prev = prev](uptr& val)
|
||||
const auto [_, did_deref] = m_val.fetch_op([prev = prev](fat_ptr& val)
|
||||
{
|
||||
if (val >> c_ref_size == prev >> c_ref_size)
|
||||
if (val.ptr == prev.ptr)
|
||||
{
|
||||
val--;
|
||||
val.ref_ctr--;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -888,7 +896,7 @@ namespace stx
|
|||
|
||||
atomic_ptr old;
|
||||
old.m_val.raw() = m_val.exchange(to_val(r.m_ptr));
|
||||
old.m_val.raw() += 1;
|
||||
old.m_val.raw().ref_ctr += 1;
|
||||
|
||||
r.m_ptr = std::launder(ptr_to(old.m_val));
|
||||
return r;
|
||||
|
@ -904,7 +912,7 @@ namespace stx
|
|||
|
||||
atomic_ptr old;
|
||||
old.m_val.raw() = m_val.exchange(to_val(value.m_ptr));
|
||||
old.m_val.raw() += 1;
|
||||
old.m_val.raw().ref_ctr += 1;
|
||||
|
||||
value.m_ptr = std::launder(ptr_to(old.m_val));
|
||||
return value;
|
||||
|
@ -923,21 +931,21 @@ namespace stx
|
|||
|
||||
atomic_ptr old;
|
||||
|
||||
const uptr _val = m_val.fetch_op([&](uptr& val)
|
||||
const fat_ptr _val = m_val.fetch_op([&](fat_ptr& val)
|
||||
{
|
||||
if (val >> c_ref_size == _old)
|
||||
if (val.ptr == _old)
|
||||
{
|
||||
// Set new value
|
||||
val = _new << c_ref_size;
|
||||
val = to_val(_new);
|
||||
}
|
||||
else if (val)
|
||||
else if (val.ptr != 0)
|
||||
{
|
||||
// Reference previous value
|
||||
val++;
|
||||
val.ref_ctr++;
|
||||
}
|
||||
});
|
||||
|
||||
if (_val >> c_ref_size == _old)
|
||||
if (_val.ptr == _old)
|
||||
{
|
||||
// Success (exch is consumed, cmp_and_old is unchanged)
|
||||
if (exch.m_ptr)
|
||||
|
@ -954,9 +962,10 @@ namespace stx
|
|||
old_exch.m_val.raw() = to_val(std::exchange(exch.m_ptr, nullptr));
|
||||
|
||||
// Set to reset old cmp_and_old value
|
||||
old.m_val.raw() = to_val(cmp_and_old.m_ptr) | c_ref_mask;
|
||||
old.m_val.raw() = to_val(cmp_and_old.m_ptr);
|
||||
old.m_val.raw().ref_ctr |= c_ref_mask;
|
||||
|
||||
if (!_val)
|
||||
if (!_val.ptr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -966,11 +975,11 @@ namespace stx
|
|||
cmp_and_old.d()->refs++;
|
||||
|
||||
// Dereference if still the same pointer
|
||||
const auto [_, did_deref] = m_val.fetch_op([_val](uptr& val)
|
||||
const auto [_, did_deref] = m_val.fetch_op([_val](fat_ptr& val)
|
||||
{
|
||||
if (val >> c_ref_size == _val >> c_ref_size)
|
||||
if (val.ptr == _val.ptr)
|
||||
{
|
||||
val--;
|
||||
val.ref_ctr--;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1009,12 +1018,12 @@ namespace stx
|
|||
|
||||
atomic_ptr old;
|
||||
|
||||
const auto [_val, ok] = m_val.fetch_op([&](uptr& val)
|
||||
const auto [_val, ok] = m_val.fetch_op([&](fat_ptr& val)
|
||||
{
|
||||
if (val >> c_ref_size == _old)
|
||||
if (val.ptr == _old)
|
||||
{
|
||||
// Set new value
|
||||
val = _new << c_ref_size;
|
||||
val = to_val(_new);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1081,7 +1090,7 @@ namespace stx
|
|||
if (next.m_ptr)
|
||||
{
|
||||
// Compensation for `next` assignment
|
||||
old.m_val.raw() += 1;
|
||||
old.m_val.raw().ref_ctr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1093,7 +1102,7 @@ namespace stx
|
|||
|
||||
explicit constexpr operator bool() const noexcept
|
||||
{
|
||||
return m_val != 0;
|
||||
return observe() != nullptr;
|
||||
}
|
||||
|
||||
template <typename U> requires same_ptr_implicit_v<T, U>
|
||||
|
@ -1110,17 +1119,17 @@ namespace stx
|
|||
|
||||
void wait(std::nullptr_t, atomic_wait_timeout timeout = atomic_wait_timeout::inf)
|
||||
{
|
||||
utils::bless<atomic_t<u32>>(&m_val)[1].wait(0, timeout);
|
||||
utils::bless<atomic_t<u32>>(&m_val.raw().is_non_null)->wait(0, timeout);
|
||||
}
|
||||
|
||||
void notify_one()
|
||||
{
|
||||
utils::bless<atomic_t<u32>>(&m_val)[1].notify_one();
|
||||
utils::bless<atomic_t<u32>>(&m_val.raw().is_non_null)->notify_one();
|
||||
}
|
||||
|
||||
void notify_all()
|
||||
{
|
||||
utils::bless<atomic_t<u32>>(&m_val)[1].notify_all();
|
||||
utils::bless<atomic_t<u32>>(&m_val.raw().is_non_null)->notify_all();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue