mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-19 19:15:26 +00:00
Enable ASLR
This commit is contained in:
parent
04d833d3e6
commit
06e2324809
8 changed files with 84 additions and 69 deletions
|
@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.28)
|
|||
project(rpcs3 LANGUAGES C CXX)
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11)
|
||||
|
|
|
@ -344,15 +344,7 @@ jit_runtime_base& asmjit::get_global_runtime()
|
|||
{
|
||||
custom_runtime() noexcept
|
||||
{
|
||||
// Search starting in first 2 GiB of memory
|
||||
for (u64 addr = size;; addr += size)
|
||||
{
|
||||
if (auto ptr = utils::memory_reserve(size, reinterpret_cast<void*>(addr)))
|
||||
{
|
||||
m_pos.raw() = static_cast<uchar*>(ptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
ensure(m_pos.raw() = static_cast<uchar*>(utils::memory_reserve(size)));
|
||||
|
||||
// Initialize "end" pointer
|
||||
m_max = m_pos + size;
|
||||
|
|
|
@ -5,13 +5,12 @@ if(MSVC)
|
|||
add_compile_definitions(
|
||||
_CRT_SECURE_NO_DEPRECATE=1 _CRT_NON_CONFORMING_SWPRINTFS=1 _SCL_SECURE_NO_WARNINGS=1
|
||||
NOMINMAX _ENABLE_EXTENDED_ALIGNED_STORAGE=1 _HAS_EXCEPTIONS=0)
|
||||
add_link_options(/DYNAMICBASE:NO /BASE:0x10000 /FIXED)
|
||||
add_link_options(/DYNAMICBASE:YES)
|
||||
|
||||
#TODO: Some of these could be cleaned up
|
||||
add_compile_options(/wd4805) # Comparing boolean and int
|
||||
add_compile_options(/wd4804) # Using integer operators with booleans
|
||||
add_compile_options(/wd4200) # Zero-sized array in struct/union
|
||||
add_link_options(/ignore:4281) # Undesirable base address 0x10000
|
||||
|
||||
# MSVC 2017 uses iterator as base class internally, causing a lot of warning spam
|
||||
add_compile_definitions(_SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING=1)
|
||||
|
@ -19,8 +18,6 @@ if(MSVC)
|
|||
# Increase stack limit to 8 MB
|
||||
add_link_options(/STACK:8388608,1048576)
|
||||
else()
|
||||
# Some distros have the compilers set to use PIE by default, but RPCS3 doesn't work with PIE, so we need to disable it.
|
||||
check_cxx_compiler_flag("-no-pie" HAS_NO_PIE)
|
||||
check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
|
||||
check_cxx_compiler_flag("-msse -msse2 -mcx16" COMPILER_X86)
|
||||
if (APPLE)
|
||||
|
@ -96,15 +93,6 @@ else()
|
|||
if(NOT APPLE AND NOT WIN32)
|
||||
# This hides our LLVM from mesa's LLVM, otherwise we get some unresolvable conflicts.
|
||||
add_link_options(-Wl,--exclude-libs,ALL)
|
||||
|
||||
if(HAS_NO_PIE)
|
||||
add_link_options(-no-pie)
|
||||
endif()
|
||||
elseif(APPLE)
|
||||
if (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
|
||||
add_link_options(-Wl,-image_base,0x10000 -Wl,-pagezero_size,0x10000)
|
||||
add_link_options(-Wl,-no_pie)
|
||||
endif()
|
||||
elseif(WIN32)
|
||||
add_compile_definitions(__STDC_FORMAT_MACROS=1)
|
||||
|
||||
|
@ -113,11 +101,6 @@ else()
|
|||
|
||||
# Increase stack limit to 8 MB
|
||||
add_link_options(-Wl,--stack -Wl,8388608)
|
||||
|
||||
# For arm64 windows, the image base cannot be below 4GB or the OS rejects the binary without much explanation.
|
||||
if(COMPILER_X86)
|
||||
add_link_options(-Wl,--image-base,0x10000)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Specify C++ library to use as standard C++ when using clang (not required on linux due to GNU)
|
||||
|
|
|
@ -1902,8 +1902,9 @@ auto gen_ghc_cpp_trampoline(ppu_intrp_func_t fn_target)
|
|||
// Take second ghc arg
|
||||
c.mov(args[0], x86::rbp);
|
||||
c.mov(args[2].r32(), x86::dword_ptr(args[0], ::offset32(&ppu_thread::cia)));
|
||||
c.add(args[2], x86::qword_ptr(reinterpret_cast<u64>(&vm::g_base_addr)));
|
||||
c.jmp(fn_target);
|
||||
c.movabs(args[1], reinterpret_cast<u64>(&vm::g_base_addr));
|
||||
c.add(args[2], x86::qword_ptr(args[1]));
|
||||
c.jmp(Imm(fn_target));
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -220,7 +220,8 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
|||
c.mov(x86::qword_ptr(args[0], ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs)), x86::rsp);
|
||||
|
||||
// Initialize args
|
||||
c.mov(x86::r13, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_exec_addr)));
|
||||
c.movabs(x86::r13, reinterpret_cast<u64>(&vm::g_exec_addr));
|
||||
c.mov(x86::r13, x86::qword_ptr(x86::r13));
|
||||
c.mov(x86::rbp, args[0]);
|
||||
c.mov(x86::edx, x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia))); // Load PC
|
||||
|
||||
|
@ -232,7 +233,8 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
|||
c.shl(x86::edx, 13);
|
||||
c.mov(x86::r12d, x86::edx); // Load relocation base
|
||||
|
||||
c.mov(x86::rbx, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_base_addr)));
|
||||
c.movabs(x86::rbx, reinterpret_cast<u64>(&vm::g_base_addr));
|
||||
c.mov(x86::rbx, x86::qword_ptr(x86::rbx));
|
||||
c.mov(x86::r14, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 0))); // Load some registers
|
||||
c.mov(x86::rsi, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 1)));
|
||||
c.mov(x86::rdi, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 2)));
|
||||
|
@ -3164,8 +3166,9 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime
|
|||
|
||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::r13);
|
||||
c.push(x86::r14);
|
||||
c.sub(x86::rsp, 40);
|
||||
c.sub(x86::rsp, 48);
|
||||
#ifdef _WIN32
|
||||
if (!s_tsx_avx)
|
||||
{
|
||||
|
@ -3176,14 +3179,16 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime
|
|||
|
||||
// Prepare registers
|
||||
build_swap_rdx_with(c, args, x86::r10);
|
||||
c.mov(x86::rbp, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.movabs(x86::rbp, reinterpret_cast<u64>(&vm::g_sudo_addr));
|
||||
c.mov(x86::rbp, x86::qword_ptr(x86::rbp));
|
||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
||||
c.and_(x86::rbp, -128);
|
||||
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
||||
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
||||
c.movzx(args[0].r32(), args[0].r16());
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(x86::r11, x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
c.movabs(x86::r11, reinterpret_cast<u64>(+vm::g_reservations));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.and_(x86::r11, -128 / 2);
|
||||
c.and_(args[0].r32(), 63);
|
||||
|
||||
|
@ -3217,7 +3222,8 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime
|
|||
{
|
||||
build_get_tsc(c);
|
||||
c.sub(x86::rax, stamp0);
|
||||
c.cmp(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&g_rtm_tx_limit2)));
|
||||
c.movabs(x86::r13, reinterpret_cast<u64>(&g_rtm_tx_limit2));
|
||||
c.cmp(x86::rax, x86::qword_ptr(x86::r13));
|
||||
c.jae(fall);
|
||||
});
|
||||
|
||||
|
@ -3342,8 +3348,9 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime
|
|||
c.vzeroupper();
|
||||
}
|
||||
|
||||
c.add(x86::rsp, 40);
|
||||
c.add(x86::rsp, 48);
|
||||
c.pop(x86::r14);
|
||||
c.pop(x86::r13);
|
||||
c.pop(x86::rbp);
|
||||
|
||||
maybe_flush_lbr(c);
|
||||
|
@ -4176,7 +4183,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
|||
// 2 7MB overlay files -> 14GB
|
||||
// The growth in memory requirements of LLVM is not linear with file size of course
|
||||
// But these estimates should hopefully protect RPCS3 in the coming years
|
||||
// Especially when thread count is on the rise with each CPU generation
|
||||
// Especially when thread count is on the rise with each CPU generation
|
||||
atomic_t<u32> file_size_limit = static_cast<u32>(std::clamp<u64>(utils::aligned_div<u64>(utils::get_total_memory(), 2000), 65536, u32{umax}));
|
||||
|
||||
const u32 software_thread_limit = std::min<u32>(g_cfg.core.llvm_threads ? g_cfg.core.llvm_threads : u32{umax}, ::size32(file_queue));
|
||||
|
@ -4298,8 +4305,8 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
|||
if (!src && !Emu.klic.empty() && src.open(path))
|
||||
{
|
||||
src = decrypt_self(src, reinterpret_cast<u8*>(&Emu.klic[0]));
|
||||
|
||||
if (src)
|
||||
|
||||
if (src)
|
||||
{
|
||||
ppu_log.error("Possible missed KLIC for precompilation of '%s', please report to developers.", path);
|
||||
|
||||
|
@ -4330,7 +4337,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
|||
{
|
||||
if (value)
|
||||
{
|
||||
// Allow at least one file, make 0 the "memory unavailable" sign value for atomic waiting efficiency
|
||||
// Allow at least one file, make 0 the "memory unavailable" sign value for atomic waiting efficiency
|
||||
const u32 new_val = static_cast<u32>(utils::sub_saturate<u64>(value, file_size));
|
||||
restore_mem = value - new_val;
|
||||
value = new_val;
|
||||
|
@ -4503,8 +4510,8 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
|||
if (!src && !Emu.klic.empty() && src.open(path))
|
||||
{
|
||||
src = decrypt_self(src, reinterpret_cast<u8*>(&Emu.klic[0]));
|
||||
|
||||
if (src)
|
||||
|
||||
if (src)
|
||||
{
|
||||
ppu_log.error("Possible missed KLIC for precompilation of '%s', please report to developers.", path);
|
||||
}
|
||||
|
@ -5076,7 +5083,8 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
code_size_until_jump = buf_end - buf_start;
|
||||
|
||||
c.add(x86::edx, seg0);
|
||||
c.mov(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_exec_addr)));
|
||||
c.movabs(x86::rax, reinterpret_cast<u64>(&vm::g_exec_addr));
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rax));
|
||||
c.mov(x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia)), x86::edx);
|
||||
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rax, x86::rdx, 1, 0)); // Load call target
|
||||
|
@ -5337,7 +5345,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
sha1_update(&ctx, reinterpret_cast<const u8*>(addrs.data()), addrs.size() * sizeof(be_t<u32>));
|
||||
}
|
||||
|
||||
part.jit_bounds = std::move(local_jit_bounds);
|
||||
part.jit_bounds = std::move(local_jit_bounds);
|
||||
local_jit_bounds = std::make_shared<std::pair<u32, u32>>(u32{umax}, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -2770,14 +2770,17 @@ void spu_recompiler::FREST(spu_opcode_t op)
|
|||
const u64 fraction_lut_addr = reinterpret_cast<u64>(spu_frest_fraction_lut);
|
||||
const u64 exponent_lut_addr = reinterpret_cast<u64>(spu_frest_exponent_lut);
|
||||
|
||||
c->movabs(*arg0, fraction_lut_addr);
|
||||
c->movabs(*arg1, exponent_lut_addr);
|
||||
|
||||
for (u32 index = 0; index < 4; index++)
|
||||
{
|
||||
c->pextrd(*qw0, v_fraction, index);
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(fraction_lut_addr, *qw0, 2));
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(*arg0, *qw0, 2));
|
||||
c->pinsrd(v_fraction, *qw1, index);
|
||||
|
||||
c->pextrd(*qw0, v_exponent, index);
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(exponent_lut_addr, *qw0, 2));
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(*arg1, *qw0, 2));
|
||||
c->pinsrd(v_exponent, *qw1, index);
|
||||
}
|
||||
|
||||
|
@ -2810,14 +2813,17 @@ void spu_recompiler::FRSQEST(spu_opcode_t op)
|
|||
const u64 fraction_lut_addr = reinterpret_cast<u64>(spu_frsqest_fraction_lut);
|
||||
const u64 exponent_lut_addr = reinterpret_cast<u64>(spu_frsqest_exponent_lut);
|
||||
|
||||
c->movabs(*arg0, fraction_lut_addr);
|
||||
c->movabs(*arg1, exponent_lut_addr);
|
||||
|
||||
for (u32 index = 0; index < 4; index++)
|
||||
{
|
||||
c->pextrd(*qw0, v_fraction, index);
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(fraction_lut_addr, *qw0, 2));
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(*arg0, *qw0, 2));
|
||||
c->pinsrd(v_fraction, *qw1, index);
|
||||
|
||||
c->pextrd(*qw0, v_exponent, index);
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(exponent_lut_addr, *qw0, 2));
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(*arg1, *qw0, 2));
|
||||
c->pinsrd(v_exponent, *qw1, index);
|
||||
}
|
||||
|
||||
|
|
|
@ -628,6 +628,8 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, void*
|
|||
//}
|
||||
|
||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::rbx);
|
||||
#ifdef _WIN32
|
||||
c.sub(x86::rsp, 168);
|
||||
if (s_tsx_avx)
|
||||
|
@ -648,17 +650,21 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, void*
|
|||
c.movups(x86::oword_ptr(x86::rsp, 128), x86::xmm14);
|
||||
c.movups(x86::oword_ptr(x86::rsp, 144), x86::xmm15);
|
||||
}
|
||||
#else
|
||||
c.sub(x86::rsp, 40);
|
||||
#endif
|
||||
|
||||
// Prepare registers
|
||||
build_swap_rdx_with(c, args, x86::r10);
|
||||
c.mov(args[1], x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.movabs(args[1], reinterpret_cast<u64>(&vm::g_sudo_addr));
|
||||
c.mov(args[1], x86::qword_ptr(args[1]));
|
||||
c.lea(args[1], x86::qword_ptr(args[1], args[0]));
|
||||
c.prefetchw(x86::byte_ptr(args[1], 0));
|
||||
c.prefetchw(x86::byte_ptr(args[1], 64));
|
||||
c.and_(args[0].r32(), 0xff80);
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(x86::r11, x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
c.movabs(x86::r11, reinterpret_cast<u64>(+vm::g_reservations));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
|
||||
// Prepare data
|
||||
if (s_tsx_avx)
|
||||
|
@ -703,7 +709,8 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, void*
|
|||
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx) - ::offset32(&spu_thread::rdata)), 1);
|
||||
build_get_tsc(c);
|
||||
c.sub(x86::rax, stamp0);
|
||||
c.cmp(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&g_rtm_tx_limit2)));
|
||||
c.movabs(x86::rbx, reinterpret_cast<u64>(&g_rtm_tx_limit2));
|
||||
c.cmp(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.jae(fall);
|
||||
});
|
||||
|
||||
|
@ -853,8 +860,13 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, void*
|
|||
c.movups(x86::xmm15, x86::oword_ptr(x86::rsp, 144));
|
||||
}
|
||||
c.add(x86::rsp, 168);
|
||||
#else
|
||||
c.add(x86::rsp, 40);
|
||||
#endif
|
||||
|
||||
c.pop(x86::rbx);
|
||||
c.pop(x86::rbp);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vzeroupper();
|
||||
|
@ -884,8 +896,10 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
|||
//}
|
||||
|
||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
||||
#ifdef _WIN32
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::rbx);
|
||||
c.sub(x86::rsp, 40);
|
||||
#ifdef _WIN32
|
||||
if (!s_tsx_avx)
|
||||
{
|
||||
c.movups(x86::oword_ptr(x86::rsp, 0), x86::xmm6);
|
||||
|
@ -894,7 +908,8 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
|||
#endif
|
||||
// Prepare registers
|
||||
build_swap_rdx_with(c, args, x86::r10);
|
||||
c.mov(x86::r11, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.movabs(x86::r11, reinterpret_cast<u64>(&vm::g_sudo_addr));
|
||||
c.mov(x86::r11, x86::qword_ptr(x86::r11));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.prefetchw(x86::byte_ptr(x86::r11, 0));
|
||||
c.prefetchw(x86::byte_ptr(x86::r11, 64));
|
||||
|
@ -921,7 +936,8 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
|||
|
||||
c.and_(args[0].r32(), 0xff80);
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(args[1], x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
c.movabs(args[1], reinterpret_cast<u64>(+vm::g_reservations));
|
||||
c.lea(args[1], x86::qword_ptr(args[1], args[0]));
|
||||
|
||||
// Alloc args[0] to stamp0
|
||||
const auto stamp0 = args[0];
|
||||
|
@ -933,7 +949,8 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
|||
c.add(x86::qword_ptr(args[3]), 1);
|
||||
build_get_tsc(c);
|
||||
c.sub(x86::rax, stamp0);
|
||||
c.cmp(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&g_rtm_tx_limit2)));
|
||||
c.movabs(x86::rbx, reinterpret_cast<u64>(&g_rtm_tx_limit2));
|
||||
c.cmp(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.jae(fall);
|
||||
});
|
||||
|
||||
|
@ -986,6 +1003,10 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
|||
c.vzeroupper();
|
||||
}
|
||||
|
||||
c.add(x86::rsp, 40);
|
||||
c.pop(x86::rbx);
|
||||
c.pop(x86::rbp);
|
||||
|
||||
maybe_flush_lbr(c);
|
||||
c.ret();
|
||||
#else
|
||||
|
@ -1023,11 +1044,13 @@ const auto spu_getllar_tx = build_function_asm<u64(*)(u32 raddr, void* rdata, cp
|
|||
|
||||
// Prepare registers
|
||||
build_swap_rdx_with(c, args, x86::r10);
|
||||
c.mov(x86::rbp, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.movabs(x86::rbp, reinterpret_cast<u64>(&vm::g_sudo_addr));
|
||||
c.mov(x86::rbp, x86::qword_ptr(x86::rbp));
|
||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
||||
c.and_(args[0].r32(), 0xff80);
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(x86::r11, x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
c.movabs(x86::r11, reinterpret_cast<u64>(+vm::g_reservations));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
|
||||
// Alloc args[0] to stamp0
|
||||
const auto stamp0 = args[0];
|
||||
|
@ -1039,7 +1062,8 @@ const auto spu_getllar_tx = build_function_asm<u64(*)(u32 raddr, void* rdata, cp
|
|||
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx)), 1);
|
||||
build_get_tsc(c);
|
||||
c.sub(x86::rax, stamp0);
|
||||
c.cmp(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&g_rtm_tx_limit1)));
|
||||
c.movabs(x86::rbx, reinterpret_cast<u64>(&g_rtm_tx_limit1));
|
||||
c.cmp(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.jae(fall);
|
||||
});
|
||||
|
||||
|
@ -4445,7 +4469,7 @@ bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_add
|
|||
// Detect "invalid" relative branches
|
||||
// Branch offsets that, although are the only way to get X code address using relative address
|
||||
// Rely on overflow/underflow of SPU memory bounds
|
||||
// Thus they would behave differently if SPU LS memory size was to increase (evolving the CELL architecture was the original plan)
|
||||
// Thus they would behave differently if SPU LS memory size was to increase (evolving the CELL architecture was the original plan)
|
||||
// Making them highly unlikely to be valid code
|
||||
|
||||
if (rel < 0)
|
||||
|
@ -4666,7 +4690,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
|
||||
// Add to chance if previous wait was long enough
|
||||
const u32 add_count = zero_count == 3 && total_wait >= 40 ? (total_wait - 39) * 40
|
||||
: zero_count == 2 && total_wait >= 11 ? (total_wait - 10) * 40
|
||||
: zero_count == 2 && total_wait >= 11 ? (total_wait - 10) * 40
|
||||
: zero_count == 1 && total_wait >= 8 ? (total_wait - 7) * 40
|
||||
: zero_count == 0 && total_wait >= 6 ? (total_wait - 5) * 40
|
||||
: 0;
|
||||
|
@ -5004,7 +5028,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
|
||||
if (group->spurs_running == max_run - 1)
|
||||
{
|
||||
// Try to let another thread slip in and take over execution
|
||||
// Try to let another thread slip in and take over execution
|
||||
thread_ctrl::wait_for(300);
|
||||
|
||||
// Update value
|
||||
|
@ -5029,7 +5053,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
if (spurs_last_task_timestamp)
|
||||
{
|
||||
const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate;
|
||||
spurs_average_task_duration -= avg_entry;
|
||||
spurs_average_task_duration -= avg_entry;
|
||||
spurs_average_task_duration += std::min<u64>(45'000, current - spurs_last_task_timestamp);
|
||||
spu_log.trace("duration: %d, avg=%d", current - spurs_last_task_timestamp, spurs_average_task_duration / spurs_task_count_to_calculate);
|
||||
spurs_last_task_timestamp = 0;
|
||||
|
@ -5050,7 +5074,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
}
|
||||
|
||||
max_run = group->max_run;
|
||||
|
||||
|
||||
prev_running = group->spurs_running.fetch_op([max_run](u32& x)
|
||||
{
|
||||
if (x < max_run)
|
||||
|
@ -5115,7 +5139,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
if (spurs_last_task_timestamp)
|
||||
{
|
||||
const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate;
|
||||
spurs_average_task_duration -= avg_entry;
|
||||
spurs_average_task_duration -= avg_entry;
|
||||
spurs_average_task_duration += std::min<u64>(45'000, current - spurs_last_task_timestamp);
|
||||
spu_log.trace("duration: %d, avg=%d", current - spurs_last_task_timestamp, spurs_average_task_duration / spurs_task_count_to_calculate);
|
||||
spurs_last_task_timestamp = 0;
|
||||
|
|
|
@ -97,10 +97,9 @@
|
|||
<IgnoreImportLibrary>true</IgnoreImportLibrary>
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<OutputFile>$(OutDir)\rpcs3.exe</OutputFile>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<RandomizedBaseAddress>true</RandomizedBaseAddress>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<BaseAddress>0x10000</BaseAddress>
|
||||
<EntryPointSymbol>mainCRTStartup</EntryPointSymbol>
|
||||
</Link>
|
||||
<Midl>
|
||||
|
@ -148,10 +147,11 @@
|
|||
<GenerateDebugInformation>Debug</GenerateDebugInformation>
|
||||
<IgnoreImportLibrary>true</IgnoreImportLibrary>
|
||||
<OutputFile>$(OutDir)\rpcs3d.exe</OutputFile>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<RandomizedBaseAddress>true</RandomizedBaseAddress>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<BaseAddress>0x10000</BaseAddress>
|
||||
<BaseAddress>
|
||||
</BaseAddress>
|
||||
<EntryPointSymbol>mainCRTStartup</EntryPointSymbol>
|
||||
</Link>
|
||||
<Midl>
|
||||
|
@ -2123,4 +2123,4 @@
|
|||
<UserProperties MocDir=".\QTGeneratedFiles\$(ConfigurationName)" Qt5Version_x0020_x64="$(DefaultQtVersion)" RccDir=".\QTGeneratedFiles" UicDir=".\QTGeneratedFiles" />
|
||||
</VisualStudio>
|
||||
</ProjectExtensions>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
Loading…
Add table
Reference in a new issue