From f6e5448b43f74fffab22eca2f96b281ef0f662d3 Mon Sep 17 00:00:00 2001 From: Martino Fontana Date: Sun, 5 Oct 2025 21:46:41 +0200 Subject: [PATCH] Jit64: Dynamic length of regular jump instruction (for known addresses) Conditional jumps already do that, so let's be consistent. --- Source/Core/Common/x64Emitter.cpp | 32 +++++++++++-------- Source/Core/Common/x64Emitter.h | 4 ++- Source/Core/Core/DSP/Jit/x64/DSPEmitter.cpp | 8 ++--- Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp | 4 +-- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 21 ++++++------ Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 2 +- .../Core/PowerPC/Jit64Common/BlockCache.cpp | 10 ++++-- .../PowerPC/Jit64Common/TrampolineCache.cpp | 4 +-- Source/UnitTests/Common/x64EmitterTest.cpp | 10 +++--- 9 files changed, 55 insertions(+), 40 deletions(-) diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index 3db669e9b2..46b117a87f 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -412,26 +412,30 @@ void XEmitter::Rex(int w, int r, int x, int b) Write8(rx); } -void XEmitter::JMP(const u8* addr, const Jump jump) +void XEmitter::JMP(const u8* addr, bool force_near_padding) { u64 fn = (u64)addr; - if (jump == Jump::Short) + s64 distance = (s64)(fn - ((u64)code + SHORT_JMP_LEN)); + if (distance < -0x80 || distance >= 0x80) { - s64 distance = (s64)(fn - ((u64)code + 2)); - ASSERT_MSG(DYNA_REC, distance >= -0x80 && distance < 0x80, - "Jump::Short target too far away ({}), needs Jump::Near", distance); - // 8 bits will do - Write8(0xEB); - Write8((u8)(s8)distance); + distance = (s64)(fn - ((u64)code + NEAR_JMP_LEN)); + ASSERT_MSG(DYNA_REC, distance >= -0x80000000LL && distance < 0x80000000LL, + "Jump target too far away ({}), needs indirect register", distance); + Write8(0xE9); + Write32((u32)(s32)distance); } else { - s64 distance = (s64)(fn - ((u64)code + 5)); - - ASSERT_MSG(DYNA_REC, distance >= -0x80000000LL && distance < 0x80000000LL, - "Jump::Near target too far away ({}), needs indirect register", distance); - Write8(0xE9); - Write32((u32)(s32)distance); + Write8(0xEB); + Write8((u8)(s8)distance); + if (force_near_padding) + { + for (int i = 0; i < NEAR_JMP_LEN - SHORT_JMP_LEN; i++) + { + // INT3 is more efficient than NOP if never executed, as it stops CPU speculation. + INT3(); + } + } } } diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 3c8a411159..71dcea30f9 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -444,6 +444,8 @@ public: Short, Near, }; + static const int SHORT_JMP_LEN = 2; + static const int NEAR_JMP_LEN = 5; // Flow control void RET(); @@ -451,7 +453,7 @@ public: void UD2(); [[nodiscard]] FixupBranch J(Jump jump = Jump::Short); - void JMP(const u8* addr, Jump jump = Jump::Short); + void JMP(const u8* addr, bool force_near_padding = false); void JMPptr(const OpArg& arg); void JMPself(); // infinite loop! #ifdef CALL diff --git a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.cpp b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.cpp index f1b57ea504..f84a85b067 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.cpp @@ -119,7 +119,7 @@ void DSPEmitter::checkExceptions(u32 retval) TEST(32, R(ABI_RETURN), R(ABI_RETURN)); FixupBranch skip_return = J_CC(CC_Z, Jump::Short); MOV(32, R(EAX), Imm32(retval)); - JMP(m_return_dispatcher, Jump::Near); + JMP(m_return_dispatcher); SetJumpTarget(skip_return); m_gpr.LoadRegs(false); m_gpr.FlushRegs(c, false); @@ -293,7 +293,7 @@ void DSPEmitter::Compile(u16 start_addr) { MOV(16, R(EAX), Imm16(m_block_size[start_addr])); } - JMP(m_return_dispatcher, Jump::Near); + JMP(m_return_dispatcher); m_gpr.LoadRegs(false); m_gpr.FlushRegs(c, false); @@ -329,7 +329,7 @@ void DSPEmitter::Compile(u16 start_addr) { MOV(16, R(EAX), Imm16(m_block_size[start_addr])); } - JMP(m_return_dispatcher, Jump::Near); + JMP(m_return_dispatcher); m_gpr.LoadRegs(false); m_gpr.FlushRegs(c, false); @@ -392,7 +392,7 @@ void DSPEmitter::Compile(u16 start_addr) { MOV(16, R(EAX), Imm16(m_block_size[start_addr])); } - JMP(m_return_dispatcher, Jump::Near); + JMP(m_return_dispatcher); } void DSPEmitter::CompileCurrent(DSPEmitter& emitter) diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp index 4490061493..70239c95d1 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp @@ -109,7 +109,7 @@ void DSPEmitter::WriteBranchExit() { MOV(16, R(EAX), Imm16(m_block_size[m_start_address])); } - JMP(m_return_dispatcher, Jump::Near); + JMP(m_return_dispatcher); m_gpr.LoadRegs(false); m_gpr.FlushRegs(c, false); } @@ -130,7 +130,7 @@ void DSPEmitter::WriteBlockLink(u16 dest) SUB(16, R(ECX), Imm16(m_block_size[m_start_address])); MOV(16, MatR(RAX), R(ECX)); - JMP(m_block_links[dest], Jump::Near); + JMP(m_block_links[dest]); SetJumpTarget(notEnoughCycles); } else diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index ef4660b2fd..ae37f2b45d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -207,7 +207,7 @@ bool Jit64::BackPatch(SContext* ctx) // Patch the original memory operation. XEmitter emitter(start, start + info.len); - emitter.JMP(trampoline, Jump::Near); + emitter.JMP(trampoline); // NOPs become dead code const u8* end = info.start + info.len; for (const u8* i = emitter.GetCodePtr(); i < end; ++i) @@ -594,7 +594,10 @@ void Jit64::JustWriteExit(u32 destination, bool bl, u32 after) J_CC(CC_LE, asm_routines.do_timing); linkData.exitPtrs = GetWritableCodePtr(); - JMP(asm_routines.dispatcher_no_timing_check, Jump::Near); + // Padding required for correctness, as the JMP length might differ between dispatcher and + // linked block: if this wrote a Short JMP but then JitBlockCache::WriteLinkBlock wrote a Near + // JMP, the latter would overwrite other instructions. + JMP(asm_routines.dispatcher_no_timing_check, true); } b->linkData.push_back(linkData); @@ -622,7 +625,7 @@ void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after) } else { - JMP(asm_routines.dispatcher, Jump::Near); + JMP(asm_routines.dispatcher); } } @@ -660,7 +663,7 @@ void Jit64::WriteRfiExitDestInRSCRATCH() ABI_PopRegistersAndAdjustStack({}, 0); EmitUpdateMembase(); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); - JMP(asm_routines.dispatcher, Jump::Near); + JMP(asm_routines.dispatcher); } void Jit64::WriteIdleExit(u32 destination) @@ -682,7 +685,7 @@ void Jit64::WriteExceptionExit() ABI_PopRegistersAndAdjustStack({}, 0); EmitUpdateMembase(); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); - JMP(asm_routines.dispatcher, Jump::Near); + JMP(asm_routines.dispatcher); } void Jit64::WriteExternalExceptionExit() @@ -695,7 +698,7 @@ void Jit64::WriteExternalExceptionExit() ABI_PopRegistersAndAdjustStack({}, 0); EmitUpdateMembase(); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); - JMP(asm_routines.dispatcher, Jump::Near); + JMP(asm_routines.dispatcher); } void Jit64::Run() @@ -936,7 +939,7 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) ABI_CallFunctionPC(JitInterface::CompileExceptionCheckFromJIT, &m_system.GetJitInterface(), static_cast(JitInterface::ExceptionType::PairedQuantize)); ABI_PopRegistersAndAdjustStack({}, 0); - JMP(asm_routines.dispatcher_no_check, Jump::Near); + JMP(asm_routines.dispatcher_no_check); SwitchToNearCode(); // Insert a check that the GQRs are still the value we expect at @@ -1064,7 +1067,7 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) Cleanup(); MOV(32, PPCSTATE(npc), Imm32(op.address)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); - JMP(asm_routines.dispatcher_exit, Jump::Near); + JMP(asm_routines.dispatcher_exit); SetJumpTarget(noBreakpoint); } @@ -1284,7 +1287,7 @@ void Jit64::IntializeSpeculativeConstants() ABI_CallFunctionPC(JitInterface::CompileExceptionCheckFromJIT, &m_system.GetJitInterface(), static_cast(JitInterface::ExceptionType::SpeculativeConstants)); ABI_PopRegistersAndAdjustStack({}, 0); - JMP(asm_routines.dispatcher_no_check, Jump::Near); + JMP(asm_routines.dispatcher_no_check); SwitchToNearCode(); } CMP(32, PPCSTATE_GPR(i), Imm32(compileTimeValue)); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 29841d1ada..eea2e75b0c 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -218,7 +218,7 @@ void Jit64AsmRoutineManager::Generate() // If jitting triggered an ISI exception, MSR.DR may have changed MOV(64, R(RMEM), PPCSTATE(mem_ptr)); - JMP(dispatcher_no_check, Jump::Near); + JMP(dispatcher_no_check); SetJumpTarget(bail); do_timing = GetCodePtr(); diff --git a/Source/Core/Core/PowerPC/Jit64Common/BlockCache.cpp b/Source/Core/Core/PowerPC/Jit64Common/BlockCache.cpp index 351c6e1c09..34c327df28 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/BlockCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/BlockCache.cpp @@ -26,15 +26,19 @@ void JitBlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const JitBl // to emit JMP. So just NOP out the gap to the next block. // Support up to 3 additional bytes because of alignment. s64 offset = address - location; - if (offset > 0 && offset <= 5 + 3) + if (offset > 0 && offset <= Gen::XEmitter::NEAR_JMP_LEN + 3) { Gen::XEmitter emit(location, location + offset); emit.NOP(offset); } else { - Gen::XEmitter emit(location, location + 5); - emit.JMP(address, Gen::XEmitter::Jump::Near); + // Length forced to Near because JMP length might differ between dispatcher and linked block. + // Technically this isn't necessary, as this is executed after Jit64::JustWriteExit (which + // also pads), and a Short JMP written on top of a Near JMP isn't incorrect since the garbage + // bytes are skipped. But they confuse the disassembler, and probably the CPU speculation too. + Gen::XEmitter emit(location, location + Gen::XEmitter::NEAR_JMP_LEN); + emit.JMP(address, true); } } } diff --git a/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp b/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp index 186868f7a8..f43535ab7d 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp @@ -46,7 +46,7 @@ const u8* TrampolineCache::GenerateReadTrampoline(const TrampolineInfo& info) SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.registersInUse, info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOW_ACCESS); - JMP(info.start + info.len, Jump::Near); + JMP(info.start + info.len); Common::JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline_{:x}", info.pc); return trampoline; @@ -65,7 +65,7 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const TrampolineInfo& info) SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset, info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOW_ACCESS); - JMP(info.start + info.len, Jump::Near); + JMP(info.start + info.len); Common::JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_{:x}", info.pc); return trampoline; diff --git a/Source/UnitTests/Common/x64EmitterTest.cpp b/Source/UnitTests/Common/x64EmitterTest.cpp index ae69c3f712..f38bb7c1b9 100644 --- a/Source/UnitTests/Common/x64EmitterTest.cpp +++ b/Source/UnitTests/Common/x64EmitterTest.cpp @@ -298,12 +298,14 @@ TEST_F(x64EmitterTest, POP_Register) TEST_F(x64EmitterTest, JMP) { emitter->NOP(1); - emitter->JMP(code_buffer, XEmitter::Jump::Short); + emitter->JMP(code_buffer); ExpectBytes({/* nop */ 0x90, /* short jmp */ 0xeb, /* offset -3 */ 0xfd}); - emitter->NOP(1); - emitter->JMP(code_buffer, XEmitter::Jump::Near); - ExpectBytes({/* nop */ 0x90, /* near jmp */ 0xe9, /* offset -6 */ 0xfa, 0xff, 0xff, 0xff}); + emitter->NOP(0x90); + const u8* after_nops = emitter->GetCodePtr(); + ResetCodeBuffer(); + emitter->JMP(after_nops); + ExpectBytes({/* near jmp */ 0xe9, /* offset */ 0x8B, 0, 0, 0}); } TEST_F(x64EmitterTest, JMPptr_Register)