From 3b0c0e2500c5ed45ee95d58d0f3e9e7322d08213 Mon Sep 17 00:00:00 2001 From: comex Date: Sun, 29 Sep 2013 22:00:13 -0400 Subject: [PATCH 1/6] Trap to the debugger properly after BackPatch failure. --- Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp | 9 ++++++++- Source/Core/Core/Src/x64MemTools.cpp | 5 +++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp index da7f7c1c27..4e9bb512de 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp @@ -177,15 +177,23 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) InstructionInfo info; if (!DisassembleMov(codePtr, &info)) { BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress); + return 0; } if (info.otherReg != RBX) + { PanicAlert("BackPatch : Base reg not RBX." "\n\nAttempted to access %08x.", emAddress); + return 0; + } auto it = registersInUseAtLoc.find(codePtr); if (it == registersInUseAtLoc.end()) + { PanicAlert("BackPatch: no register use entry for address %p", codePtr); + return 0; + } + u32 registersInUse = it->second; if (!info.isMemoryWrite) @@ -235,7 +243,6 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) emitter.NOP(codePtr + info.instructionSize - emitter.GetCodePtr()); return start; } - return 0; #else return 0; #endif diff --git a/Source/Core/Core/Src/x64MemTools.cpp b/Source/Core/Core/Src/x64MemTools.cpp index a0c79aaed5..62ca755385 100644 --- a/Source/Core/Core/Src/x64MemTools.cpp +++ b/Source/Core/Core/Src/x64MemTools.cpp @@ -65,6 +65,11 @@ bool DoFault(u64 bad_address, SContext *ctx) { ctx->CTX_PC = (u64) new_pc; } + else + { + // there was an error, give the debugger a chance + return false; + } return true; } From a53dc6f9811960e8b27efa7db45b28f2567738ac Mon Sep 17 00:00:00 2001 From: comex Date: Thu, 3 Oct 2013 02:41:52 -0400 Subject: [PATCH 2/6] Remove profiled re-JIT support in JitIL. It's extremely unsafe, unused (not exposed in the GUI and not present in any gameconfigs), and mostly obviated by fastmem. Although this type of thing could theoretically be useful someday for fastmem support with MMU, it's probably not the best way to do it, the existing implementation is way too simplistic, and it can always be dug up to provide support for a new implementation if needed. Not like it's a big deal to keep it working, but it really seems pointless. --- Source/Core/Core/Src/ConfigManager.cpp | 1 - Source/Core/Core/Src/CoreParameter.cpp | 2 +- Source/Core/Core/Src/CoreParameter.h | 1 - Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 2 - .../Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp | 158 +----------------- Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h | 2 - .../Core/Src/PowerPC/Jit64IL/JitILAsm.cpp | 7 - .../Core/Core/Src/PowerPC/Jit64IL/JitILAsm.h | 2 - 8 files changed, 6 insertions(+), 169 deletions(-) diff --git a/Source/Core/Core/Src/ConfigManager.cpp b/Source/Core/Core/Src/ConfigManager.cpp index 84afc54a6d..fd328e12e2 100644 --- a/Source/Core/Core/Src/ConfigManager.cpp +++ b/Source/Core/Core/Src/ConfigManager.cpp @@ -409,7 +409,6 @@ void SConfig::LoadSettings() ini.Get("Core", "SlotB", (int*)&m_EXIDevice[1], EXIDEVICE_NONE); ini.Get("Core", "SerialPort1", (int*)&m_EXIDevice[2], EXIDEVICE_NONE); ini.Get("Core", "BBA_MAC", &m_bba_mac); - ini.Get("Core", "ProfiledReJIT",&m_LocalCoreStartupParameter.bJITProfiledReJIT, false); ini.Get("Core", "TimeProfiling",&m_LocalCoreStartupParameter.bJITILTimeProfiling, false); ini.Get("Core", "OutputIR", &m_LocalCoreStartupParameter.bJITILOutputIR, false); char sidevicenum[16]; diff --git a/Source/Core/Core/Src/CoreParameter.cpp b/Source/Core/Core/Src/CoreParameter.cpp index c22f5a15ba..4cba012ea4 100644 --- a/Source/Core/Core/Src/CoreParameter.cpp +++ b/Source/Core/Core/Src/CoreParameter.cpp @@ -28,7 +28,7 @@ SCoreStartupParameter::SCoreStartupParameter() bJITLoadStoreFloatingOff(false), bJITLoadStorePairedOff(false), bJITFloatingPointOff(false), bJITIntegerOff(false), bJITPairedOff(false), bJITSystemRegistersOff(false), - bJITBranchOff(false), bJITProfiledReJIT(false), + bJITBranchOff(false), bJITILTimeProfiling(false), bJITILOutputIR(false), bEnableFPRF(false), bCPUThread(true), bDSPThread(false), bDSPHLE(true), diff --git a/Source/Core/Core/Src/CoreParameter.h b/Source/Core/Core/Src/CoreParameter.h index 9821580c84..caef1d4bbb 100644 --- a/Source/Core/Core/Src/CoreParameter.h +++ b/Source/Core/Core/Src/CoreParameter.h @@ -111,7 +111,6 @@ struct SCoreStartupParameter bool bJITPairedOff; bool bJITSystemRegistersOff; bool bJITBranchOff; - bool bJITProfiledReJIT; bool bJITILTimeProfiling; bool bJITILOutputIR; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index bb1c3a4a19..13ec88e0e1 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -234,6 +234,4 @@ public: void icbi(UGeckoInstruction inst); }; -void ProfiledReJit(); - #endif // _JIT64_H diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp index ecd44134fa..a9051996d9 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp @@ -56,9 +56,6 @@ struct RegInfo { InstLoc fregs[MAX_NUMBER_OF_REGS]; unsigned numSpills; unsigned numFSpills; - bool MakeProfile; - bool UseProfile; - unsigned numProfiledLoads; unsigned exitNumber; RegInfo(JitIL* j, InstLoc f, unsigned insts) : Jit(j), FirstI(f), IInfo(insts), lastUsed(insts) { @@ -68,9 +65,7 @@ struct RegInfo { } numSpills = 0; numFSpills = 0; - numProfiledLoads = 0; exitNumber = 0; - MakeProfile = UseProfile = false; } private: @@ -106,7 +101,6 @@ static unsigned regReadUse(RegInfo& R, InstLoc I) { } static unsigned SlotSet[1000]; -static unsigned ProfiledLoads[1000]; static u8 GC_ALIGNED16(FSlotSet[16*1000]); static OpArg regLocForSlot(RegInfo& RI, unsigned slot) { @@ -440,47 +434,14 @@ static void regMarkMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum regMarkUse(RI, I, AI, OpNum); } -static void regClearDeadMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum) { - if (!(RI.IInfo[I - RI.FirstI] & (2 << OpNum))) - return; - if (isImm(*AI)) { - unsigned addr = RI.Build->GetImmValue(AI); - if (Memory::IsRAMAddress(addr)) { - return; - } - } - InstLoc AddrBase; - if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) { - AddrBase = getOp1(AI); - } else { - AddrBase = AI; - } - regClearInst(RI, AddrBase); -} - // in 64-bit build, this returns a completely bizarre address sometimes! static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, - unsigned OpNum, unsigned Size, X64Reg* dest, - bool Profiled, - unsigned ProfileOffset = 0) { + unsigned OpNum, unsigned Size, X64Reg* dest) { if (isImm(*AI)) { unsigned addr = RI.Build->GetImmValue(AI); if (Memory::IsRAMAddress(addr)) { if (dest) *dest = regFindFreeReg(RI); -#ifdef _M_IX86 - // 32-bit - if (Profiled) - return M((void*)((u8*)Memory::base + (addr & Memory::MEMVIEW32_MASK))); - return M((void*)addr); -#else - // 64-bit - if (Profiled) { - RI.Jit->LEA(32, EAX, M((void*)(u64)addr)); - return MComplex(RBX, EAX, SCALE_1, 0); - } - return M((void*)(u64)addr); -#endif } } unsigned offset; @@ -512,44 +473,12 @@ static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, } else { baseReg = regEnsureInReg(RI, AddrBase); } - - if (Profiled) { - // (Profiled mode isn't the default, at least for the moment) -#ifdef _M_IX86 - return MDisp(baseReg, (u32)Memory::base + offset + ProfileOffset); -#else - RI.Jit->LEA(32, EAX, MDisp(baseReg, offset)); - return MComplex(RBX, EAX, SCALE_1, 0); -#endif - } return MDisp(baseReg, offset); } static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) { - if (RI.UseProfile) { - unsigned curLoad = ProfiledLoads[RI.numProfiledLoads++]; - if (!(curLoad & 0x0C000000)) { - X64Reg reg; - OpArg addr = regBuildMemAddress(RI, I, getOp1(I), 1, - Size, ®, true, - -(curLoad & 0xC0000000)); - RI.Jit->MOVZX(32, Size, reg, addr); - RI.Jit->BSWAP(Size, reg); - if (regReadUse(RI, I)) - RI.regs[reg] = I; - return; - } - } X64Reg reg; - OpArg addr = regBuildMemAddress(RI, I, getOp1(I), 1, Size, ®, false); - RI.Jit->LEA(32, ECX, addr); - if (RI.MakeProfile) { - RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX)); - } - u32 mem_mask = 0; - - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU || SConfig::GetInstance().m_LocalCoreStartupParameter.bTLBHack) - mem_mask = 0x20000000; + OpArg addr = regBuildMemAddress(RI, I, getOp1(I), 1, Size, ®); RI.Jit->TEST(32, R(ECX), Imm32(0x0C000000 | mem_mask)); FixupBranch argh = RI.Jit->J_CC(CC_Z); @@ -580,19 +509,6 @@ static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) { RI.regs[reg] = I; } -static OpArg regSwappedImmForConst(RegInfo& RI, InstLoc I, unsigned Size) { - unsigned imm = RI.Build->GetImmValue(I); - if (Size == 32) { - imm = Common::swap32(imm); - return Imm32(imm); - } else if (Size == 16) { - imm = Common::swap16(imm); - return Imm16(imm); - } else { - return Imm8(imm); - } -} - static OpArg regImmForConst(RegInfo& RI, InstLoc I, unsigned Size) { unsigned imm = RI.Build->GetImmValue(I); if (Size == 32) { @@ -605,42 +521,7 @@ static OpArg regImmForConst(RegInfo& RI, InstLoc I, unsigned Size) { } static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) { - if (RI.UseProfile) { - unsigned curStore = ProfiledLoads[RI.numProfiledLoads++]; - if (!(curStore & 0x0C000000)) { - OpArg addr = regBuildMemAddress(RI, I, getOp2(I), 2, - Size, 0, true, - -(curStore & 0xC0000000)); - if (isImm(*getOp1(I))) { - RI.Jit->MOV(Size, addr, regSwappedImmForConst(RI, getOp1(I), Size)); - } else { - RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - RI.Jit->BSWAP(Size, ECX); - RI.Jit->MOV(Size, addr, R(ECX)); - } - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(I)); - return; - } else if ((curStore & 0xFFFFF000) == 0xCC008000) { - regSpill(RI, EAX); - if (isImm(*getOp1(I))) { - RI.Jit->MOV(Size, R(ECX), regSwappedImmForConst(RI, getOp1(I), Size)); - } else { - RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - RI.Jit->BSWAP(Size, ECX); - } - RI.Jit->MOV(32, R(EAX), M(&GPFifo::m_gatherPipeCount)); - RI.Jit->MOV(Size, MDisp(EAX, (u32)(u64)GPFifo::m_gatherPipe), R(ECX)); - RI.Jit->ADD(32, R(EAX), Imm8(Size >> 3)); - RI.Jit->MOV(32, M(&GPFifo::m_gatherPipeCount), R(EAX)); - RI.Jit->js.fifoBytesThisBlock += Size >> 3; - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(I)); - regClearDeadMemAddress(RI, I, getOp2(I), 2); - return; - } - } - OpArg addr = regBuildMemAddress(RI, I, getOp2(I), 2, Size, 0, false); + OpArg addr = regBuildMemAddress(RI, I, getOp2(I), 2, Size, 0); RI.Jit->LEA(32, ECX, addr); regSpill(RI, EAX); if (isImm(*getOp1(I))) { @@ -648,9 +529,6 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) { } else { RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp1(I))); } - if (RI.MakeProfile) { - RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX)); - } RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0, regsInUse(RI)); if (RI.IInfo[I - RI.FirstI] & 4) regClearInst(RI, getOp1(I)); @@ -704,18 +582,6 @@ static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) { } static void regWriteExit(RegInfo& RI, InstLoc dest) { - if (RI.MakeProfile) { - if (isImm(*dest)) { - RI.Jit->MOV(32, M(&PC), Imm32(RI.Build->GetImmValue(dest))); - } else { - RI.Jit->MOV(32, R(EAX), regLocForInst(RI, dest)); - RI.Jit->MOV(32, M(&PC), R(EAX)); - } - RI.Jit->Cleanup(); - RI.Jit->SUB(32, M(&CoreTiming::downcount), Imm32(RI.Jit->js.downcountAmount)); - RI.Jit->JMP(((JitIL *)jit)->asm_routines.doReJit, true); - return; - } if (isImm(*dest)) { RI.Jit->WriteExit(RI.Build->GetImmValue(dest), RI.exitNumber++); } else { @@ -729,12 +595,10 @@ static bool checkIsSNAN() { return MathUtil::IsSNAN(isSNANTemp[0][0]) || MathUtil::IsSNAN(isSNANTemp[1][0]); } -static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool MakeProfile) { +static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit) { //printf("Writing block: %x\n", js.blockStart); RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts()); RI.Build = ibuild; - RI.UseProfile = UseProfile; - RI.MakeProfile = MakeProfile; // Pass to compute liveness ibuild->StartBackPass(); for (unsigned int index = (unsigned int)RI.IInfo.size() - 1; index != -1U; --index) { @@ -1997,22 +1861,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak } } - //if (!RI.MakeProfile && RI.numSpills) - // printf("Block: %x, numspills %d\n", Jit->js.blockStart, RI.numSpills); - Jit->WriteExit(jit->js.curBlock->exitAddress[0], 0); Jit->UD2(); } void JitIL::WriteCode() { - DoWriteCode(&ibuild, this, false, SConfig::GetInstance().m_LocalCoreStartupParameter.bJITProfiledReJIT); -} - -void ProfiledReJit() { - JitIL *jitil = (JitIL *)jit; - jitil->SetCodePtr(jitil->js.rewriteStart); - DoWriteCode(&jitil->ibuild, jitil, true, false); - jitil->js.curBlock->codeSize = (int)(jitil->GetCodePtr() - jitil->js.rewriteStart); - jitil->GetBlockCache()->FinalizeBlock(jitil->js.curBlock->blockNum, jitil->jo.enableBlocklink, - jitil->js.curBlock->normalEntry); + DoWriteCode(&ibuild, this); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h index 30371311a3..770b26120c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h @@ -221,6 +221,4 @@ public: void Jit(u32 em_address); -void ProfiledReJit(); - #endif // _JITIL_H diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp index 9750a0e690..8e8cbe6bd7 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp @@ -233,13 +233,6 @@ void JitILAsmRoutineManager::GenerateCommon() fifoDirectWriteXmm64 = AlignCode4(); GenFifoXmm64Write(); - doReJit = AlignCode4(); - ABI_AlignStack(0); - CALL(reinterpret_cast(&ProfiledReJit)); - ABI_RestoreStack(0); - SUB(32, M(&CoreTiming::downcount), Imm8(0)); - JMP(dispatcher, true); - GenQuantizedLoads(); GenQuantizedStores(); GenQuantizedSingleStores(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.h b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.h index 8222e897c3..cf4dd6e155 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.h @@ -38,8 +38,6 @@ public: void Shutdown() { FreeCodeSpace(); } - - const u8 *doReJit; }; extern JitILAsmRoutineManager jitil_asm_routines; From 5e4665301bd5c1bbcd5e375cdbcf9ab99e4d8de6 Mon Sep 17 00:00:00 2001 From: comex Date: Sun, 29 Sep 2013 22:51:07 -0400 Subject: [PATCH 3/6] Finish replacing ThunkManager with ABI_PushRegistersAndAdjustStack. As part of that, change SafeLoadToEAX to SafeLoadToReg, and have JitIL use that, which should fix fastmem on JitIL. This should also fix a potential stack corruption issue with x86. --- Source/Core/Common/CMakeLists.txt | 1 - Source/Core/Common/Common.vcxproj | 2 - Source/Core/Common/Common.vcxproj.filters | 2 - Source/Core/Common/Src/Thunk.h | 46 ------- Source/Core/Common/Src/x64ABI.cpp | 80 ++++++++++++ Source/Core/Common/Src/x64Emitter.cpp | 68 ---------- Source/Core/Common/Src/x64Emitter.h | 7 +- Source/Core/Common/Src/x64Thunk.cpp | 121 ------------------ Source/Core/Core/Src/HW/HW.cpp | 1 - Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 6 +- .../Core/Src/PowerPC/Jit64/Jit_Branch.cpp | 1 - .../Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 69 +++++----- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 4 +- .../Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 8 +- .../Src/PowerPC/Jit64/Jit_SystemRegisters.cpp | 1 - Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp | 1 - .../Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp | 48 ++----- .../Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp | 1 - .../Core/Src/PowerPC/Jit64IL/JitILAsm.cpp | 1 - .../Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp | 1 - .../Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp | 1 - .../PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp | 1 - .../PowerPC/Jit64IL/JitIL_SystemRegisters.cpp | 1 - .../Src/PowerPC/JitArm32/JitArm_Branch.cpp | 1 - .../PowerPC/JitArm32/JitArm_FloatingPoint.cpp | 1 - .../Src/PowerPC/JitArm32/JitArm_Integer.cpp | 1 - .../Src/PowerPC/JitArm32/JitArm_LoadStore.cpp | 1 - .../JitArm32/JitArm_LoadStoreFloating.cpp | 1 - .../JitArm32/JitArm_LoadStorePaired.cpp | 1 - .../Src/PowerPC/JitArm32/JitArm_Paired.cpp | 1 - .../JitArm32/JitArm_SystemRegisters.cpp | 1 - .../Src/PowerPC/JitCommon/JitAsmCommon.cpp | 7 +- .../Core/Src/PowerPC/JitCommon/JitAsmCommon.h | 4 - .../Src/PowerPC/JitCommon/JitBackpatch.cpp | 9 +- .../Core/Src/PowerPC/JitCommon/JitBackpatch.h | 3 - .../Core/Src/PowerPC/JitCommon/Jit_Util.cpp | 117 ++++++++++------- .../Core/Src/PowerPC/JitCommon/Jit_Util.h | 6 +- 37 files changed, 220 insertions(+), 406 deletions(-) delete mode 100644 Source/Core/Common/Src/Thunk.h delete mode 100644 Source/Core/Common/Src/x64Thunk.cpp diff --git a/Source/Core/Common/CMakeLists.txt b/Source/Core/Common/CMakeLists.txt index 6e230c7fa7..0302ee78c8 100644 --- a/Source/Core/Common/CMakeLists.txt +++ b/Source/Core/Common/CMakeLists.txt @@ -37,7 +37,6 @@ else() if(NOT _M_GENERIC) #X86 set(SRCS ${SRCS} Src/x64FPURoundMode.cpp - Src/x64Thunk.cpp ) endif() set(SRCS ${SRCS} Src/x64CPUDetect.cpp) diff --git a/Source/Core/Common/Common.vcxproj b/Source/Core/Common/Common.vcxproj index 4066febcca..20cf92dfc3 100644 --- a/Source/Core/Common/Common.vcxproj +++ b/Source/Core/Common/Common.vcxproj @@ -217,7 +217,6 @@ - @@ -263,7 +262,6 @@ - diff --git a/Source/Core/Common/Common.vcxproj.filters b/Source/Core/Common/Common.vcxproj.filters index a912cc3d8a..05b9edcb14 100644 --- a/Source/Core/Common/Common.vcxproj.filters +++ b/Source/Core/Common/Common.vcxproj.filters @@ -47,7 +47,6 @@ - @@ -84,7 +83,6 @@ - diff --git a/Source/Core/Common/Src/Thunk.h b/Source/Core/Common/Src/Thunk.h deleted file mode 100644 index b1487badf8..0000000000 --- a/Source/Core/Common/Src/Thunk.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#ifndef _THUNK_H_ -#define _THUNK_H_ - -#include - -#include "Common.h" -#include "x64Emitter.h" - -// This simple class creates a wrapper around a C/C++ function that saves all fp state -// before entering it, and restores it upon exit. This is required to be able to selectively -// call functions from generated code, without inflicting the performance hit and increase -// of complexity that it means to protect the generated code from this problem. - -// This process is called thunking. - -// There will only ever be one level of thunking on the stack, plus, -// we don't want to pollute the stack, so we store away regs somewhere global. -// NOT THREAD SAFE. This may only be used from the CPU thread. -// Any other thread using this stuff will be FATAL. - -class ThunkManager : public Gen::XCodeBlock -{ - std::map thunks; - - const u8 *save_regs; - const u8 *load_regs; - -public: - ThunkManager() { - Init(); - } - ~ThunkManager() { - Shutdown(); - } - void *ProtectFunction(void *function, int num_params); -private: - void Init(); - void Shutdown(); - void Reset(); -}; - -#endif // _THUNK_H_ diff --git a/Source/Core/Common/Src/x64ABI.cpp b/Source/Core/Common/Src/x64ABI.cpp index 54d19c7775..3750636c3e 100644 --- a/Source/Core/Common/Src/x64ABI.cpp +++ b/Source/Core/Common/Src/x64ABI.cpp @@ -57,6 +57,86 @@ void XEmitter::ABI_RestoreStack(unsigned int frameSize, bool noProlog) { } } +void XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog) +{ + int regSize = +#ifdef _M_X64 + 8; +#else + 4; +#endif + int shadow = 0; +#if defined(_WIN32) && defined(_M_X64) + shadow = 0x20; +#endif + int count = 0; + for (int r = 0; r < 16; r++) + { + if (mask & (1 << r)) + { + PUSH((X64Reg) r); + count++; + } + } + int size = ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf; + for (int x = 0; x < 16; x++) + { + if (mask & (1 << (16 + x))) + size += 16; + } + size += shadow; + if (size) + SUB(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size)); + int offset = shadow; + for (int x = 0; x < 16; x++) + { + if (mask & (1 << (16 + x))) + { + MOVAPD(MDisp(RSP, offset), (X64Reg) x); + offset += 16; + } + } +} + +void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog) +{ + int regSize = +#ifdef _M_X64 + 8; +#else + 4; +#endif + int size = 0; +#if defined(_WIN32) && defined(_M_X64) + size += 0x20; +#endif + for (int x = 0; x < 16; x++) + { + if (mask & (1 << (16 + x))) + { + MOVAPD((X64Reg) x, MDisp(RSP, size)); + size += 16; + } + } + int count = 0; + for (int r = 0; r < 16; r++) + { + if (mask & (1 << r)) + count++; + } + size += ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf; + + if (size) + ADD(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size)); + for (int r = 15; r >= 0; r--) + { + if (mask & (1 << r)) + { + POP((X64Reg) r); + } + } +} + #ifdef _M_IX86 // All32 // Shared code between Win32 and Unix32 diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index 9b6731856f..a5d7cb2f0f 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -1634,74 +1634,6 @@ void XEmitter::___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u CALLptr(M(impptr)); } -void XEmitter::PushRegistersAndAlignStack(u32 mask) -{ - int shadow = 0; -#ifdef _WIN32 - shadow = 0x20; -#endif - int count = 0; - for (int r = 0; r < 16; r++) - { - if (mask & (1 << r)) - { - PUSH((X64Reg) r); - count++; - } - } - int size = (count & 1) ? 0 : 8; - for (int x = 0; x < 16; x++) - { - if (mask & (1 << (16 + x))) - size += 16; - } - size += shadow; - if (size) - SUB(64, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size)); - int offset = shadow; - for (int x = 0; x < 16; x++) - { - if (mask & (1 << (16 + x))) - { - MOVAPD(MDisp(RSP, offset), (X64Reg) x); - offset += 16; - } - } -} - -void XEmitter::PopRegistersAndAlignStack(u32 mask) -{ - int size = 0; -#ifdef _WIN32 - size += 0x20; -#endif - for (int x = 0; x < 16; x++) - { - if (mask & (1 << (16 + x))) - { - MOVAPD((X64Reg) x, MDisp(RSP, size)); - size += 16; - } - } - int count = 0; - for (int r = 0; r < 16; r++) - { - if (mask & (1 << r)) - count++; - } - size += (count & 1) ? 0 : 8; - - if (size) - ADD(64, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size)); - for (int r = 15; r >= 0; r--) - { - if (mask & (1 << r)) - { - POP((X64Reg) r); - } - } -} - #endif } diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index 87724a8092..94938b290a 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -646,6 +646,10 @@ public: void ABI_PushAllCalleeSavedRegsAndAdjustStack(); void ABI_PopAllCalleeSavedRegsAndAdjustStack(); + // A more flexible version of the above. + void ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog); + void ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog); + unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false); void ABI_AlignStack(unsigned int frameSize, bool noProlog = false); void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false); @@ -691,9 +695,6 @@ public: #define DECLARE_IMPORT(x) extern "C" void *__imp_##x - void PushRegistersAndAlignStack(u32 mask); - void PopRegistersAndAlignStack(u32 mask); - #endif }; // class XEmitter diff --git a/Source/Core/Common/Src/x64Thunk.cpp b/Source/Core/Common/Src/x64Thunk.cpp deleted file mode 100644 index d77d78e40e..0000000000 --- a/Source/Core/Common/Src/x64Thunk.cpp +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include - -#include "Common.h" -#include "MemoryUtil.h" -#include "x64ABI.h" -#include "Thunk.h" - -#define THUNK_ARENA_SIZE 1024*1024*1 - -namespace -{ - -static u8 GC_ALIGNED32(saved_fp_state[16 * 4 * 4]); -static u8 GC_ALIGNED32(saved_gpr_state[16 * 8]); -static u16 saved_mxcsr; - -} // namespace - -using namespace Gen; - -void ThunkManager::Init() -{ - AllocCodeSpace(THUNK_ARENA_SIZE); - save_regs = GetCodePtr(); - for (int i = 2; i < ABI_GetNumXMMRegs(); i++) - MOVAPS(M(saved_fp_state + i * 16), (X64Reg)(XMM0 + i)); - STMXCSR(M(&saved_mxcsr)); -#ifdef _M_X64 - MOV(64, M(saved_gpr_state + 0 ), R(RCX)); - MOV(64, M(saved_gpr_state + 8 ), R(RDX)); - MOV(64, M(saved_gpr_state + 16), R(R8) ); - MOV(64, M(saved_gpr_state + 24), R(R9) ); - MOV(64, M(saved_gpr_state + 32), R(R10)); - MOV(64, M(saved_gpr_state + 40), R(R11)); -#ifndef _WIN32 - MOV(64, M(saved_gpr_state + 48), R(RSI)); - MOV(64, M(saved_gpr_state + 56), R(RDI)); -#endif - MOV(64, M(saved_gpr_state + 64), R(RBX)); -#else - MOV(32, M(saved_gpr_state + 0 ), R(RCX)); - MOV(32, M(saved_gpr_state + 4 ), R(RDX)); -#endif - RET(); - load_regs = GetCodePtr(); - LDMXCSR(M(&saved_mxcsr)); - for (int i = 2; i < ABI_GetNumXMMRegs(); i++) - MOVAPS((X64Reg)(XMM0 + i), M(saved_fp_state + i * 16)); -#ifdef _M_X64 - MOV(64, R(RCX), M(saved_gpr_state + 0 )); - MOV(64, R(RDX), M(saved_gpr_state + 8 )); - MOV(64, R(R8) , M(saved_gpr_state + 16)); - MOV(64, R(R9) , M(saved_gpr_state + 24)); - MOV(64, R(R10), M(saved_gpr_state + 32)); - MOV(64, R(R11), M(saved_gpr_state + 40)); -#ifndef _WIN32 - MOV(64, R(RSI), M(saved_gpr_state + 48)); - MOV(64, R(RDI), M(saved_gpr_state + 56)); -#endif - MOV(64, R(RBX), M(saved_gpr_state + 64)); -#else - MOV(32, R(RCX), M(saved_gpr_state + 0 )); - MOV(32, R(RDX), M(saved_gpr_state + 4 )); -#endif - RET(); -} - -void ThunkManager::Reset() -{ - thunks.clear(); - ResetCodePtr(); -} - -void ThunkManager::Shutdown() -{ - Reset(); - FreeCodeSpace(); -} - -void *ThunkManager::ProtectFunction(void *function, int num_params) -{ - std::map::iterator iter; - iter = thunks.find(function); - if (iter != thunks.end()) - return (void *)iter->second; - if (!region) - PanicAlert("Trying to protect functions before the emu is started. Bad bad bad."); - - const u8 *call_point = GetCodePtr(); -#ifdef _M_X64 - // Make sure to align stack. - ABI_AlignStack(0, true); - CALL((void*)save_regs); - CALL((void*)function); - CALL((void*)load_regs); - ABI_RestoreStack(0, true); - RET(); -#else - CALL((void*)save_regs); - // Since parameters are in the previous stack frame, not in registers, this takes some - // trickery : we simply re-push the parameters. might not be optimal, but that doesn't really - // matter. - ABI_AlignStack(num_params * 4, true); - unsigned int alignedSize = ABI_GetAlignedFrameSize(num_params * 4, true); - for (int i = 0; i < num_params; i++) { - // ESP is changing, so we do not need i - PUSH(32, MDisp(ESP, alignedSize)); - } - CALL(function); - ABI_RestoreStack(num_params * 4, true); - CALL((void*)load_regs); - RET(); -#endif - - thunks[function] = call_point; - return (void *)call_point; -} diff --git a/Source/Core/Core/Src/HW/HW.cpp b/Source/Core/Core/Src/HW/HW.cpp index cef3666d59..586344ecc6 100644 --- a/Source/Core/Core/Src/HW/HW.cpp +++ b/Source/Core/Core/Src/HW/HW.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include "Common.h" -#include "Thunk.h" #include "../Core.h" #include "HW.h" #include "../PowerPC/PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index f2bff458ae..2b14810b3e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -12,7 +12,6 @@ #include "Common.h" #include "x64Emitter.h" #include "x64ABI.h" -#include "Thunk.h" #include "../../HLE/HLE.h" #include "../../Core.h" #include "../../PatchEngine.h" @@ -552,7 +551,10 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc { js.fifoBytesThisBlock -= 32; MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write - ABI_CallFunction(thunks.ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0)); + u32 registersInUse = RegistersInUse(); + ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); + ABI_PopRegistersAndAdjustStack(registersInUse, false); } u32 function = HLE::GetFunctionIndex(ops[i].address); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp index 310deaa9c2..2dba16cff3 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index a94b6977b1..d63aee55e7 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -6,7 +6,6 @@ // Should give a very noticable speed boost to paired single heavy code. #include "Common.h" -#include "Thunk.h" #include "../PowerPC.h" #include "../../Core.h" @@ -120,11 +119,8 @@ void Jit64::lXXx(UGeckoInstruction inst) // do our job at first s32 offset = (s32)(s16)inst.SIMM_16; - gpr.Lock(d); - SafeLoadToEAX(gpr.R(a), accessSize, offset, RegistersInUse(), signExtend); - gpr.KillImmediate(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - gpr.UnlockAll(); + gpr.BindToRegister(d, false, true); + SafeLoadToReg(gpr.RX(d), gpr.R(a), accessSize, offset, RegistersInUse(), signExtend); gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL); @@ -174,18 +170,32 @@ void Jit64::lXXx(UGeckoInstruction inst) { if ((inst.OPCD != 31) && gpr.R(a).IsImm()) { - opAddress = Imm32((u32)gpr.R(a).offset + (s32)inst.SIMM_16); + u32 val = (u32)gpr.R(a).offset + (s32)inst.SIMM_16; + opAddress = Imm32(val); + if (update) + gpr.SetImmediate32(a, val); } else if ((inst.OPCD == 31) && gpr.R(a).IsImm() && gpr.R(b).IsImm()) { - opAddress = Imm32((u32)gpr.R(a).offset + (u32)gpr.R(b).offset); + u32 val = (u32)gpr.R(a).offset + (u32)gpr.R(b).offset; + opAddress = Imm32(val); + if (update) + gpr.SetImmediate32(a, val); } else { - gpr.FlushLockX(ABI_PARAM1); - opAddress = R(ABI_PARAM1); - MOV(32, opAddress, gpr.R(a)); - + if (update || (inst.OPCD != 31 && inst.SIMM_16 == 0)) + { + gpr.BindToRegister(a, true, update); + opAddress = gpr.R(a); + } + else + { + gpr.FlushLockX(ABI_PARAM1); + opAddress = R(ABI_PARAM1); + MOV(32, opAddress, gpr.R(a)); + } + if (inst.OPCD == 31) ADD(32, opAddress, gpr.R(b)); else @@ -193,29 +203,9 @@ void Jit64::lXXx(UGeckoInstruction inst) } } - SafeLoadToEAX(opAddress, accessSize, 0, RegistersInUse(), signExtend); - - // We must flush immediate values from the following registers because - // they may change at runtime if no MMU exception has been raised - gpr.KillImmediate(d, true, true); - if (update) - { - gpr.Lock(a); - gpr.BindToRegister(a, true, true); - } - - MEMCHECK_START - - if (update) - { - if (inst.OPCD == 31) - ADD(32, gpr.R(a), gpr.R(b)); - else - ADD(32, gpr.R(a), Imm32((u32)(s32)inst.SIMM_16)); - } - MOV(32, gpr.R(d), R(EAX)); - - MEMCHECK_END + gpr.Lock(a, b, d); + gpr.BindToRegister(d, false, true); + SafeLoadToReg(gpr.RX(d), opAddress, accessSize, 0, RegistersInUse(), signExtend); gpr.UnlockAll(); gpr.UnlockAllX(); @@ -318,12 +308,15 @@ void Jit64::stX(UGeckoInstruction inst) else { MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write + u32 registersInUse = RegistersInUse(); + ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 32: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), gpr.R(s), addr); break; - case 16: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), gpr.R(s), addr); break; - case 8: ABI_CallFunctionAC(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), gpr.R(s), addr); break; + case 32: ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), gpr.R(s), addr); break; + case 16: ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), gpr.R(s), addr); break; + case 8: ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr); break; } + ABI_PopRegistersAndAdjustStack(registersInUse, false); if (update) gpr.SetImmediate32(a, addr); return; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index e8d4465ebb..9d84e6222d 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -50,7 +50,7 @@ void Jit64::lfs(UGeckoInstruction inst) } s32 offset = (s32)(s16)inst.SIMM_16; - SafeLoadToEAX(gpr.R(a), 32, offset, RegistersInUse(), false); + SafeLoadToReg(EAX, gpr.R(a), 32, offset, RegistersInUse(), false); MEMCHECK_START @@ -338,7 +338,7 @@ void Jit64::lfsx(UGeckoInstruction inst) MEMCHECK_END } else { - SafeLoadToEAX(R(EAX), 32, 0, RegistersInUse(), false); + SafeLoadToReg(EAX, R(EAX), 32, 0, RegistersInUse(), false); MEMCHECK_START diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 00066f4746..876268b90a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -7,7 +7,6 @@ #include "Common.h" -#include "Thunk.h" #include "../PowerPC.h" #include "../../Core.h" #include "../../HW/GPFifo.h" @@ -102,20 +101,19 @@ void Jit64::psq_st(UGeckoInstruction inst) #else int addr_scale = SCALE_8; #endif + u32 registersInUse = RegistersInUse(); + ABI_PushRegistersAndAdjustStack(registersInUse, false); if (inst.W) { // One value XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. CVTSD2SS(XMM0, fpr.R(s)); - ABI_AlignStack(0); CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.singleStoreQuantized)); - ABI_RestoreStack(0); } else { // Pair of values CVTPD2PS(XMM0, fpr.R(s)); - ABI_AlignStack(0); CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.pairedStoreQuantized)); - ABI_RestoreStack(0); } + ABI_PopRegistersAndAdjustStack(registersInUse, false); gpr.UnlockAll(); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp index e316ae212c..92804a304c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -11,7 +11,6 @@ #include "../PPCTables.h" #include "x64Emitter.h" #include "x64ABI.h" -#include "Thunk.h" #include "Jit.h" #include "JitRegCache.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp index 82beace052..6ffdbad7ad 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp @@ -123,7 +123,6 @@ Fix profiled loads/stores to work safely. On 32-bit, one solution is to #include "IR.h" #include "../PPCTables.h" #include "../../CoreTiming.h" -#include "Thunk.h" #include "../../HW/Memmap.h" #include "JitILAsm.h" #include "JitIL.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp index a9051996d9..6eff9056eb 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp @@ -27,7 +27,6 @@ The register allocation is linear scan allocation. #include "IR.h" #include "../PPCTables.h" #include "../../CoreTiming.h" -#include "Thunk.h" #include "../../HW/Memmap.h" #include "JitILAsm.h" #include "JitIL.h" @@ -39,8 +38,6 @@ The register allocation is linear scan allocation. #include "../../Core.h" #include "HW/ProcessorInterface.h" -static ThunkManager thunks; - using namespace IREmitter; using namespace Gen; @@ -435,13 +432,14 @@ static void regMarkMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum } // in 64-bit build, this returns a completely bizarre address sometimes! -static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, - unsigned OpNum, unsigned Size, X64Reg* dest) { +static std::pair regBuildMemAddress(RegInfo& RI, InstLoc I, + InstLoc AI, unsigned OpNum, unsigned Size, X64Reg* dest) { if (isImm(*AI)) { - unsigned addr = RI.Build->GetImmValue(AI); + unsigned addr = RI.Build->GetImmValue(AI); if (Memory::IsRAMAddress(addr)) { if (dest) *dest = regFindFreeReg(RI); + return std::make_pair(Imm32(addr), 0); } } unsigned offset; @@ -473,38 +471,15 @@ static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, } else { baseReg = regEnsureInReg(RI, AddrBase); } - return MDisp(baseReg, offset); + + return std::make_pair(R(baseReg), offset); } static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) { X64Reg reg; - OpArg addr = regBuildMemAddress(RI, I, getOp1(I), 1, Size, ®); + auto info = regBuildMemAddress(RI, I, getOp1(I), 1, Size, ®); - RI.Jit->TEST(32, R(ECX), Imm32(0x0C000000 | mem_mask)); - FixupBranch argh = RI.Jit->J_CC(CC_Z); - - // Slow safe read using Memory::Read_Ux routines -#ifdef _M_IX86 // we don't allocate EAX on x64 so no reason to save it. - if (reg != EAX) { - RI.Jit->PUSH(32, R(EAX)); - } -#endif - switch (Size) - { - case 32: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), ECX); break; - case 16: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), ECX); break; - case 8: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), ECX); break; - } - if (reg != EAX) { - RI.Jit->MOV(32, R(reg), R(EAX)); -#ifdef _M_IX86 - RI.Jit->POP(32, R(EAX)); -#endif - } - FixupBranch arg2 = RI.Jit->J(); - RI.Jit->SetJumpTarget(argh); - RI.Jit->UnsafeLoadRegToReg(ECX, reg, Size, 0, false); - RI.Jit->SetJumpTarget(arg2); + RI.Jit->SafeLoadToReg(reg, info.first, Size, info.second, regsInUse(RI), false); if (regReadUse(RI, I)) RI.regs[reg] = I; } @@ -521,8 +496,11 @@ static OpArg regImmForConst(RegInfo& RI, InstLoc I, unsigned Size) { } static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) { - OpArg addr = regBuildMemAddress(RI, I, getOp2(I), 2, Size, 0); - RI.Jit->LEA(32, ECX, addr); + auto info = regBuildMemAddress(RI, I, getOp2(I), 2, Size, 0); + if (info.first.IsImm()) + RI.Jit->MOV(32, R(ECX), info.first); + else + RI.Jit->LEA(32, ECX, MDisp(info.first.GetSimpleReg(), info.second)); regSpill(RI, EAX); if (isImm(*getOp1(I))) { RI.Jit->MOV(Size, R(EAX), regImmForConst(RI, getOp1(I), Size)); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp index 4b04415add..acd8baa002 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp @@ -7,7 +7,6 @@ #include "Common.h" #include "x64Emitter.h" #include "x64ABI.h" -#include "Thunk.h" #include "../../HLE/HLE.h" #include "../../Core.h" #include "../../PatchEngine.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp index 8e8cbe6bd7..57c5212b33 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp @@ -13,7 +13,6 @@ #include "CPUDetect.h" #include "x64ABI.h" -#include "Thunk.h" #include "../../HW/GPFifo.h" #include "../../Core.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp index d0ea86225d..55bed551a8 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include "Common.h" -#include "Thunk.h" #include "../../ConfigManager.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp index 98b6726227..b8f561e05f 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp @@ -6,7 +6,6 @@ // Should give a very noticable speed boost to paired single heavy code. #include "Common.h" -#include "Thunk.h" #include "../PowerPC.h" #include "../../Core.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp index 33b81623c1..e371fa6a39 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp @@ -4,7 +4,6 @@ #include "Common.h" -#include "Thunk.h" #include "../PowerPC.h" #include "../../Core.h" #include "../../HW/GPFifo.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp index cb3ab91b32..38f55db982 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp @@ -11,7 +11,6 @@ #include "../PPCTables.h" #include "x64Emitter.h" #include "x64ABI.h" -#include "Thunk.h" #include "JitIL.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp index b6bde154af..dc24366df7 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp @@ -15,7 +15,6 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp index cdf1bd89af..e27bb90e69 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp @@ -16,7 +16,6 @@ // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp index 3b57351568..0df1cd965e 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp @@ -15,7 +15,6 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp index f5296a17d3..1c5c55a369 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp @@ -16,7 +16,6 @@ // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp index c8a773e2c9..c2f2f626ac 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp @@ -16,7 +16,6 @@ // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp index ab90f41655..d694fbd79b 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 // Refer to the license.txt file included. #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp index 185af91a9d..744fbc1940 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp @@ -15,7 +15,6 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp index 3dcf5fe099..13f90fb684 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp @@ -15,7 +15,6 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp index da210e2606..8d1f6fa06b 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include "x64ABI.h" -#include "Thunk.h" #include "CPUDetect.h" #include "x64Emitter.h" @@ -167,7 +166,7 @@ void CommonAsmRoutines::GenQuantizedStores() { MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX)); FixupBranch skip_complex = J(); SetJumpTarget(too_complex); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX, /* noProlog = */ true); + ABI_CallFunctionRR((void *)&WriteDual32, RAX, RCX, /* noProlog = */ true); SetJumpTarget(skip_complex); RET(); #else @@ -184,10 +183,10 @@ void CommonAsmRoutines::GenQuantizedStores() { FixupBranch arg2 = J(); SetJumpTarget(argh); MOV(32, R(EAX), M(((char*)&psTemp))); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX, /* noProlog = */ true); + ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, ECX, /* noProlog = */ true); MOV(32, R(EAX), M(((char*)&psTemp)+4)); ADD(32, R(ECX), Imm32(4)); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX, /* noProlog = */ true); + ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, ECX, /* noProlog = */ true); SetJumpTarget(arg2); RET(); #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h index 58d7c5fe6e..67398f6f6b 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h @@ -6,7 +6,6 @@ #define _JITASMCOMMON_H #include "../JitCommon/Jit_Util.h" -#include "Thunk.h" class CommonAsmRoutinesBase { public: @@ -65,9 +64,6 @@ public: void GenFifoXmm64Write(); void GenFifoFloatWrite(); -private: - ThunkManager thunks; - }; #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp index 4e9bb512de..a325ca1884 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp @@ -13,7 +13,6 @@ #include "x64Emitter.h" #include "x64ABI.h" -#include "Thunk.h" #include "x64Analyzer.h" #include "StringUtil.h" @@ -76,7 +75,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re if (info.displacement) { ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); } - PushRegistersAndAlignStack(registersInUse); + ABI_PushRegistersAndAdjustStack(registersInUse, true); switch (info.operandSize) { case 4: @@ -96,7 +95,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re MOV(32, R(dataReg), R(EAX)); } - PopRegistersAndAlignStack(registersInUse); + ABI_PopRegistersAndAdjustStack(registersInUse, true); RET(); #endif return trampoline; @@ -137,7 +136,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); } - PushRegistersAndAlignStack(registersInUse); + ABI_PushRegistersAndAdjustStack(registersInUse, true); switch (info.operandSize) { case 8: @@ -154,7 +153,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r break; } - PopRegistersAndAlignStack(registersInUse); + ABI_PopRegistersAndAdjustStack(registersInUse, true); RET(); #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h index 59310b6ecf..3dc48dd05d 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h @@ -8,7 +8,6 @@ #include "Common.h" #include "x64Emitter.h" #include "x64Analyzer.h" -#include "Thunk.h" // meh. #if defined(_WIN32) @@ -234,8 +233,6 @@ public: const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse); const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse); -private: - ThunkManager thunks; }; #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp index 79833e883c..7fbc565c23 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include "Common.h" -#include "Thunk.h" #include "CPUDetect.h" #include "../PowerPC.h" @@ -58,34 +57,46 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i #endif } -u8 *EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend) +u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend) { u8 *result; #ifdef _M_X64 if (opAddress.IsSimpleReg()) { + // Deal with potential wraparound. (This is just a heuristic, and it would + // be more correct to actually mirror the first page at the end, but the + // only case where it probably actually matters is JitIL turning adds into + // offsets with the wrong sign, so whatever. Since the original code + // *could* try to wrap an address around, however, this is the correct + // place to address the issue.) + if ((u32) offset >= 0x1000) { + LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset)); + opAddress = R(reg_value); + offset = 0; + } + result = GetWritableCodePtr(); - MOVZX(32, accessSize, EAX, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset)); + MOVZX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset)); } else { - MOV(32, R(EAX), opAddress); + MOV(32, R(reg_value), opAddress); result = GetWritableCodePtr(); - MOVZX(32, accessSize, EAX, MComplex(RBX, EAX, SCALE_1, offset)); + MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset)); } #else if (opAddress.IsImm()) { result = GetWritableCodePtr(); - MOVZX(32, accessSize, EAX, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK))); + MOVZX(32, accessSize, reg_value, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK))); } else { - if (!opAddress.IsSimpleReg(EAX)) - MOV(32, R(EAX), opAddress); - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + if (!opAddress.IsSimpleReg(reg_value)) + MOV(32, R(reg_value), opAddress); + AND(32, R(reg_value), Imm32(Memory::MEMVIEW32_MASK)); result = GetWritableCodePtr(); - MOVZX(32, accessSize, EAX, MDisp(EAX, (u32)Memory::base + offset)); + MOVZX(32, accessSize, reg_value, MDisp(reg_value, (u32)Memory::base + offset)); } #endif @@ -95,26 +106,27 @@ u8 *EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, if (accessSize == 32) { - BSWAP(32, EAX); + BSWAP(32, reg_value); } else if (accessSize == 16) { - BSWAP(32, EAX); + BSWAP(32, reg_value); if (signExtend) - SAR(32, R(EAX), Imm8(16)); + SAR(32, R(reg_value), Imm8(16)); else - SHR(32, R(EAX), Imm8(16)); + SHR(32, R(reg_value), Imm8(16)); } else if (signExtend) { // TODO: bake 8-bit into the original load. - MOVSX(32, accessSize, EAX, R(EAX)); + MOVSX(32, accessSize, reg_value, R(reg_value)); } return result; } -void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend) +void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend) { + registersInUse &= ~(1 << RAX | 1 << reg_value); #if defined(_M_X64) #ifdef ENABLE_MEM_CHECK if (!Core::g_CoreStartupParameter.bMMU && !Core::g_CoreStartupParameter.bEnableDebugging && Core::g_CoreStartupParameter.bFastmem) @@ -122,10 +134,8 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem) #endif { - u8 *mov = UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + u8 *mov = UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend); - // XXX: are these dead anyway? - registersInUse &= ~((1 << ABI_PARAM1) | (1 << ABI_PARAM2) | (1 << RAX)); registersInUseAtLoc[mov] = registersInUse; } else @@ -149,20 +159,26 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s u32 address = (u32)opAddress.offset + offset; if ((address & mem_mask) == 0) { - UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend); } else { + ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 32: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), address); break; - case 16: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), address); break; - case 8: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), address); break; + case 32: ABI_CallFunctionC((void *)&Memory::Read_U32, address); break; + case 16: ABI_CallFunctionC((void *)&Memory::Read_U16_ZX, address); break; + case 8: ABI_CallFunctionC((void *)&Memory::Read_U8_ZX, address); break; } + ABI_PopRegistersAndAdjustStack(registersInUse, false); if (signExtend && accessSize < 32) { // Need to sign extend values coming from the Read_U* functions. - MOVSX(32, accessSize, EAX, R(EAX)); + MOVSX(32, accessSize, reg_value, R(EAX)); + } + else if (reg_value != EAX) + { + MOVZX(32, accessSize, reg_value, R(EAX)); } } } @@ -173,45 +189,57 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s MOV(32, R(EAX), opAddress); ADD(32, R(EAX), Imm32(offset)); TEST(32, R(EAX), Imm32(mem_mask)); - FixupBranch fast = J_CC(CC_Z); + FixupBranch fast = J_CC(CC_Z, true); + ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), EAX); break; - case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), EAX); break; - case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), EAX); break; + case 32: ABI_CallFunctionR((void *)&Memory::Read_U32, EAX); break; + case 16: ABI_CallFunctionR((void *)&Memory::Read_U16_ZX, EAX); break; + case 8: ABI_CallFunctionR((void *)&Memory::Read_U8_ZX, EAX); break; } + ABI_PopRegistersAndAdjustStack(registersInUse, false); if (signExtend && accessSize < 32) { // Need to sign extend values coming from the Read_U* functions. - MOVSX(32, accessSize, EAX, R(EAX)); + MOVSX(32, accessSize, reg_value, R(EAX)); + } + else if (reg_value != EAX) + { + MOVZX(32, accessSize, reg_value, R(EAX)); } FixupBranch exit = J(); SetJumpTarget(fast); - UnsafeLoadToEAX(R(EAX), accessSize, 0, signExtend); + UnsafeLoadToReg(reg_value, R(EAX), accessSize, 0, signExtend); SetJumpTarget(exit); } else { TEST(32, opAddress, Imm32(mem_mask)); - FixupBranch fast = J_CC(CC_Z); + FixupBranch fast = J_CC(CC_Z, true); + ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 32: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), opAddress); break; - case 16: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), opAddress); break; - case 8: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), opAddress); break; + case 32: ABI_CallFunctionA((void *)&Memory::Read_U32, opAddress); break; + case 16: ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, opAddress); break; + case 8: ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, opAddress); break; } + ABI_PopRegistersAndAdjustStack(registersInUse, false); if (signExtend && accessSize < 32) { // Need to sign extend values coming from the Read_U* functions. - MOVSX(32, accessSize, EAX, R(EAX)); + MOVSX(32, accessSize, reg_value, R(EAX)); + } + else if (reg_value != EAX) + { + MOVZX(32, accessSize, reg_value, R(EAX)); } FixupBranch exit = J(); SetJumpTarget(fast); - UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend); SetJumpTarget(exit); } } @@ -239,6 +267,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc // Destroys both arg registers void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags) { + registersInUse &= ~(1 << RAX); #if defined(_M_X64) if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem && @@ -255,8 +284,6 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce NOP(1); } - // XXX: are these dead anyway? - registersInUse &= ~((1 << ABI_PARAM1) | (1 << ABI_PARAM2) | (1 << RAX)); registersInUseAtLoc[mov] = registersInUse; return; } @@ -280,16 +307,18 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce #endif TEST(32, R(reg_addr), Imm32(mem_mask)); - FixupBranch fast = J_CC(CC_Z); + FixupBranch fast = J_CC(CC_Z, true); MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write bool noProlog = flags & SAFE_WRITE_NO_PROLOG; bool swap = !(flags & SAFE_WRITE_NO_SWAP); + ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr, noProlog); break; - case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr, noProlog); break; - case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr, noProlog); break; + case 32: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, noProlog); break; + case 16: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, noProlog); break; + case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, noProlog); break; } + ABI_PopRegistersAndAdjustStack(registersInUse, false); FixupBranch exit = J(); SetJumpTarget(fast); UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); @@ -315,7 +344,9 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 re MOV(32, R(EAX), M(&float_buffer)); BSWAP(32, EAX); MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write - ABI_CallFunctionRR(thunks.ProtectFunction(((void *)&Memory::Write_U32), 2), EAX, reg_addr); + ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, reg_addr); + ABI_PopRegistersAndAdjustStack(registersInUse, false); FixupBranch arg2 = J(); SetJumpTarget(argh); PSHUFB(xmm_value, M((void *)pbswapShuffle1x4)); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h index 0fd5db0380..63a0fed0a5 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h @@ -6,7 +6,6 @@ #define _JITUTIL_H #include "x64Emitter.h" -#include "Thunk.h" #include // Like XCodeBlock but has some utilities for memory access. @@ -16,8 +15,8 @@ public: void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset); // these return the address of the MOV, for backpatching u8 *UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true); - u8 *UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); - void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend); + u8 *UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend); + void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend); enum SafeWriteFlags { SAFE_WRITE_NO_SWAP = 1, @@ -38,7 +37,6 @@ public: void ForceSinglePrecisionS(Gen::X64Reg xmm); void ForceSinglePrecisionP(Gen::X64Reg xmm); protected: - ThunkManager thunks; std::unordered_map registersInUseAtLoc; }; From a91469ffa5e9cf1e3a9b966a4c3b8ddee96966a5 Mon Sep 17 00:00:00 2001 From: comex Date: Thu, 3 Oct 2013 18:17:58 -0400 Subject: [PATCH 4/6] Fix stfd, which was broken in the fastmem writes commit. --- Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 9d84e6222d..ab8e417792 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -209,6 +209,7 @@ void Jit64::stfd(UGeckoInstruction inst) MOVD_xmm(R(EAX), XMM0); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0))); + MOVAPD(XMM0, fpr.R(s)); MOVD_xmm(R(EAX), XMM0); LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset)); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse()); From 3679f9ba60b6d833b710faf41bc9feef607429d2 Mon Sep 17 00:00:00 2001 From: comex Date: Fri, 4 Oct 2013 15:26:20 -0400 Subject: [PATCH 5/6] Don't push registers before pairedStoreQuantized, that's dumb. And fix some stuff up. It would probably be good to unify the stack handling some more rather than having ABI_PushRegistersAndAdjustStack do part of it and ABI_AlignStack the rest, causing unnecessary subtract instructions on Linux x86 (only). --- Source/Core/Common/Src/x64ABI.h | 13 ++++- .../Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 3 - .../Src/PowerPC/JitCommon/JitAsmCommon.cpp | 57 +++++++++---------- .../Core/Src/PowerPC/JitCommon/Jit_Util.cpp | 10 ++-- 4 files changed, 45 insertions(+), 38 deletions(-) diff --git a/Source/Core/Common/Src/x64ABI.h b/Source/Core/Common/Src/x64ABI.h index 837e4ec3d8..4b10d11e54 100644 --- a/Source/Core/Common/Src/x64ABI.h +++ b/Source/Core/Common/Src/x64ABI.h @@ -43,6 +43,8 @@ // 32-bit bog standard cdecl, shared between linux and windows // MacOSX 32-bit is same as System V with a few exceptions that we probably don't care much about. +#define ALL_CALLEE_SAVED ((1 << EAX) | (1 << ECX) | (1 << EDX)) + #else // 64 bit calling convention #ifdef _WIN32 // 64-bit Windows - the really exotic calling convention @@ -52,7 +54,12 @@ #define ABI_PARAM3 R8 #define ABI_PARAM4 R9 -#else //64-bit Unix (hopefully MacOSX too) +#define ABI_ALL_CALLEE_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << R8) | \ + (1 << R9) | (1 << R10) | (1 << R11) | \ + (1 << XMM0) | (1 << XMM1) | (1 << XMM2) | (1 << XMM3) | \ + (1 << XMM4) | (1 << XMM5)) + +#else //64-bit Unix / OS X #define ABI_PARAM1 RDI #define ABI_PARAM2 RSI @@ -61,6 +68,10 @@ #define ABI_PARAM5 R8 #define ABI_PARAM6 R9 +#define ABI_ALL_CALLEE_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << RDI) | \ + (1 << RSI) | (1 << R8) | (1 << R9) | (1 << R10) | (1 << R11) | \ + 0xffff0000 /* xmm0..15 */) + #endif // WIN32 #endif // X86 diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 876268b90a..4548890e2c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -101,8 +101,6 @@ void Jit64::psq_st(UGeckoInstruction inst) #else int addr_scale = SCALE_8; #endif - u32 registersInUse = RegistersInUse(); - ABI_PushRegistersAndAdjustStack(registersInUse, false); if (inst.W) { // One value XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. @@ -113,7 +111,6 @@ void Jit64::psq_st(UGeckoInstruction inst) CVTPD2PS(XMM0, fpr.R(s)); CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.pairedStoreQuantized)); } - ABI_PopRegistersAndAdjustStack(registersInUse, false); gpr.UnlockAll(); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp index 8d1f6fa06b..cb763db179 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp @@ -20,6 +20,9 @@ #include "JitAsmCommon.h" #include "JitBase.h" +#define QUANTIZED_REGS_TO_SAVE (ABI_ALL_CALLEE_SAVED & ~((1 << RAX) | (1 << RCX) | (1 << RDX) | \ + (1 << XMM0) | (1 << XMM1))) + using namespace Gen; static int temp32; @@ -141,14 +144,10 @@ static const float GC_ALIGNED16(m_one[]) = {1.0f, 0.0f, 0.0f, 0.0f}; // I don't know whether the overflow actually happens in any games // but it potentially can cause problems, so we need some clamping -#ifdef _M_X64 -// TODO(ector): Improve 64-bit version -static void WriteDual32(u64 value, u32 address) +static void WriteDual32(u32 address) { - Memory::Write_U32((u32)(value >> 32), address); - Memory::Write_U32((u32)value, address + 4); + Memory::Write_U64(*(u64 *) psTemp, address); } -#endif // See comment in header for in/outs. void CommonAsmRoutines::GenQuantizedStores() { @@ -161,18 +160,20 @@ void CommonAsmRoutines::GenQuantizedStores() { MOVQ_xmm(M(&psTemp[0]), XMM0); MOV(64, R(RAX), M(&psTemp[0])); TEST(32, R(ECX), Imm32(0x0C000000)); - FixupBranch too_complex = J_CC(CC_NZ); + FixupBranch too_complex = J_CC(CC_NZ, true); BSWAP(64, RAX); MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX)); - FixupBranch skip_complex = J(); + FixupBranch skip_complex = J(true); SetJumpTarget(too_complex); - ABI_CallFunctionRR((void *)&WriteDual32, RAX, RCX, /* noProlog = */ true); + ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); + ABI_CallFunctionR((void *)&WriteDual32, RCX); + ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); SetJumpTarget(skip_complex); RET(); #else MOVQ_xmm(M(&psTemp[0]), XMM0); TEST(32, R(ECX), Imm32(0x0C000000)); - FixupBranch argh = J_CC(CC_NZ); + FixupBranch argh = J_CC(CC_NZ, true); MOV(32, R(EAX), M(&psTemp)); BSWAP(32, EAX); AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); @@ -180,13 +181,11 @@ void CommonAsmRoutines::GenQuantizedStores() { MOV(32, R(EAX), M(((char*)&psTemp) + 4)); BSWAP(32, EAX); MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX)); - FixupBranch arg2 = J(); + FixupBranch arg2 = J(true); SetJumpTarget(argh); - MOV(32, R(EAX), M(((char*)&psTemp))); - ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, ECX, /* noProlog = */ true); - MOV(32, R(EAX), M(((char*)&psTemp)+4)); - ADD(32, R(ECX), Imm32(4)); - ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, ECX, /* noProlog = */ true); + ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); + ABI_CallFunctionR((void *)&WriteDual32, ECX); + ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); SetJumpTarget(arg2); RET(); #endif @@ -205,8 +204,8 @@ void CommonAsmRoutines::GenQuantizedStores() { PACKSSDW(XMM0, R(XMM0)); PACKUSWB(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); - + SafeWriteRegToReg(AX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + RET(); const u8* storePairedS8 = AlignCode4(); @@ -224,8 +223,8 @@ void CommonAsmRoutines::GenQuantizedStores() { PACKSSWB(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); - + SafeWriteRegToReg(AX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + RET(); const u8* storePairedU16 = AlignCode4(); @@ -250,8 +249,8 @@ void CommonAsmRoutines::GenQuantizedStores() { MOV(16, R(AX), M((char*)psTemp + 4)); BSWAP(32, EAX); - SafeWriteRegToReg(EAX, ECX, 32, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); - + SafeWriteRegToReg(EAX, ECX, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + RET(); const u8* storePairedS16 = AlignCode4(); @@ -270,8 +269,8 @@ void CommonAsmRoutines::GenQuantizedStores() { MOVD_xmm(R(EAX), XMM0); BSWAP(32, EAX); ROL(32, R(EAX), Imm8(16)); - SafeWriteRegToReg(EAX, ECX, 32, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); - + SafeWriteRegToReg(EAX, ECX, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + RET(); pairedStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); @@ -294,7 +293,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { // Easy! const u8* storeSingleFloat = AlignCode4(); - SafeWriteFloatToReg(XMM0, ECX, 0, SAFE_WRITE_NO_FASTMEM); + SafeWriteFloatToReg(XMM0, ECX, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_FASTMEM); RET(); /* if (cpu_info.bSSSE3) { @@ -317,7 +316,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_255)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(AL, ECX, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); const u8* storeSingleS8 = AlignCode4(); @@ -327,7 +326,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, M((void *)&m_m128)); MINSS(XMM0, M((void *)&m_127)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(AL, ECX, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); const u8* storeSingleU16 = AlignCode4(); // Used by MKWii @@ -338,7 +337,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_65535)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(EAX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); const u8* storeSingleS16 = AlignCode4(); @@ -348,7 +347,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, M((void *)&m_m32768)); MINSS(XMM0, M((void *)&m_32767)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(EAX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); singleStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp index 7fbc565c23..69cfdc8bd3 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp @@ -311,14 +311,14 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write bool noProlog = flags & SAFE_WRITE_NO_PROLOG; bool swap = !(flags & SAFE_WRITE_NO_SWAP); - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); switch (accessSize) { - case 32: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, noProlog); break; - case 16: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, noProlog); break; - case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, noProlog); break; + case 32: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); break; + case 16: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); break; + case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); break; } - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, noProlog); FixupBranch exit = J(); SetJumpTarget(fast); UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); From a51eb5fd195a68df2df6fe2e19052c5c9be3b472 Mon Sep 17 00:00:00 2001 From: comex Date: Fri, 4 Oct 2013 20:33:39 -0400 Subject: [PATCH 6/6] Fix idle skipping. It incorrectly continued to test EAX after it was changed to load directly to the assigned register. Also switch from a flush to ABI_PushRegistersAndAdjustStack, to avoid needless flushing in the no-idle case. --- Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index d63aee55e7..37d274fcc8 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -122,15 +122,17 @@ void Jit64::lXXx(UGeckoInstruction inst) gpr.BindToRegister(d, false, true); SafeLoadToReg(gpr.RX(d), gpr.R(a), accessSize, offset, RegistersInUse(), signExtend); - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - // if it's still 0, we can wait until the next event - TEST(32, R(EAX), R(EAX)); + TEST(32, gpr.R(d), gpr.R(d)); FixupBranch noIdle = J_CC(CC_NZ); + u32 registersInUse = RegistersInUse(); + ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); + ABI_PopRegistersAndAdjustStack(registersInUse, false); + // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 //MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC)); WriteExceptionExit();