From 83c6df19656c56ed9ef8ffad09772f9859e5608c Mon Sep 17 00:00:00 2001 From: JosJuice Date: Mon, 16 Aug 2021 17:22:12 +0200 Subject: [PATCH 1/7] PowerPC: Set SRR1 correctly for program exceptions --- .../Core/PowerPC/Interpreter/ExceptionUtils.h | 11 ++++++++++- .../Core/PowerPC/Interpreter/Interpreter.cpp | 2 +- .../PowerPC/Interpreter/Interpreter_Branch.cpp | 2 +- .../PowerPC/Interpreter/Interpreter_Integer.cpp | 5 +++-- .../Interpreter/Interpreter_LoadStore.cpp | 8 ++++---- .../Interpreter/Interpreter_LoadStorePaired.cpp | 8 ++++---- .../Interpreter/Interpreter_SystemRegisters.cpp | 16 ++++++++-------- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 3 +++ .../JitArm64/JitArm64_SystemRegisters.cpp | 4 ++++ Source/Core/Core/PowerPC/PowerPC.cpp | 4 ++-- 10 files changed, 40 insertions(+), 23 deletions(-) diff --git a/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h b/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h index 9ad4e04cdc..123cffdcaa 100644 --- a/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h +++ b/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h @@ -7,6 +7,14 @@ #include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/PowerPC.h" +enum class ProgramExceptionCause : u32 +{ + FloatingPoint = 1 << (31 - 11), + IllegalInstruction = 1 << (31 - 12), + PrivilegedInstruction = 1 << (31 - 13), + Trap = 1 << (31 - 14), +}; + inline void GenerateAlignmentException(u32 address) { PowerPC::ppcState.Exceptions |= EXCEPTION_ALIGNMENT; @@ -19,7 +27,8 @@ inline void GenerateDSIException(u32 address) PowerPC::ppcState.spr[SPR_DAR] = address; } -inline void GenerateProgramException() +inline void GenerateProgramException(ProgramExceptionCause cause) { PowerPC::ppcState.Exceptions |= EXCEPTION_PROGRAM; + PowerPC::ppcState.spr[SPR_SRR1] = static_cast(cause); } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp index 9d110ffddd..c8944b73cf 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp @@ -180,7 +180,7 @@ int Interpreter::SingleStepInner() { if (IsInvalidPairedSingleExecution(m_prev_inst)) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::IllegalInstruction); CheckExceptions(); } else if (MSR.FP) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp index 27877673fe..54fe1ae73e 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp @@ -101,7 +101,7 @@ void Interpreter::rfi(UGeckoInstruction inst) { if (MSR.PR) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction); return; } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp index effce996f7..b10f55ab79 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp @@ -6,6 +6,7 @@ #include "Common/BitUtils.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" +#include "Core/PowerPC/Interpreter/ExceptionUtils.h" #include "Core/PowerPC/PowerPC.h" void Interpreter::Helper_UpdateCR0(u32 value) @@ -131,7 +132,7 @@ void Interpreter::twi(UGeckoInstruction inst) if ((a < b && (TO & 0x10) != 0) || (a > b && (TO & 0x08) != 0) || (a == b && (TO & 0x04) != 0) || (u32(a) < u32(b) && (TO & 0x02) != 0) || (u32(a) > u32(b) && (TO & 0x01) != 0)) { - PowerPC::ppcState.Exceptions |= EXCEPTION_PROGRAM; + GenerateProgramException(ProgramExceptionCause::Trap); PowerPC::CheckExceptions(); m_end_block = true; // Dunno about this } @@ -339,7 +340,7 @@ void Interpreter::tw(UGeckoInstruction inst) if ((a < b && (TO & 0x10) != 0) || (a > b && (TO & 0x08) != 0) || (a == b && (TO & 0x04) != 0) || ((u32(a) < u32(b)) && (TO & 0x02) != 0) || ((u32(a) > u32(b)) && (TO & 0x01) != 0)) { - PowerPC::ppcState.Exceptions |= EXCEPTION_PROGRAM; + GenerateProgramException(ProgramExceptionCause::Trap); PowerPC::CheckExceptions(); m_end_block = true; // Dunno about this } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 74b9173a31..9989024bb1 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -450,7 +450,7 @@ void Interpreter::dcbi(UGeckoInstruction inst) { if (MSR.PR) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction); return; } @@ -514,7 +514,7 @@ void Interpreter::dcbz_l(UGeckoInstruction inst) { if (!HID2.LCE) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::IllegalInstruction); return; } @@ -1041,7 +1041,7 @@ void Interpreter::tlbie(UGeckoInstruction inst) { if (MSR.PR) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction); return; } @@ -1055,7 +1055,7 @@ void Interpreter::tlbsync(UGeckoInstruction inst) { if (MSR.PR) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction); } // Ignored diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp index 9384985a30..a09bbc7770 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp @@ -311,7 +311,7 @@ void Interpreter::psq_l(UGeckoInstruction inst) { if (HID2.LSQE == 0) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::IllegalInstruction); return; } @@ -323,7 +323,7 @@ void Interpreter::psq_lu(UGeckoInstruction inst) { if (HID2.LSQE == 0) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::IllegalInstruction); return; } @@ -342,7 +342,7 @@ void Interpreter::psq_st(UGeckoInstruction inst) { if (HID2.LSQE == 0) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::IllegalInstruction); return; } @@ -354,7 +354,7 @@ void Interpreter::psq_stu(UGeckoInstruction inst) { if (HID2.LSQE == 0) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::IllegalInstruction); return; } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 81fb4e5346..7917eba188 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -141,7 +141,7 @@ void Interpreter::mfmsr(UGeckoInstruction inst) { if (MSR.PR) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction); return; } @@ -152,7 +152,7 @@ void Interpreter::mfsr(UGeckoInstruction inst) { if (MSR.PR) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction); return; } @@ -163,7 +163,7 @@ void Interpreter::mfsrin(UGeckoInstruction inst) { if (MSR.PR) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction); return; } @@ -175,7 +175,7 @@ void Interpreter::mtmsr(UGeckoInstruction inst) { if (MSR.PR) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction); return; } @@ -190,7 +190,7 @@ void Interpreter::mtsr(UGeckoInstruction inst) { if (MSR.PR) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction); return; } @@ -203,7 +203,7 @@ void Interpreter::mtsrin(UGeckoInstruction inst) { if (MSR.PR) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction); return; } @@ -227,7 +227,7 @@ void Interpreter::mfspr(UGeckoInstruction inst) if (MSR.PR && index != SPR_XER && index != SPR_LR && index != SPR_CTR && index != SPR_TL && index != SPR_TU) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction); return; } @@ -270,7 +270,7 @@ void Interpreter::mtspr(UGeckoInstruction inst) // XER, LR, and CTR are the only ones available to be written to in user mode if (MSR.PR && index != SPR_XER && index != SPR_LR && index != SPR_CTR) { - GenerateProgramException(); + GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction); return; } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index be61b048cf..7cc7723b76 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -11,7 +11,9 @@ #include "Common/CommonTypes.h" #include "Common/MathUtil.h" #include "Common/x64Emitter.h" + #include "Core/CoreTiming.h" +#include "Core/PowerPC/Interpreter/ExceptionUtils.h" #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" @@ -2562,6 +2564,7 @@ void Jit64::twX(UGeckoInstruction inst) } LOCK(); OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_PROGRAM)); + MOV(32, PPCSTATE_SRR1, Imm32(static_cast(ProgramExceptionCause::Trap))); gpr.Flush(); fpr.Flush(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 47283eb10d..d3de831872 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -7,6 +7,7 @@ #include "Core/Core.h" #include "Core/CoreTiming.h" +#include "Core/PowerPC/Interpreter/ExceptionUtils.h" #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/PPCTables.h" #include "Core/PowerPC/PowerPC.h" @@ -233,6 +234,9 @@ void JitArm64::twx(UGeckoInstruction inst) ORR(WA, WA, LogicalImm(EXCEPTION_PROGRAM, 32)); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + MOVI2R(WA, static_cast(ProgramExceptionCause::Trap)); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_SRR1)); + WriteExceptionExit(js.compilerPC, false, true); SwitchToNearCode(); diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index f838911927..0e4b641200 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -483,8 +483,8 @@ void CheckExceptions() else if (exceptions & EXCEPTION_PROGRAM) { SRR0 = PC; - // say that it's a trap exception - SRR1 = (MSR.Hex & 0x87C0FFFF) | 0x20000; + // SRR1 was partially set by GenerateProgramException, so bitwise or is used here + SRR1 |= MSR.Hex & 0x87C0FFFF; MSR.LE = MSR.ILE; MSR.Hex &= ~0x04EF36; PC = NPC = 0x00000700; From 89a464dafa92fa669634480f4265100612a8d197 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Tue, 17 Aug 2021 17:39:47 +0200 Subject: [PATCH 2/7] Interpreter: Optimize FEX calculation The next commit will make the interpreter run this after every float instruction, so I think a little optimization here is justified. --- Source/Core/Core/PowerPC/Gekko.h | 6 ++++++ .../PowerPC/Interpreter/Interpreter_SystemRegisters.cpp | 3 +-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/Gekko.h b/Source/Core/Core/PowerPC/Gekko.h index 9f89004552..f8dc7e87af 100644 --- a/Source/Core/Core/PowerPC/Gekko.h +++ b/Source/Core/Core/PowerPC/Gekko.h @@ -419,11 +419,17 @@ enum FPSCRExceptionFlag : u32 FPSCR_VXSQRT = 1U << (31 - 22), FPSCR_VXCVI = 1U << (31 - 23), FPSCR_VE = 1U << (31 - 24), + FPSCR_OE = 1U << (31 - 25), + FPSCR_UE = 1U << (31 - 26), + FPSCR_ZE = 1U << (31 - 27), + FPSCR_XE = 1U << (31 - 28), FPSCR_VX_ANY = FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI | FPSCR_VXZDZ | FPSCR_VXIMZ | FPSCR_VXVC | FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI, FPSCR_ANY_X = FPSCR_OX | FPSCR_UX | FPSCR_ZX | FPSCR_XX | FPSCR_VX_ANY, + + FPSCR_ANY_E = FPSCR_VE | FPSCR_OE | FPSCR_UE | FPSCR_ZE | FPSCR_XE, }; // Floating Point Status and Control Register diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 7917eba188..3f7b82717a 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -40,8 +40,7 @@ static void FPSCRUpdated(UReg_FPSCR fp) static void UpdateFPSCR(UReg_FPSCR* fpscr) { fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0; - fpscr->FEX = (fpscr->VX & fpscr->VE) | (fpscr->OX & fpscr->OE) | (fpscr->UX & fpscr->UE) | - (fpscr->ZX & fpscr->ZE) | (fpscr->XX & fpscr->XE); + fpscr->FEX = ((fpscr->Hex >> 22) & (fpscr->Hex & FPSCR_ANY_E)) != 0; } void Interpreter::mtfsb0x(UGeckoInstruction inst) From c3bcc67653513b3dae7dec4df78699202363cb57 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Tue, 17 Aug 2021 19:57:06 +0200 Subject: [PATCH 3/7] PowerPC: Update FEX on FPSCR store instead of FPSCR load This is needed not only for the next commit, but also for correctly emulating float instructions that write to CR1. --- .../PowerPC/Interpreter/Interpreter_FPUtils.h | 8 +- .../Interpreter_SystemRegisters.cpp | 30 +---- Source/Core/Core/PowerPC/Jit64/Jit.h | 3 +- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 126 ++++++++++++++---- Source/Core/Core/PowerPC/JitArm64/Jit.h | 1 + .../JitArm64/JitArm64_SystemRegisters.cpp | 108 +++++++++++---- 6 files changed, 200 insertions(+), 76 deletions(-) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h index c3e1d40d4d..b8860eabf5 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h @@ -24,6 +24,12 @@ enum class FPCC FU = 1, // ? }; +inline void UpdateFPExceptionSummary(UReg_FPSCR* fpscr) +{ + fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0; + fpscr->FEX = ((fpscr->Hex >> 22) & (fpscr->Hex & FPSCR_ANY_E)) != 0; +} + inline void SetFPException(UReg_FPSCR* fpscr, u32 mask) { if ((fpscr->Hex & mask) != mask) @@ -32,7 +38,7 @@ inline void SetFPException(UReg_FPSCR* fpscr, u32 mask) } fpscr->Hex |= mask; - fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0; + UpdateFPExceptionSummary(fpscr); } inline float ForceSingle(const UReg_FPSCR& fpscr, double value) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 3f7b82717a..50d586efa5 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -25,22 +25,10 @@ mffsx: 80036650 (huh?) */ -static void FPSCRUpdated(UReg_FPSCR fp) +static void FPSCRUpdated(UReg_FPSCR* fpscr) { + UpdateFPExceptionSummary(fpscr); PowerPC::RoundingModeUpdated(); - - if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE) - { - // PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i", - // fp.VE, fp.OE, fp.UE, fp.ZE, fp.XE); - // Pokemon Colosseum does this. Gah. - } -} - -static void UpdateFPSCR(UReg_FPSCR* fpscr) -{ - fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0; - fpscr->FEX = ((fpscr->Hex >> 22) & (fpscr->Hex & FPSCR_ANY_E)) != 0; } void Interpreter::mtfsb0x(UGeckoInstruction inst) @@ -48,7 +36,7 @@ void Interpreter::mtfsb0x(UGeckoInstruction inst) u32 b = 0x80000000 >> inst.CRBD; FPSCR.Hex &= ~b; - FPSCRUpdated(FPSCR); + FPSCRUpdated(&FPSCR); if (inst.Rc) PowerPC::ppcState.UpdateCR1(); @@ -65,7 +53,7 @@ void Interpreter::mtfsb1x(UGeckoInstruction inst) else FPSCR |= b; - FPSCRUpdated(FPSCR); + FPSCRUpdated(&FPSCR); if (inst.Rc) PowerPC::ppcState.UpdateCR1(); @@ -80,7 +68,7 @@ void Interpreter::mtfsfix(UGeckoInstruction inst) FPSCR = (FPSCR.Hex & ~mask) | (imm >> (4 * field)); - FPSCRUpdated(FPSCR); + FPSCRUpdated(&FPSCR); if (inst.Rc) PowerPC::ppcState.UpdateCR1(); @@ -97,7 +85,7 @@ void Interpreter::mtfsfx(UGeckoInstruction inst) } FPSCR = (FPSCR.Hex & ~m) | (static_cast(rPS(inst.FB).PS0AsU64()) & m); - FPSCRUpdated(FPSCR); + FPSCRUpdated(&FPSCR); if (inst.Rc) PowerPC::ppcState.UpdateCR1(); @@ -563,22 +551,18 @@ void Interpreter::isync(UGeckoInstruction inst) void Interpreter::mcrfs(UGeckoInstruction inst) { - UpdateFPSCR(&FPSCR); const u32 shift = 4 * (7 - inst.CRFS); const u32 fpflags = (FPSCR.Hex >> shift) & 0xF; // If any exception bits were read, clear them FPSCR.Hex &= ~((0xF << shift) & (FPSCR_FX | FPSCR_ANY_X)); + FPSCRUpdated(&FPSCR); PowerPC::ppcState.cr.SetField(inst.CRFD, fpflags); } void Interpreter::mffsx(UGeckoInstruction inst) { - // load from FPSCR - // TODO(ector): grab all overflow flags etc and set them in FPSCR - - UpdateFPSCR(&FPSCR); rPS(inst.FD).SetPS0(UINT64_C(0xFFF8000000000000) | FPSCR.Hex); if (inst.Rc) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 35f198dc6b..70c53bd784 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -116,11 +116,12 @@ public: void ClearCRFieldBit(int field, int bit); void SetCRFieldBit(int field, int bit); void FixGTBeforeSettingCRFieldBit(Gen::X64Reg reg); - // Generates a branch that will check if a given bit of a CR register part // is set or not. Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true); + void UpdateFPExceptionSummary(Gen::X64Reg fpscr, Gen::X64Reg tmp1, Gen::X64Reg tmp2); + void SetFPRFIfNeeded(const Gen::OpArg& xmm, bool single); void FinalizeSingleResult(Gen::X64Reg output, const Gen::OpArg& input, bool packed = true, bool duplicate = false); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 34fb820274..3117ef563f 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -4,7 +4,9 @@ #include "Common/BitSet.h" #include "Common/CPUDetect.h" #include "Common/CommonTypes.h" +#include "Common/MathUtil.h" #include "Common/x64Emitter.h" + #include "Core/CoreTiming.h" #include "Core/HW/ProcessorInterface.h" #include "Core/PowerPC/Jit64/Jit.h" @@ -185,6 +187,33 @@ FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) return FixupBranch(); } +// Could be done with one temp register, but with two temp registers it's faster +void Jit64::UpdateFPExceptionSummary(X64Reg fpscr, X64Reg tmp1, X64Reg tmp2) +{ + // Kill dependency on tmp1 (not required for correctness, since SHL will shift out upper bytes) + XOR(32, R(tmp1), R(tmp1)); + + // fpscr.VX = (fpscr & FPSCR_VX_ANY) != 0 + TEST(32, R(fpscr), Imm32(FPSCR_VX_ANY)); + SETcc(CC_NZ, R(tmp1)); + SHL(32, R(tmp1), Imm8(IntLog2(FPSCR_VX))); + AND(32, R(fpscr), Imm32(~(FPSCR_VX | FPSCR_FEX))); + OR(32, R(fpscr), R(tmp1)); + + // fpscr.FEX = ((fpscr >> 22) & (fpscr & FPSCR_ANY_E)) != 0 + MOV(32, R(tmp1), R(fpscr)); + MOV(32, R(tmp2), R(fpscr)); + SHR(32, R(tmp1), Imm8(22)); + AND(32, R(tmp2), Imm32(FPSCR_ANY_E)); + TEST(32, R(tmp1), R(tmp2)); + // Unfortunately we eat a partial register stall below - we can't zero any of the registers before + // the TEST, and we can't use XOR right after the TEST since that would overwrite flags. However, + // there is no false dependency, since SETcc depends on TEST's flags and TEST depends on tmp1. + SETcc(CC_NZ, R(tmp1)); + SHL(32, R(tmp1), Imm8(IntLog2(FPSCR_FEX))); + OR(32, R(fpscr), R(tmp1)); +} + static void DoICacheReset() { PowerPC::ppcState.iCache.Reset(); @@ -637,6 +666,19 @@ void Jit64::mcrfs(UGeckoInstruction inst) // Only clear exception bits (but not FEX/VX). mask &= FPSCR_FX | FPSCR_ANY_X; + RCX64Reg scratch_guard; + X64Reg scratch; + if (mask != 0) + { + scratch_guard = gpr.Scratch(); + RegCache::Realize(scratch_guard); + scratch = scratch_guard; + } + else + { + scratch = RSCRATCH; + } + if (cpu_info.bBMI1) { MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); @@ -652,14 +694,17 @@ void Jit64::mcrfs(UGeckoInstruction inst) SHR(32, R(RSCRATCH2), Imm8(shift)); AND(32, R(RSCRATCH2), Imm32(0xF)); } + + LEA(64, scratch, MConst(PowerPC::ConditionRegister::s_crTable)); + MOV(64, R(scratch), MComplex(scratch, RSCRATCH2, SCALE_8, 0)); + MOV(64, CROffset(inst.CRFD), R(scratch)); + if (mask != 0) { AND(32, R(RSCRATCH), Imm32(~mask)); + UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch); MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); } - LEA(64, RSCRATCH, MConst(PowerPC::ConditionRegister::s_crTable)); - MOV(64, R(RSCRATCH), MComplex(RSCRATCH, RSCRATCH2, SCALE_8, 0)); - MOV(64, CROffset(inst.CRFD), R(RSCRATCH)); } void Jit64::mffsx(UGeckoInstruction inst) @@ -670,18 +715,6 @@ void Jit64::mffsx(UGeckoInstruction inst) MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); - // FPSCR.FEX = 0 (and VX for below) - AND(32, R(RSCRATCH), Imm32(~0x60000000)); - - // FPSCR.VX = (FPSCR.Hex & FPSCR_VX_ANY) != 0; - XOR(32, R(RSCRATCH2), R(RSCRATCH2)); - TEST(32, R(RSCRATCH), Imm32(FPSCR_VX_ANY)); - SETcc(CC_NZ, R(RSCRATCH2)); - SHL(32, R(RSCRATCH2), Imm8(31 - 2)); - OR(32, R(RSCRATCH), R(RSCRATCH2)); - - MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); - int d = inst.FD; RCX64Reg Rd = fpr.Bind(d, RCMode::Write); RegCache::Realize(Rd); @@ -710,17 +743,32 @@ void Jit64::mtfsb0x(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); FALLBACK_IF(inst.Rc); - u32 mask = ~(0x80000000 >> inst.CRBD); - if (inst.CRBD < 29) + const u32 mask = 0x80000000 >> inst.CRBD; + const u32 inverted_mask = ~mask; + + if (mask == FPSCR_FEX || mask == FPSCR_VX) + return; + + if (inst.CRBD < 29 && (mask & (FPSCR_ANY_X | FPSCR_ANY_E)) == 0) { - AND(32, PPCSTATE(fpscr), Imm32(mask)); + AND(32, PPCSTATE(fpscr), Imm32(inverted_mask)); } else { MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); - AND(32, R(RSCRATCH), Imm32(mask)); + AND(32, R(RSCRATCH), Imm32(inverted_mask)); + + if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0) + { + RCX64Reg scratch = gpr.Scratch(); + RegCache::Realize(scratch); + + UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch); + } + MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); - UpdateMXCSR(); + if (inst.CRBD >= 29) + UpdateMXCSR(); } } @@ -730,9 +778,13 @@ void Jit64::mtfsb1x(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); FALLBACK_IF(inst.Rc); - u32 mask = 0x80000000 >> inst.CRBD; + const u32 mask = 0x80000000 >> inst.CRBD; + + if (mask == FPSCR_FEX || mask == FPSCR_VX) + return; + MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); - if (mask & FPSCR_ANY_X) + if ((mask & FPSCR_ANY_X) != 0) { BTS(32, R(RSCRATCH), Imm32(31 - inst.CRBD)); FixupBranch dont_set_fx = J_CC(CC_C); @@ -743,6 +795,15 @@ void Jit64::mtfsb1x(UGeckoInstruction inst) { OR(32, R(RSCRATCH), Imm32(mask)); } + + if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0) + { + RCX64Reg scratch = gpr.Scratch(); + RegCache::Realize(scratch); + + UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch); + } + MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); if (inst.CRBD >= 29) UpdateMXCSR(); @@ -755,12 +816,22 @@ void Jit64::mtfsfix(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u8 imm = (inst.hex >> (31 - 19)) & 0xF; + u32 mask = 0xF0000000 >> (4 * inst.CRFD); u32 or_mask = imm << (28 - 4 * inst.CRFD); - u32 and_mask = ~(0xF0000000 >> (4 * inst.CRFD)); + u32 and_mask = ~mask; MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); AND(32, R(RSCRATCH), Imm32(and_mask)); OR(32, R(RSCRATCH), Imm32(or_mask)); + + if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0) + { + RCX64Reg scratch = gpr.Scratch(); + RegCache::Realize(scratch); + + UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch); + } + MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); // Field 7 contains NI and RN. @@ -798,6 +869,15 @@ void Jit64::mtfsfx(UGeckoInstruction inst) AND(32, R(RSCRATCH2), Imm32(~mask)); OR(32, R(RSCRATCH), R(RSCRATCH2)); } + + if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0) + { + RCX64Reg scratch = gpr.Scratch(); + RegCache::Realize(scratch); + + UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch); + } + MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); if (inst.FM & 1) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index b029f545cc..f19bd33d55 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -273,6 +273,7 @@ protected: Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg); + void UpdateFPExceptionSummary(Arm64Gen::ARM64Reg fpscr); void UpdateRoundingMode(); void ComputeRC0(Arm64Gen::ARM64Reg reg); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index d3de831872..568d3072f3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -4,6 +4,7 @@ #include "Common/Arm64Emitter.h" #include "Common/Assert.h" #include "Common/CommonTypes.h" +#include "Common/MathUtil.h" #include "Core/Core.h" #include "Core/CoreTiming.h" @@ -49,6 +50,25 @@ void JitArm64::FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg) gpr.Unlock(WA); } +void JitArm64::UpdateFPExceptionSummary(ARM64Reg fpscr) +{ + ARM64Reg WA = gpr.GetReg(); + + // fpscr.VX = (fpscr & FPSCR_VX_ANY) != 0 + MOVI2R(WA, FPSCR_VX_ANY); + TST(WA, fpscr); + CSET(WA, CCFlags::CC_NEQ); + BFI(fpscr, WA, IntLog2(FPSCR_VX), 1); + + // fpscr.FEX = ((fpscr >> 22) & (fpscr & FPSCR_ANY_E)) != 0 + AND(WA, fpscr, LogicalImm(FPSCR_ANY_E, 32)); + TST(WA, fpscr, ArithOption(fpscr, ShiftType::LSR, 22)); + CSET(WA, CCFlags::CC_NEQ); + BFI(fpscr, WA, IntLog2(FPSCR_FEX), 1); + + gpr.Unlock(WA); +} + void JitArm64::UpdateRoundingMode() { const BitSet32 gprs_to_save = gpr.GetCallerSavedUsed(); @@ -732,6 +752,8 @@ void JitArm64::mcrfs(UGeckoInstruction inst) { const u32 inverted_mask = ~mask; AND(WA, WA, LogicalImm(inverted_mask, 32)); + + UpdateFPExceptionSummary(WA); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); } @@ -753,24 +775,11 @@ void JitArm64::mffsx(UGeckoInstruction inst) LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); ARM64Reg VD = fpr.RW(inst.FD, RegType::LowerPair); - ARM64Reg WB = gpr.GetReg(); - // FPSCR.FEX = 0; - // FPSCR.VX = (FPSCR.Hex & FPSCR_VX_ANY) != 0; - // (FEX is right next to VX, so we can set both using one BFI instruction) - MOVI2R(WB, FPSCR_VX_ANY); - TST(WA, WB); - CSET(WB, CCFlags::CC_NEQ); - BFI(WA, WB, 31 - 2, 2); - - STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - - // Vd = FPSCR.Hex | 0xFFF8'0000'0000'0000; ORR(XA, XA, LogicalImm(0xFFF8'0000'0000'0000, 64)); m_float_emit.FMOV(EncodeRegToDouble(VD), XA); gpr.Unlock(WA); - gpr.Unlock(WB); } void JitArm64::mtfsb0x(UGeckoInstruction inst) @@ -779,12 +788,20 @@ void JitArm64::mtfsb0x(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); FALLBACK_IF(inst.Rc); - u32 mask = ~(0x80000000 >> inst.CRBD); + const u32 mask = 0x80000000 >> inst.CRBD; + const u32 inverted_mask = ~mask; + + if (mask == FPSCR_FEX || mask == FPSCR_VX) + return; ARM64Reg WA = gpr.GetReg(); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - AND(WA, WA, LogicalImm(mask, 32)); + + AND(WA, WA, LogicalImm(inverted_mask, 32)); + + if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0) + UpdateFPExceptionSummary(WA); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); gpr.Unlock(WA); @@ -799,12 +816,16 @@ void JitArm64::mtfsb1x(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); FALLBACK_IF(inst.Rc); - u32 mask = 0x80000000 >> inst.CRBD; + const u32 mask = 0x80000000 >> inst.CRBD; + + if (mask == FPSCR_FEX || mask == FPSCR_VX) + return; ARM64Reg WA = gpr.GetReg(); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - if (mask & FPSCR_ANY_X) + + if ((mask & FPSCR_ANY_X) != 0) { ARM64Reg WB = gpr.GetReg(); TST(WA, LogicalImm(mask, 32)); @@ -813,6 +834,9 @@ void JitArm64::mtfsb1x(UGeckoInstruction inst) gpr.Unlock(WB); } ORR(WA, WA, LogicalImm(mask, 32)); + + if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0) + UpdateFPExceptionSummary(WA); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); gpr.Unlock(WA); @@ -829,13 +853,15 @@ void JitArm64::mtfsfix(UGeckoInstruction inst) u8 imm = (inst.hex >> (31 - 19)) & 0xF; u8 shift = 28 - 4 * inst.CRFD; + u32 mask = 0xF << shift; ARM64Reg WA = gpr.GetReg(); + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); if (imm == 0xF) { - ORR(WA, WA, LogicalImm(0xF << shift, 32)); + ORR(WA, WA, LogicalImm(mask, 32)); } else if (imm == 0x0) { @@ -849,7 +875,10 @@ void JitArm64::mtfsfix(UGeckoInstruction inst) gpr.Unlock(WB); } + if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0) + UpdateFPExceptionSummary(WA); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); + gpr.Unlock(WA); // Field 7 contains NI and RN. @@ -873,24 +902,47 @@ void JitArm64::mtfsfx(UGeckoInstruction inst) if (mask == 0xFFFFFFFF) { ARM64Reg VB = fpr.R(inst.FB, RegType::LowerPair); + ARM64Reg WA = gpr.GetReg(); - m_float_emit.STR(32, IndexType::Unsigned, VB, PPC_REG, PPCSTATE_OFF(fpscr)); + m_float_emit.FMOV(WA, EncodeRegToSingle(VB)); + + UpdateFPExceptionSummary(WA); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); + + gpr.Unlock(WA); } else if (mask != 0) { ARM64Reg VB = fpr.R(inst.FB, RegType::LowerPair); - - ARM64Reg V0 = fpr.GetReg(); - ARM64Reg V1 = fpr.GetReg(); ARM64Reg WA = gpr.GetReg(); + ARM64Reg WB = gpr.GetReg(); - m_float_emit.LDR(32, IndexType::Unsigned, V0, PPC_REG, PPCSTATE_OFF(fpscr)); - MOVI2R(WA, mask); - m_float_emit.FMOV(EncodeRegToSingle(V1), WA); - m_float_emit.BIT(EncodeRegToDouble(V0), EncodeRegToDouble(VB), EncodeRegToDouble(V1)); - m_float_emit.STR(32, IndexType::Unsigned, V0, PPC_REG, PPCSTATE_OFF(fpscr)); + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); + m_float_emit.FMOV(WB, EncodeRegToSingle(VB)); + + if (LogicalImm imm = LogicalImm(mask, 32)) + { + AND(WA, WA, LogicalImm(~mask, 32)); + AND(WB, WB, imm); + } + else + { + ARM64Reg WC = gpr.GetReg(); + + MOVI2R(WC, mask); + BIC(WA, WA, WC); + AND(WB, WB, WC); + + gpr.Unlock(WC); + } + ORR(WA, WA, WB); + + gpr.Unlock(WB); + + if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0) + UpdateFPExceptionSummary(WA); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); - fpr.Unlock(V0, V1); gpr.Unlock(WA); } From 7f7748e1818b23025fbb70cb69b2db5e515f5cd8 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Wed, 18 Aug 2021 12:20:25 +0200 Subject: [PATCH 4/7] Interpreter: Raise program exception on floating point exceptions --- .../Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h | 9 +++++++++ .../PowerPC/Interpreter/Interpreter_SystemRegisters.cpp | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h index b8860eabf5..f1f8cddcd7 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h @@ -11,6 +11,7 @@ #include "Common/CommonTypes.h" #include "Common/FloatUtils.h" #include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/Interpreter/ExceptionUtils.h" #include "Core/PowerPC/PowerPC.h" constexpr double PPC_NAN = std::numeric_limits::quiet_NaN(); @@ -24,10 +25,18 @@ enum class FPCC FU = 1, // ? }; +inline void CheckFPExceptions(UReg_FPSCR fpscr) +{ + if (fpscr.FEX && (MSR.FE0 || MSR.FE1)) + GenerateProgramException(ProgramExceptionCause::FloatingPoint); +} + inline void UpdateFPExceptionSummary(UReg_FPSCR* fpscr) { fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0; fpscr->FEX = ((fpscr->Hex >> 22) & (fpscr->Hex & FPSCR_ANY_E)) != 0; + + CheckFPExceptions(*fpscr); } inline void SetFPException(UReg_FPSCR* fpscr, u32 mask) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 50d586efa5..890fea99e6 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -167,6 +167,10 @@ void Interpreter::mtmsr(UGeckoInstruction inst) } MSR.Hex = rGPR[inst.RS]; + + // FE0/FE1 may have been set + CheckFPExceptions(FPSCR); + PowerPC::CheckExceptions(); m_end_block = true; } From 9f525d69c8631a36092a11016a2eef7446158c37 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Wed, 18 Aug 2021 12:39:02 +0200 Subject: [PATCH 5/7] Jit: Raise program exception on floating point exceptions This is done entirely through interpreter fallbacks. It would probably be possible to implement this using host exception handlers instead, but I think it would be a lot of complexity for a rarely used feature, so let's not do it for now. For performance reasons, there are two settings for this feature: One setting which does enables just what True Crime: New York City needs and one setting which enables it all. The latter makes almost all float instructions fall back to the interpreter. --- Source/Core/Core/BootManager.cpp | 11 +++ Source/Core/Core/Config/MainSettings.cpp | 3 + Source/Core/Core/Config/MainSettings.h | 2 + .../ConfigLoaders/NetPlayConfigLoader.cpp | 2 + Source/Core/Core/ConfigManager.cpp | 6 ++ Source/Core/Core/ConfigManager.h | 2 + Source/Core/Core/NetPlayClient.cpp | 2 + Source/Core/Core/NetPlayProto.h | 2 + Source/Core/Core/NetPlayServer.cpp | 4 + .../CachedInterpreter/CachedInterpreter.cpp | 27 ++++-- .../Interpreter/Interpreter_Tables.cpp | 94 +++++++++---------- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 22 ++++- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 7 ++ Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp | 5 + .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 4 + Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 45 +++++---- Source/Core/Core/PowerPC/JitArm64/Jit.h | 1 + .../JitArm64/JitArm64_FloatingPoint.cpp | 6 ++ .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 6 ++ .../JitArm64/JitArm64_SystemRegisters.cpp | 4 + .../Core/Core/PowerPC/JitCommon/JitBase.cpp | 14 ++- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 6 +- Source/Core/Core/PowerPC/PPCAnalyst.cpp | 25 +++-- Source/Core/Core/PowerPC/PPCTables.h | 4 +- 24 files changed, 216 insertions(+), 88 deletions(-) diff --git a/Source/Core/Core/BootManager.cpp b/Source/Core/Core/BootManager.cpp index 585e5d62e9..cac7f1574d 100644 --- a/Source/Core/Core/BootManager.cpp +++ b/Source/Core/Core/BootManager.cpp @@ -75,6 +75,8 @@ private: bool bCPUThread; bool bJITFollowBranch; bool bSyncGPUOnSkipIdleHack; + bool bFloatExceptions; + bool bDivideByZeroExceptions; bool bFPRF; bool bAccurateNaNs; bool bMMU; @@ -109,6 +111,8 @@ void ConfigCache::SaveConfig(const SConfig& config) bCPUThread = config.bCPUThread; bJITFollowBranch = config.bJITFollowBranch; bSyncGPUOnSkipIdleHack = config.bSyncGPUOnSkipIdleHack; + bFloatExceptions = config.bFloatExceptions; + bDivideByZeroExceptions = config.bDivideByZeroExceptions; bFPRF = config.bFPRF; bAccurateNaNs = config.bAccurateNaNs; bDisableICache = config.bDisableICache; @@ -154,6 +158,8 @@ void ConfigCache::RestoreConfig(SConfig* config) config->bCPUThread = bCPUThread; config->bJITFollowBranch = bJITFollowBranch; config->bSyncGPUOnSkipIdleHack = bSyncGPUOnSkipIdleHack; + config->bFloatExceptions = bFloatExceptions; + config->bDivideByZeroExceptions = bDivideByZeroExceptions; config->bFPRF = bFPRF; config->bAccurateNaNs = bAccurateNaNs; config->bDisableICache = bDisableICache; @@ -256,6 +262,9 @@ bool BootCore(std::unique_ptr boot, const WindowSystemInfo& wsi) core_section->Get("JITFollowBranch", &StartUp.bJITFollowBranch, StartUp.bJITFollowBranch); core_section->Get("SyncOnSkipIdle", &StartUp.bSyncGPUOnSkipIdleHack, StartUp.bSyncGPUOnSkipIdleHack); + core_section->Get("FloatExceptions", &StartUp.bFloatExceptions, StartUp.bFloatExceptions); + core_section->Get("DivByZeroExceptions", &StartUp.bDivideByZeroExceptions, + StartUp.bDivideByZeroExceptions); core_section->Get("FPRF", &StartUp.bFPRF, StartUp.bFPRF); core_section->Get("AccurateNaNs", &StartUp.bAccurateNaNs, StartUp.bAccurateNaNs); core_section->Get("DisableICache", &StartUp.bDisableICache, StartUp.bDisableICache); @@ -370,6 +379,8 @@ bool BootCore(std::unique_ptr boot, const WindowSystemInfo& wsi) StartUp.bAccurateNaNs = netplay_settings.m_AccurateNaNs; StartUp.bDisableICache = netplay_settings.m_DisableICache; StartUp.bSyncGPUOnSkipIdleHack = netplay_settings.m_SyncOnSkipIdle; + StartUp.bFloatExceptions = netplay_settings.m_FloatExceptions; + StartUp.bDivideByZeroExceptions = netplay_settings.m_DivideByZeroExceptions; StartUp.bSyncGPU = netplay_settings.m_SyncGPU; StartUp.iSyncGpuMaxDistance = netplay_settings.m_SyncGpuMaxDistance; StartUp.iSyncGpuMinDistance = netplay_settings.m_SyncGpuMinDistance; diff --git a/Source/Core/Core/Config/MainSettings.cpp b/Source/Core/Core/Config/MainSettings.cpp index 550e1bdd89..ca48d4c9f8 100644 --- a/Source/Core/Core/Config/MainSettings.cpp +++ b/Source/Core/Core/Config/MainSettings.cpp @@ -86,6 +86,9 @@ const Info MAIN_SYNC_GPU_MIN_DISTANCE{{System::Main, "Core", "SyncGpuMinDis const Info MAIN_SYNC_GPU_OVERCLOCK{{System::Main, "Core", "SyncGpuOverclock"}, 1.0f}; const Info MAIN_FAST_DISC_SPEED{{System::Main, "Core", "FastDiscSpeed"}, false}; const Info MAIN_LOW_DCBZ_HACK{{System::Main, "Core", "LowDCBZHack"}, false}; +const Info MAIN_FLOAT_EXCEPTIONS{{System::Main, "Core", "FloatExceptions"}, false}; +const Info MAIN_DIVIDE_BY_ZERO_EXCEPTIONS{{System::Main, "Core", "DivByZeroExceptions"}, + false}; const Info MAIN_FPRF{{System::Main, "Core", "FPRF"}, false}; const Info MAIN_ACCURATE_NANS{{System::Main, "Core", "AccurateNaNs"}, false}; const Info MAIN_DISABLE_ICACHE{{System::Main, "Core", "DisableICache"}, false}; diff --git a/Source/Core/Core/Config/MainSettings.h b/Source/Core/Core/Config/MainSettings.h index 413f2c85b9..14867978ac 100644 --- a/Source/Core/Core/Config/MainSettings.h +++ b/Source/Core/Core/Config/MainSettings.h @@ -68,6 +68,8 @@ extern const Info MAIN_SYNC_GPU_MIN_DISTANCE; extern const Info MAIN_SYNC_GPU_OVERCLOCK; extern const Info MAIN_FAST_DISC_SPEED; extern const Info MAIN_LOW_DCBZ_HACK; +extern const Info MAIN_FLOAT_EXCEPTIONS; +extern const Info MAIN_DIVIDE_BY_ZERO_EXCEPTIONS; extern const Info MAIN_FPRF; extern const Info MAIN_ACCURATE_NANS; extern const Info MAIN_DISABLE_ICACHE; diff --git a/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp b/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp index 983bf27040..c2eb753e28 100644 --- a/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp +++ b/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp @@ -69,6 +69,8 @@ public: layer->Set(Config::GFX_SAFE_TEXTURE_CACHE_COLOR_SAMPLES, m_settings.m_SafeTextureCacheColorSamples); layer->Set(Config::GFX_PERF_QUERIES_ENABLE, m_settings.m_PerfQueriesEnable); + layer->Set(Config::MAIN_FLOAT_EXCEPTIONS, m_settings.m_FloatExceptions); + layer->Set(Config::MAIN_DIVIDE_BY_ZERO_EXCEPTIONS, m_settings.m_DivideByZeroExceptions); layer->Set(Config::MAIN_FPRF, m_settings.m_FPRF); layer->Set(Config::MAIN_ACCURATE_NANS, m_settings.m_AccurateNaNs); layer->Set(Config::MAIN_DISABLE_ICACHE, m_settings.m_DisableICache); diff --git a/Source/Core/Core/ConfigManager.cpp b/Source/Core/Core/ConfigManager.cpp index 56985b1c3d..5b6bd5713e 100644 --- a/Source/Core/Core/ConfigManager.cpp +++ b/Source/Core/Core/ConfigManager.cpp @@ -213,6 +213,8 @@ void SConfig::SaveCoreSettings(IniFile& ini) core->Set("SyncGpuMaxDistance", iSyncGpuMaxDistance); core->Set("SyncGpuMinDistance", iSyncGpuMinDistance); core->Set("SyncGpuOverclock", fSyncGpuOverclock); + core->Set("FloatExceptions", bFloatExceptions); + core->Set("DivByZeroExceptions", bDivideByZeroExceptions); core->Set("FPRF", bFPRF); core->Set("AccurateNaNs", bAccurateNaNs); core->Set("SelectedLanguage", SelectedLanguage); @@ -509,6 +511,8 @@ void SConfig::LoadCoreSettings(IniFile& ini) core->Get("SyncGpuOverclock", &fSyncGpuOverclock, 1.0f); core->Get("FastDiscSpeed", &bFastDiscSpeed, false); core->Get("LowDCBZHack", &bLowDCBZHack, false); + core->Get("FloatExceptions", &bFloatExceptions, false); + core->Get("DivByZeroExceptions", &bDivideByZeroExceptions, false); core->Get("FPRF", &bFPRF, false); core->Get("AccurateNaNs", &bAccurateNaNs, false); core->Get("DisableICache", &bDisableICache, false); @@ -747,6 +751,8 @@ void SConfig::LoadDefaults() bRunCompareServer = false; bDSPHLE = true; bFastmem = true; + bFloatExceptions = false; + bDivideByZeroExceptions = false; bFPRF = false; bAccurateNaNs = false; bDisableICache = false; diff --git a/Source/Core/Core/ConfigManager.h b/Source/Core/Core/ConfigManager.h index 50d8ebd3fe..4ed0924b4f 100644 --- a/Source/Core/Core/ConfigManager.h +++ b/Source/Core/Core/ConfigManager.h @@ -108,6 +108,8 @@ struct SConfig bool bJITRegisterCacheOff = false; bool bFastmem; + bool bFloatExceptions = false; + bool bDivideByZeroExceptions = false; bool bFPRF = false; bool bAccurateNaNs = false; bool bDisableICache = false; diff --git a/Source/Core/Core/NetPlayClient.cpp b/Source/Core/Core/NetPlayClient.cpp index 8f730038ef..e4cba66429 100644 --- a/Source/Core/Core/NetPlayClient.cpp +++ b/Source/Core/Core/NetPlayClient.cpp @@ -831,6 +831,8 @@ void NetPlayClient::OnStartGame(sf::Packet& packet) packet >> m_net_settings.m_EFBEmulateFormatChanges; packet >> m_net_settings.m_SafeTextureCacheColorSamples; packet >> m_net_settings.m_PerfQueriesEnable; + packet >> m_net_settings.m_FloatExceptions; + packet >> m_net_settings.m_DivideByZeroExceptions; packet >> m_net_settings.m_FPRF; packet >> m_net_settings.m_AccurateNaNs; packet >> m_net_settings.m_DisableICache; diff --git a/Source/Core/Core/NetPlayProto.h b/Source/Core/Core/NetPlayProto.h index 4d3cf436fb..537b820701 100644 --- a/Source/Core/Core/NetPlayProto.h +++ b/Source/Core/Core/NetPlayProto.h @@ -58,6 +58,8 @@ struct NetSettings bool m_EFBEmulateFormatChanges; int m_SafeTextureCacheColorSamples; bool m_PerfQueriesEnable; + bool m_FloatExceptions; + bool m_DivideByZeroExceptions; bool m_FPRF; bool m_AccurateNaNs; bool m_DisableICache; diff --git a/Source/Core/Core/NetPlayServer.cpp b/Source/Core/Core/NetPlayServer.cpp index 806f4967ac..bc96d0f952 100644 --- a/Source/Core/Core/NetPlayServer.cpp +++ b/Source/Core/Core/NetPlayServer.cpp @@ -1329,6 +1329,8 @@ bool NetPlayServer::SetupNetSettings() settings.m_SafeTextureCacheColorSamples = Config::Get(Config::GFX_SAFE_TEXTURE_CACHE_COLOR_SAMPLES); settings.m_PerfQueriesEnable = Config::Get(Config::GFX_PERF_QUERIES_ENABLE); + settings.m_FloatExceptions = Config::Get(Config::MAIN_FLOAT_EXCEPTIONS); + settings.m_DivideByZeroExceptions = Config::Get(Config::MAIN_DIVIDE_BY_ZERO_EXCEPTIONS); settings.m_FPRF = Config::Get(Config::MAIN_FPRF); settings.m_AccurateNaNs = Config::Get(Config::MAIN_ACCURATE_NANS); settings.m_DisableICache = Config::Get(Config::MAIN_DISABLE_ICACHE); @@ -1505,6 +1507,8 @@ bool NetPlayServer::StartGame() spac << m_settings.m_EFBEmulateFormatChanges; spac << m_settings.m_SafeTextureCacheColorSamples; spac << m_settings.m_PerfQueriesEnable; + spac << m_settings.m_FloatExceptions; + spac << m_settings.m_DivideByZeroExceptions; spac << m_settings.m_FPRF; spac << m_settings.m_AccurateNaNs; spac << m_settings.m_DisableICache; diff --git a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp index 834af98a5e..e75994fc96 100644 --- a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp +++ b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp @@ -58,7 +58,7 @@ void CachedInterpreter::Init() jo.enableBlocklink = false; m_block_cache.Init(); - UpdateMemoryOptions(); + UpdateMemoryAndExceptionOptions(); code_block.m_stats = &js.st; code_block.m_gpa = &js.gpa; @@ -180,6 +180,17 @@ static bool CheckDSI(u32 data) return false; } +static bool CheckProgramException(u32 data) +{ + if (PowerPC::ppcState.Exceptions & EXCEPTION_PROGRAM) + { + PowerPC::CheckExceptions(); + PowerPC::ppcState.downcount -= data; + return true; + } + return false; +} + static bool CheckBreakpoint(u32 data) { PowerPC::CheckBreakPoints(); @@ -267,26 +278,26 @@ void CachedInterpreter::Jit(u32 address) const bool check_fpu = (op.opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound; const bool endblock = (op.opinfo->flags & FL_ENDBLOCK) != 0; const bool memcheck = (op.opinfo->flags & FL_LOADSTORE) && jo.memcheck; + const bool check_program_exception = !endblock && ShouldHandleFPExceptionForInstruction(&op); const bool idle_loop = op.branchIsIdleLoop; - if (breakpoint) - { + if (breakpoint || check_fpu || endblock || memcheck || check_program_exception) m_code.emplace_back(WritePC, op.address); + + if (breakpoint) m_code.emplace_back(CheckBreakpoint, js.downcountAmount); - } if (check_fpu) { - m_code.emplace_back(WritePC, op.address); m_code.emplace_back(CheckFPU, js.downcountAmount); js.firstFPInstructionFound = true; } - if (endblock || memcheck) - m_code.emplace_back(WritePC, op.address); m_code.emplace_back(PPCTables::GetInterpreterOp(op.inst), op.inst); if (memcheck) m_code.emplace_back(CheckDSI, js.downcountAmount); + if (check_program_exception) + m_code.emplace_back(CheckProgramException, js.downcountAmount); if (idle_loop) m_code.emplace_back(CheckIdle, js.blockStart); if (endblock) @@ -316,5 +327,5 @@ void CachedInterpreter::ClearCache() { m_code.clear(); m_block_cache.Clear(); - UpdateMemoryOptions(); + UpdateMemoryAndExceptionOptions(); } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp index 45c1f8b9b4..f7eefbd585 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp @@ -94,14 +94,14 @@ static std::array primarytable = static std::array table4 = {{ //SUBOP10 - {0, Interpreter::ps_cmpu0, {"ps_cmpu0", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {32, Interpreter::ps_cmpo0, {"ps_cmpo0", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {0, Interpreter::ps_cmpu0, {"ps_cmpu0", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {32, Interpreter::ps_cmpo0, {"ps_cmpo0", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, {40, Interpreter::ps_neg, {"ps_neg", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, {136, Interpreter::ps_nabs, {"ps_nabs", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, {264, Interpreter::ps_abs, {"ps_abs", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {64, Interpreter::ps_cmpu1, {"ps_cmpu1", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {64, Interpreter::ps_cmpu1, {"ps_cmpu1", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, {72, Interpreter::ps_mr, {"ps_mr", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {96, Interpreter::ps_cmpo1, {"ps_cmpo1", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {96, Interpreter::ps_cmpo1, {"ps_cmpo1", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, {528, Interpreter::ps_merge00, {"ps_merge00", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, {560, Interpreter::ps_merge01, {"ps_merge01", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, {592, Interpreter::ps_merge10, {"ps_merge10", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, @@ -112,23 +112,23 @@ static std::array table4 = static std::array table4_2 = {{ - {10, Interpreter::ps_sum0, {"ps_sum0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {11, Interpreter::ps_sum1, {"ps_sum1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {12, Interpreter::ps_muls0, {"ps_muls0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {13, Interpreter::ps_muls1, {"ps_muls1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {14, Interpreter::ps_madds0, {"ps_madds0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {15, Interpreter::ps_madds1, {"ps_madds1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {18, Interpreter::ps_div, {"ps_div", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 17, 0, 0, 0}}, - {20, Interpreter::ps_sub, {"ps_sub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {21, Interpreter::ps_add, {"ps_add", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {10, Interpreter::ps_sum0, {"ps_sum0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {11, Interpreter::ps_sum1, {"ps_sum1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {12, Interpreter::ps_muls0, {"ps_muls0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {13, Interpreter::ps_muls1, {"ps_muls1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {14, Interpreter::ps_madds0, {"ps_madds0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {15, Interpreter::ps_madds1, {"ps_madds1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {18, Interpreter::ps_div, {"ps_div", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 17, 0, 0, 0}}, + {20, Interpreter::ps_sub, {"ps_sub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {21, Interpreter::ps_add, {"ps_add", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, {23, Interpreter::ps_sel, {"ps_sel", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_IN_FLOAT_BC_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {24, Interpreter::ps_res, {"ps_res", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {25, Interpreter::ps_mul, {"ps_mul", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {26, Interpreter::ps_rsqrte, {"ps_rsqrte", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 2, 0, 0, 0}}, - {28, Interpreter::ps_msub, {"ps_msub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {29, Interpreter::ps_madd, {"ps_madd", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {30, Interpreter::ps_nmsub, {"ps_nmsub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {31, Interpreter::ps_nmadd, {"ps_nmadd", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {24, Interpreter::ps_res, {"ps_res", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 1, 0, 0, 0}}, + {25, Interpreter::ps_mul, {"ps_mul", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {26, Interpreter::ps_rsqrte, {"ps_rsqrte", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 2, 0, 0, 0}}, + {28, Interpreter::ps_msub, {"ps_msub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {29, Interpreter::ps_madd, {"ps_madd", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {30, Interpreter::ps_nmsub, {"ps_nmsub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {31, Interpreter::ps_nmadd, {"ps_nmadd", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, }}; @@ -280,7 +280,7 @@ static std::array table31 = {19, Interpreter::mfcr, {"mfcr", OpType::System, FL_OUT_D, 1, 0, 0, 0}}, {83, Interpreter::mfmsr, {"mfmsr", OpType::System, FL_OUT_D | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, {144, Interpreter::mtcrf, {"mtcrf", OpType::System, FL_IN_S | FL_SET_CRn, 1, 0, 0, 0}}, - {146, Interpreter::mtmsr, {"mtmsr", OpType::System, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {146, Interpreter::mtmsr, {"mtmsr", OpType::System, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, {210, Interpreter::mtsr, {"mtsr", OpType::System, FL_IN_S | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, {242, Interpreter::mtsrin, {"mtsrin", OpType::System, FL_IN_SB | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, {339, Interpreter::mfspr, {"mfspr", OpType::SPR, FL_OUT_D | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, @@ -304,15 +304,15 @@ static std::array table31 = static std::array table59 = {{ - {18, Interpreter::fdivsx, {"fdivsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}}, // TODO - {20, Interpreter::fsubsx, {"fsubsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {21, Interpreter::faddsx, {"faddsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {24, Interpreter::fresx, {"fresx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {25, Interpreter::fmulsx, {"fmulsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {28, Interpreter::fmsubsx, {"fmsubsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {29, Interpreter::fmaddsx, {"fmaddsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {30, Interpreter::fnmsubsx, {"fnmsubsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {31, Interpreter::fnmaddsx, {"fnmaddsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {18, Interpreter::fdivsx, {"fdivsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 17, 0, 0, 0}}, // TODO + {20, Interpreter::fsubsx, {"fsubsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {21, Interpreter::faddsx, {"faddsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {24, Interpreter::fresx, {"fresx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 1, 0, 0, 0}}, + {25, Interpreter::fmulsx, {"fmulsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {28, Interpreter::fmsubsx, {"fmsubsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {29, Interpreter::fmaddsx, {"fmaddsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {30, Interpreter::fnmsubsx, {"fnmsubsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {31, Interpreter::fnmaddsx, {"fnmaddsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, }}; static std::array table63 = @@ -323,36 +323,36 @@ static std::array table63 = // we don't actually need to calculate or store them here. So FL_READ_FPRF and FL_SET_FPRF is not // an ideal representation of fcmp's effect on FPRF flags and might result in // slightly sub-optimal code. - {32, Interpreter::fcmpo, {"fcmpo", OpType::DoubleFP, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}}, - {0, Interpreter::fcmpu, {"fcmpu", OpType::DoubleFP, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}}, + {32, Interpreter::fcmpo, {"fcmpo", OpType::DoubleFP, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {0, Interpreter::fcmpu, {"fcmpu", OpType::DoubleFP, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, - {14, Interpreter::fctiwx, {"fctiwx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {15, Interpreter::fctiwzx, {"fctiwzx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, + {14, Interpreter::fctiwx, {"fctiwx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {15, Interpreter::fctiwzx, {"fctiwzx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, {72, Interpreter::fmrx, {"fmrx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, {136, Interpreter::fnabsx, {"fnabsx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_IN_FLOAT_B_BITEXACT | FL_USE_FPU, 1, 0, 0, 0}}, {40, Interpreter::fnegx, {"fnegx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_IN_FLOAT_B_BITEXACT | FL_USE_FPU, 1, 0, 0, 0}}, - {12, Interpreter::frspx, {"frspx", OpType::DoubleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {12, Interpreter::frspx, {"frspx", OpType::DoubleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, {64, Interpreter::mcrfs, {"mcrfs", OpType::SystemFP, FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}}, {583, Interpreter::mffsx, {"mffsx", OpType::SystemFP, FL_RC_BIT_F | FL_INOUT_FLOAT_D | FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}}, {70, Interpreter::mtfsb0x, {"mtfsb0x", OpType::SystemFP, FL_RC_BIT_F | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 3, 0, 0, 0}}, - {38, Interpreter::mtfsb1x, {"mtfsb1x", OpType::SystemFP, FL_RC_BIT_F | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 3, 0, 0, 0}}, - {134, Interpreter::mtfsfix, {"mtfsfix", OpType::SystemFP, FL_RC_BIT_F | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 3, 0, 0, 0}}, - {711, Interpreter::mtfsfx, {"mtfsfx", OpType::SystemFP, FL_RC_BIT_F | FL_IN_FLOAT_B | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 3, 0, 0, 0}}, + {38, Interpreter::mtfsb1x, {"mtfsb1x", OpType::SystemFP, FL_RC_BIT_F | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 3, 0, 0, 0}}, + {134, Interpreter::mtfsfix, {"mtfsfix", OpType::SystemFP, FL_RC_BIT_F | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 3, 0, 0, 0}}, + {711, Interpreter::mtfsfx, {"mtfsfx", OpType::SystemFP, FL_RC_BIT_F | FL_IN_FLOAT_B | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 3, 0, 0, 0}}, }}; static std::array table63_2 = {{ - {18, Interpreter::fdivx, {"fdivx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 31, 0, 0, 0}}, - {20, Interpreter::fsubx, {"fsubx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {21, Interpreter::faddx, {"faddx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {18, Interpreter::fdivx, {"fdivx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 31, 0, 0, 0}}, + {20, Interpreter::fsubx, {"fsubx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {21, Interpreter::faddx, {"faddx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, {23, Interpreter::fselx, {"fselx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_IN_FLOAT_BC_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {25, Interpreter::fmulx, {"fmulx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {26, Interpreter::frsqrtex, {"frsqrtex", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {28, Interpreter::fmsubx, {"fmsubx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {29, Interpreter::fmaddx, {"fmaddx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {30, Interpreter::fnmsubx, {"fnmsubx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {31, Interpreter::fnmaddx, {"fnmaddx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {25, Interpreter::fmulx, {"fmulx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {26, Interpreter::frsqrtex, {"frsqrtex", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 1, 0, 0, 0}}, + {28, Interpreter::fmsubx, {"fmsubx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {29, Interpreter::fmaddx, {"fmaddx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {30, Interpreter::fnmsubx, {"fnmsubx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, + {31, Interpreter::fnmaddx, {"fnmaddx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}}, }}; // clang-format on diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 0a4bfeed4d..dba9520290 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -335,7 +335,7 @@ void Jit64::Init() jo.fastmem_arena = SConfig::GetInstance().bFastmem && Memory::InitFastmemArena(); jo.optimizeGatherPipe = true; jo.accurateSinglePrecision = true; - UpdateMemoryOptions(); + UpdateMemoryAndExceptionOptions(); js.fastmemLoadStore = nullptr; js.compilerPC = 0; @@ -389,7 +389,7 @@ void Jit64::ClearCache() m_const_pool.Clear(); ClearCodeSpace(); Clear(); - UpdateMemoryOptions(); + UpdateMemoryAndExceptionOptions(); ResetFreeMemoryRanges(); } @@ -453,6 +453,24 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst) SetJumpTarget(c); } } + else if (ShouldHandleFPExceptionForInstruction(js.op)) + { + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_PROGRAM)); + FixupBranch exception = J_CC(CC_NZ, true); + + SwitchToFarCode(); + SetJumpTarget(exception); + + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); + + gpr.Flush(); + fpr.Flush(); + + MOV(32, PPCSTATE(pc), Imm32(js.op->address)); + WriteExceptionExit(); + SwitchToNearCode(); + } } void Jit64::HLEFunction(u32 hook_index) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index ace5e193d1..9d2aad1b27 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -208,6 +208,7 @@ void Jit64::fp_arith(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions || (jo.div_by_zero_exceptions && inst.SUBOP5 == 18)); int a = inst.FA; int b = inst.FB; @@ -292,6 +293,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); // We would like to emulate FMA instructions accurately without rounding error if possible, but // unfortunately emulating FMA in software is just too slow on CPUs that are too old to have FMA @@ -733,6 +735,7 @@ void Jit64::fcmpX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(jo.fp_exceptions); FloatCompare(inst); } @@ -742,6 +745,7 @@ void Jit64::fctiwx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); int d = inst.RD; int b = inst.RB; @@ -784,6 +788,7 @@ void Jit64::frspx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); int b = inst.FB; int d = inst.FD; bool packed = js.op->fprIsDuplicated[b] && !cpu_info.bAtom; @@ -800,6 +805,7 @@ void Jit64::frsqrtex(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions); int b = inst.FB; int d = inst.FD; @@ -818,6 +824,7 @@ void Jit64::fresx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions); int b = inst.FB; int d = inst.FD; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp index 0b28fed61c..023d0d834a 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp @@ -33,6 +33,7 @@ void Jit64::ps_sum(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); int d = inst.FD; int a = inst.FA; @@ -84,6 +85,7 @@ void Jit64::ps_muls(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); int d = inst.FD; int a = inst.FA; @@ -152,6 +154,7 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions); int b = inst.FB; int d = inst.FD; @@ -176,6 +179,7 @@ void Jit64::ps_res(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions); int b = inst.FB; int d = inst.FD; @@ -199,6 +203,7 @@ void Jit64::ps_cmpXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(jo.fp_exceptions); FloatCompare(inst, !!(inst.SUBOP10 & 64)); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 3117ef563f..2806879bcb 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -424,6 +424,7 @@ void Jit64::mtmsr(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); + FALLBACK_IF(jo.fp_exceptions); { RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read); @@ -777,6 +778,7 @@ void Jit64::mtfsb1x(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); const u32 mask = 0x80000000 >> inst.CRBD; @@ -814,6 +816,7 @@ void Jit64::mtfsfix(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); u8 imm = (inst.hex >> (31 - 19)) & 0xF; u32 mask = 0xF0000000 >> (4 * inst.CRFD); @@ -844,6 +847,7 @@ void Jit64::mtfsfx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); u32 mask = 0; for (int i = 0; i < 8; i++) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 3191be64eb..a68fc078ee 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -50,7 +50,7 @@ void JitArm64::Init() jo.fastmem_arena = SConfig::GetInstance().bFastmem && Memory::InitFastmemArena(); jo.enableBlocklink = true; jo.optimizeGatherPipe = true; - UpdateMemoryOptions(); + UpdateMemoryAndExceptionOptions(); gpr.Init(this); fpr.Init(this); blocks.Init(); @@ -129,7 +129,7 @@ void JitArm64::ClearCache() const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes; ClearCodeSpace(); farcode.ClearCodeSpace(); - UpdateMemoryOptions(); + UpdateMemoryAndExceptionOptions(); GenerateAsm(); } @@ -193,25 +193,14 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) gpr.Unlock(WA); } } + else if (ShouldHandleFPExceptionForInstruction(js.op)) + { + WriteConditionalExceptionExit(EXCEPTION_PROGRAM); + } if (jo.memcheck && (js.op->opinfo->flags & FL_LOADSTORE)) { - ARM64Reg WA = gpr.GetReg(); - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); - FixupBranch noException = TBZ(WA, IntLog2(EXCEPTION_DSI)); - - FixupBranch handleException = B(); - SwitchToFarCode(); - SetJumpTarget(handleException); - - gpr.Flush(FlushMode::MaintainState, WA); - fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); - - WriteExceptionExit(js.compilerPC, false, true); - - SwitchToNearCode(); - SetJumpTarget(noException); - gpr.Unlock(WA); + WriteConditionalExceptionExit(EXCEPTION_DSI); } } @@ -495,6 +484,26 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external, bool always B(dispatcher); } +void JitArm64::WriteConditionalExceptionExit(int exception) +{ + ARM64Reg WA = gpr.GetReg(); + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + FixupBranch noException = TBZ(WA, IntLog2(exception)); + + FixupBranch handleException = B(); + SwitchToFarCode(); + SetJumpTarget(handleException); + + gpr.Flush(FlushMode::MaintainState, WA); + fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); + + WriteExceptionExit(js.compilerPC, false, true); + + SwitchToNearCode(); + SetJumpTarget(noException); + gpr.Unlock(WA); +} + bool JitArm64::HandleFunctionHooking(u32 address) { return HLE::ReplaceFunctionIfPossible(address, [&](u32 hook_index, HLE::HookType type) { diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index f19bd33d55..bc831360c1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -268,6 +268,7 @@ protected: bool always_exception = false); void WriteExceptionExit(Arm64Gen::ARM64Reg dest, bool only_external = false, bool always_exception = false); + void WriteConditionalExceptionExit(int exception); void FakeLKExit(u32 exit_address_after_return); void WriteBLRExit(Arm64Gen::ARM64Reg dest); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index e8b15ce16a..c4d63802a9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -67,6 +67,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions || (jo.div_by_zero_exceptions && inst.SUBOP5 == 18)); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; u32 op5 = inst.SUBOP5; @@ -339,6 +340,7 @@ void JitArm64::frspx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); const u32 b = inst.FB; const u32 d = inst.FD; @@ -500,6 +502,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(jo.fp_exceptions); FloatCompare(inst); } @@ -509,6 +512,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); const u32 b = inst.FB; const u32 d = inst.FD; @@ -551,6 +555,7 @@ void JitArm64::fresx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions); const u32 b = inst.FB; const u32 d = inst.FD; @@ -579,6 +584,7 @@ void JitArm64::frsqrtex(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions); const u32 b = inst.FB; const u32 d = inst.FD; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 1e8edb5036..5690bbc481 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -75,6 +75,7 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); const u32 a = inst.FA; const u32 c = inst.FC; @@ -125,6 +126,7 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); const u32 a = inst.FA; const u32 b = inst.FB; @@ -316,6 +318,7 @@ void JitArm64::ps_sumX(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); const u32 a = inst.FA; const u32 b = inst.FB; @@ -362,6 +365,7 @@ void JitArm64::ps_res(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions); const u32 b = inst.FB; const u32 d = inst.FD; @@ -394,6 +398,7 @@ void JitArm64::ps_rsqrte(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions); const u32 b = inst.FB; const u32 d = inst.FD; @@ -425,6 +430,7 @@ void JitArm64::ps_cmpXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); + FALLBACK_IF(jo.fp_exceptions); const bool upper = inst.SUBOP10 & 64; FloatCompare(inst, upper); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 568d3072f3..63aa016322 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -86,6 +86,7 @@ void JitArm64::mtmsr(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); + FALLBACK_IF(jo.fp_exceptions); gpr.BindToRegister(inst.RS, true); STR(IndexType::Unsigned, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(msr)); @@ -815,6 +816,7 @@ void JitArm64::mtfsb1x(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); const u32 mask = 0x80000000 >> inst.CRBD; @@ -850,6 +852,7 @@ void JitArm64::mtfsfix(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); u8 imm = (inst.hex >> (31 - 19)) & 0xF; u8 shift = 28 - 4 * inst.CRFD; @@ -891,6 +894,7 @@ void JitArm64::mtfsfx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); FALLBACK_IF(inst.Rc); + FALLBACK_IF(jo.fp_exceptions); u32 mask = 0; for (int i = 0; i < 8; i++) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index c42fce554e..1d67d61046 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -41,9 +41,21 @@ bool JitBase::CanMergeNextInstructions(int count) const return true; } -void JitBase::UpdateMemoryOptions() +void JitBase::UpdateMemoryAndExceptionOptions() { bool any_watchpoints = PowerPC::memchecks.HasAny(); jo.fastmem = SConfig::GetInstance().bFastmem && jo.fastmem_arena && (MSR.DR || !any_watchpoints); jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints; + jo.fp_exceptions = SConfig::GetInstance().bFloatExceptions; + jo.div_by_zero_exceptions = SConfig::GetInstance().bDivideByZeroExceptions; +} + +bool JitBase::ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op) +{ + if (jo.fp_exceptions) + return (op->opinfo->flags & FL_FLOAT_EXCEPTION) != 0; + else if (jo.div_by_zero_exceptions) + return (op->opinfo->flags & FL_FLOAT_DIV) != 0; + else + return false; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 18784ff095..b34d16365c 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -63,6 +63,8 @@ protected: bool fastmem; bool fastmem_arena; bool memcheck; + bool fp_exceptions; + bool div_by_zero_exceptions; bool profile_blocks; }; struct JitState @@ -113,7 +115,9 @@ protected: bool CanMergeNextInstructions(int count) const; - void UpdateMemoryOptions(); + void UpdateMemoryAndExceptionOptions(); + + bool ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op); public: JitBase(); diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index bd1f0d0324..d02cc10e0c 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -524,8 +524,12 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code, const Gekk code->wantsCR0 = false; code->wantsCR1 = false; + bool first_fpu_instruction = false; if (opinfo->flags & FL_USE_FPU) + { + first_fpu_instruction = !block->m_fpa->any; block->m_fpa->any = true; + } if (opinfo->flags & FL_TIMER) block->m_gpa->anyTimer = true; @@ -550,9 +554,10 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code, const Gekk code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0; code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) != 0; - // TODO: Is it possible to determine that some FPU instructions never cause exceptions? code->canCauseException = - (opinfo->flags & (FL_LOADSTORE | FL_USE_FPU | FL_PROGRAMEXCEPTION)) != 0; + first_fpu_instruction || (opinfo->flags & (FL_LOADSTORE | FL_PROGRAMEXCEPTION)) != 0 || + (SConfig::GetInstance().bFloatExceptions && (opinfo->flags & FL_FLOAT_EXCEPTION)) || + (SConfig::GetInstance().bDivideByZeroExceptions && (opinfo->flags & FL_FLOAT_DIV)); code->wantsCA = (opinfo->flags & FL_READ_CA) != 0; code->outputCA = (opinfo->flags & FL_SET_CA) != 0; @@ -928,14 +933,14 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std: const bool opWantsCR1 = op.wantsCR1; const bool opWantsFPRF = op.wantsFPRF; const bool opWantsCA = op.wantsCA; - op.wantsCR0 = wantsCR0 || op.canEndBlock; - op.wantsCR1 = wantsCR1 || op.canEndBlock; - op.wantsFPRF = wantsFPRF || op.canEndBlock; - op.wantsCA = wantsCA || op.canEndBlock; - wantsCR0 |= opWantsCR0 || op.canEndBlock; - wantsCR1 |= opWantsCR1 || op.canEndBlock; - wantsFPRF |= opWantsFPRF || op.canEndBlock; - wantsCA |= opWantsCA || op.canEndBlock; + op.wantsCR0 = wantsCR0 || op.canEndBlock || op.canCauseException; + op.wantsCR1 = wantsCR1 || op.canEndBlock || op.canCauseException; + op.wantsFPRF = wantsFPRF || op.canEndBlock || op.canCauseException; + op.wantsCA = wantsCA || op.canEndBlock || op.canCauseException; + wantsCR0 |= opWantsCR0 || op.canEndBlock || op.canCauseException; + wantsCR1 |= opWantsCR1 || op.canEndBlock || op.canCauseException; + wantsFPRF |= opWantsFPRF || op.canEndBlock || op.canCauseException; + wantsCA |= opWantsCA || op.canEndBlock || op.canCauseException; wantsCR0 &= !op.outputCR0 || opWantsCR0; wantsCR1 &= !op.outputCR1 || opWantsCR1; wantsFPRF &= !op.outputFPRF || opWantsFPRF; diff --git a/Source/Core/Core/PowerPC/PPCTables.h b/Source/Core/Core/PowerPC/PPCTables.h index c53510f221..f88d5d8ef3 100644 --- a/Source/Core/Core/PowerPC/PPCTables.h +++ b/Source/Core/Core/PowerPC/PPCTables.h @@ -64,7 +64,9 @@ enum InstructionFlags : u64 FL_IN_FLOAT_C_BITEXACT = (1ull << 31), // The output is based on the exact bits in frC. FL_IN_FLOAT_AB_BITEXACT = FL_IN_FLOAT_A_BITEXACT | FL_IN_FLOAT_B_BITEXACT, FL_IN_FLOAT_BC_BITEXACT = FL_IN_FLOAT_B_BITEXACT | FL_IN_FLOAT_C_BITEXACT, - FL_PROGRAMEXCEPTION = (1ull << 32), // May generate a system exception. + FL_PROGRAMEXCEPTION = (1ull << 32), // May generate a program exception (not floating point). + FL_FLOAT_EXCEPTION = (1ull << 33), // May generate a program exception (floating point). + FL_FLOAT_DIV = (1ull << 34), // May generate a program exception (FP) due to division by 0. }; enum class OpType From 25bff910540104c06212b5b6439a38b277549ab5 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 29 Aug 2021 11:57:15 +0200 Subject: [PATCH 6/7] Interpreter: Fix NI_div ZX check --- .../PowerPC/Interpreter/Interpreter_FPUtils.h | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h index f1f8cddcd7..4e0601b897 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h @@ -138,7 +138,15 @@ inline FPResult NI_div(UReg_FPSCR* fpscr, double a, double b) { FPResult result{a / b}; - if (std::isnan(result.value)) + if (std::isinf(result.value)) + { + if (b == 0.0) + { + result.SetException(fpscr, FPSCR_ZX); + return result; + } + } + else if (std::isnan(result.value)) { if (Common::IsSNAN(a) || Common::IsSNAN(b)) result.SetException(fpscr, FPSCR_VXSNAN); @@ -157,20 +165,9 @@ inline FPResult NI_div(UReg_FPSCR* fpscr, double a, double b) } if (b == 0.0) - { - if (a == 0.0) - { - result.SetException(fpscr, FPSCR_VXZDZ); - } - else - { - result.SetException(fpscr, FPSCR_ZX); - } - } + result.SetException(fpscr, FPSCR_VXZDZ); else if (std::isinf(a) && std::isinf(b)) - { result.SetException(fpscr, FPSCR_VXIDI); - } result.value = PPC_NAN; return result; From c250ed03cf65c4398c4f844524b75d5600cfb535 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 29 Aug 2021 12:21:33 +0200 Subject: [PATCH 7/7] GameINI: Enable DivByZeroExceptions where needed Combined with the previous commits in this pull request, this fixes https://bugs.dolphin-emu.org/issues/7230 (True Crime: New York City) and https://bugs.dolphin-emu.org/issues/9650 (Call of Duty: Finest Hour). --- Data/Sys/GameSettings/G2C.ini | 4 ++++ Data/Sys/GameSettings/GCO.ini | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 Data/Sys/GameSettings/G2C.ini create mode 100644 Data/Sys/GameSettings/GCO.ini diff --git a/Data/Sys/GameSettings/G2C.ini b/Data/Sys/GameSettings/G2C.ini new file mode 100644 index 0000000000..a12a6a1190 --- /dev/null +++ b/Data/Sys/GameSettings/G2C.ini @@ -0,0 +1,4 @@ +# G2CD52, G2CE52, G2CP52, G2CX52 - True Crime: New York City + +[Core] +DivByZeroExceptions = True diff --git a/Data/Sys/GameSettings/GCO.ini b/Data/Sys/GameSettings/GCO.ini new file mode 100644 index 0000000000..ec36443599 --- /dev/null +++ b/Data/Sys/GameSettings/GCO.ini @@ -0,0 +1,4 @@ +# GCOD52, GCOE52, GCOF52, GCOP52 - Call of Duty: Finest Hour + +[Core] +DivByZeroExceptions = True