From 83c6df19656c56ed9ef8ffad09772f9859e5608c Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Mon, 16 Aug 2021 17:22:12 +0200
Subject: [PATCH 1/7] PowerPC: Set SRR1 correctly for program exceptions

---
 .../Core/PowerPC/Interpreter/ExceptionUtils.h    | 11 ++++++++++-
 .../Core/PowerPC/Interpreter/Interpreter.cpp     |  2 +-
 .../PowerPC/Interpreter/Interpreter_Branch.cpp   |  2 +-
 .../PowerPC/Interpreter/Interpreter_Integer.cpp  |  5 +++--
 .../Interpreter/Interpreter_LoadStore.cpp        |  8 ++++----
 .../Interpreter/Interpreter_LoadStorePaired.cpp  |  8 ++++----
 .../Interpreter/Interpreter_SystemRegisters.cpp  | 16 ++++++++--------
 Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp   |  3 +++
 .../JitArm64/JitArm64_SystemRegisters.cpp        |  4 ++++
 Source/Core/Core/PowerPC/PowerPC.cpp             |  4 ++--
 10 files changed, 40 insertions(+), 23 deletions(-)
diff --git a/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h b/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h
index 9ad4e04cdc..123cffdcaa 100644
--- a/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h
+++ b/Source/Core/Core/PowerPC/Interpreter/ExceptionUtils.h
@@ -7,6 +7,14 @@
 #include "Core/PowerPC/Gekko.h"
 #include "Core/PowerPC/PowerPC.h"
 
+enum class ProgramExceptionCause : u32
+{
+  FloatingPoint = 1 << (31 - 11),
+  IllegalInstruction = 1 << (31 - 12),
+  PrivilegedInstruction = 1 << (31 - 13),
+  Trap = 1 << (31 - 14),
+};
+
 inline void GenerateAlignmentException(u32 address)
 {
   PowerPC::ppcState.Exceptions |= EXCEPTION_ALIGNMENT;
@@ -19,7 +27,8 @@ inline void GenerateDSIException(u32 address)
   PowerPC::ppcState.spr[SPR_DAR] = address;
 }
 
-inline void GenerateProgramException()
+inline void GenerateProgramException(ProgramExceptionCause cause)
 {
   PowerPC::ppcState.Exceptions |= EXCEPTION_PROGRAM;
+  PowerPC::ppcState.spr[SPR_SRR1] = static_cast<u32>(cause);
 }
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp
index 9d110ffddd..c8944b73cf 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp
@@ -180,7 +180,7 @@ int Interpreter::SingleStepInner()
   {
     if (IsInvalidPairedSingleExecution(m_prev_inst))
     {
-      GenerateProgramException();
+      GenerateProgramException(ProgramExceptionCause::IllegalInstruction);
       CheckExceptions();
     }
     else if (MSR.FP)
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp
index 27877673fe..54fe1ae73e 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp
@@ -101,7 +101,7 @@ void Interpreter::rfi(UGeckoInstruction inst)
 {
   if (MSR.PR)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction);
     return;
   }
 
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp
index effce996f7..b10f55ab79 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp
@@ -6,6 +6,7 @@
 #include "Common/BitUtils.h"
 #include "Common/CommonTypes.h"
 #include "Common/Logging/Log.h"
+#include "Core/PowerPC/Interpreter/ExceptionUtils.h"
 #include "Core/PowerPC/PowerPC.h"
 
 void Interpreter::Helper_UpdateCR0(u32 value)
@@ -131,7 +132,7 @@ void Interpreter::twi(UGeckoInstruction inst)
   if ((a < b && (TO & 0x10) != 0) || (a > b && (TO & 0x08) != 0) || (a == b && (TO & 0x04) != 0) ||
       (u32(a) < u32(b) && (TO & 0x02) != 0) || (u32(a) > u32(b) && (TO & 0x01) != 0))
   {
-    PowerPC::ppcState.Exceptions |= EXCEPTION_PROGRAM;
+    GenerateProgramException(ProgramExceptionCause::Trap);
     PowerPC::CheckExceptions();
     m_end_block = true;  // Dunno about this
   }
@@ -339,7 +340,7 @@ void Interpreter::tw(UGeckoInstruction inst)
   if ((a < b && (TO & 0x10) != 0) || (a > b && (TO & 0x08) != 0) || (a == b && (TO & 0x04) != 0) ||
       ((u32(a) < u32(b)) && (TO & 0x02) != 0) || ((u32(a) > u32(b)) && (TO & 0x01) != 0))
   {
-    PowerPC::ppcState.Exceptions |= EXCEPTION_PROGRAM;
+    GenerateProgramException(ProgramExceptionCause::Trap);
     PowerPC::CheckExceptions();
     m_end_block = true;  // Dunno about this
   }
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp
index 74b9173a31..9989024bb1 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp
@@ -450,7 +450,7 @@ void Interpreter::dcbi(UGeckoInstruction inst)
 {
   if (MSR.PR)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction);
     return;
   }
 
@@ -514,7 +514,7 @@ void Interpreter::dcbz_l(UGeckoInstruction inst)
 {
   if (!HID2.LCE)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::IllegalInstruction);
     return;
   }
 
@@ -1041,7 +1041,7 @@ void Interpreter::tlbie(UGeckoInstruction inst)
 {
   if (MSR.PR)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction);
     return;
   }
 
@@ -1055,7 +1055,7 @@ void Interpreter::tlbsync(UGeckoInstruction inst)
 {
   if (MSR.PR)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction);
   }
 
   // Ignored
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp
index 9384985a30..a09bbc7770 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp
@@ -311,7 +311,7 @@ void Interpreter::psq_l(UGeckoInstruction inst)
 {
   if (HID2.LSQE == 0)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::IllegalInstruction);
     return;
   }
 
@@ -323,7 +323,7 @@ void Interpreter::psq_lu(UGeckoInstruction inst)
 {
   if (HID2.LSQE == 0)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::IllegalInstruction);
     return;
   }
 
@@ -342,7 +342,7 @@ void Interpreter::psq_st(UGeckoInstruction inst)
 {
   if (HID2.LSQE == 0)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::IllegalInstruction);
     return;
   }
 
@@ -354,7 +354,7 @@ void Interpreter::psq_stu(UGeckoInstruction inst)
 {
   if (HID2.LSQE == 0)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::IllegalInstruction);
     return;
   }
 
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
index 81fb4e5346..7917eba188 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
@@ -141,7 +141,7 @@ void Interpreter::mfmsr(UGeckoInstruction inst)
 {
   if (MSR.PR)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction);
     return;
   }
 
@@ -152,7 +152,7 @@ void Interpreter::mfsr(UGeckoInstruction inst)
 {
   if (MSR.PR)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction);
     return;
   }
 
@@ -163,7 +163,7 @@ void Interpreter::mfsrin(UGeckoInstruction inst)
 {
   if (MSR.PR)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction);
     return;
   }
 
@@ -175,7 +175,7 @@ void Interpreter::mtmsr(UGeckoInstruction inst)
 {
   if (MSR.PR)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction);
     return;
   }
 
@@ -190,7 +190,7 @@ void Interpreter::mtsr(UGeckoInstruction inst)
 {
   if (MSR.PR)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction);
     return;
   }
 
@@ -203,7 +203,7 @@ void Interpreter::mtsrin(UGeckoInstruction inst)
 {
   if (MSR.PR)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction);
     return;
   }
 
@@ -227,7 +227,7 @@ void Interpreter::mfspr(UGeckoInstruction inst)
   if (MSR.PR && index != SPR_XER && index != SPR_LR && index != SPR_CTR && index != SPR_TL &&
       index != SPR_TU)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction);
     return;
   }
 
@@ -270,7 +270,7 @@ void Interpreter::mtspr(UGeckoInstruction inst)
   // XER, LR, and CTR are the only ones available to be written to in user mode
   if (MSR.PR && index != SPR_XER && index != SPR_LR && index != SPR_CTR)
   {
-    GenerateProgramException();
+    GenerateProgramException(ProgramExceptionCause::PrivilegedInstruction);
     return;
   }
 
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
index be61b048cf..7cc7723b76 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
@@ -11,7 +11,9 @@
 #include "Common/CommonTypes.h"
 #include "Common/MathUtil.h"
 #include "Common/x64Emitter.h"
+
 #include "Core/CoreTiming.h"
+#include "Core/PowerPC/Interpreter/ExceptionUtils.h"
 #include "Core/PowerPC/Jit64/Jit.h"
 #include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
 #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
@@ -2562,6 +2564,7 @@ void Jit64::twX(UGeckoInstruction inst)
     }
     LOCK();
     OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_PROGRAM));
+    MOV(32, PPCSTATE_SRR1, Imm32(static_cast<u32>(ProgramExceptionCause::Trap)));
 
     gpr.Flush();
     fpr.Flush();
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
index 47283eb10d..d3de831872 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
@@ -7,6 +7,7 @@
 
 #include "Core/Core.h"
 #include "Core/CoreTiming.h"
+#include "Core/PowerPC/Interpreter/ExceptionUtils.h"
 #include "Core/PowerPC/JitArm64/Jit.h"
 #include "Core/PowerPC/PPCTables.h"
 #include "Core/PowerPC/PowerPC.h"
@@ -233,6 +234,9 @@ void JitArm64::twx(UGeckoInstruction inst)
   ORR(WA, WA, LogicalImm(EXCEPTION_PROGRAM, 32));
   STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
 
+  MOVI2R(WA, static_cast<u32>(ProgramExceptionCause::Trap));
+  STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_SRR1));
+
   WriteExceptionExit(js.compilerPC, false, true);
 
   SwitchToNearCode();
diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp
index f838911927..0e4b641200 100644
--- a/Source/Core/Core/PowerPC/PowerPC.cpp
+++ b/Source/Core/Core/PowerPC/PowerPC.cpp
@@ -483,8 +483,8 @@ void CheckExceptions()
   else if (exceptions & EXCEPTION_PROGRAM)
   {
     SRR0 = PC;
-    // say that it's a trap exception
-    SRR1 = (MSR.Hex & 0x87C0FFFF) | 0x20000;
+    // SRR1 was partially set by GenerateProgramException, so bitwise or is used here
+    SRR1 |= MSR.Hex & 0x87C0FFFF;
     MSR.LE = MSR.ILE;
     MSR.Hex &= ~0x04EF36;
     PC = NPC = 0x00000700;

From 89a464dafa92fa669634480f4265100612a8d197 Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Tue, 17 Aug 2021 17:39:47 +0200
Subject: [PATCH 2/7] Interpreter: Optimize FEX calculation

The next commit will make the interpreter run this after every
float instruction, so I think a little optimization here is justified.
---
 Source/Core/Core/PowerPC/Gekko.h                            | 6 ++++++
 .../PowerPC/Interpreter/Interpreter_SystemRegisters.cpp     | 3 +--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Gekko.h b/Source/Core/Core/PowerPC/Gekko.h
index 9f89004552..f8dc7e87af 100644
--- a/Source/Core/Core/PowerPC/Gekko.h
+++ b/Source/Core/Core/PowerPC/Gekko.h
@@ -419,11 +419,17 @@ enum FPSCRExceptionFlag : u32
   FPSCR_VXSQRT = 1U << (31 - 22),
   FPSCR_VXCVI = 1U << (31 - 23),
   FPSCR_VE = 1U << (31 - 24),
+  FPSCR_OE = 1U << (31 - 25),
+  FPSCR_UE = 1U << (31 - 26),
+  FPSCR_ZE = 1U << (31 - 27),
+  FPSCR_XE = 1U << (31 - 28),
 
   FPSCR_VX_ANY = FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI | FPSCR_VXZDZ | FPSCR_VXIMZ | FPSCR_VXVC |
                  FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI,
 
   FPSCR_ANY_X = FPSCR_OX | FPSCR_UX | FPSCR_ZX | FPSCR_XX | FPSCR_VX_ANY,
+
+  FPSCR_ANY_E = FPSCR_VE | FPSCR_OE | FPSCR_UE | FPSCR_ZE | FPSCR_XE,
 };
 
 // Floating Point Status and Control Register
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
index 7917eba188..3f7b82717a 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
@@ -40,8 +40,7 @@ static void FPSCRUpdated(UReg_FPSCR fp)
 static void UpdateFPSCR(UReg_FPSCR* fpscr)
 {
   fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0;
-  fpscr->FEX = (fpscr->VX & fpscr->VE) | (fpscr->OX & fpscr->OE) | (fpscr->UX & fpscr->UE) |
-               (fpscr->ZX & fpscr->ZE) | (fpscr->XX & fpscr->XE);
+  fpscr->FEX = ((fpscr->Hex >> 22) & (fpscr->Hex & FPSCR_ANY_E)) != 0;
 }
 
 void Interpreter::mtfsb0x(UGeckoInstruction inst)

From c3bcc67653513b3dae7dec4df78699202363cb57 Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Tue, 17 Aug 2021 19:57:06 +0200
Subject: [PATCH 3/7] PowerPC: Update FEX on FPSCR store instead of FPSCR load

This is needed not only for the next commit, but also for
correctly emulating float instructions that write to CR1.
---
 .../PowerPC/Interpreter/Interpreter_FPUtils.h |   8 +-
 .../Interpreter_SystemRegisters.cpp           |  30 +----
 Source/Core/Core/PowerPC/Jit64/Jit.h          |   3 +-
 .../PowerPC/Jit64/Jit_SystemRegisters.cpp     | 126 ++++++++++++++----
 Source/Core/Core/PowerPC/JitArm64/Jit.h       |   1 +
 .../JitArm64/JitArm64_SystemRegisters.cpp     | 108 +++++++++++----
 6 files changed, 200 insertions(+), 76 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
index c3e1d40d4d..b8860eabf5 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
@@ -24,6 +24,12 @@ enum class FPCC
   FU = 1,  // ?
 };
 
+inline void UpdateFPExceptionSummary(UReg_FPSCR* fpscr)
+{
+  fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0;
+  fpscr->FEX = ((fpscr->Hex >> 22) & (fpscr->Hex & FPSCR_ANY_E)) != 0;
+}
+
 inline void SetFPException(UReg_FPSCR* fpscr, u32 mask)
 {
   if ((fpscr->Hex & mask) != mask)
@@ -32,7 +38,7 @@ inline void SetFPException(UReg_FPSCR* fpscr, u32 mask)
   }
 
   fpscr->Hex |= mask;
-  fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0;
+  UpdateFPExceptionSummary(fpscr);
 }
 
 inline float ForceSingle(const UReg_FPSCR& fpscr, double value)
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
index 3f7b82717a..50d586efa5 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
@@ -25,22 +25,10 @@ mffsx: 80036650 (huh?)
 
 */
 
-static void FPSCRUpdated(UReg_FPSCR fp)
+static void FPSCRUpdated(UReg_FPSCR* fpscr)
 {
+  UpdateFPExceptionSummary(fpscr);
   PowerPC::RoundingModeUpdated();
-
-  if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE)
-  {
-    // PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i",
-    // fp.VE, fp.OE, fp.UE, fp.ZE, fp.XE);
-    // Pokemon Colosseum does this. Gah.
-  }
-}
-
-static void UpdateFPSCR(UReg_FPSCR* fpscr)
-{
-  fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0;
-  fpscr->FEX = ((fpscr->Hex >> 22) & (fpscr->Hex & FPSCR_ANY_E)) != 0;
 }
 
 void Interpreter::mtfsb0x(UGeckoInstruction inst)
@@ -48,7 +36,7 @@ void Interpreter::mtfsb0x(UGeckoInstruction inst)
   u32 b = 0x80000000 >> inst.CRBD;
 
   FPSCR.Hex &= ~b;
-  FPSCRUpdated(FPSCR);
+  FPSCRUpdated(&FPSCR);
 
   if (inst.Rc)
     PowerPC::ppcState.UpdateCR1();
@@ -65,7 +53,7 @@ void Interpreter::mtfsb1x(UGeckoInstruction inst)
   else
     FPSCR |= b;
 
-  FPSCRUpdated(FPSCR);
+  FPSCRUpdated(&FPSCR);
 
   if (inst.Rc)
     PowerPC::ppcState.UpdateCR1();
@@ -80,7 +68,7 @@ void Interpreter::mtfsfix(UGeckoInstruction inst)
 
   FPSCR = (FPSCR.Hex & ~mask) | (imm >> (4 * field));
 
-  FPSCRUpdated(FPSCR);
+  FPSCRUpdated(&FPSCR);
 
   if (inst.Rc)
     PowerPC::ppcState.UpdateCR1();
@@ -97,7 +85,7 @@ void Interpreter::mtfsfx(UGeckoInstruction inst)
   }
 
   FPSCR = (FPSCR.Hex & ~m) | (static_cast<u32>(rPS(inst.FB).PS0AsU64()) & m);
-  FPSCRUpdated(FPSCR);
+  FPSCRUpdated(&FPSCR);
 
   if (inst.Rc)
     PowerPC::ppcState.UpdateCR1();
@@ -563,22 +551,18 @@ void Interpreter::isync(UGeckoInstruction inst)
 
 void Interpreter::mcrfs(UGeckoInstruction inst)
 {
-  UpdateFPSCR(&FPSCR);
   const u32 shift = 4 * (7 - inst.CRFS);
   const u32 fpflags = (FPSCR.Hex >> shift) & 0xF;
 
   // If any exception bits were read, clear them
   FPSCR.Hex &= ~((0xF << shift) & (FPSCR_FX | FPSCR_ANY_X));
+  FPSCRUpdated(&FPSCR);
 
   PowerPC::ppcState.cr.SetField(inst.CRFD, fpflags);
 }
 
 void Interpreter::mffsx(UGeckoInstruction inst)
 {
-  // load from FPSCR
-  // TODO(ector): grab all overflow flags etc and set them in FPSCR
-
-  UpdateFPSCR(&FPSCR);
   rPS(inst.FD).SetPS0(UINT64_C(0xFFF8000000000000) | FPSCR.Hex);
 
   if (inst.Rc)
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
index 35f198dc6b..70c53bd784 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@@ -116,11 +116,12 @@ public:
   void ClearCRFieldBit(int field, int bit);
   void SetCRFieldBit(int field, int bit);
   void FixGTBeforeSettingCRFieldBit(Gen::X64Reg reg);
-
   // Generates a branch that will check if a given bit of a CR register part
   // is set or not.
   Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true);
 
+  void UpdateFPExceptionSummary(Gen::X64Reg fpscr, Gen::X64Reg tmp1, Gen::X64Reg tmp2);
+
   void SetFPRFIfNeeded(const Gen::OpArg& xmm, bool single);
   void FinalizeSingleResult(Gen::X64Reg output, const Gen::OpArg& input, bool packed = true,
                             bool duplicate = false);
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
index 34fb820274..3117ef563f 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
@@ -4,7 +4,9 @@
 #include "Common/BitSet.h"
 #include "Common/CPUDetect.h"
 #include "Common/CommonTypes.h"
+#include "Common/MathUtil.h"
 #include "Common/x64Emitter.h"
+
 #include "Core/CoreTiming.h"
 #include "Core/HW/ProcessorInterface.h"
 #include "Core/PowerPC/Jit64/Jit.h"
@@ -185,6 +187,33 @@ FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
   return FixupBranch();
 }
 
+// Could be done with one temp register, but with two temp registers it's faster
+void Jit64::UpdateFPExceptionSummary(X64Reg fpscr, X64Reg tmp1, X64Reg tmp2)
+{
+  // Kill dependency on tmp1 (not required for correctness, since SHL will shift out upper bytes)
+  XOR(32, R(tmp1), R(tmp1));
+
+  // fpscr.VX = (fpscr & FPSCR_VX_ANY) != 0
+  TEST(32, R(fpscr), Imm32(FPSCR_VX_ANY));
+  SETcc(CC_NZ, R(tmp1));
+  SHL(32, R(tmp1), Imm8(IntLog2(FPSCR_VX)));
+  AND(32, R(fpscr), Imm32(~(FPSCR_VX | FPSCR_FEX)));
+  OR(32, R(fpscr), R(tmp1));
+
+  // fpscr.FEX = ((fpscr >> 22) & (fpscr & FPSCR_ANY_E)) != 0
+  MOV(32, R(tmp1), R(fpscr));
+  MOV(32, R(tmp2), R(fpscr));
+  SHR(32, R(tmp1), Imm8(22));
+  AND(32, R(tmp2), Imm32(FPSCR_ANY_E));
+  TEST(32, R(tmp1), R(tmp2));
+  // Unfortunately we eat a partial register stall below - we can't zero any of the registers before
+  // the TEST, and we can't use XOR right after the TEST since that would overwrite flags. However,
+  // there is no false dependency, since SETcc depends on TEST's flags and TEST depends on tmp1.
+  SETcc(CC_NZ, R(tmp1));
+  SHL(32, R(tmp1), Imm8(IntLog2(FPSCR_FEX)));
+  OR(32, R(fpscr), R(tmp1));
+}
+
 static void DoICacheReset()
 {
   PowerPC::ppcState.iCache.Reset();
@@ -637,6 +666,19 @@ void Jit64::mcrfs(UGeckoInstruction inst)
   // Only clear exception bits (but not FEX/VX).
   mask &= FPSCR_FX | FPSCR_ANY_X;
 
+  RCX64Reg scratch_guard;
+  X64Reg scratch;
+  if (mask != 0)
+  {
+    scratch_guard = gpr.Scratch();
+    RegCache::Realize(scratch_guard);
+    scratch = scratch_guard;
+  }
+  else
+  {
+    scratch = RSCRATCH;
+  }
+
   if (cpu_info.bBMI1)
   {
     MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
@@ -652,14 +694,17 @@ void Jit64::mcrfs(UGeckoInstruction inst)
     SHR(32, R(RSCRATCH2), Imm8(shift));
     AND(32, R(RSCRATCH2), Imm32(0xF));
   }
+
+  LEA(64, scratch, MConst(PowerPC::ConditionRegister::s_crTable));
+  MOV(64, R(scratch), MComplex(scratch, RSCRATCH2, SCALE_8, 0));
+  MOV(64, CROffset(inst.CRFD), R(scratch));
+
   if (mask != 0)
   {
     AND(32, R(RSCRATCH), Imm32(~mask));
+    UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
     MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
   }
-  LEA(64, RSCRATCH, MConst(PowerPC::ConditionRegister::s_crTable));
-  MOV(64, R(RSCRATCH), MComplex(RSCRATCH, RSCRATCH2, SCALE_8, 0));
-  MOV(64, CROffset(inst.CRFD), R(RSCRATCH));
 }
 
 void Jit64::mffsx(UGeckoInstruction inst)
@@ -670,18 +715,6 @@ void Jit64::mffsx(UGeckoInstruction inst)
 
   MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
 
-  // FPSCR.FEX = 0 (and VX for below)
-  AND(32, R(RSCRATCH), Imm32(~0x60000000));
-
-  // FPSCR.VX = (FPSCR.Hex & FPSCR_VX_ANY) != 0;
-  XOR(32, R(RSCRATCH2), R(RSCRATCH2));
-  TEST(32, R(RSCRATCH), Imm32(FPSCR_VX_ANY));
-  SETcc(CC_NZ, R(RSCRATCH2));
-  SHL(32, R(RSCRATCH2), Imm8(31 - 2));
-  OR(32, R(RSCRATCH), R(RSCRATCH2));
-
-  MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
-
   int d = inst.FD;
   RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
   RegCache::Realize(Rd);
@@ -710,17 +743,32 @@ void Jit64::mtfsb0x(UGeckoInstruction inst)
   JITDISABLE(bJITSystemRegistersOff);
   FALLBACK_IF(inst.Rc);
 
-  u32 mask = ~(0x80000000 >> inst.CRBD);
-  if (inst.CRBD < 29)
+  const u32 mask = 0x80000000 >> inst.CRBD;
+  const u32 inverted_mask = ~mask;
+
+  if (mask == FPSCR_FEX || mask == FPSCR_VX)
+    return;
+
+  if (inst.CRBD < 29 && (mask & (FPSCR_ANY_X | FPSCR_ANY_E)) == 0)
   {
-    AND(32, PPCSTATE(fpscr), Imm32(mask));
+    AND(32, PPCSTATE(fpscr), Imm32(inverted_mask));
   }
   else
   {
     MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
-    AND(32, R(RSCRATCH), Imm32(mask));
+    AND(32, R(RSCRATCH), Imm32(inverted_mask));
+
+    if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
+    {
+      RCX64Reg scratch = gpr.Scratch();
+      RegCache::Realize(scratch);
+
+      UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
+    }
+
     MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
-    UpdateMXCSR();
+    if (inst.CRBD >= 29)
+      UpdateMXCSR();
   }
 }
 
@@ -730,9 +778,13 @@ void Jit64::mtfsb1x(UGeckoInstruction inst)
   JITDISABLE(bJITSystemRegistersOff);
   FALLBACK_IF(inst.Rc);
 
-  u32 mask = 0x80000000 >> inst.CRBD;
+  const u32 mask = 0x80000000 >> inst.CRBD;
+
+  if (mask == FPSCR_FEX || mask == FPSCR_VX)
+    return;
+
   MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
-  if (mask & FPSCR_ANY_X)
+  if ((mask & FPSCR_ANY_X) != 0)
   {
     BTS(32, R(RSCRATCH), Imm32(31 - inst.CRBD));
     FixupBranch dont_set_fx = J_CC(CC_C);
@@ -743,6 +795,15 @@ void Jit64::mtfsb1x(UGeckoInstruction inst)
   {
     OR(32, R(RSCRATCH), Imm32(mask));
   }
+
+  if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
+  {
+    RCX64Reg scratch = gpr.Scratch();
+    RegCache::Realize(scratch);
+
+    UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
+  }
+
   MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
   if (inst.CRBD >= 29)
     UpdateMXCSR();
@@ -755,12 +816,22 @@ void Jit64::mtfsfix(UGeckoInstruction inst)
   FALLBACK_IF(inst.Rc);
 
   u8 imm = (inst.hex >> (31 - 19)) & 0xF;
+  u32 mask = 0xF0000000 >> (4 * inst.CRFD);
   u32 or_mask = imm << (28 - 4 * inst.CRFD);
-  u32 and_mask = ~(0xF0000000 >> (4 * inst.CRFD));
+  u32 and_mask = ~mask;
 
   MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
   AND(32, R(RSCRATCH), Imm32(and_mask));
   OR(32, R(RSCRATCH), Imm32(or_mask));
+
+  if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
+  {
+    RCX64Reg scratch = gpr.Scratch();
+    RegCache::Realize(scratch);
+
+    UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
+  }
+
   MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
 
   // Field 7 contains NI and RN.
@@ -798,6 +869,15 @@ void Jit64::mtfsfx(UGeckoInstruction inst)
     AND(32, R(RSCRATCH2), Imm32(~mask));
     OR(32, R(RSCRATCH), R(RSCRATCH2));
   }
+
+  if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
+  {
+    RCX64Reg scratch = gpr.Scratch();
+    RegCache::Realize(scratch);
+
+    UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
+  }
+
   MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
 
   if (inst.FM & 1)
diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h
index b029f545cc..f19bd33d55 100644
--- a/Source/Core/Core/PowerPC/JitArm64/Jit.h
+++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h
@@ -273,6 +273,7 @@ protected:
 
   Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
   void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg);
+  void UpdateFPExceptionSummary(Arm64Gen::ARM64Reg fpscr);
   void UpdateRoundingMode();
 
   void ComputeRC0(Arm64Gen::ARM64Reg reg);
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
index d3de831872..568d3072f3 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
@@ -4,6 +4,7 @@
 #include "Common/Arm64Emitter.h"
 #include "Common/Assert.h"
 #include "Common/CommonTypes.h"
+#include "Common/MathUtil.h"
 
 #include "Core/Core.h"
 #include "Core/CoreTiming.h"
@@ -49,6 +50,25 @@ void JitArm64::FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg)
   gpr.Unlock(WA);
 }
 
+void JitArm64::UpdateFPExceptionSummary(ARM64Reg fpscr)
+{
+  ARM64Reg WA = gpr.GetReg();
+
+  // fpscr.VX = (fpscr & FPSCR_VX_ANY) != 0
+  MOVI2R(WA, FPSCR_VX_ANY);
+  TST(WA, fpscr);
+  CSET(WA, CCFlags::CC_NEQ);
+  BFI(fpscr, WA, IntLog2(FPSCR_VX), 1);
+
+  // fpscr.FEX = ((fpscr >> 22) & (fpscr & FPSCR_ANY_E)) != 0
+  AND(WA, fpscr, LogicalImm(FPSCR_ANY_E, 32));
+  TST(WA, fpscr, ArithOption(fpscr, ShiftType::LSR, 22));
+  CSET(WA, CCFlags::CC_NEQ);
+  BFI(fpscr, WA, IntLog2(FPSCR_FEX), 1);
+
+  gpr.Unlock(WA);
+}
+
 void JitArm64::UpdateRoundingMode()
 {
   const BitSet32 gprs_to_save = gpr.GetCallerSavedUsed();
@@ -732,6 +752,8 @@ void JitArm64::mcrfs(UGeckoInstruction inst)
   {
     const u32 inverted_mask = ~mask;
     AND(WA, WA, LogicalImm(inverted_mask, 32));
+
+    UpdateFPExceptionSummary(WA);
     STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
   }
 
@@ -753,24 +775,11 @@ void JitArm64::mffsx(UGeckoInstruction inst)
   LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
 
   ARM64Reg VD = fpr.RW(inst.FD, RegType::LowerPair);
-  ARM64Reg WB = gpr.GetReg();
 
-  // FPSCR.FEX = 0;
-  // FPSCR.VX = (FPSCR.Hex & FPSCR_VX_ANY) != 0;
-  // (FEX is right next to VX, so we can set both using one BFI instruction)
-  MOVI2R(WB, FPSCR_VX_ANY);
-  TST(WA, WB);
-  CSET(WB, CCFlags::CC_NEQ);
-  BFI(WA, WB, 31 - 2, 2);
-
-  STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
-
-  // Vd = FPSCR.Hex | 0xFFF8'0000'0000'0000;
   ORR(XA, XA, LogicalImm(0xFFF8'0000'0000'0000, 64));
   m_float_emit.FMOV(EncodeRegToDouble(VD), XA);
 
   gpr.Unlock(WA);
-  gpr.Unlock(WB);
 }
 
 void JitArm64::mtfsb0x(UGeckoInstruction inst)
@@ -779,12 +788,20 @@ void JitArm64::mtfsb0x(UGeckoInstruction inst)
   JITDISABLE(bJITSystemRegistersOff);
   FALLBACK_IF(inst.Rc);
 
-  u32 mask = ~(0x80000000 >> inst.CRBD);
+  const u32 mask = 0x80000000 >> inst.CRBD;
+  const u32 inverted_mask = ~mask;
+
+  if (mask == FPSCR_FEX || mask == FPSCR_VX)
+    return;
 
   ARM64Reg WA = gpr.GetReg();
 
   LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
-  AND(WA, WA, LogicalImm(mask, 32));
+
+  AND(WA, WA, LogicalImm(inverted_mask, 32));
+
+  if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
+    UpdateFPExceptionSummary(WA);
   STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
 
   gpr.Unlock(WA);
@@ -799,12 +816,16 @@ void JitArm64::mtfsb1x(UGeckoInstruction inst)
   JITDISABLE(bJITSystemRegistersOff);
   FALLBACK_IF(inst.Rc);
 
-  u32 mask = 0x80000000 >> inst.CRBD;
+  const u32 mask = 0x80000000 >> inst.CRBD;
+
+  if (mask == FPSCR_FEX || mask == FPSCR_VX)
+    return;
 
   ARM64Reg WA = gpr.GetReg();
 
   LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
-  if (mask & FPSCR_ANY_X)
+
+  if ((mask & FPSCR_ANY_X) != 0)
   {
     ARM64Reg WB = gpr.GetReg();
     TST(WA, LogicalImm(mask, 32));
@@ -813,6 +834,9 @@ void JitArm64::mtfsb1x(UGeckoInstruction inst)
     gpr.Unlock(WB);
   }
   ORR(WA, WA, LogicalImm(mask, 32));
+
+  if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
+    UpdateFPExceptionSummary(WA);
   STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
 
   gpr.Unlock(WA);
@@ -829,13 +853,15 @@ void JitArm64::mtfsfix(UGeckoInstruction inst)
 
   u8 imm = (inst.hex >> (31 - 19)) & 0xF;
   u8 shift = 28 - 4 * inst.CRFD;
+  u32 mask = 0xF << shift;
 
   ARM64Reg WA = gpr.GetReg();
+
   LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
 
   if (imm == 0xF)
   {
-    ORR(WA, WA, LogicalImm(0xF << shift, 32));
+    ORR(WA, WA, LogicalImm(mask, 32));
   }
   else if (imm == 0x0)
   {
@@ -849,7 +875,10 @@ void JitArm64::mtfsfix(UGeckoInstruction inst)
     gpr.Unlock(WB);
   }
 
+  if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
+    UpdateFPExceptionSummary(WA);
   STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
+
   gpr.Unlock(WA);
 
   // Field 7 contains NI and RN.
@@ -873,24 +902,47 @@ void JitArm64::mtfsfx(UGeckoInstruction inst)
   if (mask == 0xFFFFFFFF)
   {
     ARM64Reg VB = fpr.R(inst.FB, RegType::LowerPair);
+    ARM64Reg WA = gpr.GetReg();
 
-    m_float_emit.STR(32, IndexType::Unsigned, VB, PPC_REG, PPCSTATE_OFF(fpscr));
+    m_float_emit.FMOV(WA, EncodeRegToSingle(VB));
+
+    UpdateFPExceptionSummary(WA);
+    STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
+
+    gpr.Unlock(WA);
   }
   else if (mask != 0)
   {
     ARM64Reg VB = fpr.R(inst.FB, RegType::LowerPair);
-
-    ARM64Reg V0 = fpr.GetReg();
-    ARM64Reg V1 = fpr.GetReg();
     ARM64Reg WA = gpr.GetReg();
+    ARM64Reg WB = gpr.GetReg();
 
-    m_float_emit.LDR(32, IndexType::Unsigned, V0, PPC_REG, PPCSTATE_OFF(fpscr));
-    MOVI2R(WA, mask);
-    m_float_emit.FMOV(EncodeRegToSingle(V1), WA);
-    m_float_emit.BIT(EncodeRegToDouble(V0), EncodeRegToDouble(VB), EncodeRegToDouble(V1));
-    m_float_emit.STR(32, IndexType::Unsigned, V0, PPC_REG, PPCSTATE_OFF(fpscr));
+    LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
+    m_float_emit.FMOV(WB, EncodeRegToSingle(VB));
+
+    if (LogicalImm imm = LogicalImm(mask, 32))
+    {
+      AND(WA, WA, LogicalImm(~mask, 32));
+      AND(WB, WB, imm);
+    }
+    else
+    {
+      ARM64Reg WC = gpr.GetReg();
+
+      MOVI2R(WC, mask);
+      BIC(WA, WA, WC);
+      AND(WB, WB, WC);
+
+      gpr.Unlock(WC);
+    }
+    ORR(WA, WA, WB);
+
+    gpr.Unlock(WB);
+
+    if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
+      UpdateFPExceptionSummary(WA);
+    STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
 
-    fpr.Unlock(V0, V1);
     gpr.Unlock(WA);
   }
 

From 7f7748e1818b23025fbb70cb69b2db5e515f5cd8 Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Wed, 18 Aug 2021 12:20:25 +0200
Subject: [PATCH 4/7] Interpreter: Raise program exception on floating point
 exceptions

---
 .../Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h  | 9 +++++++++
 .../PowerPC/Interpreter/Interpreter_SystemRegisters.cpp  | 4 ++++
 2 files changed, 13 insertions(+)

diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
index b8860eabf5..f1f8cddcd7 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
@@ -11,6 +11,7 @@
 #include "Common/CommonTypes.h"
 #include "Common/FloatUtils.h"
 #include "Core/PowerPC/Gekko.h"
+#include "Core/PowerPC/Interpreter/ExceptionUtils.h"
 #include "Core/PowerPC/PowerPC.h"
 
 constexpr double PPC_NAN = std::numeric_limits<double>::quiet_NaN();
@@ -24,10 +25,18 @@ enum class FPCC
   FU = 1,  // ?
 };
 
+inline void CheckFPExceptions(UReg_FPSCR fpscr)
+{
+  if (fpscr.FEX && (MSR.FE0 || MSR.FE1))
+    GenerateProgramException(ProgramExceptionCause::FloatingPoint);
+}
+
 inline void UpdateFPExceptionSummary(UReg_FPSCR* fpscr)
 {
   fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0;
   fpscr->FEX = ((fpscr->Hex >> 22) & (fpscr->Hex & FPSCR_ANY_E)) != 0;
+
+  CheckFPExceptions(*fpscr);
 }
 
 inline void SetFPException(UReg_FPSCR* fpscr, u32 mask)
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
index 50d586efa5..890fea99e6 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
@@ -167,6 +167,10 @@ void Interpreter::mtmsr(UGeckoInstruction inst)
   }
 
   MSR.Hex = rGPR[inst.RS];
+
+  // FE0/FE1 may have been set
+  CheckFPExceptions(FPSCR);
+
   PowerPC::CheckExceptions();
   m_end_block = true;
 }

From 9f525d69c8631a36092a11016a2eef7446158c37 Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Wed, 18 Aug 2021 12:39:02 +0200
Subject: [PATCH 5/7] Jit: Raise program exception on floating point exceptions

This is done entirely through interpreter fallbacks. It would
probably be possible to implement this using host exception
handlers instead, but I think it would be a lot of complexity
for a rarely used feature, so let's not do it for now.

For performance reasons, there are two settings for this feature:
One setting which does enables just what True Crime: New York City
needs and one setting which enables it all. The latter makes
almost all float instructions fall back to the interpreter.
---
 Source/Core/Core/BootManager.cpp              | 11 +++
 Source/Core/Core/Config/MainSettings.cpp      |  3 +
 Source/Core/Core/Config/MainSettings.h        |  2 +
 .../ConfigLoaders/NetPlayConfigLoader.cpp     |  2 +
 Source/Core/Core/ConfigManager.cpp            |  6 ++
 Source/Core/Core/ConfigManager.h              |  2 +
 Source/Core/Core/NetPlayClient.cpp            |  2 +
 Source/Core/Core/NetPlayProto.h               |  2 +
 Source/Core/Core/NetPlayServer.cpp            |  4 +
 .../CachedInterpreter/CachedInterpreter.cpp   | 27 ++++--
 .../Interpreter/Interpreter_Tables.cpp        | 94 +++++++++----------
 Source/Core/Core/PowerPC/Jit64/Jit.cpp        | 22 ++++-
 .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp  |  7 ++
 Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp |  5 +
 .../PowerPC/Jit64/Jit_SystemRegisters.cpp     |  4 +
 Source/Core/Core/PowerPC/JitArm64/Jit.cpp     | 45 +++++----
 Source/Core/Core/PowerPC/JitArm64/Jit.h       |  1 +
 .../JitArm64/JitArm64_FloatingPoint.cpp       |  6 ++
 .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp |  6 ++
 .../JitArm64/JitArm64_SystemRegisters.cpp     |  4 +
 .../Core/Core/PowerPC/JitCommon/JitBase.cpp   | 14 ++-
 Source/Core/Core/PowerPC/JitCommon/JitBase.h  |  6 +-
 Source/Core/Core/PowerPC/PPCAnalyst.cpp       | 25 +++--
 Source/Core/Core/PowerPC/PPCTables.h          |  4 +-
 24 files changed, 216 insertions(+), 88 deletions(-)

diff --git a/Source/Core/Core/BootManager.cpp b/Source/Core/Core/BootManager.cpp
index 585e5d62e9..cac7f1574d 100644
--- a/Source/Core/Core/BootManager.cpp
+++ b/Source/Core/Core/BootManager.cpp
@@ -75,6 +75,8 @@ private:
   bool bCPUThread;
   bool bJITFollowBranch;
   bool bSyncGPUOnSkipIdleHack;
+  bool bFloatExceptions;
+  bool bDivideByZeroExceptions;
   bool bFPRF;
   bool bAccurateNaNs;
   bool bMMU;
@@ -109,6 +111,8 @@ void ConfigCache::SaveConfig(const SConfig& config)
   bCPUThread = config.bCPUThread;
   bJITFollowBranch = config.bJITFollowBranch;
   bSyncGPUOnSkipIdleHack = config.bSyncGPUOnSkipIdleHack;
+  bFloatExceptions = config.bFloatExceptions;
+  bDivideByZeroExceptions = config.bDivideByZeroExceptions;
   bFPRF = config.bFPRF;
   bAccurateNaNs = config.bAccurateNaNs;
   bDisableICache = config.bDisableICache;
@@ -154,6 +158,8 @@ void ConfigCache::RestoreConfig(SConfig* config)
   config->bCPUThread = bCPUThread;
   config->bJITFollowBranch = bJITFollowBranch;
   config->bSyncGPUOnSkipIdleHack = bSyncGPUOnSkipIdleHack;
+  config->bFloatExceptions = bFloatExceptions;
+  config->bDivideByZeroExceptions = bDivideByZeroExceptions;
   config->bFPRF = bFPRF;
   config->bAccurateNaNs = bAccurateNaNs;
   config->bDisableICache = bDisableICache;
@@ -256,6 +262,9 @@ bool BootCore(std::unique_ptr<BootParameters> boot, const WindowSystemInfo& wsi)
     core_section->Get("JITFollowBranch", &StartUp.bJITFollowBranch, StartUp.bJITFollowBranch);
     core_section->Get("SyncOnSkipIdle", &StartUp.bSyncGPUOnSkipIdleHack,
                       StartUp.bSyncGPUOnSkipIdleHack);
+    core_section->Get("FloatExceptions", &StartUp.bFloatExceptions, StartUp.bFloatExceptions);
+    core_section->Get("DivByZeroExceptions", &StartUp.bDivideByZeroExceptions,
+                      StartUp.bDivideByZeroExceptions);
     core_section->Get("FPRF", &StartUp.bFPRF, StartUp.bFPRF);
     core_section->Get("AccurateNaNs", &StartUp.bAccurateNaNs, StartUp.bAccurateNaNs);
     core_section->Get("DisableICache", &StartUp.bDisableICache, StartUp.bDisableICache);
@@ -370,6 +379,8 @@ bool BootCore(std::unique_ptr<BootParameters> boot, const WindowSystemInfo& wsi)
     StartUp.bAccurateNaNs = netplay_settings.m_AccurateNaNs;
     StartUp.bDisableICache = netplay_settings.m_DisableICache;
     StartUp.bSyncGPUOnSkipIdleHack = netplay_settings.m_SyncOnSkipIdle;
+    StartUp.bFloatExceptions = netplay_settings.m_FloatExceptions;
+    StartUp.bDivideByZeroExceptions = netplay_settings.m_DivideByZeroExceptions;
     StartUp.bSyncGPU = netplay_settings.m_SyncGPU;
     StartUp.iSyncGpuMaxDistance = netplay_settings.m_SyncGpuMaxDistance;
     StartUp.iSyncGpuMinDistance = netplay_settings.m_SyncGpuMinDistance;
diff --git a/Source/Core/Core/Config/MainSettings.cpp b/Source/Core/Core/Config/MainSettings.cpp
index 550e1bdd89..ca48d4c9f8 100644
--- a/Source/Core/Core/Config/MainSettings.cpp
+++ b/Source/Core/Core/Config/MainSettings.cpp
@@ -86,6 +86,9 @@ const Info<int> MAIN_SYNC_GPU_MIN_DISTANCE{{System::Main, "Core", "SyncGpuMinDis
 const Info<float> MAIN_SYNC_GPU_OVERCLOCK{{System::Main, "Core", "SyncGpuOverclock"}, 1.0f};
 const Info<bool> MAIN_FAST_DISC_SPEED{{System::Main, "Core", "FastDiscSpeed"}, false};
 const Info<bool> MAIN_LOW_DCBZ_HACK{{System::Main, "Core", "LowDCBZHack"}, false};
+const Info<bool> MAIN_FLOAT_EXCEPTIONS{{System::Main, "Core", "FloatExceptions"}, false};
+const Info<bool> MAIN_DIVIDE_BY_ZERO_EXCEPTIONS{{System::Main, "Core", "DivByZeroExceptions"},
+                                                false};
 const Info<bool> MAIN_FPRF{{System::Main, "Core", "FPRF"}, false};
 const Info<bool> MAIN_ACCURATE_NANS{{System::Main, "Core", "AccurateNaNs"}, false};
 const Info<bool> MAIN_DISABLE_ICACHE{{System::Main, "Core", "DisableICache"}, false};
diff --git a/Source/Core/Core/Config/MainSettings.h b/Source/Core/Core/Config/MainSettings.h
index 413f2c85b9..14867978ac 100644
--- a/Source/Core/Core/Config/MainSettings.h
+++ b/Source/Core/Core/Config/MainSettings.h
@@ -68,6 +68,8 @@ extern const Info<int> MAIN_SYNC_GPU_MIN_DISTANCE;
 extern const Info<float> MAIN_SYNC_GPU_OVERCLOCK;
 extern const Info<bool> MAIN_FAST_DISC_SPEED;
 extern const Info<bool> MAIN_LOW_DCBZ_HACK;
+extern const Info<bool> MAIN_FLOAT_EXCEPTIONS;
+extern const Info<bool> MAIN_DIVIDE_BY_ZERO_EXCEPTIONS;
 extern const Info<bool> MAIN_FPRF;
 extern const Info<bool> MAIN_ACCURATE_NANS;
 extern const Info<bool> MAIN_DISABLE_ICACHE;
diff --git a/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp b/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp
index 983bf27040..c2eb753e28 100644
--- a/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp
+++ b/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp
@@ -69,6 +69,8 @@ public:
     layer->Set(Config::GFX_SAFE_TEXTURE_CACHE_COLOR_SAMPLES,
                m_settings.m_SafeTextureCacheColorSamples);
     layer->Set(Config::GFX_PERF_QUERIES_ENABLE, m_settings.m_PerfQueriesEnable);
+    layer->Set(Config::MAIN_FLOAT_EXCEPTIONS, m_settings.m_FloatExceptions);
+    layer->Set(Config::MAIN_DIVIDE_BY_ZERO_EXCEPTIONS, m_settings.m_DivideByZeroExceptions);
     layer->Set(Config::MAIN_FPRF, m_settings.m_FPRF);
     layer->Set(Config::MAIN_ACCURATE_NANS, m_settings.m_AccurateNaNs);
     layer->Set(Config::MAIN_DISABLE_ICACHE, m_settings.m_DisableICache);
diff --git a/Source/Core/Core/ConfigManager.cpp b/Source/Core/Core/ConfigManager.cpp
index 56985b1c3d..5b6bd5713e 100644
--- a/Source/Core/Core/ConfigManager.cpp
+++ b/Source/Core/Core/ConfigManager.cpp
@@ -213,6 +213,8 @@ void SConfig::SaveCoreSettings(IniFile& ini)
   core->Set("SyncGpuMaxDistance", iSyncGpuMaxDistance);
   core->Set("SyncGpuMinDistance", iSyncGpuMinDistance);
   core->Set("SyncGpuOverclock", fSyncGpuOverclock);
+  core->Set("FloatExceptions", bFloatExceptions);
+  core->Set("DivByZeroExceptions", bDivideByZeroExceptions);
   core->Set("FPRF", bFPRF);
   core->Set("AccurateNaNs", bAccurateNaNs);
   core->Set("SelectedLanguage", SelectedLanguage);
@@ -509,6 +511,8 @@ void SConfig::LoadCoreSettings(IniFile& ini)
   core->Get("SyncGpuOverclock", &fSyncGpuOverclock, 1.0f);
   core->Get("FastDiscSpeed", &bFastDiscSpeed, false);
   core->Get("LowDCBZHack", &bLowDCBZHack, false);
+  core->Get("FloatExceptions", &bFloatExceptions, false);
+  core->Get("DivByZeroExceptions", &bDivideByZeroExceptions, false);
   core->Get("FPRF", &bFPRF, false);
   core->Get("AccurateNaNs", &bAccurateNaNs, false);
   core->Get("DisableICache", &bDisableICache, false);
@@ -747,6 +751,8 @@ void SConfig::LoadDefaults()
   bRunCompareServer = false;
   bDSPHLE = true;
   bFastmem = true;
+  bFloatExceptions = false;
+  bDivideByZeroExceptions = false;
   bFPRF = false;
   bAccurateNaNs = false;
   bDisableICache = false;
diff --git a/Source/Core/Core/ConfigManager.h b/Source/Core/Core/ConfigManager.h
index 50d8ebd3fe..4ed0924b4f 100644
--- a/Source/Core/Core/ConfigManager.h
+++ b/Source/Core/Core/ConfigManager.h
@@ -108,6 +108,8 @@ struct SConfig
   bool bJITRegisterCacheOff = false;
 
   bool bFastmem;
+  bool bFloatExceptions = false;
+  bool bDivideByZeroExceptions = false;
   bool bFPRF = false;
   bool bAccurateNaNs = false;
   bool bDisableICache = false;
diff --git a/Source/Core/Core/NetPlayClient.cpp b/Source/Core/Core/NetPlayClient.cpp
index 8f730038ef..e4cba66429 100644
--- a/Source/Core/Core/NetPlayClient.cpp
+++ b/Source/Core/Core/NetPlayClient.cpp
@@ -831,6 +831,8 @@ void NetPlayClient::OnStartGame(sf::Packet& packet)
     packet >> m_net_settings.m_EFBEmulateFormatChanges;
     packet >> m_net_settings.m_SafeTextureCacheColorSamples;
     packet >> m_net_settings.m_PerfQueriesEnable;
+    packet >> m_net_settings.m_FloatExceptions;
+    packet >> m_net_settings.m_DivideByZeroExceptions;
     packet >> m_net_settings.m_FPRF;
     packet >> m_net_settings.m_AccurateNaNs;
     packet >> m_net_settings.m_DisableICache;
diff --git a/Source/Core/Core/NetPlayProto.h b/Source/Core/Core/NetPlayProto.h
index 4d3cf436fb..537b820701 100644
--- a/Source/Core/Core/NetPlayProto.h
+++ b/Source/Core/Core/NetPlayProto.h
@@ -58,6 +58,8 @@ struct NetSettings
   bool m_EFBEmulateFormatChanges;
   int m_SafeTextureCacheColorSamples;
   bool m_PerfQueriesEnable;
+  bool m_FloatExceptions;
+  bool m_DivideByZeroExceptions;
   bool m_FPRF;
   bool m_AccurateNaNs;
   bool m_DisableICache;
diff --git a/Source/Core/Core/NetPlayServer.cpp b/Source/Core/Core/NetPlayServer.cpp
index 806f4967ac..bc96d0f952 100644
--- a/Source/Core/Core/NetPlayServer.cpp
+++ b/Source/Core/Core/NetPlayServer.cpp
@@ -1329,6 +1329,8 @@ bool NetPlayServer::SetupNetSettings()
   settings.m_SafeTextureCacheColorSamples =
       Config::Get(Config::GFX_SAFE_TEXTURE_CACHE_COLOR_SAMPLES);
   settings.m_PerfQueriesEnable = Config::Get(Config::GFX_PERF_QUERIES_ENABLE);
+  settings.m_FloatExceptions = Config::Get(Config::MAIN_FLOAT_EXCEPTIONS);
+  settings.m_DivideByZeroExceptions = Config::Get(Config::MAIN_DIVIDE_BY_ZERO_EXCEPTIONS);
   settings.m_FPRF = Config::Get(Config::MAIN_FPRF);
   settings.m_AccurateNaNs = Config::Get(Config::MAIN_ACCURATE_NANS);
   settings.m_DisableICache = Config::Get(Config::MAIN_DISABLE_ICACHE);
@@ -1505,6 +1507,8 @@ bool NetPlayServer::StartGame()
   spac << m_settings.m_EFBEmulateFormatChanges;
   spac << m_settings.m_SafeTextureCacheColorSamples;
   spac << m_settings.m_PerfQueriesEnable;
+  spac << m_settings.m_FloatExceptions;
+  spac << m_settings.m_DivideByZeroExceptions;
   spac << m_settings.m_FPRF;
   spac << m_settings.m_AccurateNaNs;
   spac << m_settings.m_DisableICache;
diff --git a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp
index 834af98a5e..e75994fc96 100644
--- a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp
+++ b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp
@@ -58,7 +58,7 @@ void CachedInterpreter::Init()
   jo.enableBlocklink = false;
 
   m_block_cache.Init();
-  UpdateMemoryOptions();
+  UpdateMemoryAndExceptionOptions();
 
   code_block.m_stats = &js.st;
   code_block.m_gpa = &js.gpa;
@@ -180,6 +180,17 @@ static bool CheckDSI(u32 data)
   return false;
 }
 
+static bool CheckProgramException(u32 data)
+{
+  if (PowerPC::ppcState.Exceptions & EXCEPTION_PROGRAM)
+  {
+    PowerPC::CheckExceptions();
+    PowerPC::ppcState.downcount -= data;
+    return true;
+  }
+  return false;
+}
+
 static bool CheckBreakpoint(u32 data)
 {
   PowerPC::CheckBreakPoints();
@@ -267,26 +278,26 @@ void CachedInterpreter::Jit(u32 address)
       const bool check_fpu = (op.opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound;
       const bool endblock = (op.opinfo->flags & FL_ENDBLOCK) != 0;
       const bool memcheck = (op.opinfo->flags & FL_LOADSTORE) && jo.memcheck;
+      const bool check_program_exception = !endblock && ShouldHandleFPExceptionForInstruction(&op);
       const bool idle_loop = op.branchIsIdleLoop;
 
-      if (breakpoint)
-      {
+      if (breakpoint || check_fpu || endblock || memcheck || check_program_exception)
         m_code.emplace_back(WritePC, op.address);
+
+      if (breakpoint)
         m_code.emplace_back(CheckBreakpoint, js.downcountAmount);
-      }
 
       if (check_fpu)
       {
-        m_code.emplace_back(WritePC, op.address);
         m_code.emplace_back(CheckFPU, js.downcountAmount);
         js.firstFPInstructionFound = true;
       }
 
-      if (endblock || memcheck)
-        m_code.emplace_back(WritePC, op.address);
       m_code.emplace_back(PPCTables::GetInterpreterOp(op.inst), op.inst);
       if (memcheck)
         m_code.emplace_back(CheckDSI, js.downcountAmount);
+      if (check_program_exception)
+        m_code.emplace_back(CheckProgramException, js.downcountAmount);
       if (idle_loop)
         m_code.emplace_back(CheckIdle, js.blockStart);
       if (endblock)
@@ -316,5 +327,5 @@ void CachedInterpreter::ClearCache()
 {
   m_code.clear();
   m_block_cache.Clear();
-  UpdateMemoryOptions();
+  UpdateMemoryAndExceptionOptions();
 }
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp
index 45c1f8b9b4..f7eefbd585 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp
@@ -94,14 +94,14 @@ static std::array<GekkoOPTemplate, 54> primarytable =
 
 static std::array<GekkoOPTemplate, 13> table4 =
 {{    //SUBOP10
-	{0,    Interpreter::ps_cmpu0,   {"ps_cmpu0",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{32,   Interpreter::ps_cmpo0,   {"ps_cmpo0",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{0,    Interpreter::ps_cmpu0,   {"ps_cmpu0",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{32,   Interpreter::ps_cmpo0,   {"ps_cmpo0",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
 	{40,   Interpreter::ps_neg,     {"ps_neg",     OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
 	{136,  Interpreter::ps_nabs,    {"ps_nabs",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
 	{264,  Interpreter::ps_abs,     {"ps_abs",     OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{64,   Interpreter::ps_cmpu1,   {"ps_cmpu1",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{64,   Interpreter::ps_cmpu1,   {"ps_cmpu1",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
 	{72,   Interpreter::ps_mr,      {"ps_mr",      OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{96,   Interpreter::ps_cmpo1,   {"ps_cmpo1",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{96,   Interpreter::ps_cmpo1,   {"ps_cmpo1",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
 	{528,  Interpreter::ps_merge00, {"ps_merge00", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
 	{560,  Interpreter::ps_merge01, {"ps_merge01", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
 	{592,  Interpreter::ps_merge10, {"ps_merge10", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
@@ -112,23 +112,23 @@ static std::array<GekkoOPTemplate, 13> table4 =
 
 static std::array<GekkoOPTemplate, 17> table4_2 =
 {{
-	{10, Interpreter::ps_sum0,      {"ps_sum0",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{11, Interpreter::ps_sum1,      {"ps_sum1",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{12, Interpreter::ps_muls0,     {"ps_muls0",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{13, Interpreter::ps_muls1,     {"ps_muls1",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{14, Interpreter::ps_madds0,    {"ps_madds0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{15, Interpreter::ps_madds1,    {"ps_madds1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{18, Interpreter::ps_div,       {"ps_div",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 17, 0, 0, 0}},
-	{20, Interpreter::ps_sub,       {"ps_sub",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{21, Interpreter::ps_add,       {"ps_add",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{10, Interpreter::ps_sum0,      {"ps_sum0",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{11, Interpreter::ps_sum1,      {"ps_sum1",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{12, Interpreter::ps_muls0,     {"ps_muls0",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{13, Interpreter::ps_muls1,     {"ps_muls1",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{14, Interpreter::ps_madds0,    {"ps_madds0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{15, Interpreter::ps_madds1,    {"ps_madds1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{18, Interpreter::ps_div,       {"ps_div",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 17, 0, 0, 0}},
+	{20, Interpreter::ps_sub,       {"ps_sub",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{21, Interpreter::ps_add,       {"ps_add",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
 	{23, Interpreter::ps_sel,       {"ps_sel",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_IN_FLOAT_BC_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{24, Interpreter::ps_res,       {"ps_res",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{25, Interpreter::ps_mul,       {"ps_mul",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{26, Interpreter::ps_rsqrte,    {"ps_rsqrte", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 2, 0, 0, 0}},
-	{28, Interpreter::ps_msub,      {"ps_msub",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{29, Interpreter::ps_madd,      {"ps_madd",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{30, Interpreter::ps_nmsub,     {"ps_nmsub",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
-	{31, Interpreter::ps_nmadd,     {"ps_nmadd",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{24, Interpreter::ps_res,       {"ps_res",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 1, 0, 0, 0}},
+	{25, Interpreter::ps_mul,       {"ps_mul",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{26, Interpreter::ps_rsqrte,    {"ps_rsqrte", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 2, 0, 0, 0}},
+	{28, Interpreter::ps_msub,      {"ps_msub",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{29, Interpreter::ps_madd,      {"ps_madd",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{30, Interpreter::ps_nmsub,     {"ps_nmsub",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{31, Interpreter::ps_nmadd,     {"ps_nmadd",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
 }};
 
 
@@ -280,7 +280,7 @@ static std::array<GekkoOPTemplate, 107> table31 =
 	{19,  Interpreter::mfcr,        {"mfcr",   OpType::System, FL_OUT_D, 1, 0, 0, 0}},
 	{83,  Interpreter::mfmsr,       {"mfmsr",  OpType::System, FL_OUT_D | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
 	{144, Interpreter::mtcrf,       {"mtcrf",  OpType::System, FL_IN_S | FL_SET_CRn, 1, 0, 0, 0}},
-	{146, Interpreter::mtmsr,       {"mtmsr",  OpType::System, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{146, Interpreter::mtmsr,       {"mtmsr",  OpType::System, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
 	{210, Interpreter::mtsr,        {"mtsr",   OpType::System, FL_IN_S | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
 	{242, Interpreter::mtsrin,      {"mtsrin", OpType::System, FL_IN_SB | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
 	{339, Interpreter::mfspr,       {"mfspr",  OpType::SPR, FL_OUT_D | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
@@ -304,15 +304,15 @@ static std::array<GekkoOPTemplate, 107> table31 =
 
 static std::array<GekkoOPTemplate, 9> table59 =
 {{
-	{18, Interpreter::fdivsx,       {"fdivsx",   OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}}, // TODO
-	{20, Interpreter::fsubsx,       {"fsubsx",   OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{21, Interpreter::faddsx,       {"faddsx",   OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{24, Interpreter::fresx,        {"fresx",    OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{25, Interpreter::fmulsx,       {"fmulsx",   OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{28, Interpreter::fmsubsx,      {"fmsubsx",  OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{29, Interpreter::fmaddsx,      {"fmaddsx",  OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{30, Interpreter::fnmsubsx,     {"fnmsubsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{31, Interpreter::fnmaddsx,     {"fnmaddsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
+	{18, Interpreter::fdivsx,       {"fdivsx",   OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 17, 0, 0, 0}}, // TODO
+	{20, Interpreter::fsubsx,       {"fsubsx",   OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{21, Interpreter::faddsx,       {"faddsx",   OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{24, Interpreter::fresx,        {"fresx",    OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 1, 0, 0, 0}},
+	{25, Interpreter::fmulsx,       {"fmulsx",   OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{28, Interpreter::fmsubsx,      {"fmsubsx",  OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{29, Interpreter::fmaddsx,      {"fmaddsx",  OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{30, Interpreter::fnmsubsx,     {"fnmsubsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{31, Interpreter::fnmaddsx,     {"fnmaddsx", OpType::SingleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
 }};
 
 static std::array<GekkoOPTemplate, 15> table63 =
@@ -323,36 +323,36 @@ static std::array<GekkoOPTemplate, 15> table63 =
 	// we don't actually need to calculate or store them here. So FL_READ_FPRF and FL_SET_FPRF is not
 	// an ideal representation of fcmp's effect on FPRF flags and might result in
 	// slightly sub-optimal code.
-	{32,  Interpreter::fcmpo,       {"fcmpo",   OpType::DoubleFP, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
-	{0,   Interpreter::fcmpu,       {"fcmpu",   OpType::DoubleFP, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
+	{32,  Interpreter::fcmpo,       {"fcmpo",   OpType::DoubleFP, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{0,   Interpreter::fcmpu,       {"fcmpu",   OpType::DoubleFP, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
 
-	{14,  Interpreter::fctiwx,      {"fctiwx",  OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
-	{15,  Interpreter::fctiwzx,     {"fctiwzx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
+	{14,  Interpreter::fctiwx,      {"fctiwx",  OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{15,  Interpreter::fctiwzx,     {"fctiwzx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
 	{72,  Interpreter::fmrx,        {"fmrx",    OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
 	{136, Interpreter::fnabsx,      {"fnabsx",  OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_IN_FLOAT_B_BITEXACT | FL_USE_FPU, 1, 0, 0, 0}},
 	{40,  Interpreter::fnegx,       {"fnegx",   OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_IN_FLOAT_B_BITEXACT | FL_USE_FPU, 1, 0, 0, 0}},
-	{12,  Interpreter::frspx,       {"frspx",   OpType::DoubleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
+	{12,  Interpreter::frspx,       {"frspx",   OpType::DoubleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
 
 	{64,  Interpreter::mcrfs,       {"mcrfs",   OpType::SystemFP, FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}},
 	{583, Interpreter::mffsx,       {"mffsx",   OpType::SystemFP, FL_RC_BIT_F | FL_INOUT_FLOAT_D | FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}},
 	{70,  Interpreter::mtfsb0x,     {"mtfsb0x", OpType::SystemFP, FL_RC_BIT_F | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 3, 0, 0, 0}},
-	{38,  Interpreter::mtfsb1x,     {"mtfsb1x", OpType::SystemFP, FL_RC_BIT_F | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 3, 0, 0, 0}},
-	{134, Interpreter::mtfsfix,     {"mtfsfix", OpType::SystemFP, FL_RC_BIT_F | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 3, 0, 0, 0}},
-	{711, Interpreter::mtfsfx,      {"mtfsfx",  OpType::SystemFP, FL_RC_BIT_F | FL_IN_FLOAT_B | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 3, 0, 0, 0}},
+	{38,  Interpreter::mtfsb1x,     {"mtfsb1x", OpType::SystemFP, FL_RC_BIT_F | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 3, 0, 0, 0}},
+	{134, Interpreter::mtfsfix,     {"mtfsfix", OpType::SystemFP, FL_RC_BIT_F | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 3, 0, 0, 0}},
+	{711, Interpreter::mtfsfx,      {"mtfsfx",  OpType::SystemFP, FL_RC_BIT_F | FL_IN_FLOAT_B | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 3, 0, 0, 0}},
 }};
 
 static std::array<GekkoOPTemplate, 10> table63_2 =
 {{
-	{18, Interpreter::fdivx,        {"fdivx",    OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 31, 0, 0, 0}},
-	{20, Interpreter::fsubx,        {"fsubx",    OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{21, Interpreter::faddx,        {"faddx",    OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
+	{18, Interpreter::fdivx,        {"fdivx",    OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 31, 0, 0, 0}},
+	{20, Interpreter::fsubx,        {"fsubx",    OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{21, Interpreter::faddx,        {"faddx",    OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
 	{23, Interpreter::fselx,        {"fselx",    OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_IN_FLOAT_BC_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
-	{25, Interpreter::fmulx,        {"fmulx",    OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{26, Interpreter::frsqrtex,     {"frsqrtex", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{28, Interpreter::fmsubx,       {"fmsubx",   OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{29, Interpreter::fmaddx,       {"fmaddx",   OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{30, Interpreter::fnmsubx,      {"fnmsubx",  OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{31, Interpreter::fnmaddx,      {"fnmaddx",  OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
+	{25, Interpreter::fmulx,        {"fmulx",    OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{26, Interpreter::frsqrtex,     {"frsqrtex", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION | FL_FLOAT_DIV, 1, 0, 0, 0}},
+	{28, Interpreter::fmsubx,       {"fmsubx",   OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{29, Interpreter::fmaddx,       {"fmaddx",   OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{30, Interpreter::fnmsubx,      {"fnmsubx",  OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
+	{31, Interpreter::fnmaddx,      {"fnmaddx",  OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_FLOAT_EXCEPTION, 1, 0, 0, 0}},
 }};
 // clang-format on
 
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp
index 0a4bfeed4d..dba9520290 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp
@@ -335,7 +335,7 @@ void Jit64::Init()
   jo.fastmem_arena = SConfig::GetInstance().bFastmem && Memory::InitFastmemArena();
   jo.optimizeGatherPipe = true;
   jo.accurateSinglePrecision = true;
-  UpdateMemoryOptions();
+  UpdateMemoryAndExceptionOptions();
   js.fastmemLoadStore = nullptr;
   js.compilerPC = 0;
 
@@ -389,7 +389,7 @@ void Jit64::ClearCache()
   m_const_pool.Clear();
   ClearCodeSpace();
   Clear();
-  UpdateMemoryOptions();
+  UpdateMemoryAndExceptionOptions();
   ResetFreeMemoryRanges();
 }
 
@@ -453,6 +453,24 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
       SetJumpTarget(c);
     }
   }
+  else if (ShouldHandleFPExceptionForInstruction(js.op))
+  {
+    TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_PROGRAM));
+    FixupBranch exception = J_CC(CC_NZ, true);
+
+    SwitchToFarCode();
+    SetJumpTarget(exception);
+
+    RCForkGuard gpr_guard = gpr.Fork();
+    RCForkGuard fpr_guard = fpr.Fork();
+
+    gpr.Flush();
+    fpr.Flush();
+
+    MOV(32, PPCSTATE(pc), Imm32(js.op->address));
+    WriteExceptionExit();
+    SwitchToNearCode();
+  }
 }
 
 void Jit64::HLEFunction(u32 hook_index)
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
index ace5e193d1..9d2aad1b27 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@@ -208,6 +208,7 @@ void Jit64::fp_arith(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions || (jo.div_by_zero_exceptions && inst.SUBOP5 == 18));
 
   int a = inst.FA;
   int b = inst.FB;
@@ -292,6 +293,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   // We would like to emulate FMA instructions accurately without rounding error if possible, but
   // unfortunately emulating FMA in software is just too slow on CPUs that are too old to have FMA
@@ -733,6 +735,7 @@ void Jit64::fcmpX(UGeckoInstruction inst)
 {
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
+  FALLBACK_IF(jo.fp_exceptions);
 
   FloatCompare(inst);
 }
@@ -742,6 +745,7 @@ void Jit64::fctiwx(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   int d = inst.RD;
   int b = inst.RB;
@@ -784,6 +788,7 @@ void Jit64::frspx(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
   int b = inst.FB;
   int d = inst.FD;
   bool packed = js.op->fprIsDuplicated[b] && !cpu_info.bAtom;
@@ -800,6 +805,7 @@ void Jit64::frsqrtex(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions);
   int b = inst.FB;
   int d = inst.FD;
 
@@ -818,6 +824,7 @@ void Jit64::fresx(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions);
   int b = inst.FB;
   int d = inst.FD;
 
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
index 0b28fed61c..023d0d834a 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
@@ -33,6 +33,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITPairedOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   int d = inst.FD;
   int a = inst.FA;
@@ -84,6 +85,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITPairedOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   int d = inst.FD;
   int a = inst.FA;
@@ -152,6 +154,7 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions);
   int b = inst.FB;
   int d = inst.FD;
 
@@ -176,6 +179,7 @@ void Jit64::ps_res(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions);
   int b = inst.FB;
   int d = inst.FD;
 
@@ -199,6 +203,7 @@ void Jit64::ps_cmpXX(UGeckoInstruction inst)
 {
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
+  FALLBACK_IF(jo.fp_exceptions);
 
   FloatCompare(inst, !!(inst.SUBOP10 & 64));
 }
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
index 3117ef563f..2806879bcb 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
@@ -424,6 +424,7 @@ void Jit64::mtmsr(UGeckoInstruction inst)
 {
   INSTRUCTION_START
   JITDISABLE(bJITSystemRegistersOff);
+  FALLBACK_IF(jo.fp_exceptions);
 
   {
     RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read);
@@ -777,6 +778,7 @@ void Jit64::mtfsb1x(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITSystemRegistersOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   const u32 mask = 0x80000000 >> inst.CRBD;
 
@@ -814,6 +816,7 @@ void Jit64::mtfsfix(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITSystemRegistersOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   u8 imm = (inst.hex >> (31 - 19)) & 0xF;
   u32 mask = 0xF0000000 >> (4 * inst.CRFD);
@@ -844,6 +847,7 @@ void Jit64::mtfsfx(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITSystemRegistersOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   u32 mask = 0;
   for (int i = 0; i < 8; i++)
diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp
index 3191be64eb..a68fc078ee 100644
--- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp
@@ -50,7 +50,7 @@ void JitArm64::Init()
   jo.fastmem_arena = SConfig::GetInstance().bFastmem && Memory::InitFastmemArena();
   jo.enableBlocklink = true;
   jo.optimizeGatherPipe = true;
-  UpdateMemoryOptions();
+  UpdateMemoryAndExceptionOptions();
   gpr.Init(this);
   fpr.Init(this);
   blocks.Init();
@@ -129,7 +129,7 @@ void JitArm64::ClearCache()
   const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
   ClearCodeSpace();
   farcode.ClearCodeSpace();
-  UpdateMemoryOptions();
+  UpdateMemoryAndExceptionOptions();
 
   GenerateAsm();
 }
@@ -193,25 +193,14 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
       gpr.Unlock(WA);
     }
   }
+  else if (ShouldHandleFPExceptionForInstruction(js.op))
+  {
+    WriteConditionalExceptionExit(EXCEPTION_PROGRAM);
+  }
 
   if (jo.memcheck && (js.op->opinfo->flags & FL_LOADSTORE))
   {
-    ARM64Reg WA = gpr.GetReg();
-    LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
-    FixupBranch noException = TBZ(WA, IntLog2(EXCEPTION_DSI));
-
-    FixupBranch handleException = B();
-    SwitchToFarCode();
-    SetJumpTarget(handleException);
-
-    gpr.Flush(FlushMode::MaintainState, WA);
-    fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
-
-    WriteExceptionExit(js.compilerPC, false, true);
-
-    SwitchToNearCode();
-    SetJumpTarget(noException);
-    gpr.Unlock(WA);
+    WriteConditionalExceptionExit(EXCEPTION_DSI);
   }
 }
 
@@ -495,6 +484,26 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external, bool always
   B(dispatcher);
 }
 
+void JitArm64::WriteConditionalExceptionExit(int exception)
+{
+  ARM64Reg WA = gpr.GetReg();
+  LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
+  FixupBranch noException = TBZ(WA, IntLog2(exception));
+
+  FixupBranch handleException = B();
+  SwitchToFarCode();
+  SetJumpTarget(handleException);
+
+  gpr.Flush(FlushMode::MaintainState, WA);
+  fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
+
+  WriteExceptionExit(js.compilerPC, false, true);
+
+  SwitchToNearCode();
+  SetJumpTarget(noException);
+  gpr.Unlock(WA);
+}
+
 bool JitArm64::HandleFunctionHooking(u32 address)
 {
   return HLE::ReplaceFunctionIfPossible(address, [&](u32 hook_index, HLE::HookType type) {
diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h
index f19bd33d55..bc831360c1 100644
--- a/Source/Core/Core/PowerPC/JitArm64/Jit.h
+++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h
@@ -268,6 +268,7 @@ protected:
                           bool always_exception = false);
   void WriteExceptionExit(Arm64Gen::ARM64Reg dest, bool only_external = false,
                           bool always_exception = false);
+  void WriteConditionalExceptionExit(int exception);
   void FakeLKExit(u32 exit_address_after_return);
   void WriteBLRExit(Arm64Gen::ARM64Reg dest);
 
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
index e8b15ce16a..c4d63802a9 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
@@ -67,6 +67,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions || (jo.div_by_zero_exceptions && inst.SUBOP5 == 18));
 
   u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
   u32 op5 = inst.SUBOP5;
@@ -339,6 +340,7 @@ void JitArm64::frspx(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   const u32 b = inst.FB;
   const u32 d = inst.FD;
@@ -500,6 +502,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
 {
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
+  FALLBACK_IF(jo.fp_exceptions);
 
   FloatCompare(inst);
 }
@@ -509,6 +512,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   const u32 b = inst.FB;
   const u32 d = inst.FD;
@@ -551,6 +555,7 @@ void JitArm64::fresx(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions);
 
   const u32 b = inst.FB;
   const u32 d = inst.FD;
@@ -579,6 +584,7 @@ void JitArm64::frsqrtex(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITFloatingPointOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions);
 
   const u32 b = inst.FB;
   const u32 d = inst.FD;
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp
index 1e8edb5036..5690bbc481 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp
@@ -75,6 +75,7 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITPairedOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   const u32 a = inst.FA;
   const u32 c = inst.FC;
@@ -125,6 +126,7 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITPairedOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   const u32 a = inst.FA;
   const u32 b = inst.FB;
@@ -316,6 +318,7 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITPairedOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   const u32 a = inst.FA;
   const u32 b = inst.FB;
@@ -362,6 +365,7 @@ void JitArm64::ps_res(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITPairedOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions);
 
   const u32 b = inst.FB;
   const u32 d = inst.FD;
@@ -394,6 +398,7 @@ void JitArm64::ps_rsqrte(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITPairedOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions || jo.div_by_zero_exceptions);
 
   const u32 b = inst.FB;
   const u32 d = inst.FD;
@@ -425,6 +430,7 @@ void JitArm64::ps_cmpXX(UGeckoInstruction inst)
 {
   INSTRUCTION_START
   JITDISABLE(bJITPairedOff);
+  FALLBACK_IF(jo.fp_exceptions);
 
   const bool upper = inst.SUBOP10 & 64;
   FloatCompare(inst, upper);
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
index 568d3072f3..63aa016322 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
@@ -86,6 +86,7 @@ void JitArm64::mtmsr(UGeckoInstruction inst)
 {
   INSTRUCTION_START
   JITDISABLE(bJITSystemRegistersOff);
+  FALLBACK_IF(jo.fp_exceptions);
 
   gpr.BindToRegister(inst.RS, true);
   STR(IndexType::Unsigned, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(msr));
@@ -815,6 +816,7 @@ void JitArm64::mtfsb1x(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITSystemRegistersOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   const u32 mask = 0x80000000 >> inst.CRBD;
 
@@ -850,6 +852,7 @@ void JitArm64::mtfsfix(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITSystemRegistersOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   u8 imm = (inst.hex >> (31 - 19)) & 0xF;
   u8 shift = 28 - 4 * inst.CRFD;
@@ -891,6 +894,7 @@ void JitArm64::mtfsfx(UGeckoInstruction inst)
   INSTRUCTION_START
   JITDISABLE(bJITSystemRegistersOff);
   FALLBACK_IF(inst.Rc);
+  FALLBACK_IF(jo.fp_exceptions);
 
   u32 mask = 0;
   for (int i = 0; i < 8; i++)
diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp
index c42fce554e..1d67d61046 100644
--- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp
@@ -41,9 +41,21 @@ bool JitBase::CanMergeNextInstructions(int count) const
   return true;
 }
 
-void JitBase::UpdateMemoryOptions()
+void JitBase::UpdateMemoryAndExceptionOptions()
 {
   bool any_watchpoints = PowerPC::memchecks.HasAny();
   jo.fastmem = SConfig::GetInstance().bFastmem && jo.fastmem_arena && (MSR.DR || !any_watchpoints);
   jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints;
+  jo.fp_exceptions = SConfig::GetInstance().bFloatExceptions;
+  jo.div_by_zero_exceptions = SConfig::GetInstance().bDivideByZeroExceptions;
+}
+
+bool JitBase::ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op)
+{
+  if (jo.fp_exceptions)
+    return (op->opinfo->flags & FL_FLOAT_EXCEPTION) != 0;
+  else if (jo.div_by_zero_exceptions)
+    return (op->opinfo->flags & FL_FLOAT_DIV) != 0;
+  else
+    return false;
 }
diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h
index 18784ff095..b34d16365c 100644
--- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h
+++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h
@@ -63,6 +63,8 @@ protected:
     bool fastmem;
     bool fastmem_arena;
     bool memcheck;
+    bool fp_exceptions;
+    bool div_by_zero_exceptions;
     bool profile_blocks;
   };
   struct JitState
@@ -113,7 +115,9 @@ protected:
 
   bool CanMergeNextInstructions(int count) const;
 
-  void UpdateMemoryOptions();
+  void UpdateMemoryAndExceptionOptions();
+
+  bool ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op);
 
 public:
   JitBase();
diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp
index bd1f0d0324..d02cc10e0c 100644
--- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp
+++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp
@@ -524,8 +524,12 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code, const Gekk
   code->wantsCR0 = false;
   code->wantsCR1 = false;
 
+  bool first_fpu_instruction = false;
   if (opinfo->flags & FL_USE_FPU)
+  {
+    first_fpu_instruction = !block->m_fpa->any;
     block->m_fpa->any = true;
+  }
 
   if (opinfo->flags & FL_TIMER)
     block->m_gpa->anyTimer = true;
@@ -550,9 +554,10 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code, const Gekk
   code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0;
   code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) != 0;
 
-  // TODO: Is it possible to determine that some FPU instructions never cause exceptions?
   code->canCauseException =
-      (opinfo->flags & (FL_LOADSTORE | FL_USE_FPU | FL_PROGRAMEXCEPTION)) != 0;
+      first_fpu_instruction || (opinfo->flags & (FL_LOADSTORE | FL_PROGRAMEXCEPTION)) != 0 ||
+      (SConfig::GetInstance().bFloatExceptions && (opinfo->flags & FL_FLOAT_EXCEPTION)) ||
+      (SConfig::GetInstance().bDivideByZeroExceptions && (opinfo->flags & FL_FLOAT_DIV));
 
   code->wantsCA = (opinfo->flags & FL_READ_CA) != 0;
   code->outputCA = (opinfo->flags & FL_SET_CA) != 0;
@@ -928,14 +933,14 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
     const bool opWantsCR1 = op.wantsCR1;
     const bool opWantsFPRF = op.wantsFPRF;
     const bool opWantsCA = op.wantsCA;
-    op.wantsCR0 = wantsCR0 || op.canEndBlock;
-    op.wantsCR1 = wantsCR1 || op.canEndBlock;
-    op.wantsFPRF = wantsFPRF || op.canEndBlock;
-    op.wantsCA = wantsCA || op.canEndBlock;
-    wantsCR0 |= opWantsCR0 || op.canEndBlock;
-    wantsCR1 |= opWantsCR1 || op.canEndBlock;
-    wantsFPRF |= opWantsFPRF || op.canEndBlock;
-    wantsCA |= opWantsCA || op.canEndBlock;
+    op.wantsCR0 = wantsCR0 || op.canEndBlock || op.canCauseException;
+    op.wantsCR1 = wantsCR1 || op.canEndBlock || op.canCauseException;
+    op.wantsFPRF = wantsFPRF || op.canEndBlock || op.canCauseException;
+    op.wantsCA = wantsCA || op.canEndBlock || op.canCauseException;
+    wantsCR0 |= opWantsCR0 || op.canEndBlock || op.canCauseException;
+    wantsCR1 |= opWantsCR1 || op.canEndBlock || op.canCauseException;
+    wantsFPRF |= opWantsFPRF || op.canEndBlock || op.canCauseException;
+    wantsCA |= opWantsCA || op.canEndBlock || op.canCauseException;
     wantsCR0 &= !op.outputCR0 || opWantsCR0;
     wantsCR1 &= !op.outputCR1 || opWantsCR1;
     wantsFPRF &= !op.outputFPRF || opWantsFPRF;
diff --git a/Source/Core/Core/PowerPC/PPCTables.h b/Source/Core/Core/PowerPC/PPCTables.h
index c53510f221..f88d5d8ef3 100644
--- a/Source/Core/Core/PowerPC/PPCTables.h
+++ b/Source/Core/Core/PowerPC/PPCTables.h
@@ -64,7 +64,9 @@ enum InstructionFlags : u64
   FL_IN_FLOAT_C_BITEXACT = (1ull << 31),  // The output is based on the exact bits in frC.
   FL_IN_FLOAT_AB_BITEXACT = FL_IN_FLOAT_A_BITEXACT | FL_IN_FLOAT_B_BITEXACT,
   FL_IN_FLOAT_BC_BITEXACT = FL_IN_FLOAT_B_BITEXACT | FL_IN_FLOAT_C_BITEXACT,
-  FL_PROGRAMEXCEPTION = (1ull << 32),  // May generate a system exception.
+  FL_PROGRAMEXCEPTION = (1ull << 32),  // May generate a program exception (not floating point).
+  FL_FLOAT_EXCEPTION = (1ull << 33),   // May generate a program exception (floating point).
+  FL_FLOAT_DIV = (1ull << 34),  // May generate a program exception (FP) due to division by 0.
 };
 
 enum class OpType

From 25bff910540104c06212b5b6439a38b277549ab5 Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Sun, 29 Aug 2021 11:57:15 +0200
Subject: [PATCH 6/7] Interpreter: Fix NI_div ZX check

---
 .../PowerPC/Interpreter/Interpreter_FPUtils.h | 23 ++++++++-----------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
index f1f8cddcd7..4e0601b897 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
@@ -138,7 +138,15 @@ inline FPResult NI_div(UReg_FPSCR* fpscr, double a, double b)
 {
   FPResult result{a / b};
 
-  if (std::isnan(result.value))
+  if (std::isinf(result.value))
+  {
+    if (b == 0.0)
+    {
+      result.SetException(fpscr, FPSCR_ZX);
+      return result;
+    }
+  }
+  else if (std::isnan(result.value))
   {
     if (Common::IsSNAN(a) || Common::IsSNAN(b))
       result.SetException(fpscr, FPSCR_VXSNAN);
@@ -157,20 +165,9 @@ inline FPResult NI_div(UReg_FPSCR* fpscr, double a, double b)
     }
 
     if (b == 0.0)
-    {
-      if (a == 0.0)
-      {
-        result.SetException(fpscr, FPSCR_VXZDZ);
-      }
-      else
-      {
-        result.SetException(fpscr, FPSCR_ZX);
-      }
-    }
+      result.SetException(fpscr, FPSCR_VXZDZ);
     else if (std::isinf(a) && std::isinf(b))
-    {
       result.SetException(fpscr, FPSCR_VXIDI);
-    }
 
     result.value = PPC_NAN;
     return result;

From c250ed03cf65c4398c4f844524b75d5600cfb535 Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Sun, 29 Aug 2021 12:21:33 +0200
Subject: [PATCH 7/7] GameINI: Enable DivByZeroExceptions where needed

Combined with the previous commits in this pull request,
this fixes https://bugs.dolphin-emu.org/issues/7230 (True Crime:
New York City) and https://bugs.dolphin-emu.org/issues/9650
(Call of Duty: Finest Hour).
---
 Data/Sys/GameSettings/G2C.ini | 4 ++++
 Data/Sys/GameSettings/GCO.ini | 4 ++++
 2 files changed, 8 insertions(+)
 create mode 100644 Data/Sys/GameSettings/G2C.ini
 create mode 100644 Data/Sys/GameSettings/GCO.ini

diff --git a/Data/Sys/GameSettings/G2C.ini b/Data/Sys/GameSettings/G2C.ini
new file mode 100644
index 0000000000..a12a6a1190
--- /dev/null
+++ b/Data/Sys/GameSettings/G2C.ini
@@ -0,0 +1,4 @@
+# G2CD52, G2CE52, G2CP52, G2CX52 - True Crime: New York City
+
+[Core]
+DivByZeroExceptions = True
diff --git a/Data/Sys/GameSettings/GCO.ini b/Data/Sys/GameSettings/GCO.ini
new file mode 100644
index 0000000000..ec36443599
--- /dev/null
+++ b/Data/Sys/GameSettings/GCO.ini
@@ -0,0 +1,4 @@
+# GCOD52, GCOE52, GCOF52, GCOP52 - Call of Duty: Finest Hour
+
+[Core]
+DivByZeroExceptions = True