spu/interp: optimize floating point exception checking

read once the MXCSR register and use his value to check for all exceptions
2025-08-27 20:58:33 +00:00 · 2018-02-28 18:28:38 +02:00 · 2018-02-28 18:28:38 +02:00 · 6e8ccbcf17
commit 6e8ccbcf17
parent 72e54e8b60
1 changed files with 21 additions and 18 deletions
--- a/rpcs3/Emu/Cell/SPUInterpreter.cpp
+++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp
@ -1614,7 +1614,7 @@ inline bool isdenormal(double x)

 void spu_interpreter_precise::FREST(SPUThread& spu, spu_opcode_t op)
 {
-	SetHostRoundingMode(FPSCR_RN_ZERO);
+	fesetround(FE_TOWARDZERO);
 	for (int i = 0; i < 4; i++)
 	{
 		const float a = spu.gpr[op.ra]._f[i];
@ -1634,7 +1634,7 @@ void spu_interpreter_precise::FREST(SPUThread& spu, spu_opcode_t op)

 void spu_interpreter_precise::FRSQEST(SPUThread& spu, spu_opcode_t op)
 {
-	SetHostRoundingMode(FPSCR_RN_ZERO);
+	fesetround(FE_TOWARDZERO);
 	for (int i = 0; i < 4; i++)
 	{
 		const float a = spu.gpr[op.ra]._f[i];
@ -1677,7 +1677,7 @@ void spu_interpreter_precise::FCGT(SPUThread& spu, spu_opcode_t op)

 static void FA_FS(SPUThread& spu, spu_opcode_t op, bool sub)
 {
-	SetHostRoundingMode(FPSCR_RN_ZERO);
+	fesetround(FE_TOWARDZERO);
 	for (int w = 0; w < 4; w++)
 	{
 		const float a = spu.gpr[op.ra]._f[w];
@ -1766,7 +1766,7 @@ void spu_interpreter_precise::FS(SPUThread& spu, spu_opcode_t op) { FA_FS(spu, o

 void spu_interpreter_precise::FM(SPUThread& spu, spu_opcode_t op)
 {
-	SetHostRoundingMode(FPSCR_RN_ZERO);
+	fesetround(FE_TOWARDZERO);
 	for (int w = 0; w < 4; w++)
 	{
 		const float a = spu.gpr[op.ra]._f[w];
@ -1899,18 +1899,19 @@ static void DFASM(SPUThread& spu, spu_opcode_t op, DoubleOp operation)
 			case DFASM_S: result = a - b; break;
 			case DFASM_M: result = a * b; break;
 			}
-			if (fetestexcept(FE_INVALID))
+			const u32 e = _mm_getcsr();
+			if (e & _MM_MASK_INVALID)
 			{
 				spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINV);
 				result = DOUBLE_NAN;
 			}
 			else
 			{
-				if (fetestexcept(FE_OVERFLOW))
+				if (e & _MM_MASK_OVERFLOW)
 					spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DOVF);
-				if (fetestexcept(FE_UNDERFLOW))
+				if (e & _MM_MASK_UNDERFLOW)
 					spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DUNF);
-				if (fetestexcept(FE_INEXACT))
+				if (e & _MM_MASK_INEXACT)
 					spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINX);
 			}
 		}
@ -1959,18 +1960,19 @@ static void DFMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub)
 			SetHostRoundingMode(spu.fpscr.checkSliceRounding(i));
 			feclearexcept(FE_ALL_EXCEPT);
 			result = fma(a, b, sub ? -c : c);
-			if (fetestexcept(FE_INVALID))
+			const u32 e = _mm_getcsr();
+			if (e & _MM_MASK_INVALID)
 			{
 				spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINV);
 				result = DOUBLE_NAN;
 			}
 			else
 			{
-				if (fetestexcept(FE_OVERFLOW))
+				if (e & _MM_MASK_OVERFLOW)
 					spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DOVF);
-				if (fetestexcept(FE_UNDERFLOW))
+				if (e & _MM_MASK_UNDERFLOW)
 					spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DUNF);
-				if (fetestexcept(FE_INEXACT))
+				if (e & _MM_MASK_INEXACT)
 					spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINX);
 				if (neg) result = -result;
 			}
@ -2033,11 +2035,12 @@ void spu_interpreter_precise::FRDS(SPUThread& spu, spu_opcode_t op)
 		{
 			feclearexcept(FE_ALL_EXCEPT);
 			spu.gpr[op.rt]._f[i * 2 + 1] = (float)a;
-			if (fetestexcept(FE_OVERFLOW))
+			const u32 e = _mm_getcsr();
+			if (e & _MM_MASK_OVERFLOW)
 				spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DOVF);
-			if (fetestexcept(FE_UNDERFLOW))
+			if (e & _MM_MASK_UNDERFLOW)
 				spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DUNF);
-			if (fetestexcept(FE_INEXACT))
+			if (e & _MM_MASK_INEXACT)
 				spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINX);
 		}
 		spu.gpr[op.rt]._u32[i * 2] = 0;
@ -2131,7 +2134,7 @@ void spu_interpreter_precise::CFLTU(SPUThread& spu, spu_opcode_t op)

 void spu_interpreter_precise::CSFLT(SPUThread& spu, spu_opcode_t op)
 {
-	SetHostRoundingMode(FPSCR_RN_ZERO);
+	fesetround(FE_TOWARDZERO);
 	const int scale = 155 - (op.i8 & 0xff); //unsigned immediate
 	for (int i = 0; i < 4; i++)
 	{
@ -2154,7 +2157,7 @@ void spu_interpreter_precise::CSFLT(SPUThread& spu, spu_opcode_t op)

 void spu_interpreter_precise::CUFLT(SPUThread& spu, spu_opcode_t op)
 {
-	SetHostRoundingMode(FPSCR_RN_ZERO);
+	fesetround(FE_TOWARDZERO);
 	const int scale = 155 - (op.i8 & 0xff); //unsigned immediate
 	for (int i = 0; i < 4; i++)
 	{
@ -2177,7 +2180,7 @@ void spu_interpreter_precise::CUFLT(SPUThread& spu, spu_opcode_t op)

 static void FMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub)
 {
-	SetHostRoundingMode(FPSCR_RN_ZERO);
+	fesetround(FE_TOWARDZERO);
 	for (int w = 0; w < 4; w++)
 	{
 		float a = spu.gpr[op.ra]._f[w];