diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp
index 232fe7bb0c..e0c1ba6399 100644
--- a/rpcs3/Emu/Cell/PPUInterpreter.cpp
+++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp
@@ -1426,7 +1426,7 @@ bool ppu_interpreter_precise::VPKSHSS(ppu_thread& ppu, ppu_opcode_t op)
 {
 	const auto& a = ppu.vr[op.va];
 	const auto& b = ppu.vr[op.vb];
-	auto& d = ppu.vr[op.vd];
+	v128 d;
 
 	for (u8 i = 0; i < 8; i++)
 	{
@@ -1465,6 +1465,7 @@ bool ppu_interpreter_precise::VPKSHSS(ppu_thread& ppu, ppu_opcode_t op)
 		}
 	}
 
+	ppu.vr[op.vd] = d;
 	return true;
 }
 
@@ -1476,47 +1477,20 @@ bool ppu_interpreter_fast::VPKSHUS(ppu_thread& ppu, ppu_opcode_t op)
 
 bool ppu_interpreter_precise::VPKSHUS(ppu_thread& ppu, ppu_opcode_t op)
 {
-	const auto& a = ppu.vr[op.va];
-	const auto& b = ppu.vr[op.vb];
-	auto& d = ppu.vr[op.vd];
+	const auto a = ppu.vr[op.va];
+	const auto b = ppu.vr[op.vb];
 
-	for (u8 i = 0; i < 8; i++)
+	// Detect saturation
 	{
-		s16 result = a._s16[i];
-
-		if (result < 0)
+		const u64 mask = 0xFF00FF00FF00FF00ULL;
+		const auto all_bits = v128::fromV(_mm_or_si128(a.vi, b.vi));
+		if ((all_bits._u64[0] | all_bits._u64[1]) & mask)
 		{
-			d._u8[i + 8] = 0;
 			ppu.sat = true;
 		}
-		else if (result > UINT8_MAX)
-		{
-			d._u8[i + 8] = UINT8_MAX;
-			ppu.sat = true;
-		}
-		else
-		{
-			d._u8[i + 8] = (u8)result;
-		}
-
-		result = b._s16[i];
-
-		if (result < 0)
-		{
-			d._u8[i] = 0;
-			ppu.sat = true;
-		}
-		else if (result > UINT8_MAX)
-		{
-			d._u8[i] = UINT8_MAX;
-			ppu.sat = true;
-		}
-		else
-		{
-			d._u8[i] = (u8)result;
-		}
 	}
 
+	ppu.vr[op.vd].vi = _mm_packus_epi16(b.vi, a.vi);
 	return true;
 }
 
@@ -1530,7 +1504,7 @@ bool ppu_interpreter_precise::VPKSWSS(ppu_thread& ppu, ppu_opcode_t op)
 {
 	const auto& a = ppu.vr[op.va];
 	const auto& b = ppu.vr[op.vb];
-	auto& d = ppu.vr[op.vd];
+	v128 d;
 
 	for (u8 i = 0; i < 4; i++)
 	{
@@ -1569,6 +1543,7 @@ bool ppu_interpreter_precise::VPKSWSS(ppu_thread& ppu, ppu_opcode_t op)
 		}
 	}
 
+	ppu.vr[op.vd] = d;
 	return true;
 }
 
@@ -2495,7 +2470,7 @@ bool ppu_interpreter_precise::VSUMSWS(ppu_thread& ppu, ppu_opcode_t op)
 
 bool ppu_interpreter_fast::VSUM2SWS(ppu_thread& ppu, ppu_opcode_t op)
 {
-	auto& d = ppu.vr[op.vd];
+	v128 d;
 	const auto& a = ppu.vr[op.va];
 	const auto& b = ppu.vr[op.vb];
 
@@ -2516,12 +2491,13 @@ bool ppu_interpreter_fast::VSUM2SWS(ppu_thread& ppu, ppu_opcode_t op)
 	}
 	d._s32[1] = 0;
 	d._s32[3] = 0;
+	ppu.vr[op.vd] = d;
 	return true;
 }
 
 bool ppu_interpreter_precise::VSUM2SWS(ppu_thread& ppu, ppu_opcode_t op)
 {
-	auto& d = ppu.vr[op.vd];
+	v128 d;
 	const auto& a = ppu.vr[op.va];
 	const auto& b = ppu.vr[op.vb];
 
@@ -2542,8 +2518,10 @@ bool ppu_interpreter_precise::VSUM2SWS(ppu_thread& ppu, ppu_opcode_t op)
 		else
 			d._s32[n * 2] = (s32)sum;
 	}
+
 	d._s32[1] = 0;
 	d._s32[3] = 0;
+	ppu.vr[op.vd] = d;
 	return true;
 }