diff --git a/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp b/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp
index 8bc069ecaf..9c153b2056 100644
--- a/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp
+++ b/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp
@@ -27,23 +27,24 @@ namespace rsx
 					return;
 				}
 
-				if constexpr (FlushDMA)
-				{
-					// If the backend handled the request, this call will basically be a NOP
-					g_fxo->get<rsx::dma_manager>().sync();
-				}
-
-				if constexpr (FlushPipe)
-				{
-					// Manually flush the pipeline.
-					// It is possible to stream report writes using the host GPU, but that generates too much submit traffic.
-					RSX(ctx)->sync();
-				}
-
 				if constexpr (FlushDMA || FlushPipe)
 				{
-					// Kick MM flush without waiting. Technically we should do this before the sync in case of MTRSX, but doing it later improves CPU performance.
-					rsx::mm_flush_lazy();
+					// Release op must be acoompanied by MM flush.
+					// FlushPipe implicitly does a MM flush but FlushDMA does not. Trigger the flush here
+					rsx::mm_flush();
+
+					if constexpr (FlushDMA)
+					{
+						// If the backend handled the request, this call will basically be a NOP
+						g_fxo->get<rsx::dma_manager>().sync();
+					}
+
+					if constexpr (FlushPipe)
+					{
+						// Manually flush the pipeline.
+						// It is possible to stream report writes using the host GPU, but that generates too much submit traffic.
+						RSX(ctx)->sync();
+					}
 				}
 
 				if (handled)
diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
index 324360035b..4abc484b5e 100644
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -3182,6 +3182,8 @@ namespace rsx
 	{
 		m_eng_interrupt_mask.clear(rsx::pipe_flush_interrupt);
 
+		mm_flush();
+
 		if (zcull_ctrl->has_pending())
 		{
 			zcull_ctrl->sync(this);
@@ -3718,6 +3720,9 @@ namespace rsx
 	{
 		bool pause_emulator = false;
 
+		// MM sync. This is a pre-emptive operation, so we can use a deferred request.
+		rsx::mm_flush_lazy();
+
 		// Marks the end of a frame scope GPU-side
 		if (g_user_asked_for_frame_capture.exchange(false) && !capture_current_frame)
 		{