Merge branch 'master' into SPU2

2025-04-20 19:45:20 +00:00 · 2024-12-19 17:08:37 +02:00 · 2024-12-19 17:08:37 +02:00 · 21bc3e8200
commit 21bc3e8200
parent b9b8ed3ae6 c56147e04b
42 changed files with 1672 additions and 150 deletions
--- a/3rdparty/7zip/7zip
+++ b/3rdparty/7zip/7zip
@ -1 +1 @@
-Subproject commit e008ce3976c087bfd21344af8f00a23cf69d4174
+Subproject commit e5431fa6f5505e385c6f9367260717e9c47dc2ee
--- a/3rdparty/FAudio
+++ b/3rdparty/FAudio
@ -1 +1 @@
-Subproject commit 74d45e615c2e7510c7e0f2ccb91dc6d7ccae4bec
+Subproject commit b7c2e109ea86b82109244c9c4569ce9ad0c884df
--- a/3rdparty/OpenAL/openal-soft
+++ b/3rdparty/OpenAL/openal-soft
@ -1 +1 @@
-Subproject commit d3875f333fb6abe2f39d82caca329414871ae53b
+Subproject commit 90191edd20bb877c5cbddfdac7ec0fe49ad93727
--- a/3rdparty/curl/curl
+++ b/3rdparty/curl/curl
@ -1 +1 @@
-Subproject commit b1ef0e1a01c0bb6ee5367bd9c186a603bde3615a
+Subproject commit 75a2079d5c28debb2eaa848ca9430f1fe0d7844c
--- a/3rdparty/libsdl-org/SDL
+++ b/3rdparty/libsdl-org/SDL
@ -1 +1 @@
-Subproject commit c98c4fbff6d8f3016a3ce6685bf8f43433c3efcc
+Subproject commit 9c821dc21ccbd69b2bda421fdb35cb4ae2da8f5e
--- a/rpcs3/Emu/CMakeLists.txt
+++ b/rpcs3/Emu/CMakeLists.txt
@ -506,6 +506,7 @@ target_sources(rpcs3_emu PRIVATE
    RSX/GL/OpenGL.cpp
    RSX/GL/upscalers/fsr1/fsr_pass.cpp
    RSX/GSRender.cpp
+    RSX/Host/MM.cpp
    RSX/Host/RSXDMAWriter.cpp
    RSX/Null/NullGSRender.cpp
    RSX/NV47/FW/draw_call.cpp
--- a/rpcs3/Emu/Cell/Modules/cellAdec.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellAdec.cpp
--- a/rpcs3/Emu/Cell/Modules/cellAdec.h
+++ b/rpcs3/Emu/Cell/Modules/cellAdec.h
@ -253,7 +253,7 @@ enum CellAdecSampleRate : s32
 	CELL_ADEC_FS_8kHz,
 };

-enum CellAdecBitLength : s32
+enum CellAdecBitLength : u32
 {
 	CELL_ADEC_BIT_LENGTH_RESERVED1,
 	CELL_ADEC_BIT_LENGTH_16,
@ -352,8 +352,8 @@ enum AdecCorrectPtsValueType : s8
 	ADEC_CORRECT_PTS_VALUE_TYPE_UNSPECIFIED = -1,

 	// Adds a fixed amount
-	ADEC_CORRECT_PTS_VALUE_TYPE_LPCM = 0,
-	// 1
+	ADEC_CORRECT_PTS_VALUE_TYPE_LPCM_HDMV = 0,
+	ADEC_CORRECT_PTS_VALUE_TYPE_LPCM_DVD = 1, // Unused for some reason, the DVD player probably takes care of timestamps itself
 	ADEC_CORRECT_PTS_VALUE_TYPE_ATRACX_48000Hz = 2,
 	ADEC_CORRECT_PTS_VALUE_TYPE_ATRACX_44100Hz = 3,
 	ADEC_CORRECT_PTS_VALUE_TYPE_ATRACX_32000Hz = 4,
@ -562,6 +562,11 @@ public:
 	{
 		ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE

+		if (ppu.state & cpu_flag::again) // Savestate was created while waiting on the mutex
+		{
+			return {};
+		}
+
 		if (entries[front].state == 0xff)
 		{
 			ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE
@ -648,6 +653,20 @@ static_assert(std::is_standard_layout_v<AdecContext> && std::is_trivial_v<AdecCo
 CHECK_SIZE_ALIGN(AdecContext, 0x530, 8);


+enum : u32
+{
+	CELL_ADEC_LPCM_DVD_CH_RESERVED1,
+	CELL_ADEC_LPCM_DVD_CH_MONO,
+	CELL_ADEC_LPCM_DVD_CH_RESERVED2,
+	CELL_ADEC_LPCM_DVD_CH_STEREO,
+	CELL_ADEC_LPCM_DVD_CH_UNK1, // Either 3 front or 2 front + 1 surround
+	CELL_ADEC_LPCM_DVD_CH_UNK2, // Either 3 front + 1 surround or 2 front + 2 surround
+	CELL_ADEC_LPCM_DVD_CH_3_2,
+	CELL_ADEC_LPCM_DVD_CH_3_2_LFE,
+	CELL_ADEC_LPCM_DVD_CH_3_4,
+	CELL_ADEC_LPCM_DVD_CH_3_4_LFE,
+};
+
 struct CellAdecParamLpcm
 {
 	be_t<u32> channelNumber;
@ -664,6 +683,216 @@ struct CellAdecLpcmInfo
 	be_t<u32> outputDataSize;
 };

+// HLE exclusive, for savestates
+enum class lpcm_dec_state : u8
+{
+	waiting_for_cmd_mutex_lock,
+	waiting_for_cmd_cond_wait,
+	waiting_for_output_mutex_lock,
+	waiting_for_output_cond_wait,
+	queue_mutex_lock,
+	executing_cmd
+};
+
+class LpcmDecSemaphore
+{
+	be_t<u32> value;
+	be_t<u32> mutex; // sys_mutex_t
+	be_t<u32> cond;  // sys_cond_t
+
+public:
+	error_code init(ppu_thread& ppu, vm::ptr<LpcmDecSemaphore> _this, u32 initial_value)
+	{
+		value = initial_value;
+
+		const vm::var<sys_mutex_attribute_t> mutex_attr{{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_adem01"_u64 } }};
+		const vm::var<sys_cond_attribute_t> cond_attr{{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_adec01"_u64 } }};
+
+		if (error_code ret = sys_mutex_create(ppu, _this.ptr(&LpcmDecSemaphore::mutex), mutex_attr); ret != CELL_OK)
+		{
+			return ret;
+		}
+
+		return sys_cond_create(ppu, _this.ptr(&LpcmDecSemaphore::cond), mutex, cond_attr);
+	}
+
+	error_code finalize(ppu_thread& ppu) const
+	{
+		if (error_code ret = sys_cond_destroy(ppu, cond); ret != CELL_OK)
+		{
+			return ret;
+		}
+
+		return sys_mutex_destroy(ppu, mutex);
+	}
+
+	error_code release(ppu_thread& ppu)
+	{
+		if (error_code ret = sys_mutex_lock(ppu, mutex, 0); ret != CELL_OK)
+		{
+			return ret;
+		}
+
+		value++;
+
+		if (error_code ret = sys_cond_signal(ppu, cond); ret != CELL_OK)
+		{
+			return ret; // LLE doesn't unlock the mutex
+		}
+
+		return sys_mutex_unlock(ppu, mutex);
+	}
+
+	error_code acquire(ppu_thread& ppu, lpcm_dec_state& savestate)
+	{
+		if (savestate == lpcm_dec_state::waiting_for_cmd_cond_wait)
+		{
+			goto cond_wait;
+		}
+
+		savestate = lpcm_dec_state::waiting_for_cmd_mutex_lock;
+
+		if (error_code ret = sys_mutex_lock(ppu, mutex, 0); ret != CELL_OK)
+		{
+			return ret;
+		}
+
+		if (ppu.state & cpu_flag::again)
+		{
+			return {};
+		}
+
+		if (value == 0u)
+		{
+			savestate = lpcm_dec_state::waiting_for_cmd_cond_wait;
+			cond_wait:
+
+			if (error_code ret = sys_cond_wait(ppu, cond, 0); ret != CELL_OK)
+			{
+				return ret; // LLE doesn't unlock the mutex
+			}
+
+			if (ppu.state & cpu_flag::again)
+			{
+				return {};
+			}
+		}
+
+		value--;
+
+		return sys_mutex_unlock(ppu, mutex);
+	}
+};
+
+CHECK_SIZE(LpcmDecSemaphore, 0xc);
+
+enum class LpcmDecCmdType : u32
+{
+	start_seq,
+	end_seq,
+	decode_au,
+	close
+};
+
+struct LpcmDecCmd
+{
+	be_t<s32> pcm_handle;
+	vm::bcptr<void> au_start_addr;
+	be_t<u32> au_size;
+	u32 reserved1[2];
+	CellAdecParamLpcm lpcm_param;
+	be_t<LpcmDecCmdType> type;
+	u32 reserved2;
+
+	LpcmDecCmd() = default; // cellAdecOpen()
+
+	LpcmDecCmd(LpcmDecCmdType&& type) // End sequence
+		: type(type)
+	{
+	}
+
+	LpcmDecCmd(LpcmDecCmdType&& type, const CellAdecParamLpcm& lpcm_param) // Start sequence
+		: lpcm_param(lpcm_param), type(type)
+	{
+	}
+
+	LpcmDecCmd(LpcmDecCmdType&& type, const s32& pcm_handle, const CellAdecAuInfo& au_info) // Decode au
+		: pcm_handle(pcm_handle), au_start_addr(au_info.startAddr), au_size(au_info.size), type(type)
+	{
+	}
+};
+
+CHECK_SIZE(LpcmDecCmd, 0x2c);
+
+struct LpcmDecContext
+{
+	AdecCmdQueue<LpcmDecCmd> cmd_queue;
+
+	be_t<u64> thread_id; // sys_ppu_thread_t
+
+	be_t<u32> queue_size_mutex; // sys_mutex_t
+	be_t<u32> queue_size_cond;  // sys_cond_t, unused
+	be_t<u32> unk_mutex;        // sys_mutex_t, unused
+	be_t<u32> unk_cond;         // sys_cond_t, unused
+
+	be_t<u32> run_thread;
+
+	AdecCb<AdecNotifyAuDone> notify_au_done;
+	AdecCb<AdecNotifyPcmOut> notify_pcm_out;
+	AdecCb<AdecNotifyError> notify_error;
+	AdecCb<AdecNotifySeqDone> notify_seq_done;
+
+	be_t<u32> output_locked;
+	vm::bptr<f32> output;
+
+	vm::bptr<CellAdecParamLpcm> lpcm_param;
+
+	vm::bcptr<void> spurs_cmd_data;
+
+	// HLE exclusive
+	lpcm_dec_state savestate;
+	u64 cmd_counter; // For debugging
+
+	u8 reserved1[24]; // 36 bytes on LLE
+
+	be_t<u32> output_mutex;    // sys_mutex_t
+	be_t<u32> output_consumed; // sys_cond_t
+
+	LpcmDecSemaphore cmd_available;
+	LpcmDecSemaphore reserved2; // Unused
+
+	be_t<u32> queue_mutex; // sys_mutex_t
+
+	be_t<u32> error_occurred;
+
+	u8 spurs_stuff[32];
+
+	be_t<u32> spurs_queue_pop_mutex;
+	be_t<u32> spurs_queue_push_mutex;
+
+	be_t<u32> using_existing_spurs_instance;
+
+	be_t<u32> dvd_packing;
+
+	be_t<u32> output_size;
+
+	LpcmDecCmd cmd; // HLE exclusive, name of Spurs taskset (32 bytes) + CellSpursTaskLsPattern on LLE
+
+	u8 more_spurs_stuff[10]; // 52 bytes on LLE
+
+	void exec(ppu_thread& ppu);
+
+	template <LpcmDecCmdType type>
+	error_code send_command(ppu_thread& ppu, auto&&... args);
+
+	inline error_code release_output(ppu_thread& ppu);
+};
+
+static_assert(std::is_standard_layout_v<LpcmDecContext>);
+CHECK_SIZE_ALIGN(LpcmDecContext, 0x1c8, 8);
+
+constexpr s32 LPCM_DEC_OUTPUT_BUFFER_SIZE = 0x40000;
+
 // CELP Excitation Mode
 enum CELP_ExcitationMode : s32
 {
--- a/rpcs3/Emu/Cell/Modules/cellCamera.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellCamera.cpp
@ -784,26 +784,26 @@ s32 cellCameraIsAttached(s32 dev_num)

 	if (g_cfg.io.camera == camera_handler::null)
 	{
-		return false;
+		return 0;
 	}

 	auto& g_camera = g_fxo->get<camera_thread>();

 	if (!g_camera.init)
 	{
-		return false;
+		return 0;
 	}

 	if (!check_dev_num(dev_num))
 	{
-		return false;
+		return 0;
 	}

 	vm::var<s32> type;

 	if (cellCameraGetType(dev_num, type) != CELL_OK)
 	{
-		return false;
+		return 0;
 	}

 	std::lock_guard lock(g_camera.mutex);
@ -821,12 +821,12 @@ s32 cellCameraIsAttached(s32 dev_num)
 		}
 	}

-	return is_attached;
+	return is_attached ? 1 : 0;
 }

 s32 cellCameraIsOpen(s32 dev_num)
 {
-	cellCamera.notice("cellCameraIsOpen(dev_num=%d)", dev_num);
+	cellCamera.trace("cellCameraIsOpen(dev_num=%d)", dev_num);

 	if (g_cfg.io.camera == camera_handler::null)
 	{
@ -852,7 +852,7 @@ s32 cellCameraIsOpen(s32 dev_num)

 s32 cellCameraIsStarted(s32 dev_num)
 {
-	cellCamera.notice("cellCameraIsStarted(dev_num=%d)", dev_num);
+	cellCamera.trace("cellCameraIsStarted(dev_num=%d)", dev_num);

 	if (g_cfg.io.camera == camera_handler::null)
 	{
--- a/rpcs3/Emu/Cell/Modules/cellGem.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellGem.cpp
@ -956,9 +956,9 @@ static inline void pos_to_gem_state(u32 gem_num, gem_config::gem_controller& con
 		static constexpr f32 PI = 3.14159265f;
 		const auto degree_to_rad = [](f32 degree) -> f32 { return degree * PI / 180.0f; };

-		static constexpr f32 CONE = 10.0f / 2.0f;
-		const f32 roll = -degree_to_rad((image_y - half_height) / half_height * CONE); // This is actually the pitch
-		const f32 pitch = -degree_to_rad((image_x - half_width) / half_width * CONE); // This is actually the yaw
+		const f32 max_angle_per_side = g_cfg.io.fake_move_rotation_cone / 2.0f;
+		const f32 roll = -degree_to_rad((image_y - half_height) / half_height * max_angle_per_side); // This is actually the pitch
+		const f32 pitch = -degree_to_rad((image_x - half_width) / half_width * max_angle_per_side); // This is actually the yaw
 		const f32 yaw = degree_to_rad(0.0f);
 		const f32 cr = std::cos(roll * 0.5f);
 		const f32 sr = std::sin(roll * 0.5f);
--- a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2p.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2p.cpp
@ -333,6 +333,10 @@ void lv2_socket_p2p::close()
 	auto& nc = g_fxo->get<p2p_context>();
 	{
 		std::lock_guard lock(nc.list_p2p_ports_mutex);
+
+		if (!nc.list_p2p_ports.contains(port))
+			return;
+
 		auto& p2p_port = ::at32(nc.list_p2p_ports, port);
 		{
 			std::lock_guard lock(p2p_port.bound_p2p_vports_mutex);
--- a/rpcs3/Emu/Cell/lv2/sys_prx.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_prx.cpp
@ -35,7 +35,7 @@ extern const std::map<std::string_view, int> g_prx_list
 	{ "libaacenc_spurs.sprx", 0 },
 	{ "libac3dec.sprx", 0 },
 	{ "libac3dec2.sprx", 0 },
-	{ "libadec.sprx", 0 },
+	{ "libadec.sprx", 1 },
 	{ "libadec2.sprx", 0 },
 	{ "libadec_internal.sprx", 0 },
 	{ "libad_async.sprx", 0 },
--- a/rpcs3/Emu/Io/Null/null_camera_handler.h
+++ b/rpcs3/Emu/Io/Null/null_camera_handler.h
@ -7,10 +7,10 @@ class null_camera_handler final : public camera_handler_base
 public:
 	null_camera_handler() : camera_handler_base() {}

-	void open_camera() override { m_state = camera_handler_state::open; }
-	void close_camera() override { m_state = camera_handler_state::closed; }
-	void start_camera() override { m_state = camera_handler_state::running; }
-	void stop_camera() override { m_state = camera_handler_state::open; }
+	void open_camera() override { set_state(camera_handler_state::open); }
+	void close_camera() override { set_state(camera_handler_state::closed); }
+	void start_camera() override { set_state(camera_handler_state::running); }
+	void stop_camera() override { set_state(camera_handler_state::open); }

 	void set_format(s32 format, u32 bytesize) override
 	{
@ -45,6 +45,6 @@ public:
 		height = 0;
 		frame_number = 0;
 		bytes_read = 0;
-		return m_state;
+		return get_state();
 	}
 };
--- a/rpcs3/Emu/Io/camera_handler_base.h
+++ b/rpcs3/Emu/Io/camera_handler_base.h
@ -30,22 +30,29 @@ public:
 	virtual u64 frame_number() const = 0; // Convenience function to check if there's a new frame.
 	virtual camera_handler_state get_image(u8* buf, u64 size, u32& width, u32& height, u64& frame_number, u64& bytes_read) = 0;

-	camera_handler_state get_state() const { return m_state.load(); };
+	camera_handler_state get_state() const { return m_state.load(); }
+	void set_state(camera_handler_state state) { m_state = m_state_expected = state; }

-	bool mirrored() const { return m_mirrored; };
-	s32 format() const { return m_format; };
-	u32 bytesize() const { return m_bytesize; };
-	u32 width() const { return m_width; };
-	u32 height() const { return m_height; };
-	u32 frame_rate() const { return m_frame_rate; };
+	camera_handler_state get_expected_state() const { return m_state_expected.load(); }
+	void set_expected_state(camera_handler_state state) { m_state_expected = state; }
+
+	bool mirrored() const { return m_mirrored; }
+	s32 format() const { return m_format; }
+	u32 bytesize() const { return m_bytesize; }
+	u32 width() const { return m_width; }
+	u32 height() const { return m_height; }
+	u32 frame_rate() const { return m_frame_rate; }

 protected:
 	std::mutex m_mutex;
-	atomic_t<camera_handler_state> m_state = camera_handler_state::closed;
 	bool m_mirrored = false;
 	s32 m_format = 2; // CELL_CAMERA_RAW8
 	u32 m_bytesize = 0;
 	u32 m_width = 640;
 	u32 m_height = 480;
 	u32 m_frame_rate = 30;
+
+private:
+	atomic_t<camera_handler_state> m_state = camera_handler_state::closed;
+	atomic_t<camera_handler_state> m_state_expected = camera_handler_state::closed;
 };
--- a/rpcs3/Emu/RSX/Common/texture_cache_utils.h
+++ b/rpcs3/Emu/RSX/Common/texture_cache_utils.h
@ -5,6 +5,7 @@
 #include "TextureUtils.h"

 #include "Emu/Memory/vm.h"
+#include "Emu/RSX/Host/MM.h"
 #include "util/vm.hpp"

 #include <list>
@ -29,8 +30,7 @@ namespace rsx
 	{
 		ensure(range.is_page_range());

-		//rsx_log.error("memory_protect(0x%x, 0x%x, %x)", static_cast<u32>(range.start), static_cast<u32>(range.length()), static_cast<u32>(prot));
-		utils::memory_protect(vm::base(range.start), range.length(), prot);
+		rsx::mm_protect(vm::base(range.start), range.length(), prot);

 #ifdef TEXTURE_CACHE_DEBUG
 		tex_cache_checker.set_protection(range, prot);
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@ -7,6 +7,7 @@

 #include "Emu/Memory/vm_locking.h"
 #include "Emu/RSX/rsx_methods.h"
+#include "Emu/RSX/Host/MM.h"
 #include "Emu/RSX/Host/RSXDMAWriter.h"
 #include "Emu/RSX/NV47/HW/context_accessors.define.h"

@ -1082,6 +1083,8 @@ void GLGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 cou

 bool GLGSRender::on_access_violation(u32 address, bool is_writing)
 {
+	rsx::mm_flush(address);
+
 	const bool can_flush = is_current_thread();
 	const rsx::invalidation_cause cause = is_writing
 		? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write)
--- a/rpcs3/Emu/RSX/Host/MM.cpp
+++ b/rpcs3/Emu/RSX/Host/MM.cpp
@ -0,0 +1,110 @@
+#include "stdafx.h"
+#include "MM.h"
+#include <Emu/RSX/Common/simple_array.hpp>
+#include <Emu/RSX/RSXOffload.h>
+
+#include <Emu/Memory/vm.h>
+#include <Emu/IdManager.h>
+#include <Emu/system_config.h>
+#include <Utilities/address_range.h>
+#include <Utilities/mutex.h>
+
+namespace rsx
+{
+	rsx::simple_array<MM_block> g_deferred_mprotect_queue;
+	shared_mutex g_mprotect_queue_lock;
+
+	void mm_flush_mprotect_queue_internal()
+	{
+		for (const auto& block : g_deferred_mprotect_queue)
+		{
+			utils::memory_protect(reinterpret_cast<void*>(block.start), block.length, block.prot);
+		}
+
+		g_deferred_mprotect_queue.clear();
+	}
+
+	void mm_defer_mprotect_internal(u64 start, u64 length, utils::protection prot)
+	{
+		// We could stack and merge requests here, but that is more trouble than it is truly worth.
+		// A fresh call to memory_protect only takes a few nanoseconds of setup overhead, it is not worth the risk of hanging because of conflicts.
+		g_deferred_mprotect_queue.push_back({ start, length, prot });
+	}
+
+	void mm_protect(void* ptr, u64 length, utils::protection prot)
+	{
+		if (g_cfg.video.disable_async_host_memory_manager)
+		{
+			utils::memory_protect(ptr, length, prot);
+			return;
+		}
+
+		// Naive merge. Eventually it makes more sense to do conflict resolution, but it's not as important.
+		const auto start = reinterpret_cast<u64>(ptr);
+		const auto end = start + length;
+
+		std::lock_guard lock(g_mprotect_queue_lock);
+
+		if (prot == utils::protection::rw || prot == utils::protection::wx)
+		{
+			// Basically an unlock op. Flush if any overlap is detected
+			for (const auto& block : g_deferred_mprotect_queue)
+			{
+				if (block.overlaps(start, end))
+				{
+					mm_flush_mprotect_queue_internal();
+					break;
+				}
+			}
+
+			utils::memory_protect(ptr, length, prot);
+			return;
+		}
+
+		// No, Ro, etc.
+		mm_defer_mprotect_internal(start, length, prot);
+	}
+
+	void mm_flush()
+	{
+		std::lock_guard lock(g_mprotect_queue_lock);
+		mm_flush_mprotect_queue_internal();
+	}
+
+	void mm_flush(u32 vm_address)
+	{
+		std::lock_guard lock(g_mprotect_queue_lock);
+		if (g_deferred_mprotect_queue.empty())
+		{
+			return;
+		}
+
+		const auto addr = reinterpret_cast<u64>(vm::base(vm_address));
+		for (const auto& block : g_deferred_mprotect_queue)
+		{
+			if (block.overlaps(addr))
+			{
+				mm_flush_mprotect_queue_internal();
+				return;
+			}
+		}
+	}
+
+	void mm_flush_lazy()
+	{
+		if (!g_cfg.video.multithreaded_rsx)
+		{
+			mm_flush();
+			return;
+		}
+
+		std::lock_guard lock(g_mprotect_queue_lock);
+		if (g_deferred_mprotect_queue.empty())
+		{
+			return;
+		}
+
+		auto& rsxdma = g_fxo->get<rsx::dma_manager>();
+		rsxdma.backend_ctrl(mm_backend_ctrl::cmd_mm_flush, nullptr);
+	}
+}
--- a/rpcs3/Emu/RSX/Host/MM.h
+++ b/rpcs3/Emu/RSX/Host/MM.h
@ -0,0 +1,40 @@
+#pragma once
+
+#include <util/types.hpp>
+#include <util/vm.hpp>
+
+namespace rsx
+{
+	struct MM_block
+	{
+		u64 start;
+		u64 length;
+		utils::protection prot;
+
+		inline bool overlaps(u64 start, u64 end) const
+		{
+			// [Start, End] is not a proper closed range, there is an off-by-one by design.
+			// FIXME: Use address_range64
+			const u64 this_end = this->start + this->length;
+			return (this->start < end && start < this_end);
+		}
+
+		inline bool overlaps(u64 addr) const
+		{
+			// [Start, End] is not a proper closed range, there is an off-by-one by design.
+			// FIXME: Use address_range64
+			const u64 this_end = this->start + this->length;
+			return (addr >= start && addr < this_end);
+		}
+	};
+
+	enum mm_backend_ctrl : u32
+	{
+		cmd_mm_flush = 0x81000000,
+	};
+
+	void mm_protect(void* start, u64 length, utils::protection prot);
+	void mm_flush_lazy();
+	void mm_flush(u32 vm_address);
+	void mm_flush();
+}
--- a/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp
+++ b/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp
@ -7,6 +7,9 @@

 namespace rsx
 {
+	void mm_flush_lazy();
+	void mm_flush();
+
 	namespace util
 	{
 		template <bool FlushDMA, bool FlushPipe>
@ -24,17 +27,24 @@ namespace rsx
 					return;
 				}

-				if constexpr (FlushDMA)
+				if constexpr (FlushDMA || FlushPipe)
 				{
-					// If the backend handled the request, this call will basically be a NOP
-					g_fxo->get<rsx::dma_manager>().sync();
-				}
+					// Release op must be acoompanied by MM flush.
+					// FlushPipe implicitly does a MM flush but FlushDMA does not. Trigger the flush here
+					rsx::mm_flush();

-				if constexpr (FlushPipe)
-				{
-					// Manually flush the pipeline.
-					// It is possible to stream report writes using the host GPU, but that generates too much submit traffic.
-					RSX(ctx)->sync();
+					if constexpr (FlushDMA)
+					{
+						// If the backend handled the request, this call will basically be a NOP
+						g_fxo->get<rsx::dma_manager>().sync();
+					}
+
+					if constexpr (FlushPipe)
+					{
+						// Manually flush the pipeline.
+						// It is possible to stream report writes using the host GPU, but that generates too much submit traffic.
+						RSX(ctx)->sync();
+					}
 				}

 				if (handled)
--- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.cpp
+++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.cpp
@ -83,6 +83,7 @@ namespace rsx

 			add_dropdown(&g_cfg.io.pad_mode, localized_string_id::HOME_MENU_SETTINGS_INPUT_PAD_MODE);
 			add_unsigned_slider(&g_cfg.io.pad_sleep, localized_string_id::HOME_MENU_SETTINGS_INPUT_PAD_SLEEP, " µs", 100);
+			add_unsigned_slider(&g_cfg.io.fake_move_rotation_cone, localized_string_id::HOME_MENU_SETTINGS_INPUT_FAKE_MOVE_ROTATION_CONE, "°", 1);

 			apply_layout();
 		}
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@ -9,6 +9,7 @@
 #include "Common/time.hpp"
 #include "Core/RSXReservationLock.hpp"
 #include "Core/RSXEngLock.hpp"
+#include "Host/MM.h"
 #include "Host/RSXDMAWriter.h"
 #include "NV47/HW/context.h"
 #include "Program/GLSLCommon.h"
@ -2603,8 +2604,14 @@ namespace rsx
 						rsx_log.error("Depth texture bound to pipeline with unexpected format 0x%X", format);
 					}
 				}
-				else if (!backend_config.supports_hw_renormalization)
+				else if (!backend_config.supports_hw_renormalization /* &&
+					tex.min_filter() == rsx::texture_minify_filter::nearest &&
+					tex.mag_filter() == rsx::texture_magnify_filter::nearest*/)
 				{
+					// FIXME: This check should only apply to point-sampled textures. However, it severely regresses some games (id tech 5).
+					// This is because even when filtering is active, the error from the PS3 texture expansion still applies.
+					// A proper fix is to expand these formats into BGRA8 when high texture precision is required. That requires different GUI settings and inflation shaders, so it will be handled separately.
+
 					switch (format)
 					{
 					case CELL_GCM_TEXTURE_A1R5G5B5:
@ -3175,6 +3182,8 @@ namespace rsx
 	{
 		m_eng_interrupt_mask.clear(rsx::pipe_flush_interrupt);

+		mm_flush();
+
 		if (zcull_ctrl->has_pending())
 		{
 			zcull_ctrl->sync(this);
@ -3627,10 +3636,25 @@ namespace rsx
 			on_invalidate_memory_range(m_invalidated_memory_range, rsx::invalidation_cause::read);
 		}

+		// Host sync
+		rsx::mm_flush();
+
 		on_invalidate_memory_range(m_invalidated_memory_range, rsx::invalidation_cause::unmap);
 		m_invalidated_memory_range.invalidate();
 	}

+	void thread::renderctl(u32 request_code, void* args)
+	{
+		switch (request_code)
+		{
+		case rsx::mm_backend_ctrl::cmd_mm_flush:
+			rsx::mm_flush();
+			break;
+		default:
+			fmt::throw_exception("Unknown backend request: 0x%x", request_code);
+		}
+	}
+
 	//Pause/cont wrappers for FIFO ctrl. Never call this from rsx thread itself!
 	void thread::pause()
 	{
@ -3696,6 +3720,9 @@ namespace rsx
 	{
 		bool pause_emulator = false;

+		// MM sync. This is a pre-emptive operation, so we can use a deferred request.
+		rsx::mm_flush_lazy();
+
 		// Marks the end of a frame scope GPU-side
 		if (g_user_asked_for_frame_capture.exchange(false) && !capture_current_frame)
 		{
--- a/rpcs3/Emu/RSX/RSXThread.h
+++ b/rpcs3/Emu/RSX/RSXThread.h
@ -404,7 +404,7 @@ namespace rsx
 		virtual void notify_tile_unbound(u32 /*tile*/) {}

 		// control
-		virtual void renderctl(u32 /*request_code*/, void* /*args*/) {}
+		virtual void renderctl(u32 request_code, void* args);

 		// zcull
 		void notify_zcull_info_changed();
--- a/rpcs3/Emu/RSX/VK/VKCommandStream.h
+++ b/rpcs3/Emu/RSX/VK/VKCommandStream.h
@ -9,7 +9,7 @@ namespace vk
 	enum // callback commands
 	{
 		rctrl_queue_submit = 0x80000000,
-		rctrl_run_gc       = 0x80000001
+		rctrl_run_gc       = 0x80000001,
 	};

 	struct submit_packet
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@ -15,6 +15,7 @@
 #include "vkutils/scratch.h"

 #include "Emu/RSX/rsx_methods.h"
+#include "Emu/RSX/Host/MM.h"
 #include "Emu/RSX/Host/RSXDMAWriter.h"
 #include "Emu/RSX/NV47/HW/context_accessors.define.h"
 #include "Emu/Memory/vm_locking.h"
@ -1010,6 +1011,8 @@ VKGSRender::~VKGSRender()

 bool VKGSRender::on_access_violation(u32 address, bool is_writing)
 {
+	rsx::mm_flush(address);
+
 	vk::texture_cache::thrashed_set result;
 	{
 		const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read;
@ -2460,6 +2463,9 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
 {
 	ensure(!m_queue_status.test_and_set(flush_queue_state::flushing));

+	// Host MM sync before executing anything on the GPU
+	rsx::mm_flush();
+
 	// Workaround for deadlock occuring during RSX offloader fault
 	// TODO: Restructure command submission infrastructure to avoid this condition
 	const bool sync_success = g_fxo->get<rsx::dma_manager>().sync();
@ -2824,7 +2830,7 @@ void VKGSRender::renderctl(u32 request_code, void* args)
 		break;
 	}
 	default:
-		fmt::throw_exception("Unhandled request code 0x%x", request_code);
+		rsx::thread::renderctl(request_code, args);
 	}
 }

--- a/rpcs3/Emu/localized_string_id.h
+++ b/rpcs3/Emu/localized_string_id.h
@ -223,6 +223,7 @@ enum class localized_string_id
 	HOME_MENU_SETTINGS_INPUT_CAMERA_FLIP,
 	HOME_MENU_SETTINGS_INPUT_PAD_MODE,
 	HOME_MENU_SETTINGS_INPUT_PAD_SLEEP,
+	HOME_MENU_SETTINGS_INPUT_FAKE_MOVE_ROTATION_CONE,
 	HOME_MENU_SETTINGS_ADVANCED,
 	HOME_MENU_SETTINGS_ADVANCED_PREFERRED_SPU_THREADS,
 	HOME_MENU_SETTINGS_ADVANCED_MAX_CPU_PREEMPTIONS,
--- a/rpcs3/Emu/scoped_progress_dialog.cpp
+++ b/rpcs3/Emu/scoped_progress_dialog.cpp
@ -91,8 +91,8 @@ scoped_progress_dialog::scoped_progress_dialog(std::string text) noexcept

 scoped_progress_dialog& scoped_progress_dialog::operator=(std::string text) noexcept
 {
-	// Exchange text atomically
-	g_progr_text_queue[m_text_index].exchange(make_single_value(std::move(text)));
+	// Set text atomically
+	g_progr_text_queue[m_text_index].store(make_single_value(std::move(text)));
 	return *this;
 }

--- a/rpcs3/Emu/system_config.h
+++ b/rpcs3/Emu/system_config.h
@ -178,6 +178,7 @@ struct cfg_root : cfg::node
 		cfg::_bool decr_memory_layout{ this, "DECR memory layout", false}; // Force enable increased allowed main memory range as DECR console
 		cfg::_bool host_label_synchronization{ this, "Allow Host GPU Labels", false };
 		cfg::_bool disable_msl_fast_math{ this, "Disable MSL Fast Math", false };
+		cfg::_bool disable_async_host_memory_manager{ this, "Disable Asynchronous Memory Manager", false, true };
 		cfg::_enum<output_scaling_mode> output_scaling{ this, "Output Scaling Mode", output_scaling_mode::bilinear, true };

 		struct node_vk : cfg::node
@ -282,6 +283,7 @@ struct cfg_root : cfg::node
 		cfg::string midi_devices{this, "Emulated Midi devices", "ßßß@@@ßßß@@@ßßß@@@"};
 		cfg::_bool load_sdl_mappings{ this, "Load SDL GameController Mappings", true };
 		cfg::_bool debug_overlay{ this, "IO Debug overlay", false, true };
+		cfg::uint<1, 180> fake_move_rotation_cone{ this, "Fake Move Rotation Cone", 10, true };

 	} io{ this };

--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@ -104,6 +104,7 @@
    <ClCompile Include="Emu\perf_monitor.cpp" />
    <ClCompile Include="Emu\RSX\Common\texture_cache.cpp" />
    <ClCompile Include="Emu\RSX\Core\RSXContext.cpp" />
+    <ClCompile Include="Emu\RSX\Host\MM.cpp" />
    <ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp" />
    <ClCompile Include="Emu\RSX\NV47\FW\draw_call.cpp" />
    <ClCompile Include="Emu\RSX\NV47\FW\reg_context.cpp" />
@ -621,6 +622,7 @@
    <ClInclude Include="Emu\RSX\Core\RSXDisplay.h" />
    <ClInclude Include="Emu\RSX\Core\RSXReservationLock.hpp" />
    <ClInclude Include="Emu\RSX\Core\RSXVertexTypes.h" />
+    <ClInclude Include="Emu\RSX\Host\MM.h" />
    <ClInclude Include="Emu\RSX\Host\RSXDMAWriter.h" />
    <ClInclude Include="Emu\RSX\NV47\FW\draw_call.hpp" />
    <ClInclude Include="Emu\RSX\NV47\FW\draw_call.inc.h" />
--- a/rpcs3/emucore.vcxproj.filters
+++ b/rpcs3/emucore.vcxproj.filters
@ -1312,6 +1312,9 @@
    <ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp">
      <Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
    </ClCompile>
+    <ClCompile Include="Emu\RSX\Host\MM.cpp">
+      <Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="Crypto\aes.h">
@ -2644,6 +2647,9 @@
    <ClInclude Include="Emu\RSX\Host\RSXDMAWriter.h">
      <Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
    </ClInclude>
+    <ClInclude Include="Emu\RSX\Host\MM.h">
+      <Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">
--- a/rpcs3/rpcs3qt/emu_settings_type.h
+++ b/rpcs3/rpcs3qt/emu_settings_type.h
@ -103,6 +103,7 @@ enum class emu_settings_type
 	DisableMSLFastMath,
 	OutputScalingMode,
 	ForceHwMSAAResolve,
+	DisableAsyncHostMM,

 	// Performance Overlay
 	PerfOverlayEnabled,
@ -294,6 +295,7 @@ inline static const std::map<emu_settings_type, cfg_location> settings_location
 	{ emu_settings_type::DisableMSLFastMath,         { "Video", "Disable MSL Fast Math"}},
 	{ emu_settings_type::OutputScalingMode,          { "Video", "Output Scaling Mode"}},
 	{ emu_settings_type::ForceHwMSAAResolve,         { "Video", "Force Hardware MSAA Resolve"}},
+	{ emu_settings_type::DisableAsyncHostMM,         { "Video", "Disable Asynchronous Memory Manager"}},

 	// Vulkan
 	{ emu_settings_type::VulkanAsyncTextureUploads,           { "Video", "Vulkan", "Asynchronous Texture Streaming 2"}},
--- a/rpcs3/rpcs3qt/flow_layout.cpp
+++ b/rpcs3/rpcs3qt/flow_layout.cpp
@ -79,10 +79,14 @@ flow_layout::~flow_layout()

 void flow_layout::clear()
 {
+	// We can't use a ranged loop here, since deleting the widget will call takeAt on the layout. So let's also use takeAt.
 	while (QLayoutItem* item = takeAt(0))
 	{
-		delete item->widget();
-		delete item;
+		if (item)
+		{
+			delete item->widget();
+			delete item;
+		}
 	}
 	m_item_list.clear();
 	m_positions.clear();
@ -185,8 +189,8 @@ int flow_layout::doLayout(const QRect& rect, bool testOnly) const
 	int x = effectiveRect.x();
 	int y = effectiveRect.y();
 	int lineHeight = 0;
-	int rows = 0;
-	int cols = 0;
+	int row_count = 0;
+	int col_count = 0;

 	if (m_dynamic_spacing)
 	{
@ -259,8 +263,8 @@ int flow_layout::doLayout(const QRect& rect, bool testOnly) const
 		pos.row = row;
 		pos.col = col++;

-		rows = std::max(rows, pos.row + 1);
-		cols = std::max(cols, pos.col + 1);
+		row_count = std::max(row_count, pos.row + 1);
+		col_count = std::max(col_count, pos.col + 1);

 		if (!testOnly)
 			item->setGeometry(QRect(QPoint(x, y), item->sizeHint()));
@ -269,8 +273,8 @@ int flow_layout::doLayout(const QRect& rect, bool testOnly) const
 		lineHeight = qMax(lineHeight, item->sizeHint().height());
 	}

-	m_rows = rows;
-	m_cols = cols;
+	m_rows = row_count;
+	m_cols = col_count;

 	return y + lineHeight - rect.y() + bottom;
 }
--- a/rpcs3/rpcs3qt/gui_save.h
+++ b/rpcs3/rpcs3qt/gui_save.h
@ -11,16 +11,11 @@ struct gui_save

 	gui_save()
 	{
-		key = "";
-		name = "";
-		def = QVariant();
 	}

 	gui_save(const QString& k, const QString& n, const QVariant& d)
+		: key(k), name(n), def(d)
 	{
-		key = k;
-		name = n;
-		def = d;
 	}

 	bool operator==(const gui_save& rhs) const noexcept
--- a/rpcs3/rpcs3qt/localized_emu.h
+++ b/rpcs3/rpcs3qt/localized_emu.h
@ -244,6 +244,7 @@ private:
 		case localized_string_id::HOME_MENU_SETTINGS_INPUT_CAMERA_FLIP: return tr("Camera Flip", "Input");
 		case localized_string_id::HOME_MENU_SETTINGS_INPUT_PAD_MODE: return tr("Pad Handler Mode", "Input");
 		case localized_string_id::HOME_MENU_SETTINGS_INPUT_PAD_SLEEP: return tr("Pad Handler Sleep", "Input");
+		case localized_string_id::HOME_MENU_SETTINGS_INPUT_FAKE_MOVE_ROTATION_CONE: return tr("Fake PS Move Rotation Cone", "Input");
 		case localized_string_id::HOME_MENU_SETTINGS_ADVANCED: return tr("Advanced");
 		case localized_string_id::HOME_MENU_SETTINGS_ADVANCED_PREFERRED_SPU_THREADS: return tr("Preferred SPU Threads", "Advanced");
 		case localized_string_id::HOME_MENU_SETTINGS_ADVANCED_MAX_CPU_PREEMPTIONS: return tr("Max Power Saving CPU-Preemptions", "Advanced");
--- a/rpcs3/rpcs3qt/qt_camera_handler.cpp
+++ b/rpcs3/rpcs3qt/qt_camera_handler.cpp
@ -47,6 +47,7 @@ void qt_camera_handler::set_camera(const QCameraDevice& camera_info)
 {
 	if (camera_info.isNull())
 	{
+		set_expected_state(camera_handler_state::closed);
 		reset();
 		return;
 	}
@ -57,9 +58,9 @@ void qt_camera_handler::set_camera(const QCameraDevice& camera_info)
 	camera_log.success("Using camera: id=\"%s\", description=\"%s\", front_facing=%d", camera_info.id().toStdString(), camera_info.description(), front_facing);

 	// Create camera and video surface
-	m_media_capture_session.reset(new QMediaCaptureSession(nullptr));
-	m_video_sink.reset(new qt_camera_video_sink(front_facing, nullptr));
-	m_camera.reset(new QCamera(camera_info));
+	m_media_capture_session = std::make_unique<QMediaCaptureSession>(nullptr);
+	m_video_sink = std::make_unique<qt_camera_video_sink>(front_facing, nullptr);
+	m_camera = std::make_unique<QCamera>(camera_info);

 	connect(m_camera.get(), &QCamera::activeChanged, this, &qt_camera_handler::handle_camera_active);
 	connect(m_camera.get(), &QCamera::errorOccurred, this, &qt_camera_handler::handle_camera_error);
@ -76,14 +77,37 @@ void qt_camera_handler::handle_camera_active(bool is_active)
 {
 	camera_log.notice("Camera active status changed to %d", is_active);

-	if (is_active)
+	// Check if the camera does what it's supposed to do.
+	const camera_handler_state expected_state = get_expected_state();
+
+	switch (expected_state)
 	{
-		m_state = camera_handler_state::running;
-	}
-	else
+	case camera_handler_state::closed:
+	case camera_handler_state::open:
 	{
-		m_state = camera_handler_state::closed;
+		if (is_active)
+		{
+			// This is not supposed to happen and indicates an unexpected QCamera issue
+			camera_log.error("Camera started unexpectedly");
+			set_state(camera_handler_state::running);
+			return;
+		}
+		break;
 	}
+	case camera_handler_state::running:
+	{
+		if (!is_active)
+		{
+			// This is not supposed to happen and indicates an unexpected QCamera issue
+			camera_log.error("Camera stopped unexpectedly");
+			set_state(camera_handler_state::open);
+			return;
+		}
+		break;
+	}
+	}
+
+	set_state(expected_state);
 }

 void qt_camera_handler::handle_camera_error(QCamera::Error error, const QString& errorString)
@ -100,7 +124,11 @@ void qt_camera_handler::open_camera()
 	{
 		camera_log.notice("Switching camera from %s to %s", m_camera_id, camera_id);
 		camera_log.notice("Stopping old camera...");
-		if (m_camera) m_camera->stop();
+		if (m_camera)
+		{
+			set_expected_state(camera_handler_state::open);
+			m_camera->stop();
+		}
 		m_camera_id = camera_id;
 	}

@ -129,7 +157,7 @@ void qt_camera_handler::open_camera()
 	{
 		if (m_camera_id.empty()) camera_log.notice("Camera disabled");
 		else camera_log.error("No camera found");
-		m_state = camera_handler_state::closed;
+		set_state(camera_handler_state::closed);
 		return;
 	}

@ -148,7 +176,7 @@ void qt_camera_handler::open_camera()
 	// Update camera and view finder settings
 	update_camera_settings();

-	m_state = camera_handler_state::open;
+	set_state(camera_handler_state::open);
 }

 void qt_camera_handler::close_camera()
@ -159,11 +187,12 @@ void qt_camera_handler::close_camera()
 	{
 		if (m_camera_id.empty()) camera_log.notice("Camera disabled");
 		else camera_log.error("No camera found");
-		m_state = camera_handler_state::closed;
+		set_state(camera_handler_state::closed);
 		return;
 	}

 	// Unload/close camera
+	set_expected_state(camera_handler_state::closed);
 	m_camera->stop();
 }

@ -175,7 +204,7 @@ void qt_camera_handler::start_camera()
 	{
 		if (m_camera_id.empty()) camera_log.notice("Camera disabled");
 		else camera_log.error("No camera found");
-		m_state = camera_handler_state::closed;
+		set_state(camera_handler_state::closed);
 		return;
 	}

@ -206,6 +235,7 @@ void qt_camera_handler::start_camera()
 #endif

 	// Start camera. We will start receiving frames now.
+	set_expected_state(camera_handler_state::running);
 	m_camera->start();
 }

@ -217,7 +247,7 @@ void qt_camera_handler::stop_camera()
 	{
 		if (m_camera_id.empty()) camera_log.notice("Camera disabled");
 		else camera_log.error("No camera found");
-		m_state = camera_handler_state::closed;
+		set_state(camera_handler_state::closed);
 		return;
 	}

@ -228,6 +258,7 @@ void qt_camera_handler::stop_camera()
 	}

 	// Stop camera. The camera will still be drawing power.
+	set_expected_state(camera_handler_state::open);
 	m_camera->stop();
 }

@ -284,26 +315,26 @@ camera_handler_base::camera_handler_state qt_camera_handler::get_image(u8* buf,
 		m_camera_id != camera_id)
 	{
 		camera_log.notice("Switching cameras");
-		m_state = camera_handler_state::closed;
+		set_state(camera_handler_state::closed);
 		return camera_handler_state::closed;
 	}

 	if (m_camera_id.empty())
 	{
 		camera_log.notice("Camera disabled");
-		m_state = camera_handler_state::closed;
+		set_state(camera_handler_state::closed);
 		return camera_handler_state::closed;
 	}

 	if (!m_camera || !m_video_sink)
 	{
 		camera_log.fatal("Error: camera invalid");
-		m_state = camera_handler_state::closed;
+		set_state(camera_handler_state::closed);
 		return camera_handler_state::closed;
 	}

 	// Backup current state. State may change through events.
-	const camera_handler_state current_state = m_state;
+	const camera_handler_state current_state = get_state();

 	if (current_state == camera_handler_state::running)
 	{
--- a/rpcs3/rpcs3qt/qt_camera_handler.h
+++ b/rpcs3/rpcs3qt/qt_camera_handler.h
@ -17,8 +17,6 @@ public:
 	qt_camera_handler();
 	virtual ~qt_camera_handler();

-	void set_camera(const QCameraDevice& camera_info);
-
 	void open_camera() override;
 	void close_camera() override;
 	void start_camera() override;
@ -31,11 +29,12 @@ public:
 	camera_handler_state get_image(u8* buf, u64 size, u32& width, u32& height, u64& frame_number, u64& bytes_read) override;

 private:
+	void set_camera(const QCameraDevice& camera_info);
 	void reset();
 	void update_camera_settings();

 	std::string m_camera_id;
-	std::shared_ptr<QCamera> m_camera;
+	std::unique_ptr<QCamera> m_camera;
 	std::unique_ptr<QMediaCaptureSession> m_media_capture_session;
 	std::unique_ptr<qt_camera_video_sink> m_video_sink;

--- a/rpcs3/rpcs3qt/settings_dialog.cpp
+++ b/rpcs3/rpcs3qt/settings_dialog.cpp
@ -94,7 +94,7 @@ void remove_item(QComboBox* box, int data_value, int def_value)

 extern const std::map<std::string_view, int> g_prx_list;

-settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std::shared_ptr<emu_settings> emu_settings, const int& tab_index, QWidget* parent, const GameInfo* game, bool create_cfg_from_global_cfg)
+settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std::shared_ptr<emu_settings> emu_settings, int tab_index, QWidget* parent, const GameInfo* game, bool create_cfg_from_global_cfg)
 	: QDialog(parent)
 	, m_tab_index(tab_index)
 	, ui(new Ui::settings_dialog)
@ -1593,6 +1593,9 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
 	ui->disableMslFastMath->setVisible(false);
 #endif

+	m_emu_settings->EnhanceCheckBox(ui->disableAsyncHostMM, emu_settings_type::DisableAsyncHostMM);
+	SubscribeTooltip(ui->disableAsyncHostMM, tooltips.settings.disable_async_host_mm);
+
 	// Comboboxes

 	m_emu_settings->EnhanceComboBox(ui->maxSPURSThreads, emu_settings_type::MaxSPURSThreads, true);
@ -2604,14 +2607,11 @@ void settings_dialog::ApplyStylesheet(bool reset)
 	}
 }

-int settings_dialog::exec()
+void settings_dialog::open()
 {
-	// singleShot Hack to fix following bug:
-	// If we use setCurrentIndex now we will miraculously see a resize of the dialog as soon as we
-	// switch to the cpu tab after conjuring the settings_dialog with another tab opened first.
-	// Weirdly enough this won't happen if we change the tab order so that anything else is at index 0.
-	ui->tab_widget_settings->setCurrentIndex(0);
-	QTimer::singleShot(0, [this]{ ui->tab_widget_settings->setCurrentIndex(m_tab_index); });
+	QDialog::open();
+
+	ui->tab_widget_settings->setCurrentIndex(m_tab_index);

 	// Open a dialog if your config file contained invalid entries
 	QTimer::singleShot(10, [this]
@ -2637,8 +2637,6 @@ int settings_dialog::exec()
 			}
 		}
 	});
-
-	return QDialog::exec();
 }

 void settings_dialog::SubscribeDescription(QLabel* description)
--- a/rpcs3/rpcs3qt/settings_dialog.h
+++ b/rpcs3/rpcs3qt/settings_dialog.h
@ -21,9 +21,9 @@ class settings_dialog : public QDialog
 	Q_OBJECT

 public:
-	explicit settings_dialog(std::shared_ptr<gui_settings> gui_settings, std::shared_ptr<emu_settings> emu_settings, const int& tab_index = 0, QWidget* parent = nullptr, const GameInfo* game = nullptr, bool create_cfg_from_global_cfg = true);
+	explicit settings_dialog(std::shared_ptr<gui_settings> gui_settings, std::shared_ptr<emu_settings> emu_settings, int tab_index = 0, QWidget* parent = nullptr, const GameInfo* game = nullptr, bool create_cfg_from_global_cfg = true);
 	~settings_dialog();
-	int exec() override;
+	void open() override;
 Q_SIGNALS:
 	void GuiStylesheetRequest();
 	void GuiRepaintRequest();
--- a/rpcs3/rpcs3qt/settings_dialog.ui
+++ b/rpcs3/rpcs3qt/settings_dialog.ui
@ -2695,6 +2695,13 @@
                 </property>
                </widget>
               </item>
+               <item>
+                <widget class="QCheckBox" name="disableAsyncHostMM">
+                 <property name="text">
+                  <string>Disable Asynchronous Memory Manager</string>
+                 </property>
+                </widget>
+               </item>
              </layout>
             </widget>
            </item>
--- a/rpcs3/rpcs3qt/tooltips.h
+++ b/rpcs3/rpcs3qt/tooltips.h
@ -40,6 +40,7 @@ public:
 		const QString allow_host_labels            = tr("Allows the host GPU to synchronize with CELL directly. This incurs a performance penalty, but exposes the true state of GPU objects to the guest CPU. Can help eliminate visual noise and glitching at the cost of performance. Use with caution.");
 		const QString force_hw_MSAA                = tr("Forces MSAA to use the host GPU's resolve capabilities for all sampling operations.\nThis option incurs a performance penalty as well as the risk of visual artifacts but can yield crisper visuals when MSAA is enabled.");
 		const QString disable_vertex_cache         = tr("Disables the vertex cache.\nMight resolve missing or flickering graphics output.\nMay degrade performance.");
+		const QString disable_async_host_mm        = tr("Force host memory management calls to be inlined instead of handled asynchronously.\nThis can cause severe performance degradation and stuttering in some games.\nThis option is only needed by developers to debug problems with texture cache memory protection.");
 		const QString zcull_operation_mode         = tr("Changes ZCULL report synchronization behaviour. Experiment to find the best option for your game. Approximate mode is recommended for most games.\n· Precise is the most accurate to PS3 behaviour. Required for accurate visuals in some titles such as Demon's Souls and The Darkness.\n· Approximate is a much faster way to generate occlusion data which may not always match what the PS3 would generate. Works well with most PS3 games.\n· Relaxed changes the synchronization method completely and can greatly improve performance in some games or completely break others.");
 		const QString max_spurs_threads            = tr("Limits the maximum number of SPURS threads in each thread group.\nMay improve performance in some cases, especially on systems with limited number of hardware threads.\nLimiting the number of threads is likely to cause crashes; it's recommended to keep this at the default value.");
 		const QString sleep_timers_accuracy        = tr("Changes the sleep period accuracy.\n'As Host' uses default accuracy of the underlying operating system, while 'All Timers' attempts to improve it.\n'Usleep Only' limits the adjustments to usleep syscall only.\nCan affect performance in unexpected ways.");
--- a/rpcs3/rpcs3qt/user_account.h
+++ b/rpcs3/rpcs3qt/user_account.h
@ -14,9 +14,9 @@ class user_account
 public:
 	explicit user_account(const std::string& user_id = "00000001");

-	std::string GetUserId() const { return m_user_id; }
-	std::string GetUserDir() const { return m_user_dir; }
-	std::string GetUsername() const { return m_username; }
+	const std::string& GetUserId() const { return m_user_id; }
+	const std::string& GetUserDir() const { return m_user_dir; }
+	const std::string& GetUsername() const { return m_username; }

 	static std::map<u32, user_account> GetUserAccounts(const std::string& base_dir);

--- a/rpcs3/util/simd.hpp
+++ b/rpcs3/util/simd.hpp
@ -21,6 +21,7 @@
 #include <arm_neon.h>
 #endif

+#include <algorithm>
 #include <cmath>
 #include <math.h>
 #include <cfenv>
@ -1967,6 +1968,15 @@ inline v128 gv_mulfs(const v128& a, const v128& b)
 #endif
 }

+inline v128 gv_mulfs(const v128& a, f32 b)
+{
+#if defined(ARCH_X64)
+	return _mm_mul_ps(a, _mm_set_ps1(b));
+#elif defined(ARCH_ARM64)
+	return vmulq_n_f32(a, b);
+#endif
+}
+
 inline v128 gv_hadds8x2(const v128& a)
 {
 #if defined(__SSSE3__)
@ -2979,6 +2989,23 @@ inline v128 gv_rol16(const v128& a, const v128& b)
 #endif
 }

+// For each 16-bit element, r = rotate a by count
+template <u8 Count>
+inline v128 gv_rol16(const v128& a)
+{
+	constexpr u8 count = Count & 0xf;
+#if defined(ARCH_X64)
+	return _mm_or_si128(_mm_srli_epi16(a, 16 - count), _mm_slli_epi16(a, count));
+#elif defined(ARCH_ARM64)
+	return vorrq_u16(vshrq_n_u16(a, 16 - count), vshlq_n_u16(a, count));
+#else
+	v128 r;
+	for (u32 i = 0; i < 8; i++)
+		r._u16[i] = std::rotl(a._u16[i], count);
+	return r;
+#endif
+}
+
 // For each 32-bit element, r = rotate a by b
 inline v128 gv_rol32(const v128& a, const v128& b)
 {
@ -2997,15 +3024,16 @@ inline v128 gv_rol32(const v128& a, const v128& b)
 }

 // For each 32-bit element, r = rotate a by count
-inline v128 gv_rol32(const v128& a, u32 count)
+template <u8 Count>
+inline v128 gv_rol32(const v128& a)
 {
-	count %= 32;
-#if defined(ARCH_X64)
-	return _mm_or_epi32(_mm_srli_epi32(a, 32 - count), _mm_slli_epi32(a, count));
+	constexpr u8 count = Count & 0x1f;
+#if defined(__AVX512VL__)
+	return _mm_rol_epi32(a, count);
+#elif defined(ARCH_X64)
+	return _mm_or_si128(_mm_srli_epi32(a, 32 - count), _mm_slli_epi32(a, count));
 #elif defined(ARCH_ARM64)
-	const auto amt1 = vdupq_n_s32(count);
-	const auto amt2 = vdupq_n_s32(count - 32);
-	return vorrq_u32(vshlq_u32(a, amt1), vshlq_u32(a, amt2));
+	return vorrq_u32(vshrq_n_u32(a, 32 - count), vshlq_n_u32(a, count));
 #else
 	v128 r;
 	for (u32 i = 0; i < 4; i++)
@ -3107,6 +3135,139 @@ inline auto gv_shuffle_right(A&& a)
 	FOR_X64(unary_op, kIdPsrldq, kIdVpsrldq, std::forward<A>(a), Count);
 }

+// Load 32-bit integer into the first element of a new vector, set other elements to zero
+inline v128 gv_loadu32(const void* ptr)
+{
+#if defined(ARCH_X64)
+	return _mm_loadu_si32(ptr);
+#elif defined(ARCH_ARM64)
+	return vld1q_lane_u32(static_cast<const u32*>(ptr), vdupq_n_u32(0), 0);
+#endif
+}
+
+// Load 16-bit integer into an existing vector at the position specified by Index
+template <u8 Index>
+inline v128 gv_insert16(const v128& vec, u16 value)
+{
+#if defined(ARCH_X64)
+	return _mm_insert_epi16(vec, value, Index);
+#elif defined(ARCH_ARM64)
+	return vsetq_lane_u16(value, vec, Index & 0x7);
+#endif
+}
+
+// For each 8-bit element,
+// if ctrl >= 0 && ctrl < 16 then r = vec[ctrl],
+// else if ctrl < 0 then r = 0
+inline v128 gv_shuffle8(const v128& vec, const v128& ctrl)
+{
+	AUDIT(std::ranges::none_of(ctrl._chars, [](s8 i){ return i >= static_cast<s8>(sizeof(v128)); }), "All indices must be in the range [0, 15] or negative, since PSHUFB and TBL behave differently otherwise");
+#if defined(__SSSE3__)
+	return _mm_shuffle_epi8(vec, ctrl);
+#elif defined(ARCH_ARM64)
+	return vqtbl1q_s8(vec, ctrl);
+#else
+	v128 r;
+	for (s32 i = 0; i < 16; i++)
+		r._s8[i] = ctrl._s8[i] < 0 ? 0 : vec._s8[ctrl._s8[i] & 0xf];
+	return r;
+#endif
+}
+
+// For each 2-bit index in Control, r = vec[index]
+template <u8 Control>
+inline v128 gv_shuffle32(const v128& vec)
+{
+#if defined(ARCH_X64)
+	return _mm_shuffle_epi32(vec, Control);
+#elif defined(ARCH_ARM64)
+	constexpr u8 idx0 = (Control & 3) * sizeof(s32);
+	constexpr u8 idx1 = (Control >> 2 & 3) * sizeof(s32);
+	constexpr u8 idx2 = (Control >> 4 & 3) * sizeof(s32);
+	constexpr u8 idx3 = (Control >> 6 & 3) * sizeof(s32);
+
+	constexpr uint8x16_t idx_vec = { idx0, idx0 + 1, idx0 + 2, idx0 + 3, idx1, idx1 + 1, idx1 + 2, idx1 + 3, idx2, idx2 + 1, idx2 + 2, idx2 + 3, idx3, idx3 + 1, idx3 + 2, idx3 + 3 };
+
+	return vqtbl1q_s8(vec, idx_vec);
+#endif
+}
+
+// For each index, r = vec[index & 3]
+template <u8 Index0, u8 Index1, u8 Index2, u8 Index3>
+inline v128 gv_shuffle32(const v128& vec)
+{
+#if defined(ARCH_X64)
+	return _mm_shuffle_epi32(vec, (Index0 & 3) | (Index1 & 3) << 2 | (Index2 & 3) << 4 | (Index3 & 3) << 6);
+#elif defined(ARCH_ARM64)
+	constexpr u8 idx0 = (Index0 & 3) * sizeof(s32);
+	constexpr u8 idx1 = (Index1 & 3) * sizeof(s32);
+	constexpr u8 idx2 = (Index2 & 3) * sizeof(s32);
+	constexpr u8 idx3 = (Index3 & 3) * sizeof(s32);
+
+	constexpr uint8x16_t idx_vec = { idx0, idx0 + 1, idx0 + 2, idx0 + 3, idx1, idx1 + 1, idx1 + 2, idx1 + 3, idx2, idx2 + 1, idx2 + 2, idx2 + 3, idx3, idx3 + 1, idx3 + 2, idx3 + 3 };
+
+	return vqtbl1q_s8(vec, idx_vec);
+#endif
+}
+
+// For the first two 2-bit indices in Control, r = a[index],
+// for the last two indices, r = b[index]
+template <u8 Control>
+inline v128 gv_shufflefs(const v128& a, const v128& b)
+{
+#if defined(ARCH_X64)
+	return _mm_shuffle_ps(a, b, Control);
+#elif defined(ARCH_ARM64)
+	constexpr u8 idx0 = (Control & 3) * sizeof(s32);
+	constexpr u8 idx1 = (Control >> 2 & 3) * sizeof(s32);
+	constexpr u8 idx2 = (Control >> 4 & 3) * sizeof(s32) + sizeof(v128);
+	constexpr u8 idx3 = (Control >> 6 & 3) * sizeof(s32) + sizeof(v128);
+
+	constexpr uint8x16_t idx_vec = { idx0, idx0 + 1, idx0 + 2, idx0 + 3, idx1, idx1 + 1, idx1 + 2, idx1 + 3, idx2, idx2 + 1, idx2 + 2, idx2 + 3, idx3, idx3 + 1, idx3 + 2, idx3 + 3 };
+
+	return vqtbl2q_s8({ a, b }, idx_vec);
+#endif
+}
+
+// For the first two indices, r = a[index & 3],
+// for the last two indices, r = b[index & 3]
+template <u8 Index0, u8 Index1, u8 Index2, u8 Index3>
+inline v128 gv_shufflefs(const v128& a, const v128& b)
+{
+#if defined(ARCH_X64)
+	return _mm_shuffle_ps(a, b, (Index0 & 3) | (Index1 & 3) << 2 | (Index2 & 3) << 4 | (Index3 & 3) << 6);
+#elif defined(ARCH_ARM64)
+	constexpr u8 idx0 = (Index0 & 3) * sizeof(s32);
+	constexpr u8 idx1 = (Index1 & 3) * sizeof(s32);
+	constexpr u8 idx2 = (Index2 & 3) * sizeof(s32) + sizeof(v128);
+	constexpr u8 idx3 = (Index3 & 3) * sizeof(s32) + sizeof(v128);
+
+	constexpr uint8x16_t idx_vec = { idx0, idx0 + 1, idx0 + 2, idx0 + 3, idx1, idx1 + 1, idx1 + 2, idx1 + 3, idx2, idx2 + 1, idx2 + 2, idx2 + 3, idx3, idx3 + 1, idx3 + 2, idx3 + 3 };
+
+	return vqtbl2q_s8({ a, b }, idx_vec);
+#endif
+}
+
+// For each 32-bit element, reverse byte order
+inline v128 gv_rev32(const v128& vec)
+{
+#if defined(__SSSE3__)
+	return _mm_shuffle_epi8(vec, _mm_setr_epi8(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12));
+#elif defined(ARCH_ARM64)
+	return vrev32q_u8(vec);
+#else
+	return gv_rol32<16>(gv_rol16<8>(vec));
+#endif
+}
+
+// For each 32-bit element, convert between big-endian and native-endian
+inline v128 gv_to_be32(const v128& vec)
+{
+	if constexpr (std::endian::native == std::endian::little)
+		return gv_rev32(vec);
+	return vec;
+}
+
 #if defined(__clang__)
 #pragma clang diagnostic pop
 #elif defined(__GNUC__)
--- a/rpcs3/util/types.hpp
+++ b/rpcs3/util/types.hpp
@ -989,14 +989,14 @@ template <typename To, typename From> requires (std::is_integral_v<decltype(std:
 	constexpr bool is_from_signed = std::is_signed_v<CommonFrom>;
 	constexpr bool is_to_signed = std::is_signed_v<CommonTo>;

-	constexpr auto from_mask = is_from_signed > is_to_signed ? UnFrom{umax} >> 1 : UnFrom{umax};
-	constexpr auto to_mask = is_to_signed > is_from_signed ? UnTo{umax} >> 1 : UnTo{umax};
+	constexpr auto from_mask = (is_from_signed && !is_to_signed) ? UnFrom{umax} >> 1 : UnFrom{umax};
+	constexpr auto to_mask = (is_to_signed && !is_from_signed) ? UnTo{umax} >> 1 : UnTo{umax};

 	constexpr auto mask = ~(from_mask & to_mask);

 	// Signed to unsigned always require test
 	// Otherwise, this is bit-wise narrowing or conversion between types of different signedness of the same size
-	if constexpr (is_from_signed > is_to_signed || to_mask < from_mask)
+	if constexpr ((is_from_signed && !is_to_signed) || to_mask < from_mask)
 	{
 		// Try to optimize test if both are of the same signedness
 		if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast<CommonTo>(value) != value) [[unlikely]]