diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt
index c157140431..38a99264df 100644
--- a/rpcs3/Emu/CMakeLists.txt
+++ b/rpcs3/Emu/CMakeLists.txt
@@ -25,6 +25,7 @@ target_include_directories(rpcs3_emu
# Utilities
target_sources(rpcs3_emu PRIVATE
../util/atomic.cpp
+ ../util/atomic2.cpp
../../Utilities/bin_patch.cpp
../../Utilities/cond.cpp
../../Utilities/Config.cpp
diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj
index 9345092ede..ed2a4af04a 100644
--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@@ -73,6 +73,9 @@
NotUsing
+
+ NotUsing
+
NotUsing
diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters
index 064c9998f4..b6712e7e23 100644
--- a/rpcs3/emucore.vcxproj.filters
+++ b/rpcs3/emucore.vcxproj.filters
@@ -830,6 +830,9 @@
Utilities
+
+ Utilities
+
Crypto
diff --git a/rpcs3/util/atomic2.cpp b/rpcs3/util/atomic2.cpp
new file mode 100644
index 0000000000..2227600c22
--- /dev/null
+++ b/rpcs3/util/atomic2.cpp
@@ -0,0 +1,308 @@
+#include "atomic2.hpp"
+#include "Utilities/JIT.h"
+#include "Utilities/asm.h"
+#include "Utilities/sysinfo.h"
+
+//
+static const bool s_use_rtm = utils::has_rtm();
+
+// 4095 records max
+static constexpr u64 s_rec_gcount = 4096 / 64;
+
+// Global record pool
+static stx::multi_cas_record s_records[s_rec_gcount * 64]{};
+
+// Allocation bits (without first element)
+static atomic_t s_rec_bits[s_rec_gcount]{1};
+
+static constexpr u64 s_state_mask = 3;
+static constexpr u64 s_state_undef = 0;
+static constexpr u64 s_state_failure = 1;
+static constexpr u64 s_state_success = 2;
+static constexpr u64 s_ref_mask = ~s_state_mask;
+static constexpr u64 s_ref_one = s_state_mask + 1;
+
+static u64 rec_alloc()
+{
+ const u32 start = __rdtsc();
+
+ for (u32 i = 0;; i++)
+ {
+ const u32 group = (i + start) % s_rec_gcount;
+
+ const auto [bits, ok] = s_rec_bits[group].fetch_op([](u64& bits)
+ {
+ if (~bits)
+ {
+ // Set lowest clear bit
+ bits |= bits + 1;
+ return true;
+ }
+
+ return false;
+ });
+
+ if (ok)
+ {
+ // Find lowest clear bit
+ return group * 64 + utils::cnttz64(~bits, false);
+ }
+ }
+
+ // TODO: unreachable
+ std::abort();
+ return 0;
+}
+
+static bool cmpxchg16(s64(&dest)[2], s64(&cmp_res)[2], s64 exch_high, s64 exch_low)
+{
+#ifdef _MSC_VER
+ return !!_InterlockedCompareExchange128(dest, exch_high, exch_low, cmp_res);
+#else
+ s64 exch[2]{exch_low, exch_high};
+ return __atomic_compare_exchange(&dest, &cmp_res, &exch, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+#endif
+}
+
+bool stx::multi_cas_record::commit() const noexcept
+{
+ // Transaction cancelled
+ if (m_count == 0)
+ {
+ return true;
+ }
+
+ // Try TSX if available
+ if (s_use_rtm)
+ {
+ // TODO
+ }
+
+ static auto rec_unref = [](u64 id)
+ {
+ if (id && id < s_rec_gcount * 64)
+ {
+ auto [_, ok] = s_records[id].m_state.fetch_op([](u64& state)
+ {
+ if (state < s_ref_one)
+ {
+ return 0;
+ }
+
+ state -= s_ref_one;
+
+ if (state < s_ref_one)
+ {
+ state = 0;
+ return 2;
+ }
+
+ return 1;
+ });
+
+ if (ok > 1)
+ {
+ s_rec_bits[id / 64] &= ~(u64{1} << (id % 64));
+ }
+ }
+ };
+
+ // Helper function to complete successful transaction
+ static auto rec_complete = [](u64 id)
+ {
+ for (u32 i = 0; i < s_records[id].m_count; i++)
+ {
+ auto& item = s_records[id].m_list[i];
+
+ atomic2 cmp;
+ cmp.m_data[0] = item.m_old;
+ cmp.m_data[1] = id;
+
+ if (item.m_addr->load() == item.m_old && atomic_storage::load(item.m_addr->m_data[1]) == id)
+ {
+ if (cmpxchg16(item.m_addr->m_data, cmp.m_data, 0, item.m_new))
+ {
+ }
+ }
+ }
+ };
+
+ // Helper function to deal with existing transaction
+ static auto rec_try_abort = [](u64 id) -> u64
+ {
+ if (id >= s_rec_gcount * 64)
+ {
+ std::abort();
+ }
+
+ auto [_old, ok] = s_records[id].m_state.fetch_op([](u64& state)
+ {
+ if (state < s_ref_one)
+ {
+ // Don't reference if no references
+ return false;
+ }
+
+ if ((state & s_state_mask) == s_state_undef)
+ {
+ // Break transaction if possible
+ state |= s_state_failure;
+ }
+
+ state += s_ref_one;
+ return true;
+ });
+
+ if (!ok)
+ {
+ return 0;
+ }
+
+ if ((_old & s_state_mask) != s_state_success)
+ {
+ // Allow to overwrite failing transaction
+ return id;
+ }
+
+ // Help to complete
+ rec_complete(id);
+ rec_unref(id);
+ return 0;
+ };
+
+ // Single CAS path
+ if (m_count == 1)
+ {
+ atomic2 cmp;
+ cmp.m_data[0] = m_list[0].m_old;
+ cmp.m_data[1] = 0;
+
+ while (auto ptr = m_list[0].m_addr)
+ {
+ if (ptr->load() != m_list[0].m_old)
+ {
+ return false;
+ }
+
+ cmp.m_data[1] = atomic_storage::load(ptr->m_data[1]);
+
+ if (!cmp.m_data[1] && cmpxchg16(ptr->m_data, cmp.m_data, 0, m_list[0].m_new))
+ {
+ return true;
+ }
+ else if (cmp.m_data[0] != m_list[0].m_old)
+ {
+ return false;
+ }
+ else if (cmp.m_data[1])
+ {
+ if (u64 _id = rec_try_abort(cmp.m_data[1]))
+ {
+ if (cmpxchg16(ptr->m_data, cmp.m_data, 0, m_list[0].m_new))
+ {
+ rec_unref(_id);
+ return true;
+ }
+
+ rec_unref(_id);
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+
+ // Unreachable
+ std::abort();
+ }
+
+ // Allocate global record and copy data
+ const u64 id = rec_alloc();
+
+ for (u32 i = 0; i < (m_count / 2 + 1); i++)
+ {
+ std::memcpy(s_records[id].m_list + i * 2, m_list + i * 2, sizeof(multi_cas_item) * 2);
+ }
+
+ s_records[id].m_count = m_count;
+ s_records[id].m_state = s_ref_one;
+
+ // Try to install CAS items
+ for (u32 i = 0; i < m_count && (s_records[id].m_state & s_state_mask) == s_state_undef; i++)
+ {
+ atomic2 cmp;
+ cmp.m_data[0] = m_list[i].m_old;
+ cmp.m_data[1] = 0;
+
+ while (auto ptr = m_list[i].m_addr)
+ {
+ if (ptr->load() != m_list[i].m_old)
+ {
+ s_records[id].m_state |= s_state_failure;
+ break;
+ }
+
+ cmp.m_data[1] = atomic_storage::load(ptr->m_data[1]);
+
+ if (!cmp.m_data[1] && cmpxchg16(ptr->m_data, cmp.m_data, id, m_list[i].m_old))
+ {
+ break;
+ }
+ else if (cmp.m_data[0] != m_list[i].m_old)
+ {
+ s_records[id].m_state |= s_state_failure;
+ break;
+ }
+ else if (cmp.m_data[1])
+ {
+ if (u64 _id = rec_try_abort(cmp.m_data[1]))
+ {
+ if (cmpxchg16(ptr->m_data, cmp.m_data, id, m_list[i].m_old))
+ {
+ rec_unref(_id);
+ break;
+ }
+
+ rec_unref(_id);
+ }
+ else
+ {
+ s_records[id].m_state |= s_state_failure;
+ break;
+ }
+ }
+ }
+ }
+
+ // Try to acknowledge transaction success
+ auto [_, ok] = s_records[id].m_state.fetch_op([](u64& state)
+ {
+ if (state & s_state_failure)
+ {
+ return false;
+ }
+
+ state |= s_state_success;
+ return true;
+ });
+
+ // Complete transaction on success, or cleanup on failure
+ for (u32 i = 0; i < m_count; i++)
+ {
+ auto& item = m_list[i];
+
+ atomic2 cmp;
+ cmp.m_data[0] = item.m_old;
+ cmp.m_data[1] = id;
+
+ if (item.m_addr->load() == item.m_old && atomic_storage::load(item.m_addr->m_data[1]) == id)
+ {
+ // Restore old or set new
+ cmpxchg16(item.m_addr->m_data, cmp.m_data, 0, ok ? item.m_new : item.m_old);
+ }
+ }
+
+ rec_unref(id);
+ return ok;
+}
diff --git a/rpcs3/util/atomic2.hpp b/rpcs3/util/atomic2.hpp
new file mode 100644
index 0000000000..afb5674517
--- /dev/null
+++ b/rpcs3/util/atomic2.hpp
@@ -0,0 +1,156 @@
+#pragma once
+
+#include
+#include "util/atomic.hpp"
+
+namespace stx
+{
+ // Unsigned 64-bit atomic for multi-cas (occupies 128 bits)
+ class alignas(16) atomic2
+ {
+ // First 64-bit value is an actual value, second one is an allocated control block pointer (if not zero)
+ std::int64_t m_data[2]{};
+
+ friend class multi_cas_record;
+
+ public:
+ // Can't be really uninitialized or it'll be fundamentally broken
+ constexpr atomic2() noexcept = default;
+
+ atomic2(const atomic2&) = delete;
+
+ atomic2& operator=(const atomic2&) = delete;
+
+ constexpr atomic2(u64 value) noexcept
+ : m_data{static_cast(value), s64{0}}
+ {
+ }
+
+ // Simply observe the state
+ u64 load() const noexcept
+ {
+ return atomic_storage::load(m_data[0]);
+ }
+
+ // void wait(u64 old_value) const noexcept;
+ // void notify_one() noexcept;
+ // void notify_all() noexcept;
+ };
+
+ // Atomic CAS item
+ class multi_cas_item
+ {
+ atomic2* m_addr;
+ std::uint64_t m_old;
+ std::uint64_t m_new;
+
+ friend class multi_cas_record;
+
+ public:
+ multi_cas_item() noexcept = default;
+
+ multi_cas_item(const multi_cas_item&) = delete;
+
+ multi_cas_item& operator=(const multi_cas_item&) = delete;
+
+ u64 get_old() const noexcept
+ {
+ return m_old;
+ }
+
+ operator u64() const noexcept
+ {
+ return m_new;
+ }
+
+ void operator=(u64 value) noexcept
+ {
+ m_new = value;
+ }
+ };
+
+ // An object passed to multi_cas lambda
+ class alignas(64) multi_cas_record
+ {
+ // Ref counter and Multi-CAS state
+ atomic_t m_state;
+
+ // Total number of CASes
+ std::uint64_t m_count;
+
+ // Support up to 10 CASes
+ multi_cas_item m_list[10];
+
+ public:
+ // Read atomic value and allocate "writable" item
+ multi_cas_item& load(atomic2& atom) noexcept
+ {
+ if (m_count >= std::size(m_list))
+ {
+ std::abort();
+ }
+
+ auto& r = m_list[m_count++];
+ r.m_addr = &atom;
+ r.m_old = atom.load();
+ r.m_new = r.m_old;
+ return r;
+ }
+
+ // Reset transaction (invalidates item references)
+ void cancel() noexcept
+ {
+ m_count = 0;
+ }
+
+ // Try to commit sudoku (don't call)
+ bool commit() const noexcept;
+ };
+
+ template
+ struct multi_cas_result
+ {
+ static constexpr bool is_void = false;
+
+ T ret;
+ };
+
+ template <>
+ struct multi_cas_result
+ {
+ static constexpr bool is_void = true;
+ };
+
+ template
+ class multi_cas final : Context, multi_cas_record, public multi_cas_result>
+ {
+ using result = multi_cas_result>;
+ using record = multi_cas_record;
+
+ public:
+ // Implicit deduction guide candidate constructor (for lambda)
+ multi_cas(Context&& f) noexcept
+ : Context(std::forward(f))
+ {
+ while (true)
+ {
+ multi_cas_record& rec = *this;
+ record::cancel();
+
+ if constexpr (result::is_void)
+ {
+ Context::operator()(rec);
+ }
+ else
+ {
+ result::ret = Context::operator()(rec);
+ }
+
+ if (record::commit())
+ {
+ return;
+ }
+ }
+ }
+ };
+}
\ No newline at end of file