diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 79f09e2495..14fc479a04 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -165,13 +165,6 @@ union u128 } _bit; - //operator u64() const { return _u64[0]; } - //operator u32() const { return _u32[0]; } - //operator u16() const { return _u16[0]; } - //operator u8() const { return _u8[0]; } - - //operator bool() const { return _u64[0] != 0 || _u64[1] != 0; } - static u128 from64(u64 _0, u64 _1 = 0) { u128 ret; @@ -443,7 +436,7 @@ static force_inline u128 sync_fetch_and_xor(volatile u128* dest, u128 value) } } -template struct se_t; +template struct se_t; template struct se_t { @@ -501,16 +494,13 @@ template struct se_t } }; -template struct const_se_t; - -template struct const_se_t -{ - static const u8 value = _value; -}; +template struct const_se_t; template struct const_se_t { - static const u16 value = ((_value >> 8) & 0xff) | ((_value << 8) & 0xff00); + static const u16 value = + ((_value >> 8) & 0x00ff) | + ((_value << 8) & 0xff00); }; template struct const_se_t @@ -600,9 +590,9 @@ public: using stype = be_storage_t>; #ifdef IS_LE_MACHINE - stype m_data; + stype m_data; // don't access directly #else - type m_data; + type m_data; // don't access directly #endif static_assert(!std::is_class::value, "be_t<> error: invalid type (class or structure)"); @@ -695,41 +685,41 @@ public: be_t& operator --() { *this -= 1; return *this; } }; -template inline std::enable_if_t::value, bool> operator ==(const be_t& left, const be_t& right) +template inline std::enable_if_t::value && std::is_integral::value, bool> operator ==(const be_t& left, const be_t& right) { return left.data() == right.data(); } -template inline std::enable_if_t::value, bool> operator !=(const be_t& left, const be_t& right) +template inline std::enable_if_t::value && std::is_integral::value, bool> operator !=(const be_t& left, const be_t& right) { return left.data() != right.data(); } -template inline std::enable_if_t::value, be_t> operator &(const be_t& left, const be_t& right) +template inline std::enable_if_t::value && std::is_integral::value, be_t> operator &(const be_t& left, const be_t& right) { be_t result; result.m_data = left.data() & right.data(); return result; } -template inline std::enable_if_t::value, be_t> operator |(const be_t& left, const be_t& right) +template inline std::enable_if_t::value && std::is_integral::value, be_t> operator |(const be_t& left, const be_t& right) { be_t result; result.m_data = left.data() | right.data(); return result; } -template inline std::enable_if_t::value, be_t> operator ^(const be_t& left, const be_t& right) +template inline std::enable_if_t::value && std::is_integral::value, be_t> operator ^(const be_t& left, const be_t& right) { be_t result; result.m_data = left.data() ^ right.data(); return result; } -template inline std::enable_if_t> operator ~(const be_t& other) +template inline std::enable_if_t::value, be_t> operator ~(const be_t& arg) { be_t result; - result.m_data = ~other.data(); + result.m_data = ~arg.data(); return result; } @@ -782,7 +772,7 @@ public: using type = std::remove_cv_t; using stype = be_storage_t>; - type m_data; + type m_data; // don't access directly static_assert(!std::is_class::value, "le_t<> error: invalid type (class or structure)"); static_assert(!std::is_union::value || std::is_same::value, "le_t<> error: invalid type (union)"); diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index d160a46198..4449cbb12a 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -749,7 +749,7 @@ size_t get_x64_access_size(x64_context* context, x64_op_t op, x64_reg_t reg, siz if (op == X64OP_CMPXCHG) { // detect whether this instruction can't actually modify memory to avoid breaking reservation; - // this may theoretically cause endless loop, but it shouldn't be a problem if only read_sync() generates such instruction + // this may theoretically cause endless loop, but it shouldn't be a problem if only load_sync() generates such instruction u64 cmp, exch; if (!get_x64_reg_value(context, reg, d_size, i_size, cmp) || !get_x64_reg_value(context, X64R_RAX, d_size, i_size, exch)) { @@ -1480,16 +1480,22 @@ bool thread_t::joinable() const return m_state == TS_JOINABLE; } -bool waiter_map_t::is_stopped(u64 signal_id) +bool waiter_map_t::is_stopped(u32 addr) { if (Emu.IsStopped()) { - LOG_WARNING(Log::HLE, "%s: waiter_op() aborted (signal_id=0x%llx)", name.c_str(), signal_id); + LOG_WARNING(Log::HLE, "%s: waiter_op() aborted (addr=0x%x)", name.c_str(), addr); return true; } return false; } +void waiter_map_t::notify(u32 addr) +{ + // signal appropriate condition variable + cv[get_hash(addr)].notify_all(); +} + const std::function SQUEUE_ALWAYS_EXIT = [](){ return true; }; const std::function SQUEUE_NEVER_EXIT = [](){ return false; }; diff --git a/Utilities/Thread.h b/Utilities/Thread.h index 783f66176c..fbc528b58d 100644 --- a/Utilities/Thread.h +++ b/Utilities/Thread.h @@ -103,7 +103,7 @@ class slw_shared_mutex_t struct waiter_map_t { - static const size_t size = 32; + static const size_t size = 16; std::array mutex; std::array cv; @@ -115,40 +115,43 @@ struct waiter_map_t { } - bool is_stopped(u64 signal_id); + // generate simple "hash" for mutex/cv distribution + u32 get_hash(u32 addr) + { + addr ^= addr >> 16; + addr ^= addr >> 24; + addr ^= addr >> 28; + return addr % size; + } + + // check emu status + bool is_stopped(u32 addr); // wait until waiter_func() returns true, signal_id is an arbitrary number - template force_inline safe_buffers void wait_op(const S& signal_id, const WT waiter_func) + template safe_buffers auto wait_op(u32 addr, F pred, Args&&... args) -> decltype(static_cast(pred(args...))) { - // generate hash - const auto hash = std::hash()(signal_id) % size; + const u32 hash = get_hash(addr); // set mutex locker - std::unique_lock locker(mutex[hash], std::defer_lock); + std::unique_lock lock(mutex[hash], std::defer_lock); - // check the condition or if the emulator is stopped - while (!waiter_func() && !is_stopped(signal_id)) + while (true) { + // check the condition + if (pred(args...)) return; + // lock the mutex and initialize waiter (only once) - if (!locker.owns_lock()) - { - locker.lock(); - } + if (!lock) lock.lock(); // wait on appropriate condition variable for 1 ms or until signal arrived - cv[hash].wait_for(locker, std::chrono::milliseconds(1)); + cv[hash].wait_for(lock, std::chrono::milliseconds(1)); + + if (is_stopped(addr)) return; } } // signal all threads waiting on waiter_op() with the same signal_id (signaling only hints those threads that corresponding conditions are *probably* met) - template force_inline void notify(const S& signal_id) - { - // generate hash - const auto hash = std::hash()(signal_id) % size; - - // signal appropriate condition variable - cv[hash].notify_all(); - } + void notify(u32 addr); }; extern const std::function SQUEUE_ALWAYS_EXIT; @@ -209,7 +212,7 @@ public: { u32 pos = 0; - while (u32 res = m_sync.atomic_op_sync(SQSVR_OK, [&pos](squeue_sync_var_t& sync) -> u32 + while (u32 res = m_sync.atomic_op([&pos](squeue_sync_var_t& sync) -> u32 { assert(sync.count <= sq_size); assert(sync.position < sq_size); @@ -272,7 +275,7 @@ public: { u32 pos = 0; - while (u32 res = m_sync.atomic_op_sync(SQSVR_OK, [&pos](squeue_sync_var_t& sync) -> u32 + while (u32 res = m_sync.atomic_op([&pos](squeue_sync_var_t& sync) -> u32 { assert(sync.count <= sq_size); assert(sync.position < sq_size); @@ -341,7 +344,7 @@ public: assert(start_pos < sq_size); u32 pos = 0; - while (u32 res = m_sync.atomic_op_sync(SQSVR_OK, [&pos, start_pos](squeue_sync_var_t& sync) -> u32 + while (u32 res = m_sync.atomic_op([&pos, start_pos](squeue_sync_var_t& sync) -> u32 { assert(sync.count <= sq_size); assert(sync.position < sq_size); @@ -425,7 +428,7 @@ public: { u32 pos, count; - while (m_sync.atomic_op_sync(SQSVR_OK, [&pos, &count](squeue_sync_var_t& sync) -> u32 + while (m_sync.atomic_op([&pos, &count](squeue_sync_var_t& sync) -> u32 { assert(sync.count <= sq_size); assert(sync.position < sq_size); @@ -463,7 +466,7 @@ public: void clear() { - while (m_sync.atomic_op_sync(SQSVR_OK, [](squeue_sync_var_t& sync) -> u32 + while (m_sync.atomic_op([](squeue_sync_var_t& sync) -> u32 { assert(sync.count <= sq_size); assert(sync.position < sq_size); diff --git a/rpcs3/Emu/Cell/RawSPUThread.cpp b/rpcs3/Emu/Cell/RawSPUThread.cpp index 52c0442956..0ca0f45ca1 100644 --- a/rpcs3/Emu/Cell/RawSPUThread.cpp +++ b/rpcs3/Emu/Cell/RawSPUThread.cpp @@ -76,7 +76,7 @@ bool RawSPUThread::ReadReg(const u32 addr, u32& value) case SPU_Status_offs: { - value = status.read_relaxed(); + value = status.load(); return true; } } @@ -185,7 +185,7 @@ bool RawSPUThread::WriteReg(const u32 addr, const u32 value) break; } - run_ctrl.write_relaxed(value); + run_ctrl.store(value); return true; } @@ -196,7 +196,7 @@ bool RawSPUThread::WriteReg(const u32 addr, const u32 value) break; } - npc.write_relaxed(value); + npc.store(value); return true; } @@ -223,5 +223,5 @@ void RawSPUThread::Task() SPUThread::Task(); - npc.write_relaxed(PC | 1); + npc.store(PC | 1); } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 7b4a7476fa..dde01fde9c 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -495,7 +495,7 @@ u32 SPUThread::get_ch_count(u32 ch) case SPU_RdSigNotify1: return ch_snr1.get_count(); break; case SPU_RdSigNotify2: return ch_snr2.get_count(); break; case MFC_RdAtomicStat: return ch_atomic_stat.get_count(); break; - case SPU_RdEventStat: return ch_event_stat.read_relaxed() & ch_event_mask ? 1 : 0; break; + case SPU_RdEventStat: return ch_event_stat.load() & ch_event_mask ? 1 : 0; break; } LOG_ERROR(SPU, "get_ch_count(ch=%d [%s]): unknown/illegal channel", ch, ch < 128 ? spu_ch_name[ch] : "???"); @@ -603,7 +603,7 @@ u32 SPUThread::get_ch_value(u32 ch) case SPU_RdEventStat: { u32 result; - while (!(result = ch_event_stat.read_relaxed() & ch_event_mask) && !Emu.IsStopped()) + while (!(result = ch_event_stat.load() & ch_event_mask) && !Emu.IsStopped()) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack } diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index b06963e24c..07035add3c 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -199,7 +199,7 @@ public: void set_value(u32 value, u32 count = 1) { - sync_var.write_relaxed({ count, value }); + sync_var.store({ count, value }); } u32 get_value() volatile @@ -254,7 +254,7 @@ public: { bool out_result; - const u32 last_value = value3.read_sync(); + const u32 last_value = value3.load_sync(); sync_var.atomic_op([&out_result, &out_value, &out_count, last_value](sync_var_t& data) { @@ -292,7 +292,7 @@ public: void set(u64 ints) { // leave only enabled interrupts - ints &= mask.read_relaxed(); + ints &= mask.load(); if (ints && ~stat._or(ints) & ints) { diff --git a/rpcs3/Emu/Memory/atomic.h b/rpcs3/Emu/Memory/atomic.h index 6eea856d7c..86bd6cc741 100644 --- a/rpcs3/Emu/Memory/atomic.h +++ b/rpcs3/Emu/Memory/atomic.h @@ -32,6 +32,43 @@ template struct _to_atomic_subtype template using atomic_subtype_t = typename _to_atomic_subtype::type; +// result wrapper to deal with void result type +template struct atomic_op_result_t +{ + RT result; + + template inline atomic_op_result_t(T func, Args&&... args) + : result(std::move(func(std::forward(args)...))) + { + } + + inline RT move() + { + return std::move(result); + } +}; + +// void specialization +template<> struct atomic_op_result_t +{ + template inline atomic_op_result_t(T func, Args&&... args) + { + func(std::forward(args)...); + } + + inline void move() + { + } +}; + +struct break_never_t +{ + template inline bool operator()(const atomic_op_result_t&) const + { + return false; + } +}; + template union _atomic_base { using type = std::remove_cv_t; @@ -55,6 +92,27 @@ template union _atomic_base return reinterpret_cast(value); } +private: + template force_inline static void write_relaxed(volatile T2& data, const T2& value) + { + data = value; + } + + force_inline static void write_relaxed(volatile u128& data, const u128& value) + { + sync_lock_test_and_set(&data, value); + } + + template force_inline static T2 read_relaxed(const volatile T2& data) + { + return data; + } + + force_inline static u128 read_relaxed(const volatile u128& value) + { + return sync_val_compare_and_swap(const_cast(&value), {}, {}); + } + public: // atomically compare data with cmp, replace with exch if equal, return previous data value anyway force_inline const type compare_and_swap(const type& cmp, const type& exch) volatile @@ -69,7 +127,7 @@ public: } // read data with memory barrier - force_inline const type read_sync() const volatile + force_inline const type load_sync() const volatile { const subtype zero = {}; return from_subtype(sync_val_compare_and_swap(const_cast(&sub_data), zero, zero)); @@ -81,73 +139,42 @@ public: return from_subtype(sync_lock_test_and_set(&sub_data, to_subtype(exch))); } - // read data without memory barrier - force_inline const type read_relaxed() const volatile + // read data without memory barrier (works as load_sync() for 128 bit) + force_inline const type load() const volatile { - const subtype value = const_cast(sub_data); - return from_subtype(value); + return from_subtype(read_relaxed(sub_data)); } - // write data without memory barrier - force_inline void write_relaxed(const type& value) volatile + // write data without memory barrier (works as exchange() for 128 bit, discarding result) + force_inline void store(const type& value) volatile { - const_cast(sub_data) = to_subtype(value); + write_relaxed(sub_data, to_subtype(value)); } - // perform atomic operation on data - template force_inline void atomic_op(const FT atomic_proc) volatile + // perform an atomic operation on data (callable object version, first arg is a reference to atomic type) + template auto atomic_op(F func, Args&&... args) volatile -> decltype(func(std::declval(), args...)) { while (true) { - const subtype old = const_cast(sub_data); + // read the old value from memory + const subtype old = read_relaxed(sub_data); + + // copy the old value subtype _new = old; - atomic_proc(to_type(_new)); // function should accept reference to T type - if (sync_bool_compare_and_swap(&sub_data, old, _new)) return; + + // call atomic op for the local copy of the old value and save the return value of the function + atomic_op_result_t> result(func, to_type(_new), args...); + + // 1) check return value using callable object of Break_if type, return if condition met + // 2) atomically compare value with `old`, replace with `_new` and return on success + if (Break_if()(result) || sync_bool_compare_and_swap(&sub_data, old, _new)) return result.move(); } } - // perform atomic operation on data with special exit condition (if intermediate result != proceed_value) - template force_inline RT atomic_op(const RT proceed_value, const FT atomic_proc) volatile + // perform an atomic operation on data (member function version) + template::value>> auto atomic_op(RT(CT::* func)(FArgs...), Args&&... args) volatile -> decltype((std::declval().*func)(args...)) { - while (true) - { - const subtype old = const_cast(sub_data); - subtype _new = old; - auto res = static_cast(atomic_proc(to_type(_new))); // function should accept reference to T type and return some value - if (res != proceed_value) return res; - if (sync_bool_compare_and_swap(&sub_data, old, _new)) return proceed_value; - } - } - - // perform atomic operation on data with additional memory barrier - template force_inline void atomic_op_sync(const FT atomic_proc) volatile - { - const subtype zero = {}; - subtype old = sync_val_compare_and_swap(&sub_data, zero, zero); - while (true) - { - subtype _new = old; - atomic_proc(to_type(_new)); // function should accept reference to T type - const subtype val = sync_val_compare_and_swap(&sub_data, old, _new); - if (val == old) return; - old = val; - } - } - - // perform atomic operation on data with additional memory barrier and special exit condition (if intermediate result != proceed_value) - template force_inline RT atomic_op_sync(const RT proceed_value, const FT atomic_proc) volatile - { - const subtype zero = {}; - subtype old = sync_val_compare_and_swap(&sub_data, zero, zero); - while (true) - { - subtype _new = old; - auto res = static_cast(atomic_proc(to_type(_new))); // function should accept reference to T type and return some value - if (res != proceed_value) return res; - const subtype val = sync_val_compare_and_swap(&sub_data, old, _new); - if (val == old) return proceed_value; - old = val; - } + return atomic_op(std::mem_fn(func), std::forward(args)...); } // atomic bitwise OR, returns previous data @@ -174,17 +201,17 @@ public: return from_subtype(sync_fetch_and_xor(&sub_data, to_subtype(right))); } - force_inline const type operator |= (const type& right) volatile + force_inline const type operator |=(const type& right) volatile { return from_subtype(sync_fetch_and_or(&sub_data, to_subtype(right)) | to_subtype(right)); } - force_inline const type operator &= (const type& right) volatile + force_inline const type operator &=(const type& right) volatile { return from_subtype(sync_fetch_and_and(&sub_data, to_subtype(right)) & to_subtype(right)); } - force_inline const type operator ^= (const type& right) volatile + force_inline const type operator ^=(const type& right) volatile { return from_subtype(sync_fetch_and_xor(&sub_data, to_subtype(right)) ^ to_subtype(right)); } @@ -225,74 +252,50 @@ template inline if_integral_le_t operator -=(_at template inline if_integral_be_t operator ++(_atomic_base>& left) { - be_t result; - - left.atomic_op([&result](be_t& value) + return left.atomic_op([](be_t& value) -> be_t { - result = ++value; + return ++value; }); - - return result; } template inline if_integral_be_t operator --(_atomic_base>& left) { - be_t result; - - left.atomic_op([&result](be_t& value) + return left.atomic_op([](be_t& value) -> be_t { - result = --value; + return --value; }); - - return result; } template inline if_integral_be_t operator ++(_atomic_base>& left, int) { - be_t result; - - left.atomic_op([&result](be_t& value) + return left.atomic_op([](be_t& value) -> be_t { - result = value++; + return value++; }); - - return result; } template inline if_integral_be_t operator --(_atomic_base>& left, int) { - be_t result; - - left.atomic_op([&result](be_t& value) + return left.atomic_op([](be_t& value) -> be_t { - result = value--; + return value--; }); - - return result; } template inline if_integral_be_t operator +=(_atomic_base>& left, T2 right) { - be_t result; - - left.atomic_op([&result, right](be_t& value) + return left.atomic_op([right](be_t& value) -> be_t { - result = (value += right); + return value += right; }); - - return result; } template inline if_integral_be_t operator -=(_atomic_base>& left, T2 right) { - be_t result; - - left.atomic_op([&result, right](be_t& value) + return left.atomic_op([right](be_t& value) -> be_t { - result = (value -= right); + return value -= right; }); - - return result; } template using atomic = _atomic_base; // Atomic Type with native endianness (for emulator memory) diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 769e63dacd..0774c3a5cf 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -208,7 +208,7 @@ namespace vm { std::lock_guard lock(g_reservation_mutex); - u8 flags = g_page_info[addr >> 12].read_relaxed(); + u8 flags = g_page_info[addr >> 12].load(); if (!(flags & page_writable) || !(flags & page_allocated) || (flags & page_no_reservations)) { throw fmt::format("vm::reservation_acquire(addr=0x%x, size=0x%x) failed (invalid page flags: 0x%x)", addr, size, flags); @@ -355,7 +355,7 @@ namespace vm for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++) { - if (g_page_info[i].read_relaxed()) + if (g_page_info[i].load()) { throw fmt::format("vm::page_map(addr=0x%x, size=0x%x, flags=0x%x) failed (already mapped at 0x%x)", addr, size, flags, i * 4096); } @@ -398,7 +398,7 @@ namespace vm for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++) { - if ((g_page_info[i].read_relaxed() & flags_test) != (flags_test | page_allocated)) + if ((g_page_info[i].load() & flags_test) != (flags_test | page_allocated)) { return false; } @@ -447,7 +447,7 @@ namespace vm for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++) { - if (!(g_page_info[i].read_relaxed() & page_allocated)) + if (!(g_page_info[i].load() & page_allocated)) { throw fmt::format("vm::page_unmap(addr=0x%x, size=0x%x) failed (not mapped at 0x%x)", addr, size, i * 4096); } @@ -491,7 +491,7 @@ namespace vm for (u32 i = addr / 4096; i <= (addr + size - 1) / 4096; i++) { - if ((g_page_info[i].read_sync() & page_allocated) != page_allocated) + if ((g_page_info[i].load() & page_allocated) != page_allocated) { return false; } diff --git a/rpcs3/Emu/Memory/vm_ref.h b/rpcs3/Emu/Memory/vm_ref.h index 47ce252b32..8ab50c30df 100644 --- a/rpcs3/Emu/Memory/vm_ref.h +++ b/rpcs3/Emu/Memory/vm_ref.h @@ -5,7 +5,7 @@ namespace vm template struct _ref_base { - AT m_addr; + AT m_addr; // don't access directly static_assert(!std::is_pointer::value, "vm::_ref_base<> error: invalid type (pointer)"); static_assert(!std::is_reference::value, "vm::_ref_base<> error: invalid type (reference)"); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 07da68791d..a4557b57b7 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2494,8 +2494,8 @@ void RSXThread::Task() inc = 1; - u32 get = m_ctrl->get.read_sync(); - u32 put = m_ctrl->put.read_sync(); + u32 put = m_ctrl->put.load(); + u32 get = m_ctrl->get.load(); if (put == get || !Emu.IsRunning()) { diff --git a/rpcs3/Emu/SysCalls/Modules/cellAudio.cpp b/rpcs3/Emu/SysCalls/Modules/cellAudio.cpp index dd03c80d11..84c3fe857b 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellAudio.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellAudio.cpp @@ -31,7 +31,7 @@ s32 cellAudioInit() // clear ports for (auto& port : g_audio.ports) { - port.state.write_relaxed(AUDIO_PORT_STATE_CLOSED); + port.state.store(AUDIO_PORT_STATE_CLOSED); } // reset variables @@ -82,7 +82,7 @@ s32 cellAudioInit() bool opened = false; float* buffer; - while (out_queue.pop(buffer, [](){ return g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED; })) + while (out_queue.pop(buffer, [](){ return g_audio.state.load() != AUDIO_STATE_INITIALIZED; })) { if (use_u16) { @@ -146,7 +146,7 @@ s32 cellAudioInit() } }); - while (g_audio.state.read_relaxed() == AUDIO_STATE_INITIALIZED && !Emu.IsStopped()) + while (g_audio.state.load() == AUDIO_STATE_INITIALIZED && !Emu.IsStopped()) { if (Emu.IsPaused()) { @@ -193,7 +193,7 @@ s32 cellAudioInit() // mixing: for (auto& port : g_audio.ports) { - if (port.state.read_relaxed() != AUDIO_PORT_STATE_STARTED) continue; + if (port.state.load() != AUDIO_PORT_STATE_STARTED) continue; const u32 block_size = port.channel * AUDIO_SAMPLES; const u32 position = port.tag % port.block; // old value @@ -206,7 +206,7 @@ s32 cellAudioInit() auto step_volume = [](AudioPortConfig& port) // part of cellAudioSetPortLevel functionality { - const auto param = port.level_set.read_sync(); + const auto param = port.level_set.load(); if (param.inc != 0.0f) { @@ -357,7 +357,7 @@ s32 cellAudioInit() memset(out_buffer[out_pos].get(), 0, out_buffer_size * sizeof(float)); } - if (!out_queue.push(out_buffer[out_pos].get(), [](){ return g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED; })) + if (!out_queue.push(out_buffer[out_pos].get(), [](){ return g_audio.state.load() != AUDIO_STATE_INITIALIZED; })) { break; } @@ -375,7 +375,7 @@ s32 cellAudioInit() { AudioPortConfig& port = g_audio.ports[i]; - if (port.state.read_relaxed() != AUDIO_PORT_STATE_STARTED) continue; + if (port.state.load() != AUDIO_PORT_STATE_STARTED) continue; u32 position = port.tag % port.block; // old value port.counter = g_audio.counter; @@ -447,7 +447,7 @@ s32 cellAudioPortOpen(vm::ptr audioParam, vm::ptr portN { cellAudio.Warning("cellAudioPortOpen(audioParam=*0x%x, portNum=*0x%x)", audioParam, portNum); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -551,7 +551,7 @@ s32 cellAudioGetPortConfig(u32 portNum, vm::ptr portConfig) { cellAudio.Warning("cellAudioGetPortConfig(portNum=%d, portConfig=*0x%x)", portNum, portConfig); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -565,7 +565,7 @@ s32 cellAudioGetPortConfig(u32 portNum, vm::ptr portConfig) portConfig->readIndexAddr = port.read_index_addr; - switch (auto state = port.state.read_sync()) + switch (auto state = port.state.load()) { case AUDIO_PORT_STATE_CLOSED: portConfig->status = CELL_AUDIO_STATUS_CLOSE; break; case AUDIO_PORT_STATE_OPENED: portConfig->status = CELL_AUDIO_STATUS_READY; break; @@ -584,7 +584,7 @@ s32 cellAudioPortStart(u32 portNum) { cellAudio.Warning("cellAudioPortStart(portNum=%d)", portNum); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -607,7 +607,7 @@ s32 cellAudioPortClose(u32 portNum) { cellAudio.Warning("cellAudioPortClose(portNum=%d)", portNum); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -630,7 +630,7 @@ s32 cellAudioPortStop(u32 portNum) { cellAudio.Warning("cellAudioPortStop(portNum=%d)", portNum); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -653,7 +653,7 @@ s32 cellAudioGetPortTimestamp(u32 portNum, u64 tag, vm::ptr stamp) { cellAudio.Log("cellAudioGetPortTimestamp(portNum=%d, tag=0x%llx, stamp=*0x%x)", portNum, tag, stamp); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -665,7 +665,7 @@ s32 cellAudioGetPortTimestamp(u32 portNum, u64 tag, vm::ptr stamp) AudioPortConfig& port = g_audio.ports[portNum]; - if (port.state.read_relaxed() == AUDIO_PORT_STATE_CLOSED) + if (port.state.load() == AUDIO_PORT_STATE_CLOSED) { return CELL_AUDIO_ERROR_PORT_NOT_OPEN; } @@ -683,7 +683,7 @@ s32 cellAudioGetPortBlockTag(u32 portNum, u64 blockNo, vm::ptr tag) { cellAudio.Log("cellAudioGetPortBlockTag(portNum=%d, blockNo=0x%llx, tag=*0x%x)", portNum, blockNo, tag); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -695,7 +695,7 @@ s32 cellAudioGetPortBlockTag(u32 portNum, u64 blockNo, vm::ptr tag) AudioPortConfig& port = g_audio.ports[portNum]; - if (port.state.read_relaxed() == AUDIO_PORT_STATE_CLOSED) + if (port.state.load() == AUDIO_PORT_STATE_CLOSED) { return CELL_AUDIO_ERROR_PORT_NOT_OPEN; } @@ -726,7 +726,7 @@ s32 cellAudioSetPortLevel(u32 portNum, float level) { cellAudio.Log("cellAudioSetPortLevel(portNum=%d, level=%f)", portNum, level); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -738,7 +738,7 @@ s32 cellAudioSetPortLevel(u32 portNum, float level) AudioPortConfig& port = g_audio.ports[portNum]; - if (port.state.read_relaxed() == AUDIO_PORT_STATE_CLOSED) + if (port.state.load() == AUDIO_PORT_STATE_CLOSED) { return CELL_AUDIO_ERROR_PORT_NOT_OPEN; } @@ -796,7 +796,7 @@ s32 cellAudioSetNotifyEventQueue(u64 key) { cellAudio.Warning("cellAudioSetNotifyEventQueue(key=0x%llx)", key); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -829,7 +829,7 @@ s32 cellAudioRemoveNotifyEventQueue(u64 key) { cellAudio.Warning("cellAudioRemoveNotifyEventQueue(key=0x%llx)", key); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -862,7 +862,7 @@ s32 cellAudioAddData(u32 portNum, vm::ptr src, u32 samples, float volume) { cellAudio.Log("cellAudioAddData(portNum=%d, src=*0x%x, samples=%d, volume=%f)", portNum, src, samples, volume); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -895,7 +895,7 @@ s32 cellAudioAdd2chData(u32 portNum, vm::ptr src, u32 samples, float volu { cellAudio.Log("cellAudioAdd2chData(portNum=%d, src=*0x%x, samples=%d, volume=%f)", portNum, src, samples, volume); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -958,7 +958,7 @@ s32 cellAudioAdd6chData(u32 portNum, vm::ptr src, float volume) { cellAudio.Log("cellAudioAdd6chData(portNum=%d, src=*0x%x, volume=%f)", portNum, src, volume); - if (g_audio.state.read_relaxed() != AUDIO_STATE_INITIALIZED) + if (g_audio.state.load() != AUDIO_STATE_INITIALIZED) { return CELL_AUDIO_ERROR_NOT_INIT; } @@ -1024,7 +1024,7 @@ s32 cellAudioUnsetPersonalDevice(s32 iPersonalStream) Module cellAudio("cellAudio", []() { - g_audio.state.write_relaxed(AUDIO_STATE_NOT_INITIALIZED); + g_audio.state.store(AUDIO_STATE_NOT_INITIALIZED); g_audio.buffer = 0; g_audio.indexes = 0; diff --git a/rpcs3/Emu/SysCalls/Modules/cellFs.cpp b/rpcs3/Emu/SysCalls/Modules/cellFs.cpp index c0703e216c..8fa066ec55 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellFs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellFs.cpp @@ -398,7 +398,7 @@ s32 cellFsStReadGetRingBuf(u32 fd, vm::ptr ringbuf) return CELL_FS_EBADF; } - if (file->st_status.read_sync() == SSS_NOT_INITIALIZED) + if (file->st_status.load() == SSS_NOT_INITIALIZED) { return CELL_FS_ENXIO; } @@ -422,7 +422,7 @@ s32 cellFsStReadGetStatus(u32 fd, vm::ptr status) return CELL_FS_EBADF; } - switch (file->st_status.read_sync()) + switch (file->st_status.load()) { case SSS_INITIALIZED: case SSS_STOPPED: @@ -456,7 +456,7 @@ s32 cellFsStReadGetRegid(u32 fd, vm::ptr regid) return CELL_FS_EBADF; } - if (file->st_status.read_sync() == SSS_NOT_INITIALIZED) + if (file->st_status.load() == SSS_NOT_INITIALIZED) { return CELL_FS_ENXIO; } @@ -500,7 +500,7 @@ s32 cellFsStReadStart(u32 fd, u64 offset, u64 size) { std::unique_lock lock(file->mutex); - while (file->st_status.read_relaxed() == SSS_STARTED && !Emu.IsStopped()) + while (file->st_status.load() == SSS_STARTED && !Emu.IsStopped()) { // check free space in buffer and available data in stream if (file->st_total_read - file->st_copied <= file->st_ringbuf_size - file->st_block_size && file->st_total_read < file->st_read_size) @@ -590,7 +590,7 @@ s32 cellFsStRead(u32 fd, vm::ptr buf, u64 size, vm::ptr rsize) return CELL_FS_EBADF; } - if (file->st_status.read_sync() == SSS_NOT_INITIALIZED || file->st_copyless) + if (file->st_status.load() == SSS_NOT_INITIALIZED || file->st_copyless) { return CELL_FS_ENXIO; } @@ -624,7 +624,7 @@ s32 cellFsStReadGetCurrentAddr(u32 fd, vm::ptr addr, vm::ptr size) return CELL_FS_EBADF; } - if (file->st_status.read_sync() == SSS_NOT_INITIALIZED || !file->st_copyless) + if (file->st_status.load() == SSS_NOT_INITIALIZED || !file->st_copyless) { return CELL_FS_ENXIO; } @@ -657,7 +657,7 @@ s32 cellFsStReadPutCurrentAddr(u32 fd, vm::ptr addr, u64 size) return CELL_FS_EBADF; } - if (file->st_status.read_sync() == SSS_NOT_INITIALIZED || !file->st_copyless) + if (file->st_status.load() == SSS_NOT_INITIALIZED || !file->st_copyless) { return CELL_FS_ENXIO; } @@ -684,7 +684,7 @@ s32 cellFsStReadWait(u32 fd, u64 size) return CELL_FS_EBADF; } - if (file->st_status.read_sync() == SSS_NOT_INITIALIZED) + if (file->st_status.load() == SSS_NOT_INITIALIZED) { return CELL_FS_ENXIO; } @@ -718,7 +718,7 @@ s32 cellFsStReadWaitCallback(u32 fd, u64 size, fs_st_cb_t func) return CELL_FS_EBADF; } - if (file->st_status.read_sync() == SSS_NOT_INITIALIZED) + if (file->st_status.load() == SSS_NOT_INITIALIZED) { return CELL_FS_ENXIO; } diff --git a/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp b/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp index 607c81fbc8..82f5efce57 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp @@ -379,9 +379,9 @@ s32 _cellGcmInitBody(vm::ptr context, u32 cmdSize, u32 ioSiz vm::write32(context.addr(), gcm_info.context_addr); auto& ctrl = vm::get_ref(gcm_info.control_addr); - ctrl.put.write_relaxed(0); - ctrl.get.write_relaxed(0); - ctrl.ref.write_relaxed(-1); + ctrl.put.store(0); + ctrl.get.store(0); + ctrl.ref.store(-1); auto& render = Emu.GetGSManager().GetRender(); render.m_ctxt_addr = context.addr(); @@ -1220,7 +1220,7 @@ s32 cellGcmCallback(vm::ptr context, u32 count) // Wait for rsx to "release" the new command buffer while (!Emu.IsStopped()) { - u32 getPos = ctrl.get.read_sync().value(); + u32 getPos = ctrl.get.load().value(); if (isInCommandBufferExcept(getPos, newCommandBuffer.first, newCommandBuffer.second)) break; std::chrono::time_point waitPoint = std::chrono::system_clock::now(); @@ -1235,7 +1235,7 @@ s32 cellGcmCallback(vm::ptr context, u32 count) //if (0) //{ // auto& ctrl = vm::get_ref(gcm_info.control_addr); - // be_t res = context->current - context->begin - ctrl.put.read_relaxed(); + // be_t res = context->current - context->begin - ctrl.put.load(); // if (res != 0) // { @@ -1245,8 +1245,8 @@ s32 cellGcmCallback(vm::ptr context, u32 count) // memmove(vm::get_ptr(context->begin), vm::get_ptr(context->current - res), res); // context->current = context->begin + res; - // ctrl.put.write_relaxed(res); - // ctrl.get.write_relaxed(0); + // ctrl.put.store(res); + // ctrl.get.store(0); // return CELL_OK; //} diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 791c1de37f..d5b875f4ea 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -411,7 +411,7 @@ s32 spursDetachLv2EventQueue(vm::ptr spurs, u8 spuPort, bool spursCre auto mask = 1ull << spuPort; if (sdkVer >= 0x180000) { - if ((spurs->spuPortBits.read_relaxed() & mask) == 0) + if ((spurs->spuPortBits.load() & mask) == 0) { return CELL_SPURS_CORE_ERROR_SRCH; } @@ -438,7 +438,7 @@ void spursHandlerWaitReady(PPUThread& CPU, vm::ptr spurs) spursPpuThreadExit(CPU, 0); } - if (spurs->handlerExiting.read_relaxed()) + if (spurs->handlerExiting.load()) { if (s32 rc = sys_lwmutex_unlock(CPU, spurs.of(&CellSpurs::mutex))) { @@ -449,20 +449,20 @@ void spursHandlerWaitReady(PPUThread& CPU, vm::ptr spurs) } // Find a runnable workload - spurs->handlerDirty.write_relaxed(0); + spurs->handlerDirty.store(0); if (spurs->exception == 0) { bool foundRunnableWorkload = false; for (u32 i = 0; i < 16; i++) { - if (spurs->wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE && + if (spurs->wklState1[i].load() == SPURS_WKL_STATE_RUNNABLE && *((u64*)spurs->wklInfo1[i].priority) != 0 && - spurs->wklMaxContention[i].read_relaxed() & 0x0F) + spurs->wklMaxContention[i].load() & 0x0F) { - if (spurs->wklReadyCount1[i].read_relaxed() || - spurs->wklSignal1.read_relaxed() & (0x8000u >> i) || - (spurs->wklFlag.flag.read_relaxed() == 0 && - spurs->wklFlagReceiver.read_relaxed() == (u8)i)) + if (spurs->wklReadyCount1[i].load() || + spurs->wklSignal1.load() & (0x8000u >> i) || + (spurs->wklFlag.flag.load() == 0 && + spurs->wklFlagReceiver.load() == (u8)i)) { foundRunnableWorkload = true; break; @@ -474,14 +474,14 @@ void spursHandlerWaitReady(PPUThread& CPU, vm::ptr spurs) { for (u32 i = 0; i < 16; i++) { - if (spurs->wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE && + if (spurs->wklState2[i].load() == SPURS_WKL_STATE_RUNNABLE && *((u64*)spurs->wklInfo2[i].priority) != 0 && - spurs->wklMaxContention[i].read_relaxed() & 0xF0) + spurs->wklMaxContention[i].load() & 0xF0) { - if (spurs->wklIdleSpuCountOrReadyCount2[i].read_relaxed() || - spurs->wklSignal2.read_relaxed() & (0x8000u >> i) || - (spurs->wklFlag.flag.read_relaxed() == 0 && - spurs->wklFlagReceiver.read_relaxed() == (u8)i + 0x10)) + if (spurs->wklIdleSpuCountOrReadyCount2[i].load() || + spurs->wklSignal2.load() & (0x8000u >> i) || + (spurs->wklFlag.flag.load() == 0 && + spurs->wklFlagReceiver.load() == (u8)i + 0x10)) { foundRunnableWorkload = true; break; @@ -497,8 +497,8 @@ void spursHandlerWaitReady(PPUThread& CPU, vm::ptr spurs) // If we reach it means there are no runnable workloads in this SPURS instance. // Wait until some workload becomes ready. - spurs->handlerWaiting.write_relaxed(1); - if (spurs->handlerDirty.read_relaxed() == 0) + spurs->handlerWaiting.store(1); + if (spurs->handlerDirty.load() == 0) { if (s32 rc = sys_lwcond_wait(CPU, spurs.of(&CellSpurs::cond), 0)) { @@ -506,7 +506,7 @@ void spursHandlerWaitReady(PPUThread& CPU, vm::ptr spurs) } } - spurs->handlerWaiting.write_relaxed(0); + spurs->handlerWaiting.store(0); } // If we reach here then a runnable workload was found @@ -557,7 +557,7 @@ void spursHandlerEntry(PPUThread& CPU) if ((spurs->flags1 & SF1_EXIT_IF_NO_WORK) == 0) { - assert(spurs->handlerExiting.read_relaxed() == 1 || Emu.IsStopped()); + assert(spurs->handlerExiting.load() == 1 || Emu.IsStopped()); spursPpuThreadExit(CPU, 0); } } @@ -609,12 +609,12 @@ s32 spursWakeUpShutdownCompletionWaiter(PPUThread& CPU, vm::ptr spurs return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; } - if ((spurs->wklEnabled.read_relaxed() & (0x80000000u >> wid)) == 0) + if ((spurs->wklEnabled.load() & (0x80000000u >> wid)) == 0) { return CELL_SPURS_POLICY_MODULE_ERROR_SRCH; } - const u8 wklState = wid < CELL_SPURS_MAX_WORKLOAD ? spurs->wklState1[wid].read_relaxed() : spurs->wklState2[wid & 0x0F].read_relaxed(); + const u8 wklState = wid < CELL_SPURS_MAX_WORKLOAD ? spurs->wklState1[wid].load() : spurs->wklState2[wid & 0x0F].load(); if (wklState != SPURS_WKL_STATE_REMOVABLE) { @@ -628,14 +628,14 @@ s32 spursWakeUpShutdownCompletionWaiter(PPUThread& CPU, vm::ptr spurs { wklF.hook(CPU, spurs, wid, wklF.hookArg); - assert(wklEvent.read_relaxed() & 0x01); - assert(wklEvent.read_relaxed() & 0x02); - assert((wklEvent.read_relaxed() & 0x20) == 0); + assert(wklEvent.load() & 0x01); + assert(wklEvent.load() & 0x02); + assert((wklEvent.load() & 0x20) == 0); wklEvent |= 0x20; } s32 rc = CELL_OK; - if (!wklF.hook || wklEvent.read_relaxed() & 0x10) + if (!wklF.hook || wklEvent.load() & 0x10) { assert(wklF.x28 == 2); rc = sys_semaphore_post((u32)wklF.sem, 1); @@ -1028,7 +1028,7 @@ s32 spursInit( if (!isSecond) { - spurs->wklEnabled.write_relaxed(0xffff); + spurs->wklEnabled.store(0xffff); } // Initialise trace @@ -1043,7 +1043,7 @@ s32 spursInit( spurs->wklInfoSysSrv.addr.set(SPURS_IMG_ADDR_SYS_SRV_WORKLOAD); spurs->wklInfoSysSrv.size = 0x2200; spurs->wklInfoSysSrv.arg = 0; - spurs->wklInfoSysSrv.uniqueId.write_relaxed(0xff); + spurs->wklInfoSysSrv.uniqueId.store(0xff); auto sys_semaphore_attribute_initialize = [](vm::ptr attr) { @@ -1221,11 +1221,11 @@ s32 spursInit( } spurs->flags1 = (flags & SAF_EXIT_IF_NO_WORK ? SF1_EXIT_IF_NO_WORK : 0) | (isSecond ? SF1_32_WORKLOADS : 0); - spurs->wklFlagReceiver.write_relaxed(0xff); - spurs->wklFlag.flag.write_relaxed(-1); - spurs->handlerDirty.write_relaxed(0); - spurs->handlerWaiting.write_relaxed(0); - spurs->handlerExiting.write_relaxed(0); + spurs->wklFlagReceiver.store(0xff); + spurs->wklFlag.flag.store(-1); + spurs->handlerDirty.store(0); + spurs->handlerWaiting.store(0); + spurs->handlerExiting.store(0); spurs->ppuPriority = ppuPriority; // Create the SPURS event helper thread @@ -1586,12 +1586,12 @@ s32 cellSpursFinalize(vm::ptr spurs) return CELL_SPURS_CORE_ERROR_ALIGN; } - if (spurs->handlerExiting.read_relaxed()) + if (spurs->handlerExiting.load()) { return CELL_SPURS_CORE_ERROR_STAT; } - u32 wklEnabled = spurs->wklEnabled.read_relaxed(); + u32 wklEnabled = spurs->wklEnabled.load(); if (spurs->flags1 & SF1_32_WORKLOADS) { @@ -1690,7 +1690,7 @@ s32 cellSpursSetMaxContention(vm::ptr spurs, u32 wid, u32 maxContenti return CELL_SPURS_CORE_ERROR_INVAL; } - if ((spurs->wklEnabled.read_relaxed() & (0x80000000u >> wid)) == 0) + if ((spurs->wklEnabled.load() & (0x80000000u >> wid)) == 0) { return CELL_SPURS_CORE_ERROR_SRCH; } @@ -1734,7 +1734,7 @@ s32 cellSpursSetPriorities(vm::ptr spurs, u32 wid, vm::cptr prior return CELL_SPURS_CORE_ERROR_INVAL; } - if ((spurs->wklEnabled.read_relaxed() & (0x80000000u >> wid)) == 0) + if ((spurs->wklEnabled.load() & (0x80000000u >> wid)) == 0) { return CELL_SPURS_CORE_ERROR_SRCH; } @@ -1764,8 +1764,8 @@ s32 cellSpursSetPriorities(vm::ptr spurs, u32 wid, vm::cptr prior auto& wklInfo = wid < CELL_SPURS_MAX_WORKLOAD ? spurs->wklInfo1[wid] : spurs->wklInfo2[wid]; *((be_t*)wklInfo.priority) = prio; - spurs->sysSrvMsgUpdateWorkload.write_relaxed(0xFF); - spurs->sysSrvMessage.write_relaxed(0xFF); + spurs->sysSrvMsgUpdateWorkload.store(0xFF); + spurs->sysSrvMessage.store(0xFF); return CELL_OK; } @@ -1907,7 +1907,7 @@ void spursTraceStatusUpdate(vm::ptr spurs) if (init) { - spurs->sysSrvMessage.write_relaxed(0xFF); + spurs->sysSrvMessage.store(0xFF); if (s32 rc = sys_semaphore_wait((u32)spurs->semPrv, 0)) { @@ -2241,9 +2241,9 @@ s32 spursAddWorkload( { assert((spurs->wklCurrentContention[wnum] & 0xf) == 0); assert((spurs->wklPendingContention[wnum] & 0xf) == 0); - spurs->wklState1[wnum].write_relaxed(1); + spurs->wklState1[wnum].store(1); spurs->wklStatus1[wnum] = 0; - spurs->wklEvent1[wnum].write_relaxed(0); + spurs->wklEvent1[wnum].store(0); spurs->wklInfo1[wnum].addr = pm; spurs->wklInfo1[wnum].arg = data; spurs->wklInfo1[wnum].size = size; @@ -2267,19 +2267,19 @@ s32 spursAddWorkload( if ((spurs->flags1 & SF1_32_WORKLOADS) == 0) { - spurs->wklIdleSpuCountOrReadyCount2[wnum].write_relaxed(0); + spurs->wklIdleSpuCountOrReadyCount2[wnum].store(0); spurs->wklMinContention[wnum] = minContention > 8 ? 8 : minContention; } - spurs->wklReadyCount1[wnum].write_relaxed(0); + spurs->wklReadyCount1[wnum].store(0); } else { assert((spurs->wklCurrentContention[index] & 0xf0) == 0); assert((spurs->wklPendingContention[index] & 0xf0) == 0); - spurs->wklState2[index].write_relaxed(1); + spurs->wklState2[index].store(1); spurs->wklStatus2[index] = 0; - spurs->wklEvent2[index].write_relaxed(0); + spurs->wklEvent2[index].store(0); spurs->wklInfo2[index].addr = pm; spurs->wklInfo2[index].arg = data; spurs->wklInfo2[index].size = size; @@ -2301,7 +2301,7 @@ s32 spursAddWorkload( spurs->wklEvent2[index] |= 2; } - spurs->wklIdleSpuCountOrReadyCount2[wnum].write_relaxed(0); + spurs->wklIdleSpuCountOrReadyCount2[wnum].store(0); } if (wnum <= 15) @@ -2327,7 +2327,7 @@ s32 spursAddWorkload( u32 res_wkl; CellSpurs::WorkloadInfo& wkl = wnum <= 15 ? spurs->wklInfo1[wnum] : spurs->wklInfo2[wnum & 0xf]; - spurs->wklMskB.atomic_op_sync([spurs, &wkl, wnum, &res_wkl](be_t& v) + spurs->wklMskB.atomic_op([spurs, &wkl, wnum, &res_wkl](be_t& v) { const u32 mask = v & ~(0x80000000u >> wnum); res_wkl = 0; @@ -2340,12 +2340,12 @@ s32 spursAddWorkload( if (current.addr == wkl.addr) { // if a workload with identical policy module found - res_wkl = current.uniqueId.read_relaxed(); + res_wkl = current.uniqueId.load(); break; } else { - k |= 0x80000000 >> current.uniqueId.read_relaxed(); + k |= 0x80000000 >> current.uniqueId.load(); res_wkl = cntlz32(~k); } } @@ -2437,7 +2437,7 @@ s32 cellSpursWakeUp(PPUThread& CPU, vm::ptr spurs) spurs->handlerDirty.exchange(1); - if (spurs->handlerWaiting.read_sync()) + if (spurs->handlerWaiting.load()) { spursSignalToHandlerThread(CPU, spurs); } @@ -2465,7 +2465,7 @@ s32 cellSpursSendWorkloadSignal(vm::ptr spurs, u32 wid) return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; } - if ((spurs->wklEnabled.read_relaxed() & (0x80000000u >> wid)) == 0) + if ((spurs->wklEnabled.load() & (0x80000000u >> wid)) == 0) { return CELL_SPURS_POLICY_MODULE_ERROR_SRCH; } @@ -2475,7 +2475,7 @@ s32 cellSpursSendWorkloadSignal(vm::ptr spurs, u32 wid) return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } - if (spurs->wklState(wid).read_relaxed() != SPURS_WKL_STATE_RUNNABLE) + if (spurs->wklState(wid).load() != SPURS_WKL_STATE_RUNNABLE) { return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } @@ -2531,12 +2531,12 @@ s32 cellSpursReadyCountStore(vm::ptr spurs, u32 wid, u32 value) return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; } - if ((spurs->wklEnabled.read_relaxed() & (0x80000000u >> wid)) == 0) + if ((spurs->wklEnabled.load() & (0x80000000u >> wid)) == 0) { return CELL_SPURS_POLICY_MODULE_ERROR_SRCH; } - if (spurs->exception.data() || spurs->wklState(wid).read_relaxed() != 2) + if (spurs->exception.data() || spurs->wklState(wid).load() != 2) { return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } @@ -2594,7 +2594,7 @@ s32 cellSpursGetWorkloadData(vm::ptr spurs, vm::ptr data, u32 wi return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; } - if ((spurs->wklEnabled.read_relaxed() & (0x80000000u >> wid)) == 0) + if ((spurs->wklEnabled.load() & (0x80000000u >> wid)) == 0) { return CELL_SPURS_POLICY_MODULE_ERROR_SRCH; } @@ -2657,7 +2657,7 @@ s32 _cellSpursWorkloadFlagReceiver(vm::ptr spurs, u32 wid, u32 is_set return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; } - if ((spurs->wklEnabled.read_relaxed() & (0x80000000u >> wid)) == 0) + if ((spurs->wklEnabled.load() & (0x80000000u >> wid)) == 0) { return CELL_SPURS_POLICY_MODULE_ERROR_SRCH; } @@ -2667,18 +2667,20 @@ s32 _cellSpursWorkloadFlagReceiver(vm::ptr spurs, u32 wid, u32 is_set return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } - if (s32 res = spurs->wklFlag.flag.atomic_op_sync(0, [spurs, wid, is_set](be_t& flag) -> s32 + _mm_mfence(); + + if (s32 res = spurs->wklFlag.flag.atomic_op([spurs, wid, is_set](be_t& flag) -> s32 { if (is_set) { - if (spurs->wklFlagReceiver.read_relaxed() != 0xff) + if (spurs->wklFlagReceiver.load() != 0xff) { return CELL_SPURS_POLICY_MODULE_ERROR_BUSY; } } else { - if (spurs->wklFlagReceiver.read_relaxed() != wid) + if (spurs->wklFlagReceiver.load() != wid) { return CELL_SPURS_POLICY_MODULE_ERROR_PERM; } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index 323da36754..970ecd6099 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -194,21 +194,21 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { // The system service has the highest priority. Select the system service if // the system service message bit for this SPU is set. - if (spurs->sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) { + if (spurs->sysSrvMessage.load() & (1 << ctxt->spuNum)) { ctxt->spuIdling = 0; if (!isPoll || ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { // Clear the message bit - spurs->sysSrvMessage.write_relaxed(spurs->sysSrvMessage.read_relaxed() & ~(1 << ctxt->spuNum)); + spurs->sysSrvMessage.store(spurs->sysSrvMessage.load() & ~(1 << ctxt->spuNum)); } } else { // Caclulate the scheduling weight for each workload u16 maxWeight = 0; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { u16 runnable = ctxt->wklRunnable1 & (0x8000 >> i); - u16 wklSignal = spurs->wklSignal1.read_relaxed() & (0x8000 >> i); - u8 wklFlag = spurs->wklFlag.flag.read_relaxed() == 0 ? spurs->wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = spurs->wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->wklReadyCount1[i].read_relaxed(); - u8 idleSpuCount = spurs->wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->wklIdleSpuCountOrReadyCount2[i].read_relaxed(); + u16 wklSignal = spurs->wklSignal1.load() & (0x8000 >> i); + u8 wklFlag = spurs->wklFlag.flag.load() == 0 ? spurs->wklFlagReceiver.load() == i ? 1 : 0 : 0; + u8 readyCount = spurs->wklReadyCount1[i].load() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->wklReadyCount1[i].load(); + u8 idleSpuCount = spurs->wklIdleSpuCountOrReadyCount2[i].load() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->wklIdleSpuCountOrReadyCount2[i].load(); u8 requestCount = readyCount + idleSpuCount; // For a workload to be considered for scheduling: @@ -218,7 +218,7 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) // OR the workload must be signalled // OR the workload flag is 0 and the workload is configured as the wokload flag receiver - if (runnable && ctxt->priority[i] != 0 && spurs->wklMaxContention[i].read_relaxed() > contention[i]) { + if (runnable && ctxt->priority[i] != 0 && spurs->wklMaxContention[i].load() > contention[i]) { if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) { // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority: // 1. Wokload signal set or workload flag or ready count > contention @@ -253,12 +253,12 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { if (!isPoll || wklSelectedId == ctxt->wklCurrentId) { // Clear workload signal for the selected workload - spurs->wklSignal1.write_relaxed(spurs->wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)); - spurs->wklSignal2.write_relaxed(spurs->wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)); + spurs->wklSignal1.store(spurs->wklSignal1.load() & ~(0x8000 >> wklSelectedId)); + spurs->wklSignal2.store(spurs->wklSignal1.load() & ~(0x80000000u >> wklSelectedId)); // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s - if (wklSelectedId == spurs->wklFlagReceiver.read_relaxed()) { - spurs->wklFlag.flag.write_relaxed(0xFFFFFFFF); + if (wklSelectedId == spurs->wklFlagReceiver.load()) { + spurs->wklFlag.flag.store(0xFFFFFFFF); } } } @@ -353,12 +353,12 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { // The system service has the highest priority. Select the system service if // the system service message bit for this SPU is set. - if (spurs->sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) { + if (spurs->sysSrvMessage.load() & (1 << ctxt->spuNum)) { // Not sure what this does. Possibly Mark the SPU as in use. ctxt->spuIdling = 0; if (!isPoll || ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { // Clear the message bit - spurs->sysSrvMessage.write_relaxed(spurs->sysSrvMessage.read_relaxed() & ~(1 << ctxt->spuNum)); + spurs->sysSrvMessage.store(spurs->sysSrvMessage.load() & ~(1 << ctxt->spuNum)); } } else { // Caclulate the scheduling weight for each workload @@ -367,10 +367,10 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { auto j = i & 0x0F; u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j); u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4; - u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklMaxContention[j].read_relaxed() & 0x0F : spurs->wklMaxContention[j].read_relaxed() >> 4; - u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->wklSignal2.read_relaxed() & (0x8000 >> j); - u8 wklFlag = spurs->wklFlag.flag.read_relaxed() == 0 ? spurs->wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklReadyCount1[j].read_relaxed() : spurs->wklIdleSpuCountOrReadyCount2[j].read_relaxed(); + u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklMaxContention[j].load() & 0x0F : spurs->wklMaxContention[j].load() >> 4; + u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklSignal1.load() & (0x8000 >> j) : spurs->wklSignal2.load() & (0x8000 >> j); + u8 wklFlag = spurs->wklFlag.flag.load() == 0 ? spurs->wklFlagReceiver.load() == i ? 1 : 0 : 0; + u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklReadyCount1[j].load() : spurs->wklIdleSpuCountOrReadyCount2[j].load(); // For a workload to be considered for scheduling: // 1. Its priority must be greater than 0 @@ -405,12 +405,12 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { if (!isPoll || wklSelectedId == ctxt->wklCurrentId) { // Clear workload signal for the selected workload - spurs->wklSignal1.write_relaxed(spurs->wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)); - spurs->wklSignal2.write_relaxed(spurs->wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)); + spurs->wklSignal1.store(spurs->wklSignal1.load() & ~(0x8000 >> wklSelectedId)); + spurs->wklSignal2.store(spurs->wklSignal1.load() & ~(0x80000000u >> wklSelectedId)); // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s - if (wklSelectedId == spurs->wklFlagReceiver.read_relaxed()) { - spurs->wklFlag.flag.write_relaxed(0xFFFFFFFF); + if (wklSelectedId == spurs->wklFlagReceiver.load()) { + spurs->wklFlag.flag.store(0xFFFFFFFF); } } } @@ -492,7 +492,7 @@ void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus) { } ctxt->wklCurrentAddr = wklInfo->addr; - ctxt->wklCurrentUniqueId = wklInfo->uniqueId.read_relaxed(); + ctxt->wklCurrentUniqueId = wklInfo->uniqueId.load(); } if (!isKernel2) { @@ -624,7 +624,7 @@ void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) { // Check if any workloads can be scheduled bool foundReadyWorkload = false; - if (spurs->sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) { + if (spurs->sysSrvMessage.load() & (1 << ctxt->spuNum)) { foundReadyWorkload = true; } else { if (spurs->flags1 & SF1_32_WORKLOADS) { @@ -632,11 +632,11 @@ void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) { u32 j = i & 0x0F; u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j); u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4; - u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklMaxContention[j].read_relaxed() & 0x0F : spurs->wklMaxContention[j].read_relaxed() >> 4; + u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklMaxContention[j].load() & 0x0F : spurs->wklMaxContention[j].load() >> 4; u8 contention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklCurrentContention[j] & 0x0F : spurs->wklCurrentContention[j] >> 4; - u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->wklSignal2.read_relaxed() & (0x8000 >> j); - u8 wklFlag = spurs->wklFlag.flag.read_relaxed() == 0 ? spurs->wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklReadyCount1[j].read_relaxed() : spurs->wklIdleSpuCountOrReadyCount2[j].read_relaxed(); + u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklSignal1.load() & (0x8000 >> j) : spurs->wklSignal2.load() & (0x8000 >> j); + u8 wklFlag = spurs->wklFlag.flag.load() == 0 ? spurs->wklFlagReceiver.load() == i ? 1 : 0 : 0; + u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->wklReadyCount1[j].load() : spurs->wklIdleSpuCountOrReadyCount2[j].load(); if (runnable && priority > 0 && maxContention > contention) { if (wklFlag || wklSignal || readyCount > contention) { @@ -648,13 +648,13 @@ void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) { } else { for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { u16 runnable = ctxt->wklRunnable1 & (0x8000 >> i); - u16 wklSignal = spurs->wklSignal1.read_relaxed() & (0x8000 >> i); - u8 wklFlag = spurs->wklFlag.flag.read_relaxed() == 0 ? spurs->wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = spurs->wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->wklReadyCount1[i].read_relaxed(); - u8 idleSpuCount = spurs->wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->wklIdleSpuCountOrReadyCount2[i].read_relaxed(); + u16 wklSignal = spurs->wklSignal1.load() & (0x8000 >> i); + u8 wklFlag = spurs->wklFlag.flag.load() == 0 ? spurs->wklFlagReceiver.load() == i ? 1 : 0 : 0; + u8 readyCount = spurs->wklReadyCount1[i].load() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->wklReadyCount1[i].load(); + u8 idleSpuCount = spurs->wklIdleSpuCountOrReadyCount2[i].load() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->wklIdleSpuCountOrReadyCount2[i].load(); u8 requestCount = readyCount + idleSpuCount; - if (runnable && ctxt->priority[i] != 0 && spurs->wklMaxContention[i].read_relaxed() > spurs->wklCurrentContention[i]) { + if (runnable && ctxt->priority[i] != 0 && spurs->wklMaxContention[i].load() > spurs->wklCurrentContention[i]) { if (wklFlag || wklSignal || (readyCount != 0 && requestCount > spurs->wklCurrentContention[i])) { foundReadyWorkload = true; break; @@ -802,7 +802,7 @@ void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt) } // Update workload message - if (spurs->sysSrvMsgUpdateWorkload.read_relaxed() & (1 << ctxt->spuNum)) { + if (spurs->sysSrvMsgUpdateWorkload.load() & (1 << ctxt->spuNum)) { spurs->sysSrvMsgUpdateWorkload &= ~(1 << ctxt->spuNum); updateWorkload = true; } @@ -847,7 +847,7 @@ void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) // Copy the priority of the workload for this SPU and its unique id to the LS ctxt->priority[i] = wklInfo1[i].priority[ctxt->spuNum] == 0 ? 0 : 0x10 - wklInfo1[i].priority[ctxt->spuNum]; - ctxt->wklUniqueId[i] = wklInfo1[i].uniqueId.read_relaxed(); + ctxt->wklUniqueId[i] = wklInfo1[i].uniqueId.load(); if (spurs->flags1 & SF1_32_WORKLOADS) { auto wklInfo2 = vm::get_ptr(spu.offset + 0x30200); @@ -865,7 +865,7 @@ void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { // Update workload status and runnable flag based on the workload state auto wklStatus = spurs->wklStatus1[i]; - if (spurs->wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { + if (spurs->wklState1[i].load() == SPURS_WKL_STATE_RUNNABLE) { spurs->wklStatus1[i] |= 1 << ctxt->spuNum; ctxt->wklRunnable1 |= 0x8000 >> i; } else { @@ -874,9 +874,9 @@ void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) // If the workload is shutting down and if this is the last SPU from which it is being removed then // add it to the shutdown bit set - if (spurs->wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { + if (spurs->wklState1[i].load() == SPURS_WKL_STATE_SHUTTING_DOWN) { if (((wklStatus & (1 << ctxt->spuNum)) != 0) && (spurs->wklStatus1[i] == 0)) { - spurs->wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); + spurs->wklState1[i].store(SPURS_WKL_STATE_REMOVABLE); wklShutdownBitSet |= 0x80000000u >> i; } } @@ -884,7 +884,7 @@ void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) if (spurs->flags1 & SF1_32_WORKLOADS) { // Update workload status and runnable flag based on the workload state wklStatus = spurs->wklStatus2[i]; - if (spurs->wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { + if (spurs->wklState2[i].load() == SPURS_WKL_STATE_RUNNABLE) { spurs->wklStatus2[i] |= 1 << ctxt->spuNum; ctxt->wklRunnable2 |= 0x8000 >> i; } else { @@ -893,9 +893,9 @@ void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) // If the workload is shutting down and if this is the last SPU from which it is being removed then // add it to the shutdown bit set - if (spurs->wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { + if (spurs->wklState2[i].load() == SPURS_WKL_STATE_SHUTTING_DOWN) { if (((wklStatus & (1 << ctxt->spuNum)) != 0) && (spurs->wklStatus2[i] == 0)) { - spurs->wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); + spurs->wklState2[i].store(SPURS_WKL_STATE_REMOVABLE); wklShutdownBitSet |= 0x8000 >> i; } } @@ -924,14 +924,14 @@ void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelC for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { if (wklShutdownBitSet & (0x80000000u >> i)) { spurs->wklEvent1[i] |= 0x01; - if (spurs->wklEvent1[i].read_relaxed() & 0x02 || spurs->wklEvent1[i].read_relaxed() & 0x10) { + if (spurs->wklEvent1[i].load() & 0x02 || spurs->wklEvent1[i].load() & 0x10) { wklNotifyBitSet |= 0x80000000u >> i; } } if (wklShutdownBitSet & (0x8000 >> i)) { spurs->wklEvent2[i] |= 0x01; - if (spurs->wklEvent2[i].read_relaxed() & 0x02 || spurs->wklEvent2[i].read_relaxed() & 0x10) { + if (spurs->wklEvent2[i].load() & 0x02 || spurs->wklEvent2[i].load() & 0x10) { wklNotifyBitSet |= 0x8000 >> i; } } @@ -1035,10 +1035,10 @@ void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelConte if (wklId >= CELL_SPURS_MAX_WORKLOAD) { spurs->wklCurrentContention[wklId & 0x0F] -= 0x10; - spurs->wklReadyCount1[wklId & 0x0F].write_relaxed(spurs->wklReadyCount1[wklId & 0x0F].read_relaxed() - 1); + spurs->wklReadyCount1[wklId & 0x0F].store(spurs->wklReadyCount1[wklId & 0x0F].load() - 1); } else { spurs->wklCurrentContention[wklId & 0x0F] -= 0x01; - spurs->wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(spurs->wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1); + spurs->wklIdleSpuCountOrReadyCount2[wklId & 0x0F].store(spurs->wklIdleSpuCountOrReadyCount2[wklId & 0x0F].load() - 1); } memcpy(vm::get_ptr(spu.offset + 0x100), spurs, 128); @@ -1317,14 +1317,14 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * vm::reservation_op(vm::cast(kernelCtxt->spurs.addr()), 128, [&]() { auto spurs = kernelCtxt->spurs.priv_ptr(); - s32 readyCount = kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->wklReadyCount1[kernelCtxt->wklCurrentId].read_relaxed() : spurs->wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].read_relaxed(); + s32 readyCount = kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->wklReadyCount1[kernelCtxt->wklCurrentId].load() : spurs->wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].load(); readyCount += numNewlyReadyTasks; readyCount = readyCount < 0 ? 0 : readyCount > 0xFF ? 0xFF : readyCount; if (kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD) { - spurs->wklReadyCount1[kernelCtxt->wklCurrentId].write_relaxed(readyCount); + spurs->wklReadyCount1[kernelCtxt->wklCurrentId].store(readyCount); } else { - spurs->wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].write_relaxed(readyCount); + spurs->wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].store(readyCount); } memcpy(vm::get_ptr(spu.offset + 0x100), spurs, 128); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp index ab17450231..c424fe59b2 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp @@ -19,8 +19,10 @@ waiter_map_t g_sync_rwm_read_wm("sync_rwm_read_wm"); waiter_map_t g_sync_rwm_write_wm("sync_rwm_write_wm"); waiter_map_t g_sync_queue_wm("sync_queue_wm"); -s32 syncMutexInitialize(vm::ptr mutex) +s32 cellSyncMutexInitialize(vm::ptr mutex) { + cellSync.Log("cellSyncMutexInitialize(mutex=*0x%x)", mutex); + if (!mutex) { return CELL_SYNC_ERROR_NULL_POINTER; @@ -31,18 +33,11 @@ s32 syncMutexInitialize(vm::ptr mutex) return CELL_SYNC_ERROR_ALIGN; } - mutex->sync_var.exchange({}); + mutex->exchange({}); return CELL_OK; } -s32 cellSyncMutexInitialize(vm::ptr mutex) -{ - cellSync.Log("cellSyncMutexInitialize(mutex=*0x%x)", mutex); - - return syncMutexInitialize(mutex); -} - s32 cellSyncMutexLock(vm::ptr mutex) { cellSync.Log("cellSyncMutexLock(mutex=*0x%x)", mutex); @@ -57,16 +52,13 @@ s32 cellSyncMutexLock(vm::ptr mutex) return CELL_SYNC_ERROR_ALIGN; } - // prx: increase acquire_count and remember its old value - const auto order = mutex->cnt.acq++; + // increase acq value and remember its old value + const auto order = mutex->atomic_op(&sync_mutex_t::acquire); - // prx: wait until release_count is equal to old acquire_count - g_sync_mutex_wm.wait_op(mutex.addr(), [mutex, order]() - { - return order == mutex->cnt.rel.read_relaxed(); - }); + // wait until rel value is equal to old acq value + g_sync_mutex_wm.wait_op(mutex.addr(), WRAP_EXPR(mutex->load().rel == order)); - mutex->sync_var.read_sync(); + _mm_mfence(); return CELL_OK; } @@ -85,11 +77,12 @@ s32 cellSyncMutexTryLock(vm::ptr mutex) return CELL_SYNC_ERROR_ALIGN; } - // prx: lock only if acquire_count and release_count are equal - return mutex->sync_var.atomic_op(CELL_OK, [](CellSyncMutex::sync_t& mutex) -> s32 + if (!mutex->atomic_op(&sync_mutex_t::try_lock)) { - return (mutex.cnt_acq++ != mutex.cnt_rel) ? CELL_SYNC_ERROR_BUSY : CELL_OK; - }); + return CELL_SYNC_ERROR_BUSY; + } + + return CELL_OK; } s32 cellSyncMutexUnlock(vm::ptr mutex) @@ -100,66 +93,43 @@ s32 cellSyncMutexUnlock(vm::ptr mutex) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!mutex.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - // prx: increase release count - mutex->cnt.rel++; + mutex->atomic_op(&sync_mutex_t::unlock); g_sync_mutex_wm.notify(mutex.addr()); return CELL_OK; } -s32 syncBarrierInitialize(vm::ptr barrier, u16 total_count) -{ - if (!barrier) - { - return CELL_SYNC_ERROR_NULL_POINTER; - } - if (!barrier.aligned()) - { - return CELL_SYNC_ERROR_ALIGN; - } - if (!total_count || total_count > 32767) - { - return CELL_SYNC_ERROR_INVAL; - } - - // prx: zeroize first u16, write total_count in second u16 and sync - barrier->data.exchange({ 0, total_count }); - - return CELL_OK; -} - s32 cellSyncBarrierInitialize(vm::ptr barrier, u16 total_count) { cellSync.Log("cellSyncBarrierInitialize(barrier=*0x%x, total_count=%d)", barrier, total_count); - return syncBarrierInitialize(barrier, total_count); -} - -s32 syncBarrierTryNotifyOp(CellSyncBarrier::data_t& barrier) -{ - // prx: extract m_value (repeat if < 0), increase, compare with second s16, set sign bit if equal, insert it back - s16 value = barrier.m_value; - - if (value < 0) + if (!barrier) { - return CELL_SYNC_ERROR_BUSY; + return CELL_SYNC_ERROR_NULL_POINTER; } - if (++value == barrier.m_count) + if (!barrier.aligned()) { - value |= 0x8000; + return CELL_SYNC_ERROR_ALIGN; } - barrier.m_value = value; + if (!total_count || total_count > 32767) + { + return CELL_SYNC_ERROR_INVAL; + } + + // clear current value, write total_count and sync + barrier->exchange({ 0, total_count }); return CELL_OK; -}; +} s32 cellSyncBarrierNotify(vm::ptr barrier) { @@ -169,15 +139,13 @@ s32 cellSyncBarrierNotify(vm::ptr barrier) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!barrier.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - g_sync_barrier_notify_wm.wait_op(barrier.addr(), [barrier]() - { - return barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryNotifyOp) == CELL_OK; - }); + g_sync_barrier_notify_wm.wait_op(barrier.addr(), WRAP_EXPR(barrier->atomic_op(&sync_barrier_t::try_notify))); g_sync_barrier_wait_wm.notify(barrier.addr()); @@ -192,37 +160,20 @@ s32 cellSyncBarrierTryNotify(vm::ptr barrier) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!barrier.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - if (s32 res = barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryNotifyOp)) - { - return res; - } + _mm_mfence(); - g_sync_barrier_wait_wm.notify(barrier.addr()); - - return CELL_OK; -} - -s32 syncBarrierTryWaitOp(CellSyncBarrier::data_t& barrier) -{ - // prx: extract m_value (repeat if >= 0), decrease it, set 0 if == 0x8000, insert it back - s16 value = barrier.m_value; - - if (value >= 0) + if (!barrier->atomic_op(&sync_barrier_t::try_notify)) { return CELL_SYNC_ERROR_BUSY; } - if (--value == -0x8000) - { - value = 0; - } - - barrier.m_value = value; + g_sync_barrier_wait_wm.notify(barrier.addr()); return CELL_OK; } @@ -235,15 +186,15 @@ s32 cellSyncBarrierWait(vm::ptr barrier) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!barrier.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - g_sync_barrier_wait_wm.wait_op(barrier.addr(), [barrier]() - { - return barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryWaitOp) == CELL_OK; - }); + _mm_mfence(); + + g_sync_barrier_wait_wm.wait_op(barrier.addr(), WRAP_EXPR(barrier->atomic_op(&sync_barrier_t::try_wait))); g_sync_barrier_notify_wm.notify(barrier.addr()); @@ -258,14 +209,17 @@ s32 cellSyncBarrierTryWait(vm::ptr barrier) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!barrier.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - if (s32 res = barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryWaitOp)) + _mm_mfence(); + + if (!barrier->atomic_op(&sync_barrier_t::try_wait)) { - return res; + return CELL_SYNC_ERROR_BUSY; } g_sync_barrier_notify_wm.notify(barrier.addr()); @@ -273,56 +227,31 @@ s32 cellSyncBarrierTryWait(vm::ptr barrier) return CELL_OK; } -s32 syncRwmInitialize(vm::ptr rwm, vm::ptr buffer, u32 buffer_size) -{ - if (!rwm || !buffer) - { - return CELL_SYNC_ERROR_NULL_POINTER; - } - if (!rwm.aligned() || buffer % 128) - { - return CELL_SYNC_ERROR_ALIGN; - } - if (buffer_size % 128 || buffer_size > 0x4000) - { - return CELL_SYNC_ERROR_INVAL; - } - - // prx: zeroize first u16 and second u16, write buffer_size in second u32, write buffer_addr in second u64 and sync - rwm->m_size = buffer_size; - rwm->m_buffer = buffer; - rwm->data.exchange({}); - - return CELL_OK; -} - s32 cellSyncRwmInitialize(vm::ptr rwm, vm::ptr buffer, u32 buffer_size) { cellSync.Log("cellSyncRwmInitialize(rwm=*0x%x, buffer=*0x%x, buffer_size=0x%x)", rwm, buffer, buffer_size); - return syncRwmInitialize(rwm, buffer, buffer_size); -} - -s32 syncRwmTryReadBeginOp(CellSyncRwm::data_t& rwm) -{ - if (rwm.m_writers.data()) + if (!rwm || !buffer) { - return CELL_SYNC_ERROR_BUSY; + return CELL_SYNC_ERROR_NULL_POINTER; } - rwm.m_readers++; - - return CELL_OK; -} - -s32 syncRwmReadEndOp(CellSyncRwm::data_t& rwm) -{ - if (!rwm.m_readers.data()) + if (!rwm.aligned() || buffer % 128) { - return CELL_SYNC_ERROR_ABORT; + return CELL_SYNC_ERROR_ALIGN; } - rwm.m_readers--; + if (buffer_size % 128 || buffer_size > 0x4000) + { + return CELL_SYNC_ERROR_INVAL; + } + + // clear readers and writers, write buffer_size, buffer addr and sync + rwm->ctrl.store({}); + rwm->size = buffer_size; + rwm->buffer = buffer; + + _mm_mfence(); return CELL_OK; } @@ -335,25 +264,22 @@ s32 cellSyncRwmRead(vm::ptr rwm, vm::ptr buffer) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!rwm.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - // prx: increase m_readers, wait until m_writers is zero - g_sync_rwm_read_wm.wait_op(rwm.addr(), [rwm]() - { - return rwm->data.atomic_op(CELL_OK, syncRwmTryReadBeginOp) == CELL_OK; - }); + // wait until `writers` is zero, increase `readers` + g_sync_rwm_read_wm.wait_op(rwm.addr(), WRAP_EXPR(rwm->ctrl.atomic_op(&sync_rwm_t::try_read_begin))); - // copy data to buffer_addr - memcpy(buffer.get_ptr(), rwm->m_buffer.get_ptr(), rwm->m_size); + // copy data to buffer + std::memcpy(buffer.get_ptr(), rwm->buffer.get_ptr(), rwm->size); - // prx: decrease m_readers (return 0x8041010C if already zero) - if (s32 res = rwm->data.atomic_op(CELL_OK, syncRwmReadEndOp)) + // decrease `readers`, return error if already zero + if (!rwm->ctrl.atomic_op(&sync_rwm_t::try_read_end)) { - cellSync.Error("syncRwmReadEndOp(rwm=0x%x) failed: m_readers == 0", rwm); - return res; + return CELL_SYNC_ERROR_ABORT; } g_sync_rwm_write_wm.notify(rwm.addr()); @@ -369,36 +295,28 @@ s32 cellSyncRwmTryRead(vm::ptr rwm, vm::ptr buffer) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!rwm.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - if (s32 res = rwm->data.atomic_op(CELL_OK, syncRwmTryReadBeginOp)) - { - return res; - } - - memcpy(buffer.get_ptr(), rwm->m_buffer.get_ptr(), rwm->m_size); - - if (s32 res = rwm->data.atomic_op(CELL_OK, syncRwmReadEndOp)) - { - return res; - } - - g_sync_rwm_write_wm.notify(rwm.addr()); - - return CELL_OK; -} - -s32 syncRwmTryWriteBeginOp(CellSyncRwm::data_t& rwm) -{ - if (rwm.m_writers.data()) + // increase `readers` if `writers` is zero + if (!rwm->ctrl.atomic_op(&sync_rwm_t::try_read_begin)) { return CELL_SYNC_ERROR_BUSY; } - rwm.m_writers = 1; + // copy data to buffer + std::memcpy(buffer.get_ptr(), rwm->buffer.get_ptr(), rwm->size); + + // decrease `readers`, return error if already zero + if (!rwm->ctrl.atomic_op(&sync_rwm_t::try_read_end)) + { + return CELL_SYNC_ERROR_ABORT; + } + + g_sync_rwm_write_wm.notify(rwm.addr()); return CELL_OK; } @@ -411,27 +329,23 @@ s32 cellSyncRwmWrite(vm::ptr rwm, vm::cptr buffer) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!rwm.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - g_sync_rwm_read_wm.wait_op(rwm.addr(), [rwm]() - { - return rwm->data.atomic_op(CELL_OK, syncRwmTryWriteBeginOp) == CELL_OK; - }); + // wait until `writers` is zero, set to 1 + g_sync_rwm_read_wm.wait_op(rwm.addr(), WRAP_EXPR(rwm->ctrl.atomic_op(&sync_rwm_t::try_write_begin))); - // prx: wait until m_readers == 0 - g_sync_rwm_write_wm.wait_op(rwm.addr(), [rwm]() - { - return rwm->data.read_relaxed().m_readers.data() == 0; - }); + // wait until `readers` is zero + g_sync_rwm_write_wm.wait_op(rwm.addr(), WRAP_EXPR(!rwm->ctrl.load().readers.data())); - // prx: copy data from buffer_addr - memcpy(rwm->m_buffer.get_ptr(), buffer.get_ptr(), rwm->m_size); + // copy data from buffer + std::memcpy(rwm->buffer.get_ptr(), buffer.get_ptr(), rwm->size); - // prx: sync and zeroize m_readers and m_writers - rwm->data.exchange({}); + // sync and clear `readers` and `writers` + rwm->ctrl.exchange({}); g_sync_rwm_read_wm.notify(rwm.addr()); @@ -446,81 +360,60 @@ s32 cellSyncRwmTryWrite(vm::ptr rwm, vm::cptr buffer) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!rwm.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - // prx: compare m_readers | m_writers with 0, return if not zero, set m_writers to 1 - if (!rwm->data.compare_and_swap_test({ 0, 0 }, { 0, 1 })) + // set `writers` to 1 if `readers` and `writers` are zero + if (!rwm->ctrl.compare_and_swap_test({ 0, 0 }, { 0, 1 })) { return CELL_SYNC_ERROR_BUSY; } - // prx: copy data from buffer_addr - memcpy(rwm->m_buffer.get_ptr(), buffer.get_ptr(), rwm->m_size); + // copy data from buffer + std::memcpy(rwm->buffer.get_ptr(), buffer.get_ptr(), rwm->size); - // prx: sync and zeroize m_readers and m_writers - rwm->data.exchange({}); + // sync and clear `readers` and `writers` + rwm->ctrl.exchange({}); g_sync_rwm_read_wm.notify(rwm.addr()); return CELL_OK; } -s32 syncQueueInitialize(vm::ptr queue, vm::ptr buffer, u32 size, u32 depth) -{ - if (!queue) - { - return CELL_SYNC_ERROR_NULL_POINTER; - } - if (size && !buffer) - { - return CELL_SYNC_ERROR_NULL_POINTER; - } - if (!queue.aligned() || buffer % 16) - { - return CELL_SYNC_ERROR_ALIGN; - } - if (!depth || size % 16) - { - return CELL_SYNC_ERROR_INVAL; - } - - // prx: zeroize first u64, write size in third u32, write depth in fourth u32, write address in third u64 and sync - queue->m_size = size; - queue->m_depth = depth; - queue->m_buffer = buffer; - queue->data.exchange({}); - - return CELL_OK; -} - s32 cellSyncQueueInitialize(vm::ptr queue, vm::ptr buffer, u32 size, u32 depth) { cellSync.Log("cellSyncQueueInitialize(queue=*0x%x, buffer=*0x%x, size=0x%x, depth=0x%x)", queue, buffer, size, depth); - return syncQueueInitialize(queue, buffer, size, depth); -} - -s32 syncQueueTryPushOp(CellSyncQueue::data_t& queue, u32 depth, u32& position) -{ - const u32 v1 = queue.m_v1; - const u32 v2 = queue.m_v2; - - // prx: compare 5th u8 with zero (break if not zero) - // prx: compare (second u32 (u24) + first u8) with depth (break if greater or equal) - if ((v2 >> 24) || ((v2 & 0xffffff) + (v1 >> 24)) >= depth) + if (!queue) { - return CELL_SYNC_ERROR_BUSY; + return CELL_SYNC_ERROR_NULL_POINTER; } - // prx: extract first u32 (u24) (-> position), calculate (position + 1) % depth, insert it back - // prx: insert 1 in 5th u8 - // prx: extract second u32 (u24), increase it, insert it back - position = (v1 & 0xffffff); - queue.m_v1 = (v1 & 0xff000000) | ((position + 1) % depth); - queue.m_v2 = (1 << 24) | ((v2 & 0xffffff) + 1); + if (size && !buffer) + { + return CELL_SYNC_ERROR_NULL_POINTER; + } + + if (!queue.aligned() || buffer % 16) + { + return CELL_SYNC_ERROR_ALIGN; + } + + if (!depth || size % 16) + { + return CELL_SYNC_ERROR_INVAL; + } + + // clear sync var, write size, depth, buffer addr and sync + queue->ctrl.store({}); + queue->size = size; + queue->depth = depth; + queue->buffer = buffer; + + _mm_mfence(); return CELL_OK; } @@ -533,30 +426,23 @@ s32 cellSyncQueuePush(vm::ptr queue, vm::cptr buffer) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!queue.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - const u32 size = queue->m_size; - const u32 depth = queue->m_depth; - const auto data = queue->data.read_relaxed(); - assert((data.m_v1 & 0xffffff) <= depth && (data.m_v2 & 0xffffff) <= depth); + const u32 depth = queue->check_depth(); u32 position; - g_sync_queue_wm.wait_op(queue.addr(), [queue, depth, &position]() - { - return CELL_OK == queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 - { - return syncQueueTryPushOp(queue, depth, position); - }); - }); - // prx: memcpy(position * m_size + m_addr, buffer_addr, m_size), sync - memcpy(&queue->m_buffer[position * size], buffer.get_ptr(), size); + g_sync_queue_wm.wait_op(queue.addr(), WRAP_EXPR(queue->ctrl.atomic_op(&sync_queue_t::try_push, depth, position))); - // prx: atomically insert 0 in 5th u8 - queue->data &= { 0xffffffffu, 0x00ffffff }; + // copy data from the buffer at the position + std::memcpy(&queue->buffer[position * queue->size], buffer.get_ptr(), queue->size); + + // clear 5th byte + queue->ctrl &= { 0xffffffff, 0x00ffffff }; g_sync_queue_wm.notify(queue.addr()); @@ -571,53 +457,28 @@ s32 cellSyncQueueTryPush(vm::ptr queue, vm::cptr buffer) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!queue.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - const u32 size = queue->m_size; - const u32 depth = queue->m_depth; - const auto data = queue->data.read_relaxed(); - assert((data.m_v1 & 0xffffff) <= depth && (data.m_v2 & 0xffffff) <= depth); + const u32 depth = queue->check_depth(); u32 position; - s32 res = queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 - { - return syncQueueTryPushOp(queue, depth, position); - }); - if (res) - { - return res; - } - memcpy(&queue->m_buffer[position * size], buffer.get_ptr(), size); - - queue->data &= { 0xffffffffu, 0x00ffffff }; - - g_sync_queue_wm.notify(queue.addr()); - - return CELL_OK; -} - -s32 syncQueueTryPopOp(CellSyncQueue::data_t& queue, u32 depth, u32& position) -{ - const u32 v1 = queue.m_v1; - const u32 v2 = queue.m_v2; - - // prx: extract first u8, repeat if not zero - // prx: extract second u32 (u24), subtract 5th u8, compare with zero, repeat if less or equal - if ((v1 >> 24) || ((v2 & 0xffffff) <= (v2 >> 24))) + if (!queue->ctrl.atomic_op(&sync_queue_t::try_push, depth, position)) { return CELL_SYNC_ERROR_BUSY; } - // prx: insert 1 in first u8 - // prx: extract first u32 (u24), add depth, subtract second u32 (u24), calculate (% depth), save to position - // prx: extract second u32 (u24), decrease it, insert it back - queue.m_v1 = 0x1000000 | v1; - position = ((v1 & 0xffffff) + depth - (v2 & 0xffffff)) % depth; - queue.m_v2 = (v2 & 0xff000000) | ((v2 & 0xffffff) - 1); + // copy data from the buffer at the position + std::memcpy(&queue->buffer[position * queue->size], buffer.get_ptr(), queue->size); + + // clear 5th byte + queue->ctrl &= { 0xffffffff, 0x00ffffff }; + + g_sync_queue_wm.notify(queue.addr()); return CELL_OK; } @@ -630,30 +491,23 @@ s32 cellSyncQueuePop(vm::ptr queue, vm::ptr buffer) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!queue.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - const u32 size = queue->m_size; - const u32 depth = queue->m_depth; - const auto data = queue->data.read_relaxed(); - assert((data.m_v1 & 0xffffff) <= depth && (data.m_v2 & 0xffffff) <= depth); + const u32 depth = queue->check_depth(); u32 position; - g_sync_queue_wm.wait_op(queue.addr(), [queue, depth, &position]() - { - return CELL_OK == queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 - { - return syncQueueTryPopOp(queue, depth, position); - }); - }); - // prx: (sync), memcpy(buffer_addr, position * m_size + m_addr, m_size) - memcpy(buffer.get_ptr(), &queue->m_buffer[position * size], size); + g_sync_queue_wm.wait_op(queue.addr(), WRAP_EXPR(queue->ctrl.atomic_op(&sync_queue_t::try_pop, depth, position))); - // prx: atomically insert 0 in first u8 - queue->data &= { 0x00ffffff, 0xffffffffu }; + // copy data at the position to the buffer + std::memcpy(buffer.get_ptr(), &queue->buffer[position * queue->size], queue->size); + + // clear first byte + queue->ctrl &= { 0x00ffffff, 0xffffffffu }; g_sync_queue_wm.notify(queue.addr()); @@ -668,47 +522,28 @@ s32 cellSyncQueueTryPop(vm::ptr queue, vm::ptr buffer) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!queue.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - const u32 size = queue->m_size; - const u32 depth = queue->m_depth; - const auto data = queue->data.read_relaxed(); - assert((data.m_v1 & 0xffffff) <= depth && (data.m_v2 & 0xffffff) <= depth); + const u32 depth = queue->check_depth(); u32 position; - s32 res = queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 - { - return syncQueueTryPopOp(queue, depth, position); - }); - if (res) - { - return res; - } - - memcpy(buffer.get_ptr(), &queue->m_buffer[position * size], size); - - queue->data &= { 0x00ffffff, 0xffffffffu }; - - g_sync_queue_wm.notify(queue.addr()); - - return CELL_OK; -} - -s32 syncQueueTryPeekOp(CellSyncQueue::data_t& queue, u32 depth, u32& position) -{ - const u32 v1 = queue.m_v1; - const u32 v2 = queue.m_v2; - - if ((v1 >> 24) || ((v2 & 0xffffff) <= (v2 >> 24))) + + if (!queue->ctrl.atomic_op(&sync_queue_t::try_pop, depth, position)) { return CELL_SYNC_ERROR_BUSY; } - queue.m_v1 = 0x1000000 | v1; - position = ((v1 & 0xffffff) + depth - (v2 & 0xffffff)) % depth; + // copy data at the position to the buffer + std::memcpy(buffer.get_ptr(), &queue->buffer[position * queue->size], queue->size); + + // clear first byte + queue->ctrl &= { 0x00ffffff, 0xffffffffu }; + + g_sync_queue_wm.notify(queue.addr()); return CELL_OK; } @@ -721,28 +556,23 @@ s32 cellSyncQueuePeek(vm::ptr queue, vm::ptr buffer) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!queue.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - const u32 size = queue->m_size; - const u32 depth = queue->m_depth; - const auto data = queue->data.read_relaxed(); - assert((data.m_v1 & 0xffffff) <= depth && (data.m_v2 & 0xffffff) <= depth); + const u32 depth = queue->check_depth(); u32 position; - g_sync_queue_wm.wait_op(queue.addr(), [queue, depth, &position]() - { - return CELL_OK == queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 - { - return syncQueueTryPeekOp(queue, depth, position); - }); - }); - memcpy(buffer.get_ptr(), &queue->m_buffer[position * size], size); + g_sync_queue_wm.wait_op(queue.addr(), WRAP_EXPR(queue->ctrl.atomic_op(&sync_queue_t::try_peek, depth, position))); - queue->data &= { 0x00ffffff, 0xffffffffu }; + // copy data at the position to the buffer + std::memcpy(buffer.get_ptr(), &queue->buffer[position * queue->size], queue->size); + + // clear first byte + queue->ctrl &= { 0x00ffffff, 0xffffffffu }; g_sync_queue_wm.notify(queue.addr()); @@ -757,29 +587,26 @@ s32 cellSyncQueueTryPeek(vm::ptr queue, vm::ptr buffer) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!queue.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - const u32 size = queue->m_size; - const u32 depth = queue->m_depth; - const auto data = queue->data.read_relaxed(); - assert((data.m_v1 & 0xffffff) <= depth && (data.m_v2 & 0xffffff) <= depth); + const u32 depth = queue->check_depth(); u32 position; - s32 res = queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 + + if (!queue->ctrl.atomic_op(&sync_queue_t::try_peek, depth, position)) { - return syncQueueTryPeekOp(queue, depth, position); - }); - if (res) - { - return res; + return CELL_SYNC_ERROR_BUSY; } - memcpy(buffer.get_ptr(), &queue->m_buffer[position * size], size); + // copy data at the position to the buffer + std::memcpy(buffer.get_ptr(), &queue->buffer[position * queue->size], queue->size); - queue->data &= { 0x00ffffff, 0xffffffffu }; + // clear first byte + queue->ctrl &= { 0x00ffffff, 0xffffffffu }; g_sync_queue_wm.notify(queue.addr()); @@ -794,17 +621,15 @@ s32 cellSyncQueueSize(vm::ptr queue) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!queue.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - const auto data = queue->data.read_relaxed(); - const u32 count = data.m_v2 & 0xffffff; - const u32 depth = queue->m_depth; - assert((data.m_v1 & 0xffffff) <= depth && count <= depth); + queue->check_depth(); - return count; + return queue->ctrl.load().m_v2 & 0xffffff; } s32 cellSyncQueueClear(vm::ptr queue) @@ -815,23 +640,22 @@ s32 cellSyncQueueClear(vm::ptr queue) { return CELL_SYNC_ERROR_NULL_POINTER; } + if (!queue.aligned()) { return CELL_SYNC_ERROR_ALIGN; } - const u32 depth = queue->m_depth; - const auto data = queue->data.read_relaxed(); - assert((data.m_v1 & 0xffffff) <= depth && (data.m_v2 & 0xffffff) <= depth); + const u32 depth = queue->check_depth(); // TODO: optimize if possible g_sync_queue_wm.wait_op(queue.addr(), [queue, depth]() { - return CELL_OK == queue->data.atomic_op(CELL_OK, [depth](CellSyncQueue::data_t& queue) -> s32 + return CELL_OK == queue->ctrl.atomic_op([depth](sync_queue_t& queue) -> s32 { const u32 v1 = queue.m_v1; - // prx: extract first u8, repeat if not zero, insert 1 + // extract first byte, repeat if not zero, insert 1 if (v1 >> 24) { return CELL_SYNC_ERROR_BUSY; @@ -845,11 +669,11 @@ s32 cellSyncQueueClear(vm::ptr queue) g_sync_queue_wm.wait_op(queue.addr(), [queue, depth]() { - return CELL_OK == queue->data.atomic_op(CELL_OK, [depth](CellSyncQueue::data_t& queue) -> s32 + return CELL_OK == queue->ctrl.atomic_op([depth](sync_queue_t& queue) -> s32 { const u32 v2 = queue.m_v2; - // prx: extract 5th u8, repeat if not zero, insert 1 + // extract 5th byte, repeat if not zero, insert 1 if (v2 >> 24) { return CELL_SYNC_ERROR_BUSY; @@ -861,7 +685,7 @@ s32 cellSyncQueueClear(vm::ptr queue) }); }); - queue->data.exchange({}); + queue->ctrl.exchange({}); g_sync_queue_wm.notify(queue.addr()); @@ -895,8 +719,8 @@ void syncLFQueueInit(vm::ptr queue, vm::ptr buffer, u32 siz } else { - queue->pop1 = { { 0, 0, queue->pop1.read_relaxed().m_h3, 0 } }; - queue->push1 = { { 0, 0, queue->push1.read_relaxed().m_h7, 0 } }; + queue->pop1 = { { 0, 0, queue->pop1.load().m_h3, 0 } }; + queue->push1 = { { 0, 0, queue->push1.load().m_h7, 0 } }; queue->m_bs[0] = -1; // written as u32 queue->m_bs[1] = -1; queue->m_bs[2] = -1; @@ -952,7 +776,7 @@ s32 syncLFQueueInitialize(vm::ptr queue, vm::ptr buffer, u3 u32 old_value; while (true) { - const auto old = queue->init.read_relaxed(); + const auto old = queue->init.load(); auto init = old; if (old.data()) @@ -1002,12 +826,12 @@ s32 syncLFQueueInitialize(vm::ptr queue, vm::ptr buffer, u3 // prx: call internal function with same arguments syncLFQueueInit(queue, buffer, size, depth, direction, eaSignal); - // prx: sync, zeroize u32 at 0x2c offset + // prx: sync, clear u32 at 0x2c offset queue->init.exchange({}); } // prx: sync - queue->init.read_sync(); + _mm_mfence(); return CELL_OK; } @@ -1038,7 +862,7 @@ s32 syncLFQueueGetPushPointer(PPUThread& CPU, vm::ptr queue, s3 return -1; } - const auto old = queue->push1.read_sync(); + const auto old = queue->push1.load_sync(); auto push = old; if (var1) @@ -1058,7 +882,7 @@ s32 syncLFQueueGetPushPointer(PPUThread& CPU, vm::ptr queue, s3 } else { - var2 -= (s32)(u16)queue->pop1.read_relaxed().m_h1; + var2 -= (s32)(u16)queue->pop1.load().m_h1; if (var2 < 0) { var2 += depth * 2; @@ -1155,10 +979,10 @@ s32 syncLFQueueCompletePushPointer(PPUThread& CPU, vm::ptr queu while (true) { - const auto old = queue->push2.read_sync(); + const auto old = queue->push2.load_sync(); auto push2 = old; - const auto old2 = queue->push3.read_relaxed(); + const auto old2 = queue->push3.load(); auto push3 = old2; s32 var1 = pointer - (u16)push3.m_h5; @@ -1167,7 +991,7 @@ s32 syncLFQueueCompletePushPointer(PPUThread& CPU, vm::ptr queu var1 += depth * 2; } - s32 var2 = (s32)(s16)queue->pop1.read_relaxed().m_h4 - (s32)(u16)queue->pop1.read_relaxed().m_h1; + s32 var2 = (s32)(s16)queue->pop1.load().m_h4 - (s32)(u16)queue->pop1.load().m_h1; if (var2 < 0) { var2 += depth * 2; @@ -1265,7 +1089,7 @@ s32 syncLFQueueCompletePushPointer(PPUThread& CPU, vm::ptr queu } else { - pack = queue->push2.read_relaxed().pack; + pack = queue->push2.load().pack; if ((pack & 0x1f) == ((pack >> 10) & 0x1f)) { if (queue->push3.compare_and_swap_test(old2, push3)) @@ -1384,7 +1208,7 @@ s32 syncLFQueueGetPopPointer(PPUThread& CPU, vm::ptr queue, s32 return -1; } - const auto old = queue->pop1.read_sync(); + const auto old = queue->pop1.load_sync(); auto pop = old; if (var1) @@ -1404,7 +1228,7 @@ s32 syncLFQueueGetPopPointer(PPUThread& CPU, vm::ptr queue, s32 } else { - var2 = (s32)(u16)queue->push1.read_relaxed().m_h5 - var2; + var2 = (s32)(u16)queue->push1.load().m_h5 - var2; if (var2 < 0) { var2 += depth * 2; @@ -1501,10 +1325,10 @@ s32 syncLFQueueCompletePopPointer(PPUThread& CPU, vm::ptr queue while (true) { - const auto old = queue->pop2.read_sync(); + const auto old = queue->pop2.load_sync(); auto pop2 = old; - const auto old2 = queue->pop3.read_relaxed(); + const auto old2 = queue->pop3.load(); auto pop3 = old2; s32 var1 = pointer - (u16)pop3.m_h1; @@ -1513,7 +1337,7 @@ s32 syncLFQueueCompletePopPointer(PPUThread& CPU, vm::ptr queue var1 += depth * 2; } - s32 var2 = (s32)(s16)queue->push1.read_relaxed().m_h8 - (s32)(u16)queue->push1.read_relaxed().m_h5; + s32 var2 = (s32)(s16)queue->push1.load().m_h8 - (s32)(u16)queue->push1.load().m_h5; if (var2 < 0) { var2 += depth * 2; @@ -1610,7 +1434,7 @@ s32 syncLFQueueCompletePopPointer(PPUThread& CPU, vm::ptr queue } else { - pack = queue->pop2.read_relaxed().pack; + pack = queue->pop2.load().pack; if ((pack & 0x1f) == ((pack >> 10) & 0x1f)) { if (queue->pop3.compare_and_swap_test(old2, pop3)) @@ -1724,15 +1548,15 @@ s32 cellSyncLFQueueClear(vm::ptr queue) while (true) { - const auto old = queue->pop1.read_sync(); + const auto old = queue->pop1.load_sync(); auto pop = old; - const auto push = queue->push1.read_relaxed(); + const auto push = queue->push1.load(); s32 var1, var2; if (queue->m_direction != CELL_SYNC_QUEUE_ANY2ANY) { - var1 = var2 = (u16)queue->pop2.read_relaxed().pack; + var1 = var2 = (u16)queue->pop2.load().pack; } else { @@ -1774,10 +1598,10 @@ s32 cellSyncLFQueueSize(vm::ptr queue, vm::ptr size) while (true) { - const auto old = queue->pop3.read_sync(); + const auto old = queue->pop3.load_sync(); - u32 var1 = (u16)queue->pop1.read_relaxed().m_h1; - u32 var2 = (u16)queue->push1.read_relaxed().m_h5; + u32 var1 = (u16)queue->pop1.load().m_h1; + u32 var2 = (u16)queue->push1.load().m_h5; if (queue->pop3.compare_and_swap_test(old, old)) { diff --git a/rpcs3/Emu/SysCalls/Modules/cellSync.h b/rpcs3/Emu/SysCalls/Modules/cellSync.h index 858f3cb6d7..124d8cfd39 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSync.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSync.h @@ -31,67 +31,216 @@ enum CELL_SYNC_ERROR_NO_SPU_CONTEXT_STORAGE = 0x80410114, // ??? }; -union set_alignment(4) CellSyncMutex +struct set_alignment(4) sync_mutex_t // CellSyncMutex sync var { - struct sync_t - { - be_t cnt_rel; // increased when mutex is unlocked - be_t cnt_acq; // increased when mutex is locked - }; + be_t rel; + be_t acq; - struct + be_t acquire() { - atomic_be_t rel; - atomic_be_t acq; + return acq++; } - cnt; - atomic_be_t sync_var; + bool try_lock() + { + return acq++ == rel; + } + + void unlock() + { + rel++; + } }; +using CellSyncMutex = atomic_be_t; + CHECK_SIZE_ALIGN(CellSyncMutex, 4, 4); -struct set_alignment(4) CellSyncBarrier +struct set_alignment(4) sync_barrier_t // CellSyncBarrier sync var { - struct data_t + be_t value; + be_t count; + + bool try_notify() { - be_t m_value; - be_t m_count; + // extract m_value (repeat if < 0), increase, compare with second s16, set sign bit if equal, insert it back + s16 v = value; + + if (v < 0) + { + return false; + } + + if (++v == count) + { + v |= 0x8000; + } + + value = v; + + return true; }; - atomic_be_t data; + bool try_wait() + { + // extract m_value (repeat if >= 0), decrease it, set 0 if == 0x8000, insert it back + s16 v = value; + + if (v >= 0) + { + return false; + } + + if (--v == -0x8000) + { + v = 0; + } + + value = v; + + return true; + } }; +using CellSyncBarrier = atomic_be_t; + CHECK_SIZE_ALIGN(CellSyncBarrier, 4, 4); +struct sync_rwm_t // CellSyncRwm sync var +{ + be_t readers; + be_t writers; + + bool try_read_begin() + { + if (writers.data()) + { + return false; + } + + readers++; + return true; + } + + bool try_read_end() + { + if (!readers.data()) + { + return false; + } + + readers--; + return true; + } + + bool try_write_begin() + { + if (writers.data()) + { + return false; + } + + writers = 1; + return true; + } +}; + struct set_alignment(16) CellSyncRwm { - struct data_t - { - be_t m_readers; - be_t m_writers; - }; + atomic_be_t ctrl; // sync var - atomic_be_t data; - be_t m_size; - vm::bptr m_buffer; + be_t size; + vm::bptr buffer; }; CHECK_SIZE_ALIGN(CellSyncRwm, 16, 16); +struct sync_queue_t // CellSyncQueue sync var +{ + be_t m_v1; + be_t m_v2; + + bool try_push(u32 depth, u32& position) + { + const u32 v1 = m_v1; + const u32 v2 = m_v2; + + // compare 5th byte with zero (break if not zero) + // compare (second u32 (u24) + first byte) with depth (break if greater or equal) + if ((v2 >> 24) || ((v2 & 0xffffff) + (v1 >> 24)) >= depth) + { + return false; + } + + // extract first u32 (u24) (-> position), calculate (position + 1) % depth, insert it back + // insert 1 in 5th u8 + // extract second u32 (u24), increase it, insert it back + position = (v1 & 0xffffff); + m_v1 = (v1 & 0xff000000) | ((position + 1) % depth); + m_v2 = (1 << 24) | ((v2 & 0xffffff) + 1); + + return true; + } + + bool try_pop(u32 depth, u32& position) + { + const u32 v1 = m_v1; + const u32 v2 = m_v2; + + // extract first u8, repeat if not zero + // extract second u32 (u24), subtract 5th u8, compare with zero, repeat if less or equal + if ((v1 >> 24) || ((v2 & 0xffffff) <= (v2 >> 24))) + { + return false; + } + + // insert 1 in first u8 + // extract first u32 (u24), add depth, subtract second u32 (u24), calculate (% depth), save to position + // extract second u32 (u24), decrease it, insert it back + m_v1 = 0x1000000 | v1; + position = ((v1 & 0xffffff) + depth - (v2 & 0xffffff)) % depth; + m_v2 = (v2 & 0xff000000) | ((v2 & 0xffffff) - 1); + + return true; + } + + bool try_peek(u32 depth, u32& position) + { + const u32 v1 = m_v1; + const u32 v2 = m_v2; + + if ((v1 >> 24) || ((v2 & 0xffffff) <= (v2 >> 24))) + { + return false; + } + + m_v1 = 0x1000000 | v1; + position = ((v1 & 0xffffff) + depth - (v2 & 0xffffff)) % depth; + + return true; + } +}; + struct set_alignment(32) CellSyncQueue { - struct data_t - { - be_t m_v1; - be_t m_v2; - }; + atomic_be_t ctrl; - atomic_be_t data; - be_t m_size; - be_t m_depth; - vm::bptr m_buffer; + be_t size; + be_t depth; + vm::bptr buffer; be_t reserved; + + u32 check_depth() + { + const auto data = ctrl.load(); + + if ((data.m_v1 & 0xffffff) > depth || (data.m_v2 & 0xffffff) > depth) + { + throw __FUNCTION__; + } + + return depth; + } }; CHECK_SIZE_ALIGN(CellSyncQueue, 32, 32); @@ -191,14 +340,6 @@ struct set_alignment(128) CellSyncLFQueue CHECK_SIZE_ALIGN(CellSyncLFQueue, 128, 128); -s32 syncMutexInitialize(vm::ptr mutex); - -s32 syncBarrierInitialize(vm::ptr barrier, u16 total_count); - -s32 syncRwmInitialize(vm::ptr rwm, vm::ptr buffer, u32 buffer_size); - -s32 syncQueueInitialize(vm::ptr queue, vm::ptr buffer, u32 size, u32 depth); - s32 syncLFQueueInitialize(vm::ptr queue, vm::ptr buffer, u32 size, u32 depth, CellSyncQueueDirection direction, vm::ptr eaSignal); s32 syncLFQueueGetPushPointer(PPUThread& CPU, vm::ptr queue, s32& pointer, u32 isBlocking, u32 useEventQueue); s32 syncLFQueueGetPushPointer2(PPUThread& CPU, vm::ptr queue, s32& pointer, u32 isBlocking, u32 useEventQueue); diff --git a/rpcs3/Emu/SysCalls/Modules/libmixer.cpp b/rpcs3/Emu/SysCalls/Modules/libmixer.cpp index dac5fbe273..a9ef23eca8 100644 --- a/rpcs3/Emu/SysCalls/Modules/libmixer.cpp +++ b/rpcs3/Emu/SysCalls/Modules/libmixer.cpp @@ -343,7 +343,7 @@ int cellSurMixerCreate(vm::cptr config) ppu.InitRegs(); ppu.DoRun(); - while (port.state.read_relaxed() != AUDIO_PORT_STATE_CLOSED && !Emu.IsStopped()) + while (port.state.load() != AUDIO_PORT_STATE_CLOSED && !Emu.IsStopped()) { if (mixcount > (port.tag + 0)) // adding positive value (1-15): preemptive buffer filling (hack) { @@ -351,7 +351,7 @@ int cellSurMixerCreate(vm::cptr config) continue; } - if (port.state.read_relaxed() == AUDIO_PORT_STATE_STARTED) + if (port.state.load() == AUDIO_PORT_STATE_STARTED) { //u64 stamp0 = get_system_time(); diff --git a/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp b/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp index 091dcf4e60..8feea2d7dc 100644 --- a/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp +++ b/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp @@ -125,7 +125,7 @@ s32 sys_lwmutex_destroy(PPUThread& CPU, vm::ptr lwmutex) sysPrxForUser.Log("sys_lwmutex_destroy(lwmutex=*0x%x)", lwmutex); // check to prevent recursive locking in the next call - if (lwmutex->vars.owner.read_relaxed() == CPU.GetId()) + if (lwmutex->vars.owner.load() == CPU.GetId()) { return CELL_EBUSY; } @@ -184,7 +184,7 @@ s32 sys_lwmutex_lock(PPUThread& CPU, vm::ptr lwmutex, u64 timeout // recursive locking succeeded lwmutex->recursive_count++; - lwmutex->lock_var.read_sync(); + _mm_mfence(); return CELL_OK; } @@ -197,7 +197,7 @@ s32 sys_lwmutex_lock(PPUThread& CPU, vm::ptr lwmutex, u64 timeout for (u32 i = 0; i < 300; i++) { - if (lwmutex->vars.owner.read_relaxed() == lwmutex_free) + if (lwmutex->vars.owner.load() == lwmutex_free) { if (lwmutex->vars.owner.compare_and_swap_test(lwmutex_free, tid)) { @@ -278,7 +278,7 @@ s32 sys_lwmutex_trylock(PPUThread& CPU, vm::ptr lwmutex) // recursive locking succeeded lwmutex->recursive_count++; - lwmutex->lock_var.read_sync(); + _mm_mfence(); return CELL_OK; } @@ -319,7 +319,7 @@ s32 sys_lwmutex_unlock(PPUThread& CPU, vm::ptr lwmutex) const be_t tid = CPU.GetId(); // check owner - if (lwmutex->vars.owner.read_relaxed() != tid) + if (lwmutex->vars.owner.load() != tid) { return CELL_EPERM; } @@ -392,7 +392,7 @@ s32 sys_lwcond_signal(PPUThread& CPU, vm::ptr lwcond) //return _sys_lwcond_signal(lwcond->lwcond_queue, 0, -1, 2); } - if (lwmutex->vars.owner.read_relaxed() == CPU.GetId()) + if (lwmutex->vars.owner.load() == CPU.GetId()) { // if owns the mutex lwmutex->all_info++; @@ -450,7 +450,7 @@ s32 sys_lwcond_signal_all(PPUThread& CPU, vm::ptr lwcond) //return _sys_lwcond_signal_all(lwcond->lwcond_queue, lwmutex->sleep_queue, 2); } - if (lwmutex->vars.owner.read_relaxed() == CPU.GetId()) + if (lwmutex->vars.owner.load() == CPU.GetId()) { // if owns the mutex, call the syscall const s32 res = _sys_lwcond_signal_all(lwcond->lwcond_queue, lwmutex->sleep_queue, 1); @@ -507,7 +507,7 @@ s32 sys_lwcond_signal_to(PPUThread& CPU, vm::ptr lwcond, u32 ppu_t //return _sys_lwcond_signal(lwcond->lwcond_queue, 0, ppu_thread_id, 2); } - if (lwmutex->vars.owner.read_relaxed() == CPU.GetId()) + if (lwmutex->vars.owner.load() == CPU.GetId()) { // if owns the mutex lwmutex->all_info++; @@ -561,7 +561,7 @@ s32 sys_lwcond_wait(PPUThread& CPU, vm::ptr lwcond, u64 timeout) const vm::ptr lwmutex = lwcond->lwmutex; - if (lwmutex->vars.owner.read_relaxed() != tid) + if (lwmutex->vars.owner.load() != tid) { // if not owner of the mutex return CELL_EPERM; @@ -1189,7 +1189,7 @@ void sys_spinlock_lock(vm::ptr> lock) // prx: exchange with 0xabadcafe, repeat until exchanged with 0 while (lock->exchange(0xabadcafe).data()) { - g_sys_spinlock_wm.wait_op(lock.addr(), [lock](){ return lock->read_relaxed().data() == 0; }); + g_sys_spinlock_wm.wait_op(lock.addr(), [lock](){ return lock->load().data() == 0; }); if (Emu.IsStopped()) { diff --git a/rpcs3/Emu/SysCalls/lv2/sys_interrupt.cpp b/rpcs3/Emu/SysCalls/lv2/sys_interrupt.cpp index 7ce6a93aef..711a8ddcbc 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_interrupt.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_interrupt.cpp @@ -88,7 +88,7 @@ s32 sys_interrupt_thread_establish(vm::ptr ih, u32 intrtag, u64 intrthread, return CELL_EAGAIN; } - if (s32 res = tag.assigned.atomic_op(CELL_OK, [](s32& value) -> s32 + if (s32 res = tag.assigned.atomic_op([](s32& value) -> s32 { if (value < 0) { @@ -113,7 +113,7 @@ s32 sys_interrupt_thread_establish(vm::ptr ih, u32 intrtag, u64 intrthread, while (!Emu.IsStopped()) { // call interrupt handler until int status is clear - if (tag.stat.read_relaxed()) + if (tag.stat.load()) { //func(CPU, arg); CPU.GPR[3] = arg; diff --git a/rpcs3/Emu/SysCalls/lv2/sys_mutex.cpp b/rpcs3/Emu/SysCalls/lv2/sys_mutex.cpp index 6616f0dd71..8f70bedc3d 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_mutex.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_mutex.cpp @@ -33,7 +33,7 @@ s32 sys_mutex_create(vm::ptr mutex_id, vm::ptr attr) const bool recursive = attr->recursive == SYS_SYNC_RECURSIVE; - if ((!recursive && attr->recursive != SYS_SYNC_NOT_RECURSIVE) || attr->pshared.data() != SYS_SYNC_NOT_PROCESS_SHARED || attr->adaptive != SYS_SYNC_NOT_ADAPTIVE || attr->ipc_key.data() || attr->flags.data()) + if ((!recursive && attr->recursive != SYS_SYNC_NOT_RECURSIVE) || attr->pshared != SYS_SYNC_NOT_PROCESS_SHARED || attr->adaptive != SYS_SYNC_NOT_ADAPTIVE || attr->ipc_key.data() || attr->flags.data()) { sys_mutex.Error("sys_mutex_create(): unknown attributes (recursive=0x%x, pshared=0x%x, adaptive=0x%x, ipc_key=0x%llx, flags=0x%x)", attr->recursive, attr->pshared, attr->adaptive, attr->ipc_key, attr->flags); diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp index 68e9ca41c5..d540b167d8 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp @@ -566,7 +566,7 @@ s32 sys_spu_thread_group_join(u32 id, vm::ptr cause, vm::ptr status) { auto& spu = static_cast(*t); - if (!(spu.status.read_relaxed() & SPU_STATUS_STOPPED_BY_STOP)) + if (!(spu.status.load() & SPU_STATUS_STOPPED_BY_STOP)) { stopped = false; break; @@ -1253,7 +1253,7 @@ s32 sys_raw_spu_get_int_mask(u32 id, u32 class_id, vm::ptr mask) auto& spu = static_cast(*t); - *mask = (class_id ? spu.int2 : spu.int0).mask.read_sync(); + *mask = (class_id ? spu.int2 : spu.int0).mask.load(); return CELL_OK; } @@ -1299,7 +1299,7 @@ s32 sys_raw_spu_get_int_stat(u32 id, u32 class_id, vm::ptr stat) auto& spu = static_cast(*t); - *stat = (class_id ? spu.int2 : spu.int0).stat.read_sync(); + *stat = (class_id ? spu.int2 : spu.int0).stat.load(); return CELL_OK; } diff --git a/rpcs3/Gui/RSXDebugger.cpp b/rpcs3/Gui/RSXDebugger.cpp index 3102cbf738..7a34aa581c 100644 --- a/rpcs3/Gui/RSXDebugger.cpp +++ b/rpcs3/Gui/RSXDebugger.cpp @@ -333,7 +333,7 @@ void RSXDebugger::GoToGet(wxCommandEvent& event) if (!RSXReady()) return; auto ctrl = vm::get_ptr(Emu.GetGSManager().GetRender().m_ctrlAddress); u32 realAddr; - if (Memory.RSXIOMem.getRealAddr(ctrl->get.read_relaxed(), realAddr)) { + if (Memory.RSXIOMem.getRealAddr(ctrl->get.load(), realAddr)) { m_addr = realAddr; t_addr->SetValue(wxString::Format("%08x", m_addr)); UpdateInformation(); @@ -347,7 +347,7 @@ void RSXDebugger::GoToPut(wxCommandEvent& event) if (!RSXReady()) return; auto ctrl = vm::get_ptr(Emu.GetGSManager().GetRender().m_ctrlAddress); u32 realAddr; - if (Memory.RSXIOMem.getRealAddr(ctrl->put.read_relaxed(), realAddr)) { + if (Memory.RSXIOMem.getRealAddr(ctrl->put.load(), realAddr)) { m_addr = realAddr; t_addr->SetValue(wxString::Format("%08x", m_addr)); UpdateInformation(); diff --git a/rpcs3/stdafx.h b/rpcs3/stdafx.h index 98b7a0a845..82fa7215b2 100644 --- a/rpcs3/stdafx.h +++ b/rpcs3/stdafx.h @@ -108,6 +108,8 @@ template struct ID_type; #define CHECK_MAX_SIZE(type, size) static_assert(sizeof(type) <= size, #type " type size is too big") #define CHECK_SIZE_ALIGN(type, size, align) CHECK_SIZE(type, size); CHECK_ALIGN(type, align) +#define WRAP_EXPR(expr) [&]{ return (expr); } + #define _PRGNAME_ "RPCS3" #define _PRGVER_ "0.0.0.5"