diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index c4f22b9969..a4b3f66125 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1719,10 +1719,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) auto& cline_data = vm::_ref(addr); data += 0; - - const auto render = rsx::get_rsx_if_needs_res_pause(addr); - - if (render) render->pause(); + rsx::reservation_lock rsx_lock(addr, 128); auto& super_data = *vm::get_super_ptr(addr); const bool success = [&]() @@ -1742,7 +1739,6 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) return false; }(); - if (render) render->unpause(); return success; } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index b8b567d5ef..7711c2c128 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -2126,6 +2126,9 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) const auto& to_write = _ref(args.lsa & 0x3ff80); auto& res = vm::reservation_acquire(addr, 128); + // TODO: Limit scope!! + rsx::reservation_lock rsx_lock(addr, 128); + if (!g_use_rtm && rtime != res) { return false; @@ -2143,10 +2146,6 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) { case UINT32_MAX: { - const auto render = rsx::get_rsx_if_needs_res_pause(addr); - - if (render) render->pause(); - const bool ok = cpu_thread::suspend_all(this, [&]() { if ((res & -128) == rtime) @@ -2165,7 +2164,6 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) return false; }); - if (render) render->unpause(); return ok; } case 0: return false; @@ -2200,10 +2198,6 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) vm::_ref>(addr) += 0; - const auto render = rsx::get_rsx_if_needs_res_pause(addr); - - if (render) render->pause(); - auto& super_data = *vm::get_super_ptr(addr); const bool success = [&]() { @@ -2222,7 +2216,6 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) return false; }(); - if (render) render->unpause(); return success; }()) { @@ -2258,15 +2251,12 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write) perf_meter<"STORE128"_u64> perf0; const auto cpu = get_current_cpu_thread(); + rsx::reservation_lock rsx_lock(addr, 128); if (g_use_rtm) [[likely]] { const u32 result = spu_putlluc_tx(addr, to_write, cpu); - const auto render = result != 1 ? rsx::get_rsx_if_needs_res_pause(addr) : nullptr; - - if (render) render->pause(); - if (result == 0) { // Execute with increased priority @@ -2281,7 +2271,6 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write) perf_log.warning("STORE128: took too long: %u", result); } - if (render) render->unpause(); static_cast(cpu->test_stopped()); } else @@ -2291,10 +2280,6 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write) *reinterpret_cast*>(&data) += 0; - const auto render = rsx::get_rsx_if_needs_res_pause(addr); - - if (render) render->pause(); - auto& super_data = *vm::get_super_ptr(addr); { // Full lock (heavyweight) @@ -2303,8 +2288,6 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write) mov_rdata(super_data, *static_cast(to_write)); res += 64; } - - if (render) render->unpause(); } } @@ -2498,6 +2481,7 @@ bool spu_thread::process_mfc_cmd() alignas(64) spu_rdata_t temp; u64 ntime; + rsx::reservation_lock rsx_lock(addr, 128); if (raddr) { diff --git a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp index d6e29af4e9..f20503faac 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp @@ -314,6 +314,10 @@ error_code sys_rsx_context_iomap(u32 context_id, u32 io, u32 ea, u32 size, u64 f sys_rsx.warning("sys_rsx_context_iomap(): RSX is not idle while mapping io"); } + // Wait until we have no active RSX locks and reserve iomap for use. Must do so before acquiring vm lock to avoid deadlocks + vm::temporary_unlock(*get_current_cpu_thread()); + rsx::reservation_lock rsx_lock(ea, size); + vm::reader_lock rlock; for (u32 addr = ea, end = ea + size; addr < end; addr += 0x100000) @@ -326,6 +330,7 @@ error_code sys_rsx_context_iomap(u32 context_id, u32 io, u32 ea, u32 size, u64 f io >>= 20, ea >>= 20, size >>= 20; + render->pause(); std::scoped_lock lock(g_fxo->get()->mutex); for (u32 i = 0; i < size; i++) @@ -339,6 +344,7 @@ error_code sys_rsx_context_iomap(u32 context_id, u32 io, u32 ea, u32 size, u64 f table.io[ea + i].release((io + i) << 20); } + render->unpause(); return CELL_OK; } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index baa1f0774a..3991f786bb 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2397,6 +2397,7 @@ namespace rsx } } + rsx::reservation_lock lock(sink, 16); vm::_ref>(sink).store({ timestamp(), value, 0}); } @@ -3258,6 +3259,7 @@ namespace rsx break; } + rsx::reservation_lock lock(sink, 16); vm::_ref>(sink).store({ timestamp, value, 0}); } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 02b0cf279c..4346b3f5eb 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -44,6 +44,7 @@ namespace rsx { std::array, 4096> ea; std::array, 4096> io; + std::array rs; rsx_iomap_table() noexcept { @@ -57,6 +58,46 @@ namespace rsx { return this->ea[offs >> 20] | (offs & 0xFFFFF); } + + template + bool lock(u32 addr, u32 len) noexcept + { + if (len <= 1) return false; + const u32 end = addr + len - 1; + + for (u32 block = (addr >> 20); block <= (end >> 20); ++block) + { + if constexpr (IsFullLock) + { + rs[block].lock(); + } + else + { + rs[block].lock_shared(); + } + } + + return true; + } + + template + void unlock(u32 addr, u32 len) noexcept + { + ASSERT(len >= 1); + const u32 end = addr + len - 1; + + for (u32 block = (addr >> 20); block <= (end >> 20); ++block) + { + if constexpr (IsFullLock) + { + rs[block].unlock(); + } + else + { + rs[block].unlock_shared(); + } + } + } }; enum framebuffer_creation_context : u8 @@ -966,23 +1007,64 @@ namespace rsx return g_fxo->get(); } - // Returns nullptr if rsx does not need pausing on reservations op, rsx ptr otherwise - inline thread* get_rsx_if_needs_res_pause(u32 addr) + template + class reservation_lock { - if (!g_cfg.core.rsx_accurate_res_access) [[likely]] + u32 addr = 0, length = 0; + bool locked = false; + + inline void lock_range(u32 addr, u32 length) { - return {}; + this->addr = addr; + this->length = length; + + auto renderer = get_current_renderer(); + this->locked = renderer->iomap_table.lock(addr, length); } - const auto render = get_current_renderer(); - - ASSUME(render); - - if (render->iomap_table.io[addr >> 20].load() == umax) [[likely]] + public: + reservation_lock(u32 addr, u32 length) { - return {}; + if (g_cfg.core.rsx_accurate_res_access && + addr < constants::local_mem_base) + { + lock_range(addr, length); + } } - return render; - } + // Multi-range lock. If ranges overlap, the combined range will be acquired. + // If ranges do not overlap, the first range that is in main memory will be acquired. + reservation_lock(u32 dst_addr, u32 dst_length, u32 src_addr, u32 src_length) + { + if (g_cfg.core.rsx_accurate_res_access) + { + const auto range1 = utils::address_range::start_length(dst_addr, dst_length); + const auto range2 = utils::address_range::start_length(src_addr, src_length); + utils::address_range target_range; + + if (!range1.overlaps(range2)) [[likely]] + { + target_range = (dst_addr < constants::local_mem_base) ? range1 : range2; + } + else + { + // Very unlikely + target_range = range1.get_min_max(range2); + } + + if (target_range.start < constants::local_mem_base) + { + lock_range(target_range.start, target_range.length()); + } + } + } + + ~reservation_lock() + { + if (locked) + { + get_current_renderer()->iomap_table.unlock(addr, length); + } + } + }; } diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 68ec9ec259..2d9812e94f 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -940,10 +940,13 @@ namespace rsx { // Bit cast - optimize to mem copy - const auto dst = vm::_ptr(get_address(dst_offset + (x * 4) + (out_pitch * y), dst_dma, HERE)); - const auto src = vm::_ptr(get_address(src_offset, CELL_GCM_LOCATION_MAIN, HERE)); + const auto dst_address = get_address(dst_offset + (x * 4) + (out_pitch * y), dst_dma, HERE); + const auto src_address = get_address(src_offset, CELL_GCM_LOCATION_MAIN, HERE); + const auto dst = vm::_ptr(dst_address); + const auto src = vm::_ptr(src_address); const u32 data_length = count * 4; + auto res = rsx::reservation_lock(dst_address, data_length, src_address, data_length); if (rsx->fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) [[unlikely]] { @@ -971,8 +974,13 @@ namespace rsx } case blit_engine::transfer_destination_format::r5g6b5: { - const auto dst = vm::_ptr(get_address(dst_offset + (x * 2) + (y * out_pitch), dst_dma, HERE)); - const auto src = vm::_ptr(get_address(src_offset, CELL_GCM_LOCATION_MAIN, HERE)); + const auto dst_address = get_address(dst_offset + (x * 2) + (y * out_pitch), dst_dma, HERE); + const auto src_address = get_address(src_offset, CELL_GCM_LOCATION_MAIN, HERE); + const auto dst = vm::_ptr(dst_address); + const auto src = vm::_ptr(src_address); + + const auto data_length = count * 2; + auto res = rsx::reservation_lock(dst_address, data_length, src_address, data_length); auto convert = [](u32 input) -> u16 { @@ -1162,8 +1170,6 @@ namespace rsx const u32 src_line_length = (in_w * in_bpp); - //auto res = vm::passive_lock(dst_address, dst_address + (in_pitch * (in_h - 1) + src_line_length)); - if (is_block_transfer && (clip_h == 1 || (in_pitch == out_pitch && src_line_length == in_pitch))) { const u32 nb_lines = std::min(clip_h, in_h); @@ -1223,6 +1229,9 @@ namespace rsx return; } + // Lock here. RSX cannot execute any locking operations from this point, including ZCULL read barriers + auto res = ::rsx::reservation_lock(dst_address, out_pitch * out_h, src_address, in_pitch * in_h); + if (!g_cfg.video.force_cpu_blit_processing && (dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER || src_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER)) { blit_src_info src_info = {}; @@ -1522,29 +1531,30 @@ namespace rsx const bool is_block_transfer = (in_pitch == out_pitch && out_pitch + 0u == line_length); const auto read_address = get_address(src_offset, src_dma, HERE); const auto write_address = get_address(dst_offset, dst_dma, HERE); - const auto data_length = in_pitch * (line_count - 1) + line_length; + const auto read_length = in_pitch * (line_count - 1) + line_length; + const auto write_length = out_pitch * (line_count - 1) + line_length; - rsx->invalidate_fragment_program(dst_dma, dst_offset, data_length); - - if (const auto result = rsx->read_barrier(read_address, data_length, !is_block_transfer); + rsx->invalidate_fragment_program(dst_dma, dst_offset, write_length); + + if (const auto result = rsx->read_barrier(read_address, read_length, !is_block_transfer); result == rsx::result_zcull_intr) { // This transfer overlaps will zcull data pool - if (rsx->copy_zcull_stats(read_address, data_length, write_address) == data_length) + if (rsx->copy_zcull_stats(read_address, read_length, write_address) == write_length) { // All writes deferred return; } } - //auto res = vm::passive_lock(write_address, data_length + write_address); + auto res = ::rsx::reservation_lock(write_address, write_length, read_address, read_length); u8 *dst = vm::_ptr(write_address); const u8 *src = vm::_ptr(read_address); const bool is_overlapping = dst_dma == src_dma && [&]() -> bool { - const u32 src_max = src_offset + data_length; + const u32 src_max = src_offset + read_length; const u32 dst_max = dst_offset + (out_pitch * (line_count - 1) + line_length); return (src_offset >= dst_offset && src_offset < dst_max) || (dst_offset >= src_offset && dst_offset < src_max); @@ -1554,7 +1564,7 @@ namespace rsx { if (is_block_transfer) { - std::memmove(dst, src, line_length * line_count); + std::memmove(dst, src, read_length); } else { @@ -1582,7 +1592,7 @@ namespace rsx { if (is_block_transfer) { - std::memcpy(dst, src, line_length * line_count); + std::memcpy(dst, src, read_length); } else {