From f66256cc13d9e936b40ff8dea0d17002f721fe2d Mon Sep 17 00:00:00 2001 From: Eladash Date: Sat, 21 May 2022 19:39:14 +0300 Subject: [PATCH] rsx: PS3 Native frame limiter improvements, add Infinite frame limiter * Do not wait on DEVICE 0x30 semaphore, it seems like it is something to do with queue command synchronization. - This also fixes cellGcmSetFlipWithWaitLabel which is built specifically to enable accurate RSX flipping time, its waiting command is confirmed to be placed **AFTER** DEVICE 0x30 waiting. * Fix default vsync state to be enabled. (and set it to enabled in cellGcmSetVBlankFrequency as well) * Add experimental "Infinite" frame limiter mode. * Fix spurious enabling of second vblank. --- rpcs3/Emu/Cell/Modules/cellVideoOut.cpp | 3 ++ rpcs3/Emu/Cell/lv2/sys_rsx.cpp | 34 ++++++++++--- rpcs3/Emu/Cell/lv2/sys_rsx.h | 2 +- rpcs3/Emu/RSX/RSXThread.cpp | 67 +++++++++++++++---------- rpcs3/Emu/RSX/RSXThread.h | 6 +-- rpcs3/Emu/RSX/rsx_decode.h | 5 +- rpcs3/Emu/RSX/rsx_methods.cpp | 20 +++----- rpcs3/Emu/system_config_types.cpp | 2 +- rpcs3/Emu/system_config_types.h | 2 +- rpcs3/rpcs3qt/emu_settings.cpp | 2 +- rpcs3/rpcs3qt/tooltips.h | 2 +- 11 files changed, 88 insertions(+), 57 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/cellVideoOut.cpp b/rpcs3/Emu/Cell/Modules/cellVideoOut.cpp index de2764f3eb..7fe6beb7b2 100644 --- a/rpcs3/Emu/Cell/Modules/cellVideoOut.cpp +++ b/rpcs3/Emu/Cell/Modules/cellVideoOut.cpp @@ -4,6 +4,7 @@ #include "Emu/Cell/PPUModule.h" #include "Emu/IdManager.h" #include "Emu/RSX/rsx_utils.h" +#include "Emu/RSX/RSXThread.h" #include "cellVideoOut.h" @@ -211,6 +212,8 @@ error_code cellVideoOutConfigure(u32 videoOut, vm::ptr0x%x, format=0x%x", config->resolutionId, config->aspect, conf.aspect, config->format); + // This function resets VSYNC to be enabled + rsx::get_current_renderer()->requested_vsync = true; return CELL_OK; } diff --git a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp index 86adbba504..d5bab1f0b5 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp @@ -501,6 +501,11 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64 render->on_frame_end(static_cast(a4)); render->send_event(0, SYS_RSX_EVENT_QUEUE_BASE << a3, 0); + + if (g_cfg.video.frame_limit == frame_limit_type::infinite) + { + render->post_vblank_event(get_system_time()); + } } break; @@ -536,12 +541,24 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64 case 0x106: // ? (Used by cellGcmInitPerfMon) break; - case 0x108: // cellGcmSetSecondVFrequency + case 0x108: // cellGcmSetVBlankFrequency, cellGcmSetSecondVFrequency // a4 == 3, CELL_GCM_DISPLAY_FREQUENCY_59_94HZ // a4 == 2, CELL_GCM_DISPLAY_FREQUENCY_SCANOUT // a4 == 4, CELL_GCM_DISPLAY_FREQUENCY_DISABLE - // Note: Scanout/59_94 is ignored currently as we report refresh rate of 59_94hz as it is, so the difference doesnt matter - render->enable_second_vhandler.store(a4 != 4); + + if (a5 == 1u) + { + // This function resets vsync state to enabled + render->requested_vsync = true; + + // TODO: Set vblank frequency + } + else if (ensure(a5 == 2u)) + { + // TODO: Implement its frequency as well + render->enable_second_vhandler.store(a4 != 4); + } + break; case 0x10a: // ? Involved in managing flip status through cellGcmResetFlipStatus @@ -743,9 +760,9 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64 case 0xFED: // hack: vblank command { - if (get_current_cpu_thread()) + if (cpu_thread::get_current()) { - // VBLANK thread only + // VBLANK/RSX thread only return CELL_EINVAL; } @@ -753,12 +770,15 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64 ensure(a3 < 2); // todo: this is wrong and should be 'second' vblank handler and freq, but since currently everything is reported as being 59.94, this should be fine - vm::_ref(render->device_addr + 0x30) = 1; + driverInfo.head[a3].lastSecondVTime.atomic_op([&](be_t& time) + { + a4 = std::max(a4, time + 1); + time = a4; + }); // Time point is supplied in argument 4 (todo: convert it to MFTB rate and use it) const u64 current_time = rsxTimeStamp(); - driverInfo.head[a3].lastSecondVTime = current_time; // Note: not atomic driverInfo.head[a3].lastVTimeLow = static_cast(current_time); diff --git a/rpcs3/Emu/Cell/lv2/sys_rsx.h b/rpcs3/Emu/Cell/lv2/sys_rsx.h index 632cfd0ce0..2e574e2272 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rsx.h +++ b/rpcs3/Emu/Cell/lv2/sys_rsx.h @@ -32,7 +32,7 @@ struct RsxDriverInfo be_t lastQueuedBufferId; // 0x14 todo: this is definately not this variable but its 'unused' so im using it for queueId to pass to flip handler be_t unk3; // 0x18 be_t lastVTimeLow; // 0x1C last time for first vhandler freq (low 32-bits) - be_t lastSecondVTime; // 0x20 last time for second vhandler freq + atomic_be_t lastSecondVTime; // 0x20 last time for second vhandler freq be_t unk4; // 0x28 atomic_be_t vBlankCount; // 0x30 be_t unk; // 0x38 possible u32, 'flip field', top/bottom for interlaced diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index ee17726a60..7958f7e300 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -595,6 +595,31 @@ namespace rsx } } + void thread::post_vblank_event(u64 post_event_time) + { + vblank_count++; + + if (isHLE) + { + if (auto ptr = vblank_handler) + { + intr_thread->cmd_list + ({ + { ppu_cmd::set_args, 1 }, u64{1}, + { ppu_cmd::lle_call, ptr }, + { ppu_cmd::sleep, 0 } + }); + + intr_thread->cmd_notify++; + intr_thread->cmd_notify.notify_one(); + } + } + else + { + sys_rsx_context_attribute(0x55555555, 0xFED, 1, get_guest_system_time(post_event_time), 0, 0); + } + } + void thread::on_task() { g_tls_log_prefix = [] @@ -686,7 +711,6 @@ namespace rsx { { local_vblank_count++; - vblank_count++; if (local_vblank_count == vblank_rate) { @@ -701,25 +725,7 @@ namespace rsx vblank_period = 1'000'000 + u64{g_cfg.video.vblank_ntsc.get()} * 1000; } - if (isHLE) - { - if (auto ptr = vblank_handler) - { - intr_thread->cmd_list - ({ - { ppu_cmd::set_args, 1 }, u64{1}, - { ppu_cmd::lle_call, ptr }, - { ppu_cmd::sleep, 0 } - }); - - intr_thread->cmd_notify++; - intr_thread->cmd_notify.notify_one(); - } - } - else - { - sys_rsx_context_attribute(0x55555555, 0xFED, 1, get_guest_system_time(post_event_time), 0, 0); - } + post_vblank_event(post_event_time); } } else if (wait_sleep) @@ -3203,15 +3209,17 @@ namespace rsx } double limit = 0.; - switch (g_disable_frame_limit ? frame_limit_type::none : g_cfg.video.frame_limit) + const auto frame_limit = g_disable_frame_limit ? frame_limit_type::none : g_cfg.video.frame_limit; + + switch (frame_limit) { case frame_limit_type::none: limit = 0.; break; - case frame_limit_type::_59_94: limit = 59.94; break; case frame_limit_type::_50: limit = 50.; break; case frame_limit_type::_60: limit = 60.; break; case frame_limit_type::_30: limit = 30.; break; case frame_limit_type::_auto: limit = static_cast(g_cfg.video.vblank_rate); break; case frame_limit_type::_ps3: limit = 0.; break; + case frame_limit_type::infinite: limit = 0.; break; default: break; } @@ -3247,10 +3255,17 @@ namespace rsx } } } - else if (wait_for_flip_sema) + else if (frame_limit == frame_limit_type::_ps3) { - const auto& value = vm::_ref(device_addr + 0x30).val; - if (value != flip_sema_wait_val) + bool exit = false; + + if (vblank_at_flip == umax) + { + vblank_at_flip = +vblank_count; + exit = true; + } + + if (requested_vsync && (exit || vblank_at_flip == vblank_count)) { // Not yet signaled, handle it later async_flip_requested |= flip_request::emu_requested; @@ -3258,7 +3273,7 @@ namespace rsx return; } - wait_for_flip_sema = false; + vblank_at_flip = umax; } int_flip_index++; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index f3449d9d88..814d30449d 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -564,7 +564,7 @@ namespace rsx u32 flip_status; int debug_level; - atomic_t requested_vsync{false}; + atomic_t requested_vsync{true}; atomic_t enable_second_vhandler{false}; RsxDisplayInfo display_buffers[8]; @@ -645,8 +645,8 @@ namespace rsx atomic_t vblank_count{0}; bool capture_current_frame = false; - bool wait_for_flip_sema = false; - u32 flip_sema_wait_val = 0; + u64 vblank_at_flip = umax; + void post_vblank_event(u64 post_event_time); public: atomic_t sync_point_request = false; diff --git a/rpcs3/Emu/RSX/rsx_decode.h b/rpcs3/Emu/RSX/rsx_decode.h index 19820dba79..6874968163 100644 --- a/rpcs3/Emu/RSX/rsx_decode.h +++ b/rpcs3/Emu/RSX/rsx_decode.h @@ -1874,11 +1874,10 @@ struct registers_decoder static std::string dump(const decoded_type& decoded) { - return fmt::format("NV409E semaphore: release: 0x%x", decoded.value); + return fmt::format("NV406E semaphore: release: 0x%x", decoded.value); } }; - template <> struct registers_decoder { @@ -1891,7 +1890,7 @@ struct registers_decoder static std::string dump(const decoded_type& decoded) { - return fmt::format("NV409E semaphore: acquire: 0x%x", decoded.value); + return fmt::format("NV406E semaphore: acquire: 0x%x", decoded.value); } }; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 4f316c147d..06b7baf8ea 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -37,7 +37,7 @@ namespace rsx template void write_gcm_label(thread* rsx, u32 address, u32 data) { - const bool is_flip_sema = (address == (rsx->label_addr + 0x10) || address == (rsx->label_addr + 0x30)); + const bool is_flip_sema = (address == (rsx->label_addr + 0x10) || address == (rsx->device_addr + 0x30)); if (!is_flip_sema) { // First, queue the GPU work. If it flushes the queue for us, the following routines will be faster. @@ -108,18 +108,6 @@ namespace rsx rsx->flush_fifo(); } - if (addr == rsx->device_addr + 0x30) - { - if (g_cfg.video.frame_limit == frame_limit_type::_ps3 && rsx->requested_vsync) - { - // Enables PS3-compliant vblank behavior - rsx->flip_sema_wait_val = arg; - rsx->wait_for_flip_sema = (sema != arg); - } - - return; - } - u64 start = rsx::uclock(); u64 last_check_val = start; @@ -189,6 +177,12 @@ namespace rsx return; } + if (addr == rsx->device_addr + 0x30 && !arg) + { + // HW flip synchronization related, 1 is not written without display queue command (TODO: make it behave as real hw) + arg = 1; + } + write_gcm_label(rsx, addr, arg); } } diff --git a/rpcs3/Emu/system_config_types.cpp b/rpcs3/Emu/system_config_types.cpp index be1d04c902..f87fc2a8e4 100644 --- a/rpcs3/Emu/system_config_types.cpp +++ b/rpcs3/Emu/system_config_types.cpp @@ -307,12 +307,12 @@ void fmt_class_string::format(std::string& out, u64 arg) switch (value) { case frame_limit_type::none: return "Off"; - case frame_limit_type::_59_94: return "59.94"; case frame_limit_type::_50: return "50"; case frame_limit_type::_60: return "60"; case frame_limit_type::_30: return "30"; case frame_limit_type::_auto: return "Auto"; case frame_limit_type::_ps3: return "PS3 Native"; + case frame_limit_type::infinite: return "Infinite"; } return unknown; diff --git a/rpcs3/Emu/system_config_types.h b/rpcs3/Emu/system_config_types.h index 128f706485..802edea253 100644 --- a/rpcs3/Emu/system_config_types.h +++ b/rpcs3/Emu/system_config_types.h @@ -187,12 +187,12 @@ enum class video_aspect enum class frame_limit_type { none, - _59_94, _50, _60, _30, _auto, _ps3, + infinite, }; enum class msaa_level diff --git a/rpcs3/rpcs3qt/emu_settings.cpp b/rpcs3/rpcs3qt/emu_settings.cpp index 0d6210afcf..25822004da 100644 --- a/rpcs3/rpcs3qt/emu_settings.cpp +++ b/rpcs3/rpcs3qt/emu_settings.cpp @@ -950,12 +950,12 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_ switch (static_cast(index)) { case frame_limit_type::none: return tr("Off", "Frame limit"); - case frame_limit_type::_59_94: return tr("59.94", "Frame limit"); case frame_limit_type::_50: return tr("50", "Frame limit"); case frame_limit_type::_60: return tr("60", "Frame limit"); case frame_limit_type::_30: return tr("30", "Frame limit"); case frame_limit_type::_auto: return tr("Auto", "Frame limit"); case frame_limit_type::_ps3: return tr("PS3 Native", "Frame limit"); + case frame_limit_type::infinite: return tr("Infinite", "Frame limit"); } break; case emu_settings_type::MSAA: diff --git a/rpcs3/rpcs3qt/tooltips.h b/rpcs3/rpcs3qt/tooltips.h index df5b9d6198..179d21a3f2 100644 --- a/rpcs3/rpcs3qt/tooltips.h +++ b/rpcs3/rpcs3qt/tooltips.h @@ -158,7 +158,7 @@ public: const QString resolution = tr("This setting will be ignored if the Resolution Scale is set to anything other than 100%!\nLeave this on 1280x720. Every PS3 game is compatible with this resolution.\nOnly use 1920x1080 if the game supports it.\nRarely due to emulation bugs some games will only render at low resolutions like 480p."); const QString graphics_adapter = tr("On multi GPU systems select which GPU to use in RPCS3 when using Vulkan.\nThis is not needed when using OpenGL."); const QString aspect_ratio = tr("Leave this on 16:9 unless you have a 4:3 monitor."); - const QString frame_limit = tr("Off is the best option as it performs faster.\nUsing the frame limiter will add extra overhead and slow down the game.\nHowever, some games will crash if the frame rate is too high.\nPS3 native should only be used if Auto is not working correctly as it can introduce frame-pacing issues."); + const QString frame_limit = tr("Off is the fastest option.\nUsing the frame limiter will add extra overhead and slow down the game. However, some games will crash if the frame rate is too high.\nPS3 native should only be used if Auto is not working correctly as it can introduce frame-pacing issues.\nInfinite adds a positive feedback loop which adds another vblank signal per frame allowing more games to be fps limitless."); const QString anti_aliasing = tr("Emulate PS3 multisampling layout.\nCan fix some otherwise difficult to solve graphics glitches.\nLow to moderate performance hit depending on your GPU hardware."); const QString anisotropic_filter = tr("Higher values increase sharpness of textures on sloped surfaces at the cost of GPU resources.\nModern GPUs can handle this setting just fine, even at 16x.\nKeep this on Automatic if you want to use the original setting used by a real PS3."); const QString resolution_scale = tr("Scales the game's resolution by the given percentage.\nThe base resolution is always 1280x720.\nSet this value to 100% if you want to use the normal Resolution options.\nValues below 100% will usually not improve performance.");