rsx: PS3 Native frame limiter improvements, add Infinite frame limiter

* Do not wait on DEVICE 0x30 semaphore, it seems like it is something to do with queue command synchronization.
 - This also fixes cellGcmSetFlipWithWaitLabel which is built specifically to enable accurate RSX flipping time, its waiting command is confirmed to be placed **AFTER** DEVICE 0x30 waiting.
* Fix default vsync state to be enabled. (and set it to enabled in cellGcmSetVBlankFrequency as well)
* Add experimental "Infinite" frame limiter mode.
* Fix spurious enabling of second vblank.
This commit is contained in:
Eladash 2022-05-21 19:39:14 +03:00 committed by Megamouse
parent 9cf7a63c77
commit f66256cc13
11 changed files with 88 additions and 57 deletions

View file

@ -4,6 +4,7 @@
#include "Emu/Cell/PPUModule.h"
#include "Emu/IdManager.h"
#include "Emu/RSX/rsx_utils.h"
#include "Emu/RSX/RSXThread.h"
#include "cellVideoOut.h"
@ -211,6 +212,8 @@ error_code cellVideoOutConfigure(u32 videoOut, vm::ptr<CellVideoOutConfiguration
cellSysutil.notice("Selected video configuration: resolutionId=0x%x, aspect=0x%x=>0x%x, format=0x%x", config->resolutionId, config->aspect, conf.aspect, config->format);
// This function resets VSYNC to be enabled
rsx::get_current_renderer()->requested_vsync = true;
return CELL_OK;
}

View file

@ -501,6 +501,11 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64
render->on_frame_end(static_cast<u32>(a4));
render->send_event(0, SYS_RSX_EVENT_QUEUE_BASE << a3, 0);
if (g_cfg.video.frame_limit == frame_limit_type::infinite)
{
render->post_vblank_event(get_system_time());
}
}
break;
@ -536,12 +541,24 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64
case 0x106: // ? (Used by cellGcmInitPerfMon)
break;
case 0x108: // cellGcmSetSecondVFrequency
case 0x108: // cellGcmSetVBlankFrequency, cellGcmSetSecondVFrequency
// a4 == 3, CELL_GCM_DISPLAY_FREQUENCY_59_94HZ
// a4 == 2, CELL_GCM_DISPLAY_FREQUENCY_SCANOUT
// a4 == 4, CELL_GCM_DISPLAY_FREQUENCY_DISABLE
// Note: Scanout/59_94 is ignored currently as we report refresh rate of 59_94hz as it is, so the difference doesnt matter
render->enable_second_vhandler.store(a4 != 4);
if (a5 == 1u)
{
// This function resets vsync state to enabled
render->requested_vsync = true;
// TODO: Set vblank frequency
}
else if (ensure(a5 == 2u))
{
// TODO: Implement its frequency as well
render->enable_second_vhandler.store(a4 != 4);
}
break;
case 0x10a: // ? Involved in managing flip status through cellGcmResetFlipStatus
@ -743,9 +760,9 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64
case 0xFED: // hack: vblank command
{
if (get_current_cpu_thread())
if (cpu_thread::get_current<ppu_thread>())
{
// VBLANK thread only
// VBLANK/RSX thread only
return CELL_EINVAL;
}
@ -753,12 +770,15 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64
ensure(a3 < 2);
// todo: this is wrong and should be 'second' vblank handler and freq, but since currently everything is reported as being 59.94, this should be fine
vm::_ref<u32>(render->device_addr + 0x30) = 1;
driverInfo.head[a3].lastSecondVTime.atomic_op([&](be_t<u64>& time)
{
a4 = std::max<u64>(a4, time + 1);
time = a4;
});
// Time point is supplied in argument 4 (todo: convert it to MFTB rate and use it)
const u64 current_time = rsxTimeStamp();
driverInfo.head[a3].lastSecondVTime = current_time;
// Note: not atomic
driverInfo.head[a3].lastVTimeLow = static_cast<u32>(current_time);

View file

@ -32,7 +32,7 @@ struct RsxDriverInfo
be_t<u32> lastQueuedBufferId; // 0x14 todo: this is definately not this variable but its 'unused' so im using it for queueId to pass to flip handler
be_t<u32> unk3; // 0x18
be_t<u32> lastVTimeLow; // 0x1C last time for first vhandler freq (low 32-bits)
be_t<u64> lastSecondVTime; // 0x20 last time for second vhandler freq
atomic_be_t<u64> lastSecondVTime; // 0x20 last time for second vhandler freq
be_t<u64> unk4; // 0x28
atomic_be_t<u64> vBlankCount; // 0x30
be_t<u32> unk; // 0x38 possible u32, 'flip field', top/bottom for interlaced

View file

@ -595,6 +595,31 @@ namespace rsx
}
}
void thread::post_vblank_event(u64 post_event_time)
{
vblank_count++;
if (isHLE)
{
if (auto ptr = vblank_handler)
{
intr_thread->cmd_list
({
{ ppu_cmd::set_args, 1 }, u64{1},
{ ppu_cmd::lle_call, ptr },
{ ppu_cmd::sleep, 0 }
});
intr_thread->cmd_notify++;
intr_thread->cmd_notify.notify_one();
}
}
else
{
sys_rsx_context_attribute(0x55555555, 0xFED, 1, get_guest_system_time(post_event_time), 0, 0);
}
}
void thread::on_task()
{
g_tls_log_prefix = []
@ -686,7 +711,6 @@ namespace rsx
{
{
local_vblank_count++;
vblank_count++;
if (local_vblank_count == vblank_rate)
{
@ -701,25 +725,7 @@ namespace rsx
vblank_period = 1'000'000 + u64{g_cfg.video.vblank_ntsc.get()} * 1000;
}
if (isHLE)
{
if (auto ptr = vblank_handler)
{
intr_thread->cmd_list
({
{ ppu_cmd::set_args, 1 }, u64{1},
{ ppu_cmd::lle_call, ptr },
{ ppu_cmd::sleep, 0 }
});
intr_thread->cmd_notify++;
intr_thread->cmd_notify.notify_one();
}
}
else
{
sys_rsx_context_attribute(0x55555555, 0xFED, 1, get_guest_system_time(post_event_time), 0, 0);
}
post_vblank_event(post_event_time);
}
}
else if (wait_sleep)
@ -3203,15 +3209,17 @@ namespace rsx
}
double limit = 0.;
switch (g_disable_frame_limit ? frame_limit_type::none : g_cfg.video.frame_limit)
const auto frame_limit = g_disable_frame_limit ? frame_limit_type::none : g_cfg.video.frame_limit;
switch (frame_limit)
{
case frame_limit_type::none: limit = 0.; break;
case frame_limit_type::_59_94: limit = 59.94; break;
case frame_limit_type::_50: limit = 50.; break;
case frame_limit_type::_60: limit = 60.; break;
case frame_limit_type::_30: limit = 30.; break;
case frame_limit_type::_auto: limit = static_cast<double>(g_cfg.video.vblank_rate); break;
case frame_limit_type::_ps3: limit = 0.; break;
case frame_limit_type::infinite: limit = 0.; break;
default:
break;
}
@ -3247,10 +3255,17 @@ namespace rsx
}
}
}
else if (wait_for_flip_sema)
else if (frame_limit == frame_limit_type::_ps3)
{
const auto& value = vm::_ref<RsxSemaphore>(device_addr + 0x30).val;
if (value != flip_sema_wait_val)
bool exit = false;
if (vblank_at_flip == umax)
{
vblank_at_flip = +vblank_count;
exit = true;
}
if (requested_vsync && (exit || vblank_at_flip == vblank_count))
{
// Not yet signaled, handle it later
async_flip_requested |= flip_request::emu_requested;
@ -3258,7 +3273,7 @@ namespace rsx
return;
}
wait_for_flip_sema = false;
vblank_at_flip = umax;
}
int_flip_index++;

View file

@ -564,7 +564,7 @@ namespace rsx
u32 flip_status;
int debug_level;
atomic_t<bool> requested_vsync{false};
atomic_t<bool> requested_vsync{true};
atomic_t<bool> enable_second_vhandler{false};
RsxDisplayInfo display_buffers[8];
@ -645,8 +645,8 @@ namespace rsx
atomic_t<u64> vblank_count{0};
bool capture_current_frame = false;
bool wait_for_flip_sema = false;
u32 flip_sema_wait_val = 0;
u64 vblank_at_flip = umax;
void post_vblank_event(u64 post_event_time);
public:
atomic_t<bool> sync_point_request = false;

View file

@ -1874,11 +1874,10 @@ struct registers_decoder<NV406E_SEMAPHORE_RELEASE>
static std::string dump(const decoded_type& decoded)
{
return fmt::format("NV409E semaphore: release: 0x%x", decoded.value);
return fmt::format("NV406E semaphore: release: 0x%x", decoded.value);
}
};
template <>
struct registers_decoder<NV406E_SEMAPHORE_ACQUIRE>
{
@ -1891,7 +1890,7 @@ struct registers_decoder<NV406E_SEMAPHORE_ACQUIRE>
static std::string dump(const decoded_type& decoded)
{
return fmt::format("NV409E semaphore: acquire: 0x%x", decoded.value);
return fmt::format("NV406E semaphore: acquire: 0x%x", decoded.value);
}
};

View file

@ -37,7 +37,7 @@ namespace rsx
template<bool FlushDMA, bool FlushPipe>
void write_gcm_label(thread* rsx, u32 address, u32 data)
{
const bool is_flip_sema = (address == (rsx->label_addr + 0x10) || address == (rsx->label_addr + 0x30));
const bool is_flip_sema = (address == (rsx->label_addr + 0x10) || address == (rsx->device_addr + 0x30));
if (!is_flip_sema)
{
// First, queue the GPU work. If it flushes the queue for us, the following routines will be faster.
@ -108,18 +108,6 @@ namespace rsx
rsx->flush_fifo();
}
if (addr == rsx->device_addr + 0x30)
{
if (g_cfg.video.frame_limit == frame_limit_type::_ps3 && rsx->requested_vsync)
{
// Enables PS3-compliant vblank behavior
rsx->flip_sema_wait_val = arg;
rsx->wait_for_flip_sema = (sema != arg);
}
return;
}
u64 start = rsx::uclock();
u64 last_check_val = start;
@ -189,6 +177,12 @@ namespace rsx
return;
}
if (addr == rsx->device_addr + 0x30 && !arg)
{
// HW flip synchronization related, 1 is not written without display queue command (TODO: make it behave as real hw)
arg = 1;
}
write_gcm_label<false, true>(rsx, addr, arg);
}
}

View file

@ -307,12 +307,12 @@ void fmt_class_string<frame_limit_type>::format(std::string& out, u64 arg)
switch (value)
{
case frame_limit_type::none: return "Off";
case frame_limit_type::_59_94: return "59.94";
case frame_limit_type::_50: return "50";
case frame_limit_type::_60: return "60";
case frame_limit_type::_30: return "30";
case frame_limit_type::_auto: return "Auto";
case frame_limit_type::_ps3: return "PS3 Native";
case frame_limit_type::infinite: return "Infinite";
}
return unknown;

View file

@ -187,12 +187,12 @@ enum class video_aspect
enum class frame_limit_type
{
none,
_59_94,
_50,
_60,
_30,
_auto,
_ps3,
infinite,
};
enum class msaa_level

View file

@ -950,12 +950,12 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
switch (static_cast<frame_limit_type>(index))
{
case frame_limit_type::none: return tr("Off", "Frame limit");
case frame_limit_type::_59_94: return tr("59.94", "Frame limit");
case frame_limit_type::_50: return tr("50", "Frame limit");
case frame_limit_type::_60: return tr("60", "Frame limit");
case frame_limit_type::_30: return tr("30", "Frame limit");
case frame_limit_type::_auto: return tr("Auto", "Frame limit");
case frame_limit_type::_ps3: return tr("PS3 Native", "Frame limit");
case frame_limit_type::infinite: return tr("Infinite", "Frame limit");
}
break;
case emu_settings_type::MSAA:

View file

@ -158,7 +158,7 @@ public:
const QString resolution = tr("This setting will be ignored if the Resolution Scale is set to anything other than 100%!\nLeave this on 1280x720. Every PS3 game is compatible with this resolution.\nOnly use 1920x1080 if the game supports it.\nRarely due to emulation bugs some games will only render at low resolutions like 480p.");
const QString graphics_adapter = tr("On multi GPU systems select which GPU to use in RPCS3 when using Vulkan.\nThis is not needed when using OpenGL.");
const QString aspect_ratio = tr("Leave this on 16:9 unless you have a 4:3 monitor.");
const QString frame_limit = tr("Off is the best option as it performs faster.\nUsing the frame limiter will add extra overhead and slow down the game.\nHowever, some games will crash if the frame rate is too high.\nPS3 native should only be used if Auto is not working correctly as it can introduce frame-pacing issues.");
const QString frame_limit = tr("Off is the fastest option.\nUsing the frame limiter will add extra overhead and slow down the game. However, some games will crash if the frame rate is too high.\nPS3 native should only be used if Auto is not working correctly as it can introduce frame-pacing issues.\nInfinite adds a positive feedback loop which adds another vblank signal per frame allowing more games to be fps limitless.");
const QString anti_aliasing = tr("Emulate PS3 multisampling layout.\nCan fix some otherwise difficult to solve graphics glitches.\nLow to moderate performance hit depending on your GPU hardware.");
const QString anisotropic_filter = tr("Higher values increase sharpness of textures on sloped surfaces at the cost of GPU resources.\nModern GPUs can handle this setting just fine, even at 16x.\nKeep this on Automatic if you want to use the original setting used by a real PS3.");
const QString resolution_scale = tr("Scales the game's resolution by the given percentage.\nThe base resolution is always 1280x720.\nSet this value to 100% if you want to use the normal Resolution options.\nValues below 100% will usually not improve performance.");