From 343ba8733b17deec84fc189a1f348aeeb275d7d1 Mon Sep 17 00:00:00 2001 From: Megamouse Date: Sun, 23 Jul 2023 09:09:24 +0200 Subject: [PATCH] Merge xfloat options --- rpcs3/Emu/Cell/SPURecompiler.cpp | 64 +++++++++++++++---------------- rpcs3/Emu/system_config.h | 4 +- rpcs3/Emu/system_config_types.cpp | 17 ++++++++ rpcs3/Emu/system_config_types.h | 8 ++++ rpcs3/rpcs3qt/emu_settings.cpp | 8 ++++ rpcs3/rpcs3qt/emu_settings_type.h | 6 +-- rpcs3/rpcs3qt/settings_dialog.cpp | 25 +----------- 7 files changed, 70 insertions(+), 62 deletions(-) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index be20bcb8ef..8019a7f24e 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -5316,7 +5316,7 @@ public: if (src > 0x40000) { // Use the xfloat hint to create 256-bit (4x double) PHI - llvm::Type* type = g_cfg.core.spu_accurate_xfloat && bb.reg_maybe_xf[i] ? get_type() : get_reg_type(i); + llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type() : get_reg_type(i); const auto _phi = m_ir->CreatePHI(type, ::size32(bb.preds), fmt::format("phi0x%05x_r%u", baddr, i)); m_block->phi[i] = _phi; @@ -8876,7 +8876,7 @@ public: void FREST(spu_opcode_t op) { // TODO - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { const auto a = get_vr(op.ra); const auto mask_ov = sext(bitcast(fabs(a)) > splat(0x7e7fffff)); @@ -8885,7 +8885,7 @@ public: return; } - if (g_cfg.core.spu_approx_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { register_intrinsic("spu_frest", [&](llvm::CallInst* ci) { @@ -8918,13 +8918,13 @@ public: void FRSQEST(spu_opcode_t op) { // TODO - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { set_vr(op.rt, fsplat(1.0) / fsqrt(fabs(get_vr(op.ra)))); return; } - if (g_cfg.core.spu_approx_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci) { @@ -8956,7 +8956,7 @@ public: void FCGT(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { set_vr(op.rt, sext(fcmp_ord(get_vr(op.ra) > get_vr(op.rb)))); return; @@ -9003,7 +9003,7 @@ public: return eval(sext(bitcast(a) > bitcast(b))); } - if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed) { const auto ai = eval(bitcast(a)); const auto bi = eval(bitcast(b)); @@ -9034,7 +9034,7 @@ public: void FCMGT(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { set_vr(op.rt, sext(fcmp_ord(fabs(get_vr(op.ra)) > fabs(get_vr(op.rb))))); return; @@ -9080,7 +9080,7 @@ public: return eval(sext(mai > mbi)); } - if (g_cfg.core.spu_approx_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { return eval(sext(fcmp_uno(ma > mb) & (mai > mbi))); } @@ -9101,7 +9101,7 @@ public: void FA(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { set_vr(op.rt, get_vr(op.ra) + get_vr(op.rb)); return; @@ -9126,7 +9126,7 @@ public: void FS(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { set_vr(op.rt, get_vr(op.ra) - get_vr(op.rb)); return; @@ -9137,7 +9137,7 @@ public: const auto a = value(ci->getOperand(0)); const auto b = value(ci->getOperand(1)); - if (g_cfg.core.spu_approx_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { const auto bc = clamp_smax(b); // for #4478 return eval(a - bc); @@ -9159,7 +9159,7 @@ public: void FM(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { set_vr(op.rt, get_vr(op.ra) * get_vr(op.rb)); return; @@ -9170,7 +9170,7 @@ public: const auto a = value(ci->getOperand(0)); const auto b = value(ci->getOperand(1)); - if (g_cfg.core.spu_approx_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { if (a.value == b.value) { @@ -9206,7 +9206,7 @@ public: void FESD(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { const auto r = zshuffle(get_vr(op.ra), 1, 3); const auto d = bitcast(r); @@ -9236,7 +9236,7 @@ public: void FRDS(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { const auto r = get_vr(op.ra); const auto d = bitcast(r); @@ -9267,7 +9267,7 @@ public: void FCEQ(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { set_vr(op.rt, sext(fcmp_ord(get_vr(op.ra) == get_vr(op.rb)))); return; @@ -9320,7 +9320,7 @@ public: return eval(sext(bitcast(a) == bitcast(b))); } - if (g_cfg.core.spu_approx_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { return eval(sext(fcmp_ord(a == b)) | sext(bitcast(a) == bitcast(b))); } @@ -9341,7 +9341,7 @@ public: void FCMEQ(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { set_vr(op.rt, sext(fcmp_ord(fabs(get_vr(op.ra)) == fabs(get_vr(op.rb))))); return; @@ -9397,7 +9397,7 @@ public: return eval(sext(bitcast(fa) == bitcast(fb))); } - if (g_cfg.core.spu_approx_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { return eval(sext(fcmp_ord(fa == fb)) | sext(bitcast(fa) == bitcast(fb))); } @@ -9490,7 +9490,7 @@ public: void FNMS(spu_opcode_t op) { // See FMA. - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rc); set_vr(op.rt4, fmuladd(-a, b, c)); @@ -9503,7 +9503,7 @@ public: const auto b = value(ci->getOperand(1)); const auto c = value(ci->getOperand(2)); - if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed) { return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c); } @@ -9525,7 +9525,7 @@ public: void FMA(spu_opcode_t op) { // Hardware FMA produces the same result as multiple + add on the limited double range (xfloat). - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rc); set_vr(op.rt4, fmuladd(a, b, c)); @@ -9538,7 +9538,7 @@ public: const auto b = value(ci->getOperand(1)); const auto c = value(ci->getOperand(2)); - if (g_cfg.core.spu_approx_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { const auto ma = sext(fcmp_uno(a != fsplat(0.))); const auto mb = sext(fcmp_uno(b != fsplat(0.))); @@ -9599,7 +9599,7 @@ public: void FMS(spu_opcode_t op) { // See FMA. - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rc); set_vr(op.rt4, fmuladd(a, b, -c)); @@ -9612,7 +9612,7 @@ public: const auto b = value(ci->getOperand(1)); const auto c = value(ci->getOperand(2)); - if (g_cfg.core.spu_approx_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { return fma32x4(clamp_smax(a), clamp_smax(b), eval(-c)); } @@ -9646,7 +9646,7 @@ public: void FI(spu_opcode_t op) { // TODO - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { set_vr(op.rt, get_vr(op.rb)); // const auto [a, b] = get_vrs(op.ra, op.rb); @@ -9674,7 +9674,7 @@ public: return bitcast((b & 0xff800000u) | (bitcast(fpcast(bnew)) & ~0xff800000u)); // Inject old sign and exponent }); - if (g_cfg.core.spu_approx_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { register_intrinsic("spu_re", [&](llvm::CallInst* ci) { @@ -9733,7 +9733,7 @@ public: void CFLTS(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { value_t a = get_vr(op.ra); value_t s; @@ -9807,7 +9807,7 @@ public: void CFLTU(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { value_t a = get_vr(op.ra); value_t s; @@ -9890,7 +9890,7 @@ public: void CSFLT(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { value_t a = get_vr(op.ra); value_t r; @@ -9930,7 +9930,7 @@ public: void CUFLT(spu_opcode_t op) { - if (g_cfg.core.spu_accurate_xfloat) + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) { value_t a = get_vr(op.ra); value_t r; diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index a49a7b9c9c..bdce5d0db2 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -66,9 +66,7 @@ struct cfg_root : cfg::node cfg::uint<0, 10000> mfc_transfers_timeout{ this, "MFC Commands Timeout", 0, true }; cfg::_bool mfc_shuffling_in_steps{ this, "MFC Commands Shuffling In Steps", false, true }; cfg::_enum enable_TSX{ this, "Enable TSX", enable_tsx_by_default() ? tsx_usage::enabled : tsx_usage::disabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully - cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false }; - cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true }; - cfg::_bool spu_relaxed_xfloat{ this, "Relaxed xfloat", true }; // Approximate accuracy for only the "FCGT", "FNMS", "FREST" AND "FRSQEST" instructions + cfg::_enum spu_xfloat_accuracy{ this, "XFloat Accuracy", xfloat_accuracy::approximate, false }; cfg::_int<-1, 14> ppu_128_reservations_loop_max_length{ this, "Accurate PPU 128-byte Reservation Op Max Length", 0, true }; // -1: Always accurate, 0: Never accurate, 1-14: max accurate loop length cfg::_int<-64, 64> stub_ppu_traps{ this, "Stub PPU Traps", 0, true }; // Hack, skip PPU traps for rare cases where the trap is continueable (specify relative instructions to skip) cfg::_bool full_width_avx512{ this, "Full Width AVX-512", true }; diff --git a/rpcs3/Emu/system_config_types.cpp b/rpcs3/Emu/system_config_types.cpp index 7324101a89..e785ef8814 100644 --- a/rpcs3/Emu/system_config_types.cpp +++ b/rpcs3/Emu/system_config_types.cpp @@ -664,3 +664,20 @@ void fmt_class_string::format(std::string& out, u64 arg) return unknown; }); } + +template <> +void fmt_class_string::format(std::string& out, u64 arg) +{ + format_enum(out, arg, [](xfloat_accuracy value) + { + switch (value) + { + case xfloat_accuracy::accurate: return "Accurate"; + case xfloat_accuracy::approximate: return "Approximate"; + case xfloat_accuracy::relaxed: return "Relaxed"; + case xfloat_accuracy::inaccurate: return "Inaccurate"; + } + + return unknown; + }); +} diff --git a/rpcs3/Emu/system_config_types.h b/rpcs3/Emu/system_config_types.h index cf1214a967..90862eee69 100644 --- a/rpcs3/Emu/system_config_types.h +++ b/rpcs3/Emu/system_config_types.h @@ -320,3 +320,11 @@ enum class stereo_render_mode_options side_by_side, over_under }; + +enum class xfloat_accuracy +{ + accurate, + approximate, + relaxed, // Approximate accuracy for only the "FCGT", "FNMS", "FREST" AND "FRSQEST" instructions + inaccurate +}; diff --git a/rpcs3/rpcs3qt/emu_settings.cpp b/rpcs3/rpcs3qt/emu_settings.cpp index 37938a9738..03735f8662 100644 --- a/rpcs3/rpcs3qt/emu_settings.cpp +++ b/rpcs3/rpcs3qt/emu_settings.cpp @@ -1283,6 +1283,14 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_ case midi_device_type::keyboard: return tr("Keyboard", "Midi Device Type"); } break; + case emu_settings_type::XFloatAccuracy: + switch (static_cast(index)) + { + case xfloat_accuracy::accurate: return tr("Accurate XFloat"); + case xfloat_accuracy::approximate: return tr("Approximate XFloat"); + case xfloat_accuracy::relaxed: return tr("Relaxed XFloat"); + case xfloat_accuracy::inaccurate: return tr("Inaccurate XFloat"); + } default: break; } diff --git a/rpcs3/rpcs3qt/emu_settings_type.h b/rpcs3/rpcs3qt/emu_settings_type.h index f4600d5b87..62d67e43fd 100644 --- a/rpcs3/rpcs3qt/emu_settings_type.h +++ b/rpcs3/rpcs3qt/emu_settings_type.h @@ -26,8 +26,7 @@ enum class emu_settings_type AccurateClineStores, AccurateRSXAccess, FIFOAccuracy, - AccurateXFloat, - ApproximateXFloat, + XFloatAccuracy, AccuratePPU128Loop, MFCCommandsShuffling, NumPPUThreads, @@ -212,8 +211,7 @@ inline static const QMap settings_location = { emu_settings_type::AccurateClineStores, { "Core", "Accurate Cache Line Stores"}}, { emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}}, { emu_settings_type::FIFOAccuracy, { "Core", "RSX FIFO Accuracy"}}, - { emu_settings_type::AccurateXFloat, { "Core", "Accurate xfloat"}}, - { emu_settings_type::ApproximateXFloat, { "Core", "Approximate xfloat"}}, + { emu_settings_type::XFloatAccuracy, { "Core", "XFloat Accuracy"}}, { emu_settings_type::MFCCommandsShuffling, { "Core", "MFC Commands Shuffling Limit"}}, { emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}}, { emu_settings_type::SPUBlockSize, { "Core", "SPU Block Size"}}, diff --git a/rpcs3/rpcs3qt/settings_dialog.cpp b/rpcs3/rpcs3qt/settings_dialog.cpp index 4952831e99..f153696617 100644 --- a/rpcs3/rpcs3qt/settings_dialog.cpp +++ b/rpcs3/rpcs3qt/settings_dialog.cpp @@ -265,30 +265,9 @@ settings_dialog::settings_dialog(std::shared_ptr gui_settings, std SubscribeTooltip(ui->spuLoopDetection, tooltips.settings.spu_loop_detection); // Comboboxes + m_emu_settings->EnhanceComboBox(ui->xfloatAccuracy, emu_settings_type::XFloatAccuracy); SubscribeTooltip(ui->gb_xfloat_accuracy, tooltips.settings.xfloat); - ui->xfloatAccuracy->addItem(tr("Accurate XFloat")); - ui->xfloatAccuracy->addItem(tr("Approximate XFloat")); - ui->xfloatAccuracy->addItem(tr("Relaxed XFloat")); - - connect(ui->xfloatAccuracy, QOverload::of(&QComboBox::currentIndexChanged), this, [this](int index) - { - if (index < 0) return; - - m_emu_settings->SetSetting(emu_settings_type::AccurateXFloat, index == 0 ? "true" : "false"); - m_emu_settings->SetSetting(emu_settings_type::ApproximateXFloat, index == 1 ? "true" : "false"); - }); - - connect(m_emu_settings.get(), &emu_settings::RestoreDefaultsSignal, this, [this]() - { - ui->xfloatAccuracy->setCurrentIndex(1); - }); - - if (m_emu_settings->GetSetting(emu_settings_type::AccurateXFloat) == "true") - ui->xfloatAccuracy->setCurrentIndex(0); - else if (m_emu_settings->GetSetting(emu_settings_type::ApproximateXFloat) == "true") - ui->xfloatAccuracy->setCurrentIndex(1); - else - ui->xfloatAccuracy->setCurrentIndex(2); + remove_item(ui->xfloatAccuracy, static_cast(xfloat_accuracy::inaccurate), static_cast(g_cfg.core.spu_xfloat_accuracy.def)); m_emu_settings->EnhanceComboBox(ui->spuBlockSize, emu_settings_type::SPUBlockSize); SubscribeTooltip(ui->gb_spuBlockSize, tooltips.settings.spu_block_size);