From 2ff407ac6a8d0994f070d0da8df021e0ff15d7a7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 24 Aug 2021 17:10:59 +0300 Subject: [PATCH] rsx/fp: Fix perspective correction handling - Perspective correction flag multiplies VP output by HPOS.w. NOTE: Not the same as division by w when it comes to NaN/Inf problems!! - Restructure indexed loads a bit to avoid re-initializing registers unnecessarily --- .../RSX/Program/FragmentProgramDecompiler.cpp | 81 ++++++++++++++----- 1 file changed, 62 insertions(+), 19 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp index 0c82856f55..39b36c36e7 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp @@ -5,6 +5,23 @@ #include +namespace rsx +{ + namespace fragment_program + { + static const std::string reg_table[] = + { + "wpos", + "diff_color", "spec_color", + "fogc", + "tc0", "tc1", "tc2", "tc3", "tc4", "tc5", "tc6", "tc7", "tc8", "tc9", + "ssa" + }; + } +} + +using namespace rsx::fragment_program; + FragmentProgramDecompiler::FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size) : m_size(size) , m_prog(prog) @@ -534,15 +551,6 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) case RSX_FP_REGISTER_TYPE_INPUT: { - static const std::string reg_table[] = - { - "wpos", - "diff_color", "spec_color", - "fogc", - "tc0", "tc1", "tc2", "tc3", "tc4", "tc5", "tc6", "tc7", "tc8", "tc9", - "ssa" - }; - // NOTE: Hw testing showed the following: // 1. Reading from registers 1 and 2 (COL0 and COL1) is clamped to (0, 1) // 2. Reading from registers 4-12 (inclusive) is not clamped, but.. @@ -610,32 +618,58 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) ret += getFloatTypeName(4) + "(gl_PointCoord, 1., 0.)"; } } - else if (m_prog.texcoord_is_2d(texcoord)) + else if (src2.perspective_corr) { - ret += getFloatTypeName(4) + "(" + reg_var + ".xy, 0., in_w)"; - properties.has_w_access = true; + // Perspective correct flag multiplies the result by 1/w + if (m_prog.texcoord_is_2d(texcoord)) + { + ret += getFloatTypeName(4) + "(" + reg_var + ".xy * gl_FragCoord.w, 0., 1.)"; + } + else + { + ret += "(" + reg_var + " * gl_FragCoord.w)"; + } } else { - ret += reg_var; + if (m_prog.texcoord_is_2d(texcoord)) + { + ret += getFloatTypeName(4) + "(" + reg_var + ".xy, 0., in_w)"; + properties.has_w_access = true; + } + else + { + ret += reg_var; + } } break; } case 0xFF: { - for (int i = 0; i < 10; ++i) - { - m_parr.AddParam(PF_PARAM_IN, getFloatTypeName(4), reg_table[i + 4]); - } - if (m_loop_count > 1) { + // Afaik there is only one address/loop register on NV40 rsx_log.error("Nested loop with indexed load was detected. Report this to developers!"); } - ret = fmt::format("_indexed_load(i%u + %u)", m_loop_count - 1, src2.addr_reg); + if (m_prog.texcoord_control_mask) + { + // This would require more work if it exists. It cannot be determined at compile time and has to be part of _indexed_load() subroutine. + rsx_log.error("Indexed load with control override mask detected. Report this to developers!"); + } + + auto load_cmd = fmt::format("_indexed_load(i%u + %u)", m_loop_count - 1, src2.addr_reg); properties.has_dynamic_register_load = true; insert = false; + + if (src2.perspective_corr) + { + ret += "(" + load_cmd + " * gl_FragCoord.w)"; + } + else + { + ret += load_cmd; + } break; } default: @@ -757,6 +791,15 @@ std::string FragmentProgramDecompiler::BuildCode() } } + if (properties.has_dynamic_register_load) + { + // Since the registers will be loaded dynamically, declare all of them + for (int i = 0; i < 10; ++i) + { + m_parr.AddParam(PF_PARAM_IN, getFloatTypeName(4), reg_table[i + 4]); + } + } + std::stringstream OS; insertHeader(OS); OS << "\n";