rsx/fp: Fix perspective correction handling

- Perspective correction flag multiplies VP output by HPOS.w.
  NOTE: Not the same as division by w when it comes to NaN/Inf problems!!
- Restructure indexed loads a bit to avoid re-initializing registers unnecessarily
This commit is contained in:
kd-11 2021-08-24 17:10:59 +03:00 committed by kd-11
parent b0e5de4c9c
commit 2ff407ac6a

View file

@ -5,6 +5,23 @@
#include <algorithm>
namespace rsx
{
namespace fragment_program
{
static const std::string reg_table[] =
{
"wpos",
"diff_color", "spec_color",
"fogc",
"tc0", "tc1", "tc2", "tc3", "tc4", "tc5", "tc6", "tc7", "tc8", "tc9",
"ssa"
};
}
}
using namespace rsx::fragment_program;
FragmentProgramDecompiler::FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size)
: m_size(size)
, m_prog(prog)
@ -534,15 +551,6 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
case RSX_FP_REGISTER_TYPE_INPUT:
{
static const std::string reg_table[] =
{
"wpos",
"diff_color", "spec_color",
"fogc",
"tc0", "tc1", "tc2", "tc3", "tc4", "tc5", "tc6", "tc7", "tc8", "tc9",
"ssa"
};
// NOTE: Hw testing showed the following:
// 1. Reading from registers 1 and 2 (COL0 and COL1) is clamped to (0, 1)
// 2. Reading from registers 4-12 (inclusive) is not clamped, but..
@ -610,32 +618,58 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
ret += getFloatTypeName(4) + "(gl_PointCoord, 1., 0.)";
}
}
else if (m_prog.texcoord_is_2d(texcoord))
else if (src2.perspective_corr)
{
ret += getFloatTypeName(4) + "(" + reg_var + ".xy, 0., in_w)";
properties.has_w_access = true;
// Perspective correct flag multiplies the result by 1/w
if (m_prog.texcoord_is_2d(texcoord))
{
ret += getFloatTypeName(4) + "(" + reg_var + ".xy * gl_FragCoord.w, 0., 1.)";
}
else
{
ret += "(" + reg_var + " * gl_FragCoord.w)";
}
}
else
{
ret += reg_var;
if (m_prog.texcoord_is_2d(texcoord))
{
ret += getFloatTypeName(4) + "(" + reg_var + ".xy, 0., in_w)";
properties.has_w_access = true;
}
else
{
ret += reg_var;
}
}
break;
}
case 0xFF:
{
for (int i = 0; i < 10; ++i)
{
m_parr.AddParam(PF_PARAM_IN, getFloatTypeName(4), reg_table[i + 4]);
}
if (m_loop_count > 1)
{
// Afaik there is only one address/loop register on NV40
rsx_log.error("Nested loop with indexed load was detected. Report this to developers!");
}
ret = fmt::format("_indexed_load(i%u + %u)", m_loop_count - 1, src2.addr_reg);
if (m_prog.texcoord_control_mask)
{
// This would require more work if it exists. It cannot be determined at compile time and has to be part of _indexed_load() subroutine.
rsx_log.error("Indexed load with control override mask detected. Report this to developers!");
}
auto load_cmd = fmt::format("_indexed_load(i%u + %u)", m_loop_count - 1, src2.addr_reg);
properties.has_dynamic_register_load = true;
insert = false;
if (src2.perspective_corr)
{
ret += "(" + load_cmd + " * gl_FragCoord.w)";
}
else
{
ret += load_cmd;
}
break;
}
default:
@ -757,6 +791,15 @@ std::string FragmentProgramDecompiler::BuildCode()
}
}
if (properties.has_dynamic_register_load)
{
// Since the registers will be loaded dynamically, declare all of them
for (int i = 0; i < 10; ++i)
{
m_parr.AddParam(PF_PARAM_IN, getFloatTypeName(4), reg_table[i + 4]);
}
}
std::stringstream OS;
insertHeader(OS);
OS << "\n";