RSX: Use two loops in analyse_fragment_program

- Use one loop to find the start of the program, and another loop to analyse the actual program
- Very slight performance improvement
This commit is contained in:
Malcolm Jestadt 2025-03-04 17:15:52 -05:00
commit 74f1d357db

View file

@ -109,7 +109,6 @@ AVX512_ICL_FUNC usz get_vertex_program_ucode_hash_512(const RSXVertexProgram &pr
const __m512i result = _mm512_add_epi64(acc0, acc1); const __m512i result = _mm512_add_epi64(acc0, acc1);
return _mm512_reduce_add_epi64(result); return _mm512_reduce_add_epi64(result);
} }
#endif #endif
@ -607,6 +606,30 @@ fragment_program_utils::fragment_program_metadata fragment_program_utils::analys
const auto instBuffer = ptr; const auto instBuffer = ptr;
s32 index = 0; s32 index = 0;
// Find the start of the program
while (true)
{
const auto inst = v128::loadu(instBuffer, index);
const u32 opcode = (inst._u32[0] >> 16) & 0x3F;
if (opcode)
{
// We found the start of the program, don't advance the index
result.program_start_offset = index * 16;
break;
}
if ((inst._u32[0] >> 8) & 0x1)
{
result.program_start_offset = index * 16;
result.program_ucode_length = 16;
result.is_nop_shader = true;
return result;
}
index++;
}
while (true) while (true)
{ {
const auto inst = v128::loadu(instBuffer, index); const auto inst = v128::loadu(instBuffer, index);
@ -623,11 +646,6 @@ fragment_program_utils::fragment_program_metadata fragment_program_utils::analys
const u32 opcode = (inst._u32[0] >> 16) & 0x3F; const u32 opcode = (inst._u32[0] >> 16) & 0x3F;
if (opcode) if (opcode)
{ {
if (result.program_start_offset == umax)
{
result.program_start_offset = index * 16;
}
switch (opcode) switch (opcode)
{ {
case RSX_FP_OPCODE_TEX: case RSX_FP_OPCODE_TEX:
@ -665,31 +683,19 @@ fragment_program_utils::fragment_program_metadata fragment_program_utils::analys
{ {
//Instruction references constant, skip one slot occupied by data //Instruction references constant, skip one slot occupied by data
index++; index++;
result.program_ucode_length += 16;
result.program_constants_buffer_length += 16; result.program_constants_buffer_length += 16;
} }
} }
if (result.program_start_offset != umax) index++;
{
result.program_ucode_length += 16;
}
if ((inst._u32[0] >> 8) & 0x1) if ((inst._u32[0] >> 8) & 0x1)
{ {
if (result.program_start_offset == umax)
{
result.program_start_offset = index * 16;
result.program_ucode_length = 16;
result.is_nop_shader = true;
}
break; break;
} }
index++;
} }
result.program_ucode_length = (index - (result.program_start_offset / 16)) * 16;
return result; return result;
} }
@ -697,26 +703,21 @@ usz fragment_program_utils::get_fragment_program_ucode_hash(const RSXFragmentPro
{ {
// Checksum as hash with rotated data // Checksum as hash with rotated data
const void* instbuffer = program.get_data(); const void* instbuffer = program.get_data();
u32 instIndex = 0;
usz acc0 = 0; usz acc0 = 0;
usz acc1 = 0; usz acc1 = 0;
while (true) for (usz instIndex = 0; instIndex < (program.ucode_length / 16); instIndex++)
{ {
const auto inst = v128::loadu(instbuffer, instIndex); const auto inst = v128::loadu(instbuffer, instIndex);
const usz tmp0 = std::rotr(inst._u64[0], instIndex * 2); const usz tmp0 = std::rotr(inst._u64[0], instIndex * 2);
acc0 += tmp0; acc0 += tmp0;
const usz tmp1 = std::rotr(inst._u64[1], (instIndex * 2) + 1); const usz tmp1 = std::rotr(inst._u64[1], (instIndex * 2) + 1);
acc1 += tmp1; acc1 += tmp1;
instIndex++;
// Skip constants // Skip constants
if (fragment_program_utils::is_any_src_constant(inst)) if (fragment_program_utils::is_any_src_constant(inst))
instIndex++; instIndex++;
const bool end = (inst._u32[0] >> 8) & 0x1;
if (end)
return acc0 + acc1;
} }
return 0; return acc0 + acc1;
} }
usz fragment_program_storage_hash::operator()(const RSXFragmentProgram& program) const usz fragment_program_storage_hash::operator()(const RSXFragmentProgram& program) const
@ -751,8 +752,7 @@ bool fragment_program_compare::operator()(const RSXFragmentProgram& binary1, con
const void* instBuffer1 = binary1.get_data(); const void* instBuffer1 = binary1.get_data();
const void* instBuffer2 = binary2.get_data(); const void* instBuffer2 = binary2.get_data();
usz instIndex = 0; for (usz instIndex = 0; instIndex < (binary1.ucode_length / 16); instIndex++)
while (instIndex < (binary1.ucode_length / 16))
{ {
const auto inst1 = v128::loadu(instBuffer1, instIndex); const auto inst1 = v128::loadu(instBuffer1, instIndex);
const auto inst2 = v128::loadu(instBuffer2, instIndex); const auto inst2 = v128::loadu(instBuffer2, instIndex);
@ -762,7 +762,6 @@ bool fragment_program_compare::operator()(const RSXFragmentProgram& binary1, con
return false; return false;
} }
instIndex++;
// Skip constants // Skip constants
if (fragment_program_utils::is_any_src_constant(inst1)) if (fragment_program_utils::is_any_src_constant(inst1))
instIndex++; instIndex++;