mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-08-11 18:50:55 +00:00
RSX: Use two loops in analyse_fragment_program
- Use one loop to find the start of the program, and another loop to analyse the actual program - Very slight performance improvement
This commit is contained in:
parent
b316d737d0
commit
74f1d357db
1 changed files with 29 additions and 30 deletions
|
@ -109,7 +109,6 @@ AVX512_ICL_FUNC usz get_vertex_program_ucode_hash_512(const RSXVertexProgram &pr
|
||||||
|
|
||||||
const __m512i result = _mm512_add_epi64(acc0, acc1);
|
const __m512i result = _mm512_add_epi64(acc0, acc1);
|
||||||
return _mm512_reduce_add_epi64(result);
|
return _mm512_reduce_add_epi64(result);
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -607,6 +606,30 @@ fragment_program_utils::fragment_program_metadata fragment_program_utils::analys
|
||||||
const auto instBuffer = ptr;
|
const auto instBuffer = ptr;
|
||||||
s32 index = 0;
|
s32 index = 0;
|
||||||
|
|
||||||
|
// Find the start of the program
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
const auto inst = v128::loadu(instBuffer, index);
|
||||||
|
|
||||||
|
const u32 opcode = (inst._u32[0] >> 16) & 0x3F;
|
||||||
|
if (opcode)
|
||||||
|
{
|
||||||
|
// We found the start of the program, don't advance the index
|
||||||
|
result.program_start_offset = index * 16;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((inst._u32[0] >> 8) & 0x1)
|
||||||
|
{
|
||||||
|
result.program_start_offset = index * 16;
|
||||||
|
result.program_ucode_length = 16;
|
||||||
|
result.is_nop_shader = true;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
const auto inst = v128::loadu(instBuffer, index);
|
const auto inst = v128::loadu(instBuffer, index);
|
||||||
|
@ -623,11 +646,6 @@ fragment_program_utils::fragment_program_metadata fragment_program_utils::analys
|
||||||
const u32 opcode = (inst._u32[0] >> 16) & 0x3F;
|
const u32 opcode = (inst._u32[0] >> 16) & 0x3F;
|
||||||
if (opcode)
|
if (opcode)
|
||||||
{
|
{
|
||||||
if (result.program_start_offset == umax)
|
|
||||||
{
|
|
||||||
result.program_start_offset = index * 16;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (opcode)
|
switch (opcode)
|
||||||
{
|
{
|
||||||
case RSX_FP_OPCODE_TEX:
|
case RSX_FP_OPCODE_TEX:
|
||||||
|
@ -665,31 +683,19 @@ fragment_program_utils::fragment_program_metadata fragment_program_utils::analys
|
||||||
{
|
{
|
||||||
//Instruction references constant, skip one slot occupied by data
|
//Instruction references constant, skip one slot occupied by data
|
||||||
index++;
|
index++;
|
||||||
result.program_ucode_length += 16;
|
|
||||||
result.program_constants_buffer_length += 16;
|
result.program_constants_buffer_length += 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.program_start_offset != umax)
|
index++;
|
||||||
{
|
|
||||||
result.program_ucode_length += 16;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((inst._u32[0] >> 8) & 0x1)
|
if ((inst._u32[0] >> 8) & 0x1)
|
||||||
{
|
{
|
||||||
if (result.program_start_offset == umax)
|
|
||||||
{
|
|
||||||
result.program_start_offset = index * 16;
|
|
||||||
result.program_ucode_length = 16;
|
|
||||||
result.is_nop_shader = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
index++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
result.program_ucode_length = (index - (result.program_start_offset / 16)) * 16;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -697,26 +703,21 @@ usz fragment_program_utils::get_fragment_program_ucode_hash(const RSXFragmentPro
|
||||||
{
|
{
|
||||||
// Checksum as hash with rotated data
|
// Checksum as hash with rotated data
|
||||||
const void* instbuffer = program.get_data();
|
const void* instbuffer = program.get_data();
|
||||||
u32 instIndex = 0;
|
|
||||||
usz acc0 = 0;
|
usz acc0 = 0;
|
||||||
usz acc1 = 0;
|
usz acc1 = 0;
|
||||||
while (true)
|
for (usz instIndex = 0; instIndex < (program.ucode_length / 16); instIndex++)
|
||||||
{
|
{
|
||||||
const auto inst = v128::loadu(instbuffer, instIndex);
|
const auto inst = v128::loadu(instbuffer, instIndex);
|
||||||
const usz tmp0 = std::rotr(inst._u64[0], instIndex * 2);
|
const usz tmp0 = std::rotr(inst._u64[0], instIndex * 2);
|
||||||
acc0 += tmp0;
|
acc0 += tmp0;
|
||||||
const usz tmp1 = std::rotr(inst._u64[1], (instIndex * 2) + 1);
|
const usz tmp1 = std::rotr(inst._u64[1], (instIndex * 2) + 1);
|
||||||
acc1 += tmp1;
|
acc1 += tmp1;
|
||||||
instIndex++;
|
|
||||||
// Skip constants
|
// Skip constants
|
||||||
if (fragment_program_utils::is_any_src_constant(inst))
|
if (fragment_program_utils::is_any_src_constant(inst))
|
||||||
instIndex++;
|
instIndex++;
|
||||||
|
|
||||||
const bool end = (inst._u32[0] >> 8) & 0x1;
|
|
||||||
if (end)
|
|
||||||
return acc0 + acc1;
|
|
||||||
}
|
}
|
||||||
return 0;
|
return acc0 + acc1;
|
||||||
}
|
}
|
||||||
|
|
||||||
usz fragment_program_storage_hash::operator()(const RSXFragmentProgram& program) const
|
usz fragment_program_storage_hash::operator()(const RSXFragmentProgram& program) const
|
||||||
|
@ -751,8 +752,7 @@ bool fragment_program_compare::operator()(const RSXFragmentProgram& binary1, con
|
||||||
|
|
||||||
const void* instBuffer1 = binary1.get_data();
|
const void* instBuffer1 = binary1.get_data();
|
||||||
const void* instBuffer2 = binary2.get_data();
|
const void* instBuffer2 = binary2.get_data();
|
||||||
usz instIndex = 0;
|
for (usz instIndex = 0; instIndex < (binary1.ucode_length / 16); instIndex++)
|
||||||
while (instIndex < (binary1.ucode_length / 16))
|
|
||||||
{
|
{
|
||||||
const auto inst1 = v128::loadu(instBuffer1, instIndex);
|
const auto inst1 = v128::loadu(instBuffer1, instIndex);
|
||||||
const auto inst2 = v128::loadu(instBuffer2, instIndex);
|
const auto inst2 = v128::loadu(instBuffer2, instIndex);
|
||||||
|
@ -762,7 +762,6 @@ bool fragment_program_compare::operator()(const RSXFragmentProgram& binary1, con
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
instIndex++;
|
|
||||||
// Skip constants
|
// Skip constants
|
||||||
if (fragment_program_utils::is_any_src_constant(inst1))
|
if (fragment_program_utils::is_any_src_constant(inst1))
|
||||||
instIndex++;
|
instIndex++;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue