RSX: Optimize vertex program ucode invalidation

This commit is contained in:
Elad 2025-03-04 06:24:11 +02:00
parent eb43fbbb2a
commit ae39c5b8cb
2 changed files with 73 additions and 2 deletions

View file

@ -155,9 +155,38 @@ namespace rsx
rcount = ::size32(fifo_span);
}
copy_data_swap_u32(&REGS(ctx)->transform_program[load_pos * 4 + index % 4], fifo_span.data(), rcount);
const auto out_ptr = &REGS(ctx)->transform_program[load_pos * 4 + index % 4];
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty;
pipeline_state to_set_dirty = rsx::pipeline_state::vertex_program_ucode_dirty;
if (rcount >= 4 && !RSX(ctx)->m_graphics_state.test(rsx::pipeline_state::vertex_program_ucode_dirty))
{
// Assume clean
to_set_dirty = {};
const usz first_index_off = 0;
const usz second_index_off = (((rcount / 4) - 1) / 2) * 4;
const u64 src_op1_2 = read_from_ptr<be_t<u64>>(fifo_span.data() + first_index_off);
const u64 src_op2_2 = read_from_ptr<be_t<u64>>(fifo_span.data() + second_index_off);
// Fast comparison
if (src_op1_2 != read_from_ptr<u64>(out_ptr + first_index_off) || src_op2_2 != read_from_ptr<u64>(out_ptr + second_index_off))
{
to_set_dirty = rsx::pipeline_state::vertex_program_ucode_dirty;
}
}
if (to_set_dirty)
{
copy_data_swap_u32(out_ptr, fifo_span.data(), rcount);
}
else if (copy_data_swap_u32_cmp(out_ptr, fifo_span.data(), rcount))
{
to_set_dirty = rsx::pipeline_state::vertex_program_ucode_dirty;
}
RSX(ctx)->m_graphics_state |= to_set_dirty;
REGS(ctx)->transform_program_load_set(load_pos + ((rcount + index % 4) / 4));
RSX(ctx)->fifo_ctrl->skip_methods(rcount - 1);
}

View file

@ -172,16 +172,35 @@ protected:
bool recompile = false;
vertex_program_type* new_shader;
{
thread_local const std::pair<const RSXVertexProgram, vertex_program_type>* prev_vp = nullptr;
thread_local usz prev_count = umax;
static atomic_t<usz> invl_count = 0;
reader_lock lock(m_vertex_mutex);
if (prev_count == invl_count)
{
// prev_vp must be non-null here
if (prev_vp->first.data.size() == rsx_vp.data.size() && prev_vp->first.output_mask == rsx_vp.output_mask)
{
if (program_hash_util::vertex_program_compare()(prev_vp->first, rsx_vp))
{
return std::forward_as_tuple(prev_vp->second, true);
}
}
}
const auto& I = m_vertex_shader_cache.find(rsx_vp);
if (I != m_vertex_shader_cache.end())
{
prev_vp = &*I;
prev_count = invl_count;
return std::forward_as_tuple(I->second, true);
}
if (!force_load)
{
prev_count = umax;
return std::forward_as_tuple(__null_vertex_program, false);
}
@ -191,6 +210,8 @@ protected:
auto [it, inserted] = m_vertex_shader_cache.try_emplace(rsx_vp);
new_shader = &(it->second);
recompile = inserted;
prev_count = umax;
invl_count++;
}
if (recompile)
@ -209,16 +230,35 @@ protected:
fragment_program_type* new_shader;
{
thread_local const std::pair<const RSXFragmentProgram, fragment_program_type>* prev_fp = nullptr;
thread_local usz prev_count = umax;
static atomic_t<usz> invl_count = 0;
reader_lock lock(m_fragment_mutex);
if (prev_count == invl_count)
{
// prev_vp must be non-null here
if (prev_fp->first.ucode_length == rsx_fp.ucode_length && prev_fp->first.texcoord_control_mask == rsx_fp.texcoord_control_mask)
{
if (program_hash_util::fragment_program_compare()(prev_fp->first, rsx_fp))
{
return std::forward_as_tuple(prev_fp->second, true);
}
}
}
const auto& I = m_fragment_shader_cache.find(rsx_fp);
if (I != m_fragment_shader_cache.end())
{
prev_fp = &*I;
prev_count = invl_count;
return std::forward_as_tuple(I->second, true);
}
if (!force_load)
{
prev_count = umax;
return std::forward_as_tuple(__null_fragment_program, false);
}
@ -227,6 +267,8 @@ protected:
lock.upgrade();
std::tie(it, recompile) = m_fragment_shader_cache.try_emplace(rsx_fp);
new_shader = &(it->second);
prev_count = umax;
invl_count++;
}
if (recompile)