RSX: Do not invalidate FP analysis on same register writes

This commit is contained in:
Elad 2025-03-06 13:29:32 +02:00
parent 7995181235
commit 32df7315be
7 changed files with 141 additions and 20 deletions

View file

@ -62,8 +62,13 @@ namespace rsx
return vm::cast(get_address(offset, location));
}
void set_fragment_texture_dirty_bit(rsx::context* ctx, u32 index)
void set_fragment_texture_dirty_bit(rsx::context* ctx, u32 arg, u32 index, bool /*is_shader_config*/)
{
if (REGS(ctx)->latch == arg)
{
return;
}
RSX(ctx)->m_textures_dirty[index] = true;
if (RSX(ctx)->current_fp_metadata.referenced_textures_mask & (1 << index))
@ -72,6 +77,78 @@ namespace rsx
}
}
void set_texture_configuration_command(rsx::context* ctx, u32 reg)
{
const u32 reg_index = reg - NV4097_SET_TEXTURE_OFFSET;
ensure(reg_index % 8 == 0 && reg_index < 8 * 16); // Only NV4097_SET_TEXTURE_OFFSET is expected
const u32 texture_index = reg_index / 8;
// FIFO args count including this one
const u32 fifo_args_cnt = RSX(ctx)->fifo_ctrl->get_remaining_args_count() + 1;
// The range of methods this function resposible to
constexpr u32 method_range = 8;
// Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min)
const u32 fifo_read_limit = static_cast<u32>(((RSX(ctx)->ctrl->put & ~3ull) - (RSX(ctx)->fifo_ctrl->get_pos())) / 4);
const u32 count = std::min<u32>({ fifo_args_cnt, fifo_read_limit, method_range });
// Clamp by the count of methods this function is responsible to
std::span<const u32> command_span = RSX(ctx)->fifo_ctrl->get_current_arg_ptr();
if (command_span.size() > count)
{
command_span = command_span.subspan(0, count);
}
ensure(!command_span.empty());
u32* const dst_regs = &REGS(ctx)->registers[reg];
//u8 change_mask = 0;
u8 change_mask = 0xff;
if (dst_regs[0] != REGS(ctx)->latch)
{
// Fixup for the first method
change_mask |= 1;
}
for (usz i = 1; i < command_span.size(); i++)
{
const u32 command_data = std::bit_cast<be_t<u32>>(command_span[i]);
change_mask |= command_data != dst_regs[i] ? (1u << i) : 0;
dst_regs[i] = command_data;
}
// Disabled
// Bits set:
// NV4097_SET_TEXTURE_OFFSET
// NV4097_SET_TEXTURE_IMAGE_RECT
//constexpr u32 mask_of_texture_data_not_affecting_shader_config = 0x41;
constexpr u32 mask_of_texture_data_not_affecting_shader_config = 0;
if (change_mask)
{
RSX(ctx)->m_textures_dirty[texture_index] = true;
if (~mask_of_texture_data_not_affecting_shader_config & change_mask)
{
if (RSX(ctx)->current_fp_metadata.referenced_textures_mask & (1 << texture_index))
{
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_program_state_dirty;
}
}
}
// Skip handled methods
RSX(ctx)->fifo_ctrl->skip_methods(static_cast<u32>(command_span.size()) - 1);
}
void set_vertex_texture_dirty_bit(rsx::context* ctx, u32 index)
{
RSX(ctx)->m_vertex_textures_dirty[index] = true;

View file

@ -17,7 +17,9 @@ namespace rsx
void push_draw_parameter_change(rsx::context* ctx, rsx::command_barrier_type type, u32 reg, u32 arg0, u32 arg1 = 0u, u32 index = 0u);
void set_fragment_texture_dirty_bit(rsx::context* ctx, u32 index);
void set_fragment_texture_dirty_bit(rsx::context* ctx, u32 arg, u32 index, bool is_shader_config);
void set_texture_configuration_command(rsx::context* ctx, u32 reg);
void set_vertex_texture_dirty_bit(rsx::context* ctx, u32 index);
}

View file

@ -221,12 +221,31 @@ namespace rsx
}
};
template<u32 index>
struct set_texture_dirty_bit
template <u32 index>
struct set_texture_dirty_bit_texture_config
{
static void impl(context* ctx, u32 /*reg*/, u32 /*arg*/)
static void impl(context* ctx, u32 /*reg*/, u32 arg)
{
util::set_fragment_texture_dirty_bit(ctx, index);
util::set_fragment_texture_dirty_bit(ctx, arg, index, true);
}
};
template <u32 index>
struct set_texture_offset
{
static void impl(context* ctx, u32 reg, u32 /*arg*/)
{
fmt::throw_exception("Unreacable!");
util::set_texture_configuration_command(ctx, reg);
}
};
template <u32 index>
struct set_texture_dirty_bit_location_and_area
{
static void impl(context* ctx, u32 /*reg*/, u32 arg)
{
util::set_fragment_texture_dirty_bit(ctx, arg, index, false);
}
};

View file

@ -770,6 +770,21 @@ bool fragment_program_compare::operator()(const RSXFragmentProgram& binary1, con
return true;
}
bool fragment_program_compare::config_only(const RSXFragmentProgram& binary1, const RSXFragmentProgram& binary2)
{
if (binary1.ucode_length != binary2.ucode_length ||
binary1.ctrl != binary2.ctrl ||
binary1.texture_state != binary2.texture_state ||
binary1.texcoord_control_mask != binary2.texcoord_control_mask ||
binary1.two_sided_lighting != binary2.two_sided_lighting ||
binary1.mrt_buffers_count != binary2.mrt_buffers_count)
{
return false;
}
return true;
}
namespace rsx
{
#if defined(ARCH_X64) || defined(ARCH_ARM64)

View file

@ -80,6 +80,7 @@ namespace program_hash_util
struct fragment_program_compare
{
bool operator()(const RSXFragmentProgram &binary1, const RSXFragmentProgram &binary2) const;
static bool config_only(const RSXFragmentProgram &binary1, const RSXFragmentProgram &binary2);
};
}
@ -245,10 +246,15 @@ protected:
{
if (rsx_fp_invalidation_count != umax && prev_rsx_count == rsx_fp_invalidation_count)
{
return std::forward_as_tuple(prev_fp->second, true);
// Shader UCODE must be the same.
// Shader config changes are not tracked at the moment
// Compare manually
if (program_hash_util::fragment_program_compare::config_only(prev_fp->first, rsx_fp))
{
return std::forward_as_tuple(prev_fp->second, true);
}
}
if (program_hash_util::fragment_program_compare()(prev_fp->first, rsx_fp))
else if (program_hash_util::fragment_program_compare()(prev_fp->first, rsx_fp))
{
prev_rsx_count = rsx_fp_invalidation_count;
return std::forward_as_tuple(prev_fp->second, true);

View file

@ -2047,7 +2047,9 @@ namespace rsx
ensure(!m_graphics_state.test(rsx::pipeline_state::fragment_program_ucode_dirty));
m_graphics_state.clear(rsx::pipeline_state::fragment_program_dirty);
fragment_program_invalidation_count++;
// FP config is always checked for now (see get_graphics_pipeline)
//fragment_program_invalidation_count++;
current_fragment_program.ctrl = m_ctx->register_state->shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
current_fragment_program.texcoord_control_mask = m_ctx->register_state->texcoord_control_mask();

View file

@ -1663,16 +1663,16 @@ namespace rsx
bind(NV4097_SET_SURFACE_PITCH_D, nv4097::set_surface_dirty_bit);
bind(NV4097_SET_SURFACE_PITCH_Z, nv4097::set_surface_dirty_bit);
bind(NV4097_SET_WINDOW_OFFSET, nv4097::set_surface_dirty_bit);
bind_range<NV4097_SET_TEXTURE_OFFSET, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_FORMAT, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_ADDRESS, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_CONTROL0, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_CONTROL1, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_CONTROL2, 1, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_CONTROL3, 1, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_FILTER, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_IMAGE_RECT, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_BORDER_COLOR, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_OFFSET, 8, 16, nv4097::set_texture_dirty_bit_location_and_area>();
bind_range<NV4097_SET_TEXTURE_FORMAT, 8, 16, nv4097::set_texture_dirty_bit_texture_config>();
bind_range<NV4097_SET_TEXTURE_ADDRESS, 8, 16, nv4097::set_texture_dirty_bit_texture_config>();
bind_range<NV4097_SET_TEXTURE_CONTROL0, 8, 16, nv4097::set_texture_dirty_bit_texture_config>();
bind_range<NV4097_SET_TEXTURE_CONTROL1, 8, 16, nv4097::set_texture_dirty_bit_texture_config>();
bind_range<NV4097_SET_TEXTURE_FILTER, 8, 16, nv4097::set_texture_dirty_bit_texture_config>();
bind_range<NV4097_SET_TEXTURE_IMAGE_RECT, 8, 16, nv4097::set_texture_dirty_bit_location_and_area>();
bind_range<NV4097_SET_TEXTURE_BORDER_COLOR, 8, 16, nv4097::set_texture_dirty_bit_texture_config>();
bind_range<NV4097_SET_TEXTURE_CONTROL2, 1, 16, nv4097::set_texture_dirty_bit_texture_config>();
bind_range<NV4097_SET_TEXTURE_CONTROL3, 1, 16, nv4097::set_texture_dirty_bit_texture_config>();
bind_range<NV4097_SET_VERTEX_TEXTURE_OFFSET, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
bind_range<NV4097_SET_VERTEX_TEXTURE_FORMAT, 8, 4, nv4097::set_vertex_texture_dirty_bit>();
bind_range<NV4097_SET_VERTEX_TEXTURE_ADDRESS, 8, 4, nv4097::set_vertex_texture_dirty_bit>();