rsx: Optimizations for program management

This commit is contained in:
kd-11 2018-04-10 18:06:29 +03:00 committed by kd-11
parent a52ea7f870
commit 440a31ef18
5 changed files with 81 additions and 39 deletions

View file

@ -1049,37 +1049,37 @@ bool GLGSRender::check_program_state()
void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
{
get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid;
get_current_vertex_program();
auto &fragment_program = current_fragment_program;
auto &vertex_program = current_vertex_program;
vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
fragment_program.unnormalized_coords = 0; //unused
void* pipeline_properties = nullptr;
m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, pipeline_properties);
m_program->use();
if (m_prog_buffer.check_cache_missed())
if (m_fragment_program_dirty || m_vertex_program_dirty)
{
m_shaders_cache->store(pipeline_properties, vertex_program, fragment_program);
get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid;
//Notify the user with HUD notification
if (g_cfg.misc.show_shader_compilation_hint)
get_current_vertex_program();
current_vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
current_fragment_program.unnormalized_coords = 0; //unused
void* pipeline_properties = nullptr;
m_program = &m_prog_buffer.getGraphicPipelineState(current_vertex_program, current_fragment_program, pipeline_properties);
m_program->use();
if (m_prog_buffer.check_cache_missed())
{
if (!m_custom_ui)
m_shaders_cache->store(pipeline_properties, current_vertex_program, current_fragment_program);
//Notify the user with HUD notification
if (g_cfg.misc.show_shader_compilation_hint)
{
//Create notification but do not draw it at this time. No need to spam flip requests
m_custom_ui = std::make_unique<rsx::overlays::shader_compile_notification>();
}
else if (auto casted = dynamic_cast<rsx::overlays::shader_compile_notification*>(m_custom_ui.get()))
{
//Probe the notification
casted->touch();
if (!m_custom_ui)
{
//Create notification but do not draw it at this time. No need to spam flip requests
m_custom_ui = std::make_unique<rsx::overlays::shader_compile_notification>();
}
else if (auto casted = dynamic_cast<rsx::overlays::shader_compile_notification*>(m_custom_ui.get()))
{
//Probe the notification
casted->touch();
}
}
}
}
@ -1089,7 +1089,7 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
u32 vertex_constants_offset;
u32 fragment_constants_offset;
const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(current_fragment_program);
const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float));
if (manually_flush_ring_buffers)
@ -1128,11 +1128,11 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
if (fragment_constants_size)
{
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) },
fragment_program, gl::get_driver_caps().vendor_NVIDIA);
current_fragment_program, gl::get_driver_caps().vendor_NVIDIA);
}
// Fragment state
fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program);
fill_fragment_state_buffer(buf+fragment_constants_size, current_fragment_program);
m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512);
m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size);

View file

@ -1329,6 +1329,10 @@ namespace rsx
void thread::get_current_vertex_program()
{
if (!m_vertex_program_dirty)
return;
m_vertex_program_dirty = false;
const u32 transform_program_start = rsx::method_registers.transform_program_start();
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
current_vertex_program.skip_vertex_input_check = false;
@ -1341,8 +1345,8 @@ namespace rsx
memcpy(ucode_dst, ucode_src, current_vertex_program.data.size() * sizeof(u32));
auto program_info = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data);
current_vertex_program.data.resize(program_info.ucode_size);
current_vp_metadata = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data);
current_vertex_program.data.resize(current_vp_metadata.ucode_size);
const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
@ -1540,20 +1544,27 @@ namespace rsx
void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors)
{
if (!m_fragment_program_dirty)
return;
m_fragment_program_dirty = false;
auto &result = current_fragment_program = {};
const u32 shader_program = rsx::method_registers.shader_program_address();
if (shader_program == 0)
{
current_fp_metadata = {};
return;
}
const u32 program_location = (shader_program & 0x3) - 1;
const u32 program_offset = (shader_program & ~0x3);
result.addr = vm::base(rsx::get_address(program_offset, program_location));
const auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr);
current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr);
result.addr = ((u8*)result.addr + program_info.program_start_offset);
result.offset = program_offset + program_info.program_start_offset;
result.addr = ((u8*)result.addr + current_fp_metadata.program_start_offset);
result.offset = program_offset + current_fp_metadata.program_start_offset;
result.valid = true;
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
result.unnormalized_coords = 0;
@ -1574,7 +1585,7 @@ namespace rsx
result.texture_scale[i][1] = sampler_descriptors[i]->scale_y;
result.texture_scale[i][2] = (f32)tex.remap(); //Debug value
if (tex.enabled() && (program_info.referenced_textures_mask & (1 << i)))
if (tex.enabled() && (current_fp_metadata.referenced_textures_mask & (1 << i)))
{
u32 texture_control = 0;
result.texture_dimensions |= ((u32)sampler_descriptors[i]->image_type << (i << 1));

View file

@ -331,6 +331,8 @@ namespace rsx
bool m_textures_dirty[16];
bool m_vertex_textures_dirty[4];
bool m_framebuffer_state_contested = false;
bool m_fragment_program_dirty = false;
bool m_vertex_program_dirty = false;
protected:
std::array<u32, 4> get_color_surface_addresses() const;
@ -344,6 +346,9 @@ namespace rsx
RSXVertexProgram current_vertex_program = {};
RSXFragmentProgram current_fragment_program = {};
program_hash_util::fragment_program_utils::fragment_program_metadata current_fp_metadata = {};
program_hash_util::vertex_program_utils::vertex_program_metadata current_vp_metadata = {};
void get_current_vertex_program();
/**

View file

@ -2209,10 +2209,13 @@ bool VKGSRender::check_program_status()
void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
{
get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid;
if (m_fragment_program_dirty || m_vertex_program_dirty)
{
get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid;
get_current_vertex_program();
get_current_vertex_program();
}
auto &vertex_program = current_vertex_program;
auto &fragment_program = current_fragment_program;

View file

@ -357,9 +357,21 @@ namespace rsx
static void impl(thread* rsx, u32 _reg, u32 arg)
{
method_registers.commit_4_transform_program_instructions(index);
rsx->m_vertex_program_dirty = true;
}
};
void set_transform_program_start(thread* rsx, u32, u32)
{
rsx->m_vertex_program_dirty = true;
}
void set_vertex_attribute_output_mask(thread* rsx, u32, u32)
{
rsx->m_vertex_program_dirty = true;
rsx->m_fragment_program_dirty = true;
}
void set_begin_end(thread* rsxthr, u32 _reg, u32 arg)
{
if (arg)
@ -521,6 +533,11 @@ namespace rsx
rsx->sync();
}
void invalidate_L2(thread* rsx, u32, u32)
{
rsx->m_fragment_program_dirty = true;
}
void set_surface_dirty_bit(thread* rsx, u32, u32)
{
rsx->m_rtts_dirty = true;
@ -539,6 +556,7 @@ namespace rsx
static void impl(thread* rsx, u32 _reg, u32 arg)
{
rsx->m_textures_dirty[index] = true;
rsx->m_fragment_program_dirty = true;
}
};
@ -565,6 +583,8 @@ namespace rsx
const u32 pixel_offset = (method_registers.blit_engine_output_pitch_nv3062() * y) + (x << 2);
u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + index * 4, method_registers.blit_engine_output_location_nv3062());
vm::write32(address, arg);
rsx->m_fragment_program_dirty = true;
}
};
}
@ -1711,6 +1731,9 @@ namespace rsx
bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>();
bind<NV4097_ZCULL_SYNC, nv4097::sync>();
bind<NV4097_SET_CONTEXT_DMA_REPORT, nv4097::sync>();
bind<NV4097_INVALIDATE_L2, nv4097::invalidate_L2>();
bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>();
bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>();
//NV308A
bind_range<NV308A_COLOR, 1, 256, nv308a::color>();