diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index 7846a5d5d3..2d63e67ab8 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -476,7 +476,6 @@ target_sources(rpcs3_emu PRIVATE RSX/Common/TextureUtils.cpp RSX/Common/texture_cache.cpp RSX/Core/RSXContext.cpp - RSX/Core/RSXDrawCommands.cpp RSX/gcm_enums.cpp RSX/gcm_printing.cpp RSX/GL/GLCommonDecompiler.cpp diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp deleted file mode 100644 index 095903832d..0000000000 --- a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp +++ /dev/null @@ -1,843 +0,0 @@ -#include "stdafx.h" -#include "RSXDrawCommands.h" - -#include "Emu/RSX/Common/BufferUtils.h" -#include "Emu/RSX/Common/buffer_stream.hpp" -#include "Emu/RSX/Common/io_buffer.h" -#include "Emu/RSX/Common/simple_array.hpp" -#include "Emu/RSX/NV47/HW/context_accessors.define.h" -#include "Emu/RSX/Program/GLSLCommon.h" -#include "Emu/RSX/rsx_methods.h" -#include "Emu/RSX/RSXThread.h" - -#include "Emu/Memory/vm.h" - -namespace rsx -{ - void draw_command_processor::analyse_inputs_interleaved(vertex_input_layout& result, const vertex_program_metadata_t& vp_metadata) - { - const rsx_state& state = *REGS(m_ctx); - const u32 input_mask = state.vertex_attrib_input_mask() & vp_metadata.referenced_inputs_mask; - - result.clear(); - result.attribute_mask = static_cast(input_mask); - - if (state.current_draw_clause.command == rsx::draw_command::inlined_array) - { - interleaved_range_info& info = *result.alloc_interleaved_block(); - info.interleaved = true; - - for (u8 index = 0; index < rsx::limits::vertex_count; ++index) - { - auto& vinfo = state.vertex_arrays_info[index]; - result.attribute_placement[index] = attribute_buffer_placement::none; - - if (vinfo.size() > 0) - { - // Stride must be updated even if the stream is disabled - info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size()); - info.locations.push_back({ index, false, 1 }); - - if (input_mask & (1u << index)) - { - result.attribute_placement[index] = attribute_buffer_placement::transient; - } - } - else if (state.register_vertex_info[index].size > 0 && input_mask & (1u << index)) - { - // Reads from register - result.referenced_registers.push_back(index); - result.attribute_placement[index] = attribute_buffer_placement::transient; - } - } - - if (info.attribute_stride) - { - // At least one array feed must be enabled for vertex input - result.interleaved_blocks.push_back(&info); - } - - return; - } - - const u32 frequency_divider_mask = REGS(m_ctx)->frequency_divider_operation_mask(); - result.interleaved_blocks.reserve(16); - result.referenced_registers.reserve(16); - - for (auto [ref_mask, index] = std::tuple{ input_mask, u8(0) }; ref_mask; ++index, ref_mask >>= 1) - { - ensure(index < rsx::limits::vertex_count); - - if (!(ref_mask & 1u)) - { - // Nothing to do, uninitialized - continue; - } - - // Always reset attribute placement by default - result.attribute_placement[index] = attribute_buffer_placement::none; - - // Check for interleaving - if (REGS(m_ctx)->current_draw_clause.is_immediate_draw && - REGS(m_ctx)->current_draw_clause.command != rsx::draw_command::indexed) - { - // NOTE: In immediate rendering mode, all vertex setup is ignored - // Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults - if (m_vertex_push_buffers[index].vertex_count > 1) - { - // Ensure consistent number of vertices per attribute. - m_vertex_push_buffers[index].pad_to(m_vertex_push_buffers[0].vertex_count, false); - - // Read temp buffer (register array) - std::pair volatile_range_info = std::make_pair(index, static_cast(m_vertex_push_buffers[index].data.size() * sizeof(u32))); - result.volatile_blocks.push_back(volatile_range_info); - result.attribute_placement[index] = attribute_buffer_placement::transient; - } - else if (state.register_vertex_info[index].size > 0) - { - // Reads from register - result.referenced_registers.push_back(index); - result.attribute_placement[index] = attribute_buffer_placement::transient; - } - - // Fall back to the default register value if no source is specified via register - continue; - } - - const auto& info = state.vertex_arrays_info[index]; - if (!info.size()) - { - if (state.register_vertex_info[index].size > 0) - { - // Reads from register - result.referenced_registers.push_back(index); - result.attribute_placement[index] = attribute_buffer_placement::transient; - continue; - } - } - else - { - result.attribute_placement[index] = attribute_buffer_placement::persistent; - const u32 base_address = info.offset() & 0x7fffffff; - bool alloc_new_block = true; - bool modulo = !!(frequency_divider_mask & (1 << index)); - - for (auto& block : result.interleaved_blocks) - { - if (block->single_vertex) - { - // Single vertex definition, continue - continue; - } - - if (block->attribute_stride != info.stride()) - { - // Stride does not match, continue - continue; - } - - if (base_address > block->base_offset) - { - const u32 diff = base_address - block->base_offset; - if (diff > info.stride()) - { - // Not interleaved, continue - continue; - } - } - else - { - const u32 diff = block->base_offset - base_address; - if (diff > info.stride()) - { - // Not interleaved, continue - continue; - } - - // Matches, and this address is lower than existing - block->base_offset = base_address; - } - - alloc_new_block = false; - block->locations.push_back({ index, modulo, info.frequency() }); - block->interleaved = true; - break; - } - - if (alloc_new_block) - { - interleaved_range_info& block = *result.alloc_interleaved_block(); - block.base_offset = base_address; - block.attribute_stride = info.stride(); - block.memory_location = info.offset() >> 31; - block.locations.reserve(16); - block.locations.push_back({ index, modulo, info.frequency() }); - - if (block.attribute_stride == 0) - { - block.single_vertex = true; - block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size()); - } - - result.interleaved_blocks.push_back(&block); - } - } - } - - for (auto& info : result.interleaved_blocks) - { - // Calculate real data address to be used during upload - info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location); - } - } - - std::span draw_command_processor::get_raw_index_array(const draw_clause& draw_indexed_clause) const - { - if (!m_element_push_buffer.empty()) [[ unlikely ]] - { - // Indices provided via immediate mode - return { reinterpret_cast(m_element_push_buffer.data()), ::narrow(m_element_push_buffer.size() * sizeof(u32)) }; - } - - const rsx::index_array_type type = REGS(m_ctx)->index_type(); - const u32 type_size = get_index_type_size(type); - - // Force aligned indices as realhw - const u32 address = (0 - type_size) & get_address(REGS(m_ctx)->index_array_address(), REGS(m_ctx)->index_array_location()); - - const u32 first = draw_indexed_clause.min_index(); - const u32 count = draw_indexed_clause.get_elements_count(); - - const auto ptr = vm::_ptr(address); - return { ptr + first * type_size, count * type_size }; - } - - std::variant - draw_command_processor::get_draw_command(const rsx::rsx_state& state) const - { - if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::indexed) [[ likely ]] - { - return draw_indexed_array_command - { - get_raw_index_array(state.current_draw_clause) - }; - } - - if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::array) - { - return draw_array_command{}; - } - - if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::inlined_array) - { - return draw_inlined_array{}; - } - - fmt::throw_exception("ill-formed draw command"); - } - - void draw_command_processor::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value) - { - if (!(REGS(m_ctx)->vertex_attrib_input_mask() & (1 << attribute))) - { - return; - } - - // Enforce ATTR0 as vertex attribute for push buffers. - // This whole thing becomes a mess if we don't have a provoking attribute. - const auto vertex_id = m_vertex_push_buffers[0].get_vertex_id(); - m_vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value); - RSX(m_ctx)->m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty; - } - - u32 draw_command_processor::get_push_buffer_vertex_count() const - { - // Enforce ATTR0 as vertex attribute for push buffers. - // This whole thing becomes a mess if we don't have a provoking attribute. - return m_vertex_push_buffers[0].vertex_count; - } - - void draw_command_processor::append_array_element(u32 index) - { - // Endianness is swapped because common upload code expects input in BE - // TODO: Implement fast upload path for LE inputs and do away with this - m_element_push_buffer.push_back(std::bit_cast>(index)); - } - - u32 draw_command_processor::get_push_buffer_index_count() const - { - return ::size32(m_element_push_buffer); - } - - void draw_command_processor::clear_push_buffers() - { - auto& graphics_state = RSX(m_ctx)->m_graphics_state; - if (graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty) - { - for (auto& push_buf : m_vertex_push_buffers) - { - //Disabled, see https://github.com/RPCS3/rpcs3/issues/1932 - //REGS(m_ctx)->register_vertex_info[index].size = 0; - - push_buf.clear(); - } - - graphics_state.clear(rsx::pipeline_state::push_buffer_arrays_dirty); - } - - m_element_push_buffer.clear(); - } - - void draw_command_processor::fill_vertex_layout_state( - const vertex_input_layout& layout, - const vertex_program_metadata_t& vp_metadata, - u32 first_vertex, - u32 vertex_count, - s32* buffer, - u32 persistent_offset_base, - u32 volatile_offset_base) const - { - std::array offset_in_block = {}; - u32 volatile_offset = volatile_offset_base; - u32 persistent_offset = persistent_offset_base; - - // NOTE: Order is important! Transient ayout is always push_buffers followed by register data - if (REGS(m_ctx)->current_draw_clause.is_immediate_draw) - { - for (const auto& info : layout.volatile_blocks) - { - offset_in_block[info.first] = volatile_offset; - volatile_offset += info.second; - } - } - - for (u8 index : layout.referenced_registers) - { - offset_in_block[index] = volatile_offset; - volatile_offset += 16; - } - - if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::inlined_array) - { - const auto& block = layout.interleaved_blocks[0]; - u32 inline_data_offset = volatile_offset; - for (const auto& attrib : block->locations) - { - auto& info = REGS(m_ctx)->vertex_arrays_info[attrib.index]; - - offset_in_block[attrib.index] = inline_data_offset; - inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size()); - } - } - else - { - for (const auto& block : layout.interleaved_blocks) - { - for (const auto& attrib : block->locations) - { - const u32 local_address = (REGS(m_ctx)->vertex_arrays_info[attrib.index].offset() & 0x7fffffff); - offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset); - } - - const auto range = block->calculate_required_range(first_vertex, vertex_count); - persistent_offset += block->attribute_stride * range.second; - } - } - - // Fill the data - // Each descriptor field is 64 bits wide - // [0-8] attribute stride - // [8-24] attribute divisor - // [24-27] attribute type - // [27-30] attribute size - // [30-31] reserved - // [31-60] starting offset - // [60-21] swap bytes flag - // [61-22] volatile flag - // [62-63] modulo enable flag - - const s32 default_frequency_mask = (1 << 8); - const s32 swap_storage_mask = (1 << 29); - const s32 volatile_storage_mask = (1 << 30); - const s32 modulo_op_frequency_mask = smin; - - const u32 modulo_mask = REGS(m_ctx)->frequency_divider_operation_mask(); - const auto max_index = (first_vertex + vertex_count) - 1; - - for (u16 ref_mask = vp_metadata.referenced_inputs_mask, index = 0; ref_mask; ++index, ref_mask >>= 1) - { - if (!(ref_mask & 1u)) - { - // Unused input, ignore this - continue; - } - - if (layout.attribute_placement[index] == attribute_buffer_placement::none) - { - static constexpr u64 zero = 0; - std::memcpy(buffer + index * 2, &zero, sizeof(zero)); - continue; - } - - rsx::vertex_base_type type = {}; - s32 size = 0; - s32 attrib0 = 0; - s32 attrib1 = 0; - - if (layout.attribute_placement[index] == attribute_buffer_placement::transient) - { - if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::inlined_array) - { - const auto& info = REGS(m_ctx)->vertex_arrays_info[index]; - - if (!info.size()) - { - // Register - const auto& reginfo = REGS(m_ctx)->register_vertex_info[index]; - type = reginfo.type; - size = reginfo.size; - - attrib0 = rsx::get_vertex_type_size_on_host(type, size); - } - else - { - // Array - type = info.type(); - size = info.size(); - - attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask; - } - } - else - { - // Data is either from an immediate render or register input - // Immediate data overrides register input - - if (REGS(m_ctx)->current_draw_clause.is_immediate_draw && - m_vertex_push_buffers[index].vertex_count > 1) - { - // Push buffer - const auto& info = m_vertex_push_buffers[index]; - type = info.type; - size = info.size; - - attrib0 = rsx::get_vertex_type_size_on_host(type, size) | default_frequency_mask; - } - else - { - // Register - const auto& info = REGS(m_ctx)->register_vertex_info[index]; - type = info.type; - size = info.size; - - attrib0 = rsx::get_vertex_type_size_on_host(type, size); - } - } - - attrib1 |= volatile_storage_mask; - } - else - { - auto& info = REGS(m_ctx)->vertex_arrays_info[index]; - type = info.type(); - size = info.size(); - - auto stride = info.stride(); - attrib0 = stride; - - if (stride > 0) // when stride is 0, input is not an array but a single element - { - const u32 frequency = info.frequency(); - switch (frequency) - { - case 0: - case 1: - { - attrib0 |= default_frequency_mask; - break; - } - default: - { - if (modulo_mask & (1 << index)) - { - if (max_index >= frequency) - { - // Only set modulo mask if a modulo op is actually necessary! - // This requires that the uploaded range for this attr = [0, freq-1] - // Ignoring modulo op if the rendered range does not wrap allows for range optimization - attrib0 |= (frequency << 8); - attrib1 |= modulo_op_frequency_mask; - } - else - { - attrib0 |= default_frequency_mask; - } - } - else - { - // Division - attrib0 |= (frequency << 8); - } - break; - } - } - } - } // end attribute placement check - - // Special compressed 4 components into one 4-byte value. Decoded as one value. - if (type == rsx::vertex_base_type::cmp) - { - size = 1; - } - - // All data is passed in in PS3-native order (BE) so swap flag should be set - attrib1 |= swap_storage_mask; - attrib0 |= (static_cast(type) << 24); - attrib0 |= (size << 27); - attrib1 |= offset_in_block[index]; - - buffer[index * 2 + 0] = attrib0; - buffer[index * 2 + 1] = attrib1; - } - } - - void draw_command_processor::write_vertex_data_to_memory( - const vertex_input_layout& layout, - u32 first_vertex, - u32 vertex_count, - void* persistent_data, - void* volatile_data) const - { - auto transient = static_cast(volatile_data); - auto persistent = static_cast(persistent_data); - - auto& draw_call = REGS(m_ctx)->current_draw_clause; - - if (transient != nullptr) - { - if (draw_call.command == rsx::draw_command::inlined_array) - { - for (const u8 index : layout.referenced_registers) - { - memcpy(transient, REGS(m_ctx)->register_vertex_info[index].data.data(), 16); - transient += 16; - } - - memcpy(transient, draw_call.inline_vertex_array.data(), draw_call.inline_vertex_array.size() * sizeof(u32)); - // Is it possible to reference data outside of the inlined array? - return; - } - - // NOTE: Order is important! Transient layout is always push_buffers followed by register data - if (draw_call.is_immediate_draw) - { - // NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory - for (const auto& info : layout.volatile_blocks) - { - memcpy(transient, m_vertex_push_buffers[info.first].data.data(), info.second); - transient += info.second; - } - } - - for (const u8 index : layout.referenced_registers) - { - memcpy(transient, REGS(m_ctx)->register_vertex_info[index].data.data(), 16); - transient += 16; - } - } - - if (persistent != nullptr) - { - for (interleaved_range_info* block : layout.interleaved_blocks) - { - auto range = block->calculate_required_range(first_vertex, vertex_count); - - const u32 data_size = range.second * block->attribute_stride; - const u32 vertex_base = range.first * block->attribute_stride; - - g_fxo->get().copy(persistent, vm::_ptr(block->real_offset_address) + vertex_base, data_size); - persistent += data_size; - } - } - } - - void draw_command_processor::fill_scale_offset_data(void* buffer, bool flip_y) const - { - const int clip_w = REGS(m_ctx)->surface_clip_width(); - const int clip_h = REGS(m_ctx)->surface_clip_height(); - - const float scale_x = REGS(m_ctx)->viewport_scale_x() / (clip_w / 2.f); - float offset_x = REGS(m_ctx)->viewport_offset_x() - (clip_w / 2.f); - offset_x /= clip_w / 2.f; - - float scale_y = REGS(m_ctx)->viewport_scale_y() / (clip_h / 2.f); - float offset_y = (REGS(m_ctx)->viewport_offset_y() - (clip_h / 2.f)); - offset_y /= clip_h / 2.f; - if (flip_y) scale_y *= -1; - if (flip_y) offset_y *= -1; - - const float scale_z = REGS(m_ctx)->viewport_scale_z(); - const float offset_z = REGS(m_ctx)->viewport_offset_z(); - const float one = 1.f; - - utils::stream_vector(buffer, std::bit_cast(scale_x), 0, 0, std::bit_cast(offset_x)); - utils::stream_vector(static_cast(buffer) + 16, 0, std::bit_cast(scale_y), 0, std::bit_cast(offset_y)); - utils::stream_vector(static_cast(buffer) + 32, 0, 0, std::bit_cast(scale_z), std::bit_cast(offset_z)); - utils::stream_vector(static_cast(buffer) + 48, 0, 0, 0, std::bit_cast(one)); - } - - void draw_command_processor::fill_user_clip_data(void* buffer) const - { - const rsx::user_clip_plane_op clip_plane_control[6] = - { - REGS(m_ctx)->clip_plane_0_enabled(), - REGS(m_ctx)->clip_plane_1_enabled(), - REGS(m_ctx)->clip_plane_2_enabled(), - REGS(m_ctx)->clip_plane_3_enabled(), - REGS(m_ctx)->clip_plane_4_enabled(), - REGS(m_ctx)->clip_plane_5_enabled(), - }; - - u8 data_block[64]; - s32* clip_enabled_flags = reinterpret_cast(data_block); - f32* clip_distance_factors = reinterpret_cast(data_block + 32); - - for (int index = 0; index < 6; ++index) - { - switch (clip_plane_control[index]) - { - default: - rsx_log.error("bad clip plane control (0x%x)", static_cast(clip_plane_control[index])); - [[fallthrough]]; - - case rsx::user_clip_plane_op::disable: - clip_enabled_flags[index] = 0; - clip_distance_factors[index] = 0.f; - break; - - case rsx::user_clip_plane_op::greater_or_equal: - clip_enabled_flags[index] = 1; - clip_distance_factors[index] = 1.f; - break; - - case rsx::user_clip_plane_op::less_than: - clip_enabled_flags[index] = 1; - clip_distance_factors[index] = -1.f; - break; - } - } - - memcpy(buffer, data_block, 2 * 8 * sizeof(u32)); - } - - /** - * Fill buffer with vertex program constants. - * Buffer must be at least 512 float4 wide. - */ - void draw_command_processor::fill_vertex_program_constants_data(void* buffer, const std::span& reloc_table) const - { - if (!reloc_table.empty()) [[ likely ]] - { - char* dst = reinterpret_cast(buffer); - for (const auto& index : reloc_table) - { - utils::stream_vector_from_memory(dst, ®S(m_ctx)->transform_constants[index]); - dst += 16; - } - } - else - { - memcpy(buffer, REGS(m_ctx)->transform_constants.data(), 468 * 4 * sizeof(float)); - } - } - - void draw_command_processor::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/) const - { - ROP_control_t rop_control{}; - - if (REGS(m_ctx)->alpha_test_enabled()) - { - const u32 alpha_func = static_cast(REGS(m_ctx)->alpha_func()); - rop_control.set_alpha_test_func(alpha_func); - rop_control.enable_alpha_test(); - } - - if (REGS(m_ctx)->polygon_stipple_enabled()) - { - rop_control.enable_polygon_stipple(); - } - - if (REGS(m_ctx)->msaa_alpha_to_coverage_enabled() && !RSX(m_ctx)->get_backend_config().supports_hw_a2c) - { - // TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders - // Alpha values generate a coverage mask for order independent blending - // Requires hardware AA to work properly (or just fragment sample stage in fragment shaders) - // Simulated using combined alpha blend and alpha test - rop_control.enable_alpha_to_coverage(); - if (REGS(m_ctx)->msaa_sample_mask()) - { - rop_control.enable_MSAA_writes(); - } - - // Sample configuration bits - switch (REGS(m_ctx)->surface_antialias()) - { - case rsx::surface_antialiasing::center_1_sample: - break; - case rsx::surface_antialiasing::diagonal_centered_2_samples: - rop_control.set_msaa_control(1u); - break; - default: - rop_control.set_msaa_control(3u); - break; - } - } - - const f32 fog0 = REGS(m_ctx)->fog_params_0(); - const f32 fog1 = REGS(m_ctx)->fog_params_1(); - const u32 fog_mode = static_cast(REGS(m_ctx)->fog_equation()); - - // Check if framebuffer is actually an XRGB format and not a WZYX format - switch (REGS(m_ctx)->surface_color()) - { - case rsx::surface_color_format::w16z16y16x16: - case rsx::surface_color_format::w32z32y32x32: - case rsx::surface_color_format::x32: - // These behave very differently from "normal" formats. - break; - default: - // Integer framebuffer formats. - rop_control.enable_framebuffer_INT(); - - // Check if we want sRGB conversion. - if (REGS(m_ctx)->framebuffer_srgb_enabled()) - { - rop_control.enable_framebuffer_sRGB(); - } - break; - } - - // Generate wpos coefficients - // wpos equation is now as follows: - // wpos.y = (frag_coord / resolution_scale) * ((window_origin!=top)?-1.: 1.) + ((window_origin!=top)? window_height : 0) - // wpos.x = (frag_coord / resolution_scale) - // wpos.zw = frag_coord.zw - - const auto window_origin = REGS(m_ctx)->shader_window_origin(); - const u32 window_height = REGS(m_ctx)->shader_window_height(); - const f32 resolution_scale = (window_height <= static_cast(g_cfg.video.min_scalable_dimension)) ? 1.f : rsx::get_resolution_scale(); - const f32 wpos_scale = (window_origin == rsx::window_origin::top) ? (1.f / resolution_scale) : (-1.f / resolution_scale); - const f32 wpos_bias = (window_origin == rsx::window_origin::top) ? 0.f : window_height; - const f32 alpha_ref = REGS(m_ctx)->alpha_ref(); - - u32* dst = static_cast(buffer); - utils::stream_vector(dst, std::bit_cast(fog0), std::bit_cast(fog1), rop_control.value, std::bit_cast(alpha_ref)); - utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast(wpos_scale), std::bit_cast(wpos_bias)); - } - - void draw_command_processor::fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase& prog) const - { - auto& draw_call = REGS(m_ctx)->current_draw_clause; - - // Only call this for instanced draws! - ensure(draw_call.is_trivial_instanced_draw); - - // Temp indirection table. Used to track "running" updates. - rsx::simple_array instancing_indirection_table; - // indirection table size - const auto reloc_table = prog.has_indexed_constants ? decltype(prog.constant_ids){} : prog.constant_ids; - const auto redirection_table_size = prog.has_indexed_constants ? 468u : ::size32(prog.constant_ids); - instancing_indirection_table.resize(redirection_table_size); - - // Temp constants data - rsx::simple_array constants_data; - constants_data.reserve(redirection_table_size * draw_call.pass_count()); - - // Allocate indirection buffer on GPU stream - indirection_table_buf.reserve(instancing_indirection_table.size_bytes() * draw_call.pass_count()); - auto indirection_out = indirection_table_buf.data(); - - rsx::instanced_draw_config_t instance_config; - u32 indirection_table_offset = 0; - - // We now replay the draw call here to pack the data. - draw_call.begin(); - - // Write initial draw data. - std::iota(instancing_indirection_table.begin(), instancing_indirection_table.end(), 0); - - constants_data.resize(redirection_table_size); - fill_vertex_program_constants_data(constants_data.data(), reloc_table); - - // Next draw. We're guaranteed more than one draw call by the caller. - draw_call.next(); - - do - { - // Write previous state - std::memcpy(indirection_out + indirection_table_offset, instancing_indirection_table.data(), instancing_indirection_table.size_bytes()); - indirection_table_offset += redirection_table_size; - - // Decode next draw state - instance_config = {}; - draw_call.execute_pipeline_dependencies(m_ctx, &instance_config); - - if (!instance_config.transform_constants_data_changed) - { - continue; - } - - const int translated_offset = prog.has_indexed_constants - ? instance_config.patch_load_offset - : prog.TranslateConstantsRange(instance_config.patch_load_offset, instance_config.patch_load_count); - - if (translated_offset >= 0) - { - // Trivially patchable in bulk - const u32 redirection_loc = ::size32(constants_data); - constants_data.resize(::size32(constants_data) + instance_config.patch_load_count); - std::memcpy(constants_data.data() + redirection_loc, ®S(m_ctx)->transform_constants[instance_config.patch_load_offset], instance_config.patch_load_count * sizeof(u128)); - - // Update indirection table - for (auto i = translated_offset, count = 0; - static_cast(count) < instance_config.patch_load_count; - ++i, ++count) - { - instancing_indirection_table[i] = redirection_loc + count; - } - - continue; - } - - ensure(!prog.has_indexed_constants); - - // Sparse update. Update records individually instead of bulk - // FIXME: Range batching optimization - const auto load_end = instance_config.patch_load_offset + instance_config.patch_load_count; - for (u32 i = 0; i < redirection_table_size; ++i) - { - const auto read_index = prog.constant_ids[i]; - if (read_index < instance_config.patch_load_offset || read_index >= load_end) - { - // Reading outside "hot" range. - continue; - } - - const u32 redirection_loc = ::size32(constants_data); - constants_data.resize(::size32(constants_data) + 1); - std::memcpy(constants_data.data() + redirection_loc, ®S(m_ctx)->transform_constants[read_index], sizeof(u128)); - - instancing_indirection_table[i] = redirection_loc; - } - - } while (draw_call.next()); - - // Tail - ensure(indirection_table_offset < (instancing_indirection_table.size() * draw_call.pass_count())); - std::memcpy(indirection_out + indirection_table_offset, instancing_indirection_table.data(), instancing_indirection_table.size_bytes()); - - // Now write the constants to the GPU buffer - constants_data_array_buffer.reserve(constants_data.size_bytes()); - std::memcpy(constants_data_array_buffer.data(), constants_data.data(), constants_data.size_bytes()); - } -} diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.h b/rpcs3/Emu/RSX/Core/RSXDrawCommands.h deleted file mode 100644 index b69a918401..0000000000 --- a/rpcs3/Emu/RSX/Core/RSXDrawCommands.h +++ /dev/null @@ -1,110 +0,0 @@ -#pragma once - -#include - -#include "Emu/RSX/Core/RSXVertexTypes.h" -#include "Emu/RSX/NV47/FW/draw_call.hpp" -#include "Emu/RSX/Program/ProgramStateCache.h" -#include "Emu/RSX/rsx_vertex_data.h" - -#include -#include - -namespace rsx -{ - struct rsx_state; - struct context; - class io_buffer; - - class draw_command_processor - { - using vertex_program_metadata_t = program_hash_util::vertex_program_utils::vertex_program_metadata; - - context* m_ctx = nullptr; - - protected: - friend class thread; - - std::array m_vertex_push_buffers; - rsx::simple_array m_element_push_buffer; - - public: - draw_command_processor() = default; - - void init(context* ctx) - { - m_ctx = ctx; - } - - // Analyze vertex inputs and group all interleaved blocks - void analyse_inputs_interleaved(vertex_input_layout& layout, const vertex_program_metadata_t& vp_metadata); - - // Retrieve raw bytes for the index array (untyped) - std::span get_raw_index_array(const draw_clause& draw_indexed_clause) const; - - // Get compiled draw command for backend rendering - std::variant - get_draw_command(const rsx::rsx_state& state) const; - - // Push-buffers for immediate rendering (begin-end scopes) - void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value); - - u32 get_push_buffer_vertex_count() const; - - void append_array_element(u32 index); - - u32 get_push_buffer_index_count() const; - - void clear_push_buffers(); - - const std::span element_push_buffer() const - { - return m_element_push_buffer; - } - - // Host driver helpers - void fill_vertex_layout_state( - const vertex_input_layout& layout, - const vertex_program_metadata_t& vp_metadata, - u32 first_vertex, - u32 vertex_count, - s32* buffer, - u32 persistent_offset_base, - u32 volatile_offset_base) const; - - void write_vertex_data_to_memory( - const vertex_input_layout& layout, - u32 first_vertex, - u32 vertex_count, - void* persistent_data, - void* volatile_data) const; - - /** - * Fill buffer with 4x4 scale offset matrix. - * Vertex shader's position is to be multiplied by this matrix. - * if flip_y is set, the matrix is modified to use d3d convention. - */ - void fill_scale_offset_data(void* buffer, bool flip_y) const; - - /** - * Fill buffer with user clip information - */ - void fill_user_clip_data(void* buffer) const; - - /** - * Fill buffer with vertex program constants. - * Relocation table allows to do a partial fill with only selected registers. - */ - void fill_vertex_program_constants_data(void* buffer, const std::span& reloc_table) const; - - /** - * Fill buffer with fragment rasterization state. - * Fills current fog values, alpha test parameters and texture scaling parameters - */ - void fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& fragment_program) const; - - // Fill instancing buffers. A single iobuf is used for both. 256byte alignment enforced to allow global bind - // Returns offsets to the index redirection lookup table and constants field array - void fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase& prog) const; - }; -} diff --git a/rpcs3/Emu/RSX/Core/RSXDriverState.h b/rpcs3/Emu/RSX/Core/RSXDriverState.h deleted file mode 100644 index e915cd67e2..0000000000 --- a/rpcs3/Emu/RSX/Core/RSXDriverState.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include - -namespace rsx -{ - enum pipeline_state : u32 - { - fragment_program_ucode_dirty = (1 << 0), // Fragment program ucode changed - vertex_program_ucode_dirty = (1 << 1), // Vertex program ucode changed - fragment_program_state_dirty = (1 << 2), // Fragment program state changed - vertex_program_state_dirty = (1 << 3), // Vertex program state changed - fragment_state_dirty = (1 << 4), // Fragment state changed (alpha test, etc) - vertex_state_dirty = (1 << 5), // Vertex state changed (scale_offset, clip planes, etc) - transform_constants_dirty = (1 << 6), // Transform constants changed - fragment_constants_dirty = (1 << 7), // Fragment constants changed - framebuffer_reads_dirty = (1 << 8), // Framebuffer contents changed - fragment_texture_state_dirty = (1 << 9), // Fragment texture parameters changed - vertex_texture_state_dirty = (1 << 10), // Fragment texture parameters changed - scissor_config_state_dirty = (1 << 11), // Scissor region changed - zclip_config_state_dirty = (1 << 12), // Viewport Z clip changed - - scissor_setup_invalid = (1 << 13), // Scissor configuration is broken - scissor_setup_clipped = (1 << 14), // Scissor region is cropped by viewport constraint - - polygon_stipple_pattern_dirty = (1 << 15), // Rasterizer stippling pattern changed - line_stipple_pattern_dirty = (1 << 16), // Line stippling pattern changed - - push_buffer_arrays_dirty = (1 << 17), // Push buffers have data written to them (immediate mode vertex buffers) - - polygon_offset_state_dirty = (1 << 18), // Polygon offset config was changed - depth_bounds_state_dirty = (1 << 19), // Depth bounds configuration changed - - pipeline_config_dirty = (1 << 20), // Generic pipeline configuration changes. Shader peek hint. - - rtt_config_dirty = (1 << 21), // Render target configuration changed - rtt_config_contested = (1 << 22), // Render target configuration is indeterminate - rtt_config_valid = (1 << 23), // Render target configuration is valid - rtt_cache_state_dirty = (1 << 24), // Texture cache state is indeterminate - - xform_instancing_state_dirty = (1 << 25), // Transform instancing state has changed - - fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty, - vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty, - invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty | xform_instancing_state_dirty, - invalidate_zclip_bits = vertex_state_dirty | zclip_config_state_dirty, - memory_barrier_bits = framebuffer_reads_dirty, - - // Vulkan-specific signals - invalidate_vk_dynamic_state = zclip_config_state_dirty | scissor_config_state_dirty | polygon_offset_state_dirty | depth_bounds_state_dirty, - - all_dirty = ~0u - }; -} diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index a3cdd06986..b25b0afde5 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -513,7 +513,7 @@ void GLGSRender::emit_geometry(u32 sub_index) if (vertex_state & rsx::vertex_arrays_changed) { - m_draw_processor.analyse_inputs_interleaved(m_vertex_layout, current_vp_metadata); + analyse_inputs_interleaved(m_vertex_layout); } else if (vertex_state & rsx::vertex_base_changed) { diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 3e60af9f68..c785ddc879 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -840,8 +840,8 @@ void GLGSRender::load_program_env() // Vertex state auto mapping = m_vertex_env_buffer->alloc_from_heap(144, m_uniform_buffer_offset_align); auto buf = static_cast(mapping.first); - m_draw_processor.fill_scale_offset_data(buf, false); - m_draw_processor.fill_user_clip_data(buf + 64); + fill_scale_offset_data(buf, false); + fill_user_clip_data(buf + 64); *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); *(reinterpret_cast(buf + 132)) = rsx::method_registers.point_size() * rsx::get_resolution_scale(); *(reinterpret_cast(buf + 136)) = rsx::method_registers.clip_min(); @@ -887,7 +887,7 @@ void GLGSRender::load_program_env() // Fragment state auto mapping = m_fragment_env_buffer->alloc_from_heap(32, m_uniform_buffer_offset_align); auto buf = static_cast(mapping.first); - m_draw_processor.fill_fragment_state_buffer(buf, current_fragment_program); + fill_fragment_state_buffer(buf, current_fragment_program); m_fragment_env_buffer->bind_range(GL_FRAGMENT_STATE_BIND_SLOT, mapping.second, 32); } @@ -988,7 +988,7 @@ void GLGSRender::upload_transform_constants(const rsx::io_buffer& buffer) : std::span(m_vertex_prog->constant_ids); buffer.reserve(transform_constants_size); - m_draw_processor.fill_vertex_program_constants_data(buffer.data(), constant_ids); + fill_vertex_program_constants_data(buffer.data(), constant_ids); } } @@ -1007,14 +1007,7 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info) buf[1] = upload_info.vertex_index_offset; buf += 4; - m_draw_processor.fill_vertex_layout_state( - m_vertex_layout, - current_vp_metadata, - upload_info.first_vertex, - upload_info.allocated_vertex_count, - reinterpret_cast(buf), - upload_info.persistent_mapping_offset, - upload_info.volatile_mapping_offset); + fill_vertex_layout_state(m_vertex_layout, upload_info.first_vertex, upload_info.allocated_vertex_count, reinterpret_cast(buf), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset); m_vertex_layout_buffer->bind_range(GL_VERTEX_LAYOUT_BIND_SLOT, mapping.second, 128 + 16); diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index 6d39b7c6b8..f4e4405407 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -153,7 +153,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer() m_profiler.start(); //Write index buffers and count verts - auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), m_draw_processor.get_draw_command(rsx::method_registers)); + auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers)); const u32 vertex_count = (result.max_index - result.min_index) + 1; u32 vertex_base = result.min_index; @@ -250,7 +250,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer() } //Write all the data - m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first); + write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first); m_frame_stats.vertex_upload_time += m_profiler.duration(); return upload_info; diff --git a/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp b/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp index 640db9c762..39c8aa01e7 100644 --- a/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp +++ b/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp @@ -89,52 +89,6 @@ namespace rsx } } - bool draw_clause::check_trivially_instanced() const - { - if (pass_count() <= 1) - { - // Cannot instance one draw call or less - return false; - } - - // For instancing all draw calls must be identical - const auto& ref = draw_command_ranges.front(); - for (const auto& range : draw_command_ranges) - { - if (range.first != ref.first || range.count != ref.count) - { - return false; - } - } - - if (draw_command_barriers.empty()) - { - // Raise alarm here for investigation, we may be missing a corner case. - rsx_log.error("Instanced draw detected, but no command barriers found!"); - return false; - } - - // Barriers must exist, but can only involve updating transform constants (for now) - for (const auto& barrier : draw_command_barriers) - { - if (barrier.type != rsx::transform_constant_load_modifier_barrier && - barrier.type != rsx::transform_constant_update_barrier) - { - ensure(barrier.draw_id < ::size32(draw_command_ranges)); - if (draw_command_ranges[barrier.draw_id].count == 0) - { - // Dangling command barriers are ignored. We're also at the end of the command, so abort. - break; - } - - // Fail. Only transform constant instancing is supported at the moment. - return false; - } - } - - return true; - } - void draw_clause::reset(primitive_type type) { current_range_index = ~0u; @@ -143,7 +97,6 @@ namespace rsx command = draw_command::none; primitive = type; primitive_barrier_enable = false; - is_trivial_instanced_draw = false; draw_command_ranges.clear(); draw_command_barriers.clear(); @@ -152,7 +105,7 @@ namespace rsx is_disjoint_primitive = is_primitive_disjointed(primitive); } - u32 draw_clause::execute_pipeline_dependencies(context* ctx, instanced_draw_config_t* instance_config) const + u32 draw_clause::execute_pipeline_dependencies(context* ctx) const { u32 result = 0u; for (; @@ -198,20 +151,7 @@ namespace rsx // Update transform constants auto ptr = RSX(ctx)->fifo_ctrl->translate_address(barrier.arg0); auto buffer = std::span(static_cast(vm::base(ptr)), barrier.arg1); - auto notify = [&](rsx::context*, u32 load, u32 count) - { - if (!instance_config) - { - return false; - } - - instance_config->transform_constants_data_changed = true; - instance_config->patch_load_offset = load; - instance_config->patch_load_count = count; - return true; - }; - - nv4097::set_transform_constant::batch_decode(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, buffer, notify); + nv4097::set_transform_constant::batch_decode(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, buffer); result |= transform_constants_changed; break; } diff --git a/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp b/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp index 4e350e985d..323504ac91 100644 --- a/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp +++ b/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp @@ -7,14 +7,6 @@ namespace rsx { - struct instanced_draw_config_t - { - bool transform_constants_data_changed; - - u32 patch_load_offset; - u32 patch_load_count; - }; - class draw_clause { // Stores the first and count argument from draw/draw indexed parameters between begin/end clauses. @@ -59,8 +51,6 @@ namespace rsx } } - bool check_trivially_instanced() const; - public: primitive_type primitive{}; draw_command command{}; @@ -69,7 +59,6 @@ namespace rsx bool is_disjoint_primitive{}; // Set if primitive type does not rely on adjacency information bool primitive_barrier_enable{}; // Set once to signal that a primitive restart barrier can be inserted bool is_rendering{}; // Set while we're actually pushing the draw calls to host GPU - bool is_trivial_instanced_draw{}; // Set if the draw call can be executed on the host GPU as a single instanced draw. simple_array inline_vertex_array{}; @@ -84,8 +73,8 @@ namespace rsx { // End draw call append mode current_range_index = ~0u; - // Check if we can instance on host - is_trivial_instanced_draw = check_trivially_instanced(); + + // TODO } /** @@ -280,7 +269,7 @@ namespace rsx /** * Executes commands reqiured to make the current draw state valid */ - u32 execute_pipeline_dependencies(struct context* ctx, instanced_draw_config_t* instance_config = nullptr) const; + u32 execute_pipeline_dependencies(struct context* ctx) const; const draw_range_t& get_range() const { diff --git a/rpcs3/Emu/RSX/NV47/HW/common.cpp b/rpcs3/Emu/RSX/NV47/HW/common.cpp index f60ac94f5f..ee901bcd72 100644 --- a/rpcs3/Emu/RSX/NV47/HW/common.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/common.cpp @@ -18,7 +18,7 @@ namespace rsx // NOTE: Push buffers still behave like register writes. // You do not need to specify each attribute for each vertex, the register is referenced instead. // This is classic OpenGL 1.x behavior as I remember. - RSX(ctx)->GRAPH_frontend().append_to_push_buffer(attrib_index, count, channel_select, vtype, value); + RSX(ctx)->append_to_push_buffer(attrib_index, count, channel_select, vtype, value); } auto& info = REGS(ctx)->register_vertex_info[attrib_index]; diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index 8d9aecfaee..448ab31afe 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -30,7 +30,7 @@ namespace rsx REGS(ctx)->transform_constants[load + constant_id][subreg] = arg; } - void set_transform_constant::batch_decode(context* ctx, u32 reg, const std::span& args, const std::function& notify) + void set_transform_constant::batch_decode(context* ctx, u32 reg, const std::span& args) { const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT; const u32 constant_id = index / 4; @@ -40,15 +40,8 @@ namespace rsx auto dst = ®S(ctx)->transform_constants[load + constant_id][subreg]; copy_data_swap_u32(dst, args.data(), ::size32(args)); - // Notify const u32 last_constant_id = ((reg + ::size32(args) + 3) - NV4097_SET_TRANSFORM_CONSTANT) / 4; // Aligned div - const u32 load_index = load + constant_id; - const u32 load_count = last_constant_id - constant_id; - - if (!notify || !notify(ctx, load_index, load_count)) - { - RSX(ctx)->patch_transform_constants(ctx, load_index, load_count); - } + RSX(ctx)->patch_transform_constants(ctx, load + constant_id, last_constant_id - constant_id); } void set_transform_constant::impl(context* ctx, u32 reg, [[maybe_unused]] u32 arg) @@ -263,15 +256,15 @@ namespace rsx { if (RSX(ctx)->in_begin_end) { - RSX(ctx)->GRAPH_frontend().append_array_element(arg & 0xFFFF); - RSX(ctx)->GRAPH_frontend().append_array_element(arg >> 16); + RSX(ctx)->append_array_element(arg & 0xFFFF); + RSX(ctx)->append_array_element(arg >> 16); } } void set_array_element32(context* ctx, u32, u32 arg) { if (RSX(ctx)->in_begin_end) - RSX(ctx)->GRAPH_frontend().append_array_element(arg); + RSX(ctx)->append_array_element(arg); } void draw_arrays(context* /*rsx*/, u32 /*reg*/, u32 arg) @@ -360,8 +353,8 @@ namespace rsx // Check if we have immediate mode vertex data in a driver-local buffer if (REGS(ctx)->current_draw_clause.command == rsx::draw_command::none) { - const u32 push_buffer_vertices_count = RSX(ctx)->GRAPH_frontend().get_push_buffer_vertex_count(); - const u32 push_buffer_index_count = RSX(ctx)->GRAPH_frontend().get_push_buffer_index_count(); + const u32 push_buffer_vertices_count = RSX(ctx)->get_push_buffer_vertex_count(); + const u32 push_buffer_index_count = RSX(ctx)->get_push_buffer_index_count(); // Need to set this flag since it overrides some register contents REGS(ctx)->current_draw_clause.is_immediate_draw = true; @@ -393,12 +386,6 @@ namespace rsx return; } - // Notify the backend if the drawing style changes (instanced vs non-instanced) - if (REGS(ctx)->current_draw_clause.is_trivial_instanced_draw != RSX(ctx)->is_current_vertex_program_instanced()) - { - RSX(ctx)->m_graphics_state |= rsx::pipeline_state::xform_instancing_state_dirty; - } - RSX(ctx)->end(); } else diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.h b/rpcs3/Emu/RSX/NV47/HW/nv4097.h index 6d526d799a..68a2ddb9df 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.h +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.h @@ -204,7 +204,7 @@ namespace rsx static void decode_one(context* ctx, u32 reg, u32 arg); - static void batch_decode(context* ctx, u32 reg, const std::span& args, const std::function& notify = {}); + static void batch_decode(context* ctx, u32 reg, const std::span& args); }; struct set_transform_program diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h index df3fbe5e16..15e450dab7 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h @@ -269,7 +269,6 @@ public: struct { - // Configuration properties (in) u16 in_register_mask = 0; u16 common_access_sampler_mask = 0; @@ -277,7 +276,6 @@ public: u16 redirected_sampler_mask = 0; u16 multisampled_sampler_mask = 0; - // Decoded properties (out) bool has_lit_op = false; bool has_gather_op = false; bool has_no_output = false; diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp index 3f72a3b4a7..5bd2278b77 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp @@ -262,11 +262,6 @@ namespace glsl } } - if (props.require_instanced_render) - { - enabled_options.push_back("_ENABLE_INSTANCED_CONSTANTS"); - } - // Import vertex header program_common::define_glsl_switches(OS, enabled_options); diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXVertexPrologue.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXVertexPrologue.glsl index dfe7252ca8..fd8c9f596d 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXVertexPrologue.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXVertexPrologue.glsl @@ -55,23 +55,4 @@ vec4 apply_zclip_xform( } #endif -#if defined(_ENABLE_INSTANCED_CONSTANTS) -// Workaround for GL vs VK builtin variable naming -#ifdef VULKAN -#define _gl_InstanceID gl_InstanceIndex -#else -#define _gl_InstanceID gl_InstanceID -#endif - -vec4 _fetch_constant(const in int base_offset) -{ - // Get virtual draw/instance id. Normally will be 1:1 based on instance index - const int indirection_offset = (_gl_InstanceID * CONSTANTS_ARRAY_LENGTH) + base_offset; - const int corrected_offset = constants_addressing_lookup[indirection_offset]; - return instanced_constants_array[corrected_offset]; -} -#else -#define _fetch_constant(x) vc[x] -#endif - )" diff --git a/rpcs3/Emu/RSX/Program/GLSLTypes.h b/rpcs3/Emu/RSX/Program/GLSLTypes.h index 8967dd7598..11f657ddf5 100644 --- a/rpcs3/Emu/RSX/Program/GLSLTypes.h +++ b/rpcs3/Emu/RSX/Program/GLSLTypes.h @@ -22,7 +22,6 @@ namespace glsl // Applicable in vertex stage bool require_lit_emulation : 1; bool require_explicit_invariance : 1; - bool require_instanced_render : 1; bool emulate_zclip_transform : 1; bool emulate_depth_clip_only : 1; diff --git a/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp index 2f7330f8fa..8c1318ad28 100644 --- a/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp @@ -341,7 +341,6 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert usz vertex_program_storage_hash::operator()(const RSXVertexProgram &program) const { usz hash = vertex_program_utils::get_vertex_program_ucode_hash(program); - hash ^= program.ctrl; hash ^= program.output_mask; hash ^= program.texture_state.texture_dimensions; hash ^= program.texture_state.multisampled_textures; @@ -352,8 +351,6 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R { if (binary1.output_mask != binary2.output_mask) return false; - if (binary1.ctrl != binary2.ctrl) - return false; if (binary1.texture_state != binary2.texture_state) return false; if (binary1.data.size() != binary2.data.size()) diff --git a/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.cpp index 1511902300..7cb7d578a4 100644 --- a/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.cpp @@ -131,7 +131,7 @@ std::string VertexProgramDecompiler::GetSRC(const u32 n) m_parr.AddParam(PF_PARAM_UNIFORM, float4, std::string("vc[468]")); properties.has_indexed_constants |= !!d3.index_const; m_constant_ids.insert(static_cast(d1.const_src)); - fmt::append(ret, "_fetch_constant(%u%s)", d1.const_src, (d3.index_const ? " + " + AddAddrReg() : "")); + ret += std::string("vc[") + std::to_string(d1.const_src) + (d3.index_const ? " + " + AddAddrReg() : "") + "]"; break; default: @@ -362,13 +362,14 @@ std::string VertexProgramDecompiler::NotZeroPositive(const std::string& code) std::string VertexProgramDecompiler::BuildCode() { std::string main_body; - for (int i = 0, lvl = 1; i < static_cast(m_instr_count); i++) + for (uint i = 0, lvl = 1; i < m_instr_count; i++) { - lvl = std::max(lvl - m_instructions[i].close_scopes, 0); - + lvl -= m_instructions[i].close_scopes; + if (lvl < 1) lvl = 1; for (int j = 0; j < m_instructions[i].put_close_scopes; ++j) { - if (lvl > 1) --lvl; + --lvl; + if (lvl < 1) lvl = 1; main_body.append(lvl, '\t') += "}\n"; } @@ -379,8 +380,6 @@ std::string VertexProgramDecompiler::BuildCode() lvl++; } - ensure(lvl >= 0); // Underflow of indent level will cause crashes!! - for (const auto& instruction_body : m_instructions[i].body) { main_body.append(lvl, '\t') += instruction_body + "\n"; @@ -410,7 +409,7 @@ std::string VertexProgramDecompiler::BuildCode() { const auto i = offset++; if (i == index) continue; // Replace with self - reloc_table.emplace_back(fmt::format("_fetch_constant(%d)", index), fmt::format("_fetch_constant(%d)", i)); + reloc_table.emplace_back(fmt::format("vc[%d]", index), fmt::format("vc[%d]", i)); } // One-time patch diff --git a/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.h index 94b25f8076..b1147c7016 100644 --- a/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.h @@ -132,10 +132,6 @@ protected: public: struct { - // Configuration properties (in) - // None - - // Decoded properties (out) bool has_lit_op = false; bool has_indexed_constants = false; } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index ca10f7fdea..5e10e67e59 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -409,13 +409,12 @@ namespace rsx } }; - const auto element_push_buffer = render->draw_processor()->element_push_buffer(); if (index_size == 4) { - if (!element_push_buffer.empty()) [[unlikely]] + if (!render->element_push_buffer.empty()) [[unlikely]] { // Indices provided via immediate mode - re_evaluate(reinterpret_cast(element_push_buffer.data()), u32{}); + re_evaluate(reinterpret_cast(render->element_push_buffer.data()), u32{}); } else { @@ -425,10 +424,10 @@ namespace rsx } else { - if (!element_push_buffer.empty()) [[unlikely]] + if (!render->element_push_buffer.empty()) [[unlikely]] { // Indices provided via immediate mode - re_evaluate(reinterpret_cast(element_push_buffer.data()), u16{}); + re_evaluate(reinterpret_cast(render->element_push_buffer.data()), u16{}); } else { @@ -620,12 +619,12 @@ namespace rsx ar(rsx::method_registers); - for (auto& v : m_draw_processor.m_vertex_push_buffers) + for (auto& v : vertex_push_buffers) { ar(v.attr, v.size, v.type, v.vertex_count, v.dword_count, v.data); } - ar(m_draw_processor.m_element_push_buffer, fifo_ret_addr, saved_fifo_ret, zcull_surface_active, m_surface_info, m_depth_surface_info, m_framebuffer_layout); + ar(element_push_buffer, fifo_ret_addr, saved_fifo_ret, zcull_surface_active, m_surface_info, m_depth_surface_info, m_framebuffer_layout); ar(dma_address, iomap_table, restore_point, tiles, zculls, display_buffers, display_buffers_count, current_display_buffer); ar(enable_second_vhandler, requested_vsync); ar(device_addr, label_addr, main_mem_size, local_mem_size, rsx_event_port, driver_info); @@ -697,8 +696,6 @@ namespace rsx s_ctx.rsxthr = this; m_ctx = &s_ctx; - m_draw_processor.init(m_ctx); - if (g_cfg.misc.use_native_interface && (g_cfg.video.renderer == video_renderer::opengl || g_cfg.video.renderer == video_renderer::vulkan)) { m_overlay_manager = g_fxo->init(0); @@ -804,6 +801,39 @@ namespace rsx in_begin_end = true; } + void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value) + { + if (!(rsx::method_registers.vertex_attrib_input_mask() & (1 << attribute))) + { + return; + } + + // Enforce ATTR0 as vertex attribute for push buffers. + // This whole thing becomes a mess if we don't have a provoking attribute. + const auto vertex_id = vertex_push_buffers[0].get_vertex_id(); + vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value); + m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty; + } + + u32 thread::get_push_buffer_vertex_count() const + { + // Enforce ATTR0 as vertex attribute for push buffers. + // This whole thing becomes a mess if we don't have a provoking attribute. + return vertex_push_buffers[0].vertex_count; + } + + void thread::append_array_element(u32 index) + { + // Endianness is swapped because common upload code expects input in BE + // TODO: Implement fast upload path for LE inputs and do away with this + element_push_buffer.push_back(std::bit_cast>(index)); + } + + u32 thread::get_push_buffer_index_count() const + { + return ::size32(element_push_buffer); + } + void thread::end() { if (capture_current_frame) @@ -820,7 +850,20 @@ namespace rsx m_eng_interrupt_mask |= rsx::backend_interrupt; ROP_sync_timestamp = rsx::get_shared_tag(); - m_draw_processor.clear_push_buffers(); + if (m_graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty) + { + for (auto& push_buf : vertex_push_buffers) + { + //Disabled, see https://github.com/RPCS3/rpcs3/issues/1932 + //rsx::method_registers.register_vertex_info[index].size = 0; + + push_buf.clear(); + } + + m_graphics_state.clear(rsx::pipeline_state::push_buffer_arrays_dirty); + } + + element_push_buffer.clear(); zcull_ctrl->on_draw(); @@ -1154,6 +1197,180 @@ namespace rsx state += cpu_flag::exit; } + void thread::fill_scale_offset_data(void *buffer, bool flip_y) const + { + int clip_w = rsx::method_registers.surface_clip_width(); + int clip_h = rsx::method_registers.surface_clip_height(); + + float scale_x = rsx::method_registers.viewport_scale_x() / (clip_w / 2.f); + float offset_x = rsx::method_registers.viewport_offset_x() - (clip_w / 2.f); + offset_x /= clip_w / 2.f; + + float scale_y = rsx::method_registers.viewport_scale_y() / (clip_h / 2.f); + float offset_y = (rsx::method_registers.viewport_offset_y() - (clip_h / 2.f)); + offset_y /= clip_h / 2.f; + if (flip_y) scale_y *= -1; + if (flip_y) offset_y *= -1; + + float scale_z = rsx::method_registers.viewport_scale_z(); + float offset_z = rsx::method_registers.viewport_offset_z(); + float one = 1.f; + + utils::stream_vector(buffer, std::bit_cast(scale_x), 0, 0, std::bit_cast(offset_x)); + utils::stream_vector(static_cast(buffer) + 16, 0, std::bit_cast(scale_y), 0, std::bit_cast(offset_y)); + utils::stream_vector(static_cast(buffer) + 32, 0, 0, std::bit_cast(scale_z), std::bit_cast(offset_z)); + utils::stream_vector(static_cast(buffer) + 48, 0, 0, 0, std::bit_cast(one)); + } + + void thread::fill_user_clip_data(void *buffer) const + { + const rsx::user_clip_plane_op clip_plane_control[6] = + { + rsx::method_registers.clip_plane_0_enabled(), + rsx::method_registers.clip_plane_1_enabled(), + rsx::method_registers.clip_plane_2_enabled(), + rsx::method_registers.clip_plane_3_enabled(), + rsx::method_registers.clip_plane_4_enabled(), + rsx::method_registers.clip_plane_5_enabled(), + }; + + u8 data_block[64]; + s32* clip_enabled_flags = reinterpret_cast(data_block); + f32* clip_distance_factors = reinterpret_cast(data_block + 32); + + for (int index = 0; index < 6; ++index) + { + switch (clip_plane_control[index]) + { + default: + rsx_log.error("bad clip plane control (0x%x)", static_cast(clip_plane_control[index])); + [[fallthrough]]; + + case rsx::user_clip_plane_op::disable: + clip_enabled_flags[index] = 0; + clip_distance_factors[index] = 0.f; + break; + + case rsx::user_clip_plane_op::greater_or_equal: + clip_enabled_flags[index] = 1; + clip_distance_factors[index] = 1.f; + break; + + case rsx::user_clip_plane_op::less_than: + clip_enabled_flags[index] = 1; + clip_distance_factors[index] = -1.f; + break; + } + } + + memcpy(buffer, data_block, 2 * 8 * sizeof(u32)); + } + + /** + * Fill buffer with vertex program constants. + * Buffer must be at least 512 float4 wide. + */ + void thread::fill_vertex_program_constants_data(void* buffer, const std::span& reloc_table) + { + if (!reloc_table.empty()) [[ likely ]] + { + char* dst = reinterpret_cast(buffer); + for (const auto& index : reloc_table) + { + utils::stream_vector_from_memory(dst, &rsx::method_registers.transform_constants[index]); + dst += 16; + } + } + else + { + memcpy(buffer, rsx::method_registers.transform_constants.data(), 468 * 4 * sizeof(float)); + } + } + + void thread::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/) + { + ROP_control_t rop_control{}; + + if (rsx::method_registers.alpha_test_enabled()) + { + const u32 alpha_func = static_cast(rsx::method_registers.alpha_func()); + rop_control.set_alpha_test_func(alpha_func); + rop_control.enable_alpha_test(); + } + + if (rsx::method_registers.polygon_stipple_enabled()) + { + rop_control.enable_polygon_stipple(); + } + + if (rsx::method_registers.msaa_alpha_to_coverage_enabled() && !backend_config.supports_hw_a2c) + { + // TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders + // Alpha values generate a coverage mask for order independent blending + // Requires hardware AA to work properly (or just fragment sample stage in fragment shaders) + // Simulated using combined alpha blend and alpha test + rop_control.enable_alpha_to_coverage(); + if (rsx::method_registers.msaa_sample_mask()) + { + rop_control.enable_MSAA_writes(); + } + + // Sample configuration bits + switch (rsx::method_registers.surface_antialias()) + { + case rsx::surface_antialiasing::center_1_sample: + break; + case rsx::surface_antialiasing::diagonal_centered_2_samples: + rop_control.set_msaa_control(1u); + break; + default: + rop_control.set_msaa_control(3u); + break; + } + } + + const f32 fog0 = rsx::method_registers.fog_params_0(); + const f32 fog1 = rsx::method_registers.fog_params_1(); + const u32 fog_mode = static_cast(rsx::method_registers.fog_equation()); + + // Check if framebuffer is actually an XRGB format and not a WZYX format + switch (rsx::method_registers.surface_color()) + { + case rsx::surface_color_format::w16z16y16x16: + case rsx::surface_color_format::w32z32y32x32: + case rsx::surface_color_format::x32: + // These behave very differently from "normal" formats. + break; + default: + // Integer framebuffer formats. + rop_control.enable_framebuffer_INT(); + + // Check if we want sRGB conversion. + if (rsx::method_registers.framebuffer_srgb_enabled()) + { + rop_control.enable_framebuffer_sRGB(); + } + break; + } + + // Generate wpos coefficients + // wpos equation is now as follows: + // wpos.y = (frag_coord / resolution_scale) * ((window_origin!=top)?-1.: 1.) + ((window_origin!=top)? window_height : 0) + // wpos.x = (frag_coord / resolution_scale) + // wpos.zw = frag_coord.zw + + const auto window_origin = rsx::method_registers.shader_window_origin(); + const u32 window_height = rsx::method_registers.shader_window_height(); + const f32 resolution_scale = (window_height <= static_cast(g_cfg.video.min_scalable_dimension)) ? 1.f : rsx::get_resolution_scale(); + const f32 wpos_scale = (window_origin == rsx::window_origin::top) ? (1.f / resolution_scale) : (-1.f / resolution_scale); + const f32 wpos_bias = (window_origin == rsx::window_origin::top) ? 0.f : window_height; + const f32 alpha_ref = rsx::method_registers.alpha_ref(); + + u32 *dst = static_cast(buffer); + utils::stream_vector(dst, std::bit_cast(fog0), std::bit_cast(fog1), rop_control.value, std::bit_cast(alpha_ref)); + utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast(wpos_scale), std::bit_cast(wpos_bias)); + } + u64 thread::timestamp() { const u64 freq = sys_time_get_timebase_frequency(); @@ -1192,6 +1409,51 @@ namespace rsx return t + timestamp_subvalue; } + std::span thread::get_raw_index_array(const draw_clause& draw_indexed_clause) const + { + if (!element_push_buffer.empty()) [[ unlikely ]] + { + // Indices provided via immediate mode + return {reinterpret_cast(element_push_buffer.data()), ::narrow(element_push_buffer.size() * sizeof(u32))}; + } + + const rsx::index_array_type type = rsx::method_registers.index_type(); + const u32 type_size = get_index_type_size(type); + + // Force aligned indices as realhw + const u32 address = (0 - type_size) & get_address(rsx::method_registers.index_array_address(), rsx::method_registers.index_array_location()); + + const u32 first = draw_indexed_clause.min_index(); + const u32 count = draw_indexed_clause.get_elements_count(); + + const auto ptr = vm::_ptr(address); + return { ptr + first * type_size, count * type_size }; + } + + std::variant + thread::get_draw_command(const rsx::rsx_state& state) const + { + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) [[ likely ]] + { + return draw_indexed_array_command + { + get_raw_index_array(state.current_draw_clause) + }; + } + + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array) + { + return draw_array_command{}; + } + + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) + { + return draw_inlined_array{}; + } + + fmt::throw_exception("ill-formed draw command"); + } + void thread::do_local_task(FIFO::state state) { m_eng_interrupt_mask.clear(rsx::backend_interrupt); @@ -1987,17 +2249,6 @@ namespace rsx void thread::get_current_vertex_program(const std::array, rsx::limits::vertex_textures_count>& sampler_descriptors) { - if (m_graphics_state.test(rsx::pipeline_state::xform_instancing_state_dirty)) - { - current_vertex_program.ctrl = 0; - if (rsx::method_registers.current_draw_clause.is_trivial_instanced_draw) - { - current_vertex_program.ctrl |= RSX_SHADER_CONTROL_INSTANCED_CONSTANTS; - } - - m_graphics_state.clear(rsx::pipeline_state::xform_instancing_state_dirty); - } - if (!m_graphics_state.test(rsx::pipeline_state::vertex_program_dirty)) { return; @@ -2005,6 +2256,7 @@ namespace rsx ensure(!m_graphics_state.test(rsx::pipeline_state::vertex_program_ucode_dirty)); current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask(); + current_vertex_program.ctrl = 0; // Reserved for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { @@ -2027,6 +2279,183 @@ namespace rsx current_vertex_program.texture_state.import(current_vp_texture_state, current_vp_metadata.referenced_textures_mask); } + void thread::analyse_inputs_interleaved(vertex_input_layout& result) + { + const rsx_state& state = rsx::method_registers; + const u32 input_mask = state.vertex_attrib_input_mask() & current_vp_metadata.referenced_inputs_mask; + + result.clear(); + result.attribute_mask = static_cast(input_mask); + + if (state.current_draw_clause.command == rsx::draw_command::inlined_array) + { + interleaved_range_info& info = *result.alloc_interleaved_block(); + info.interleaved = true; + + for (u8 index = 0; index < rsx::limits::vertex_count; ++index) + { + auto &vinfo = state.vertex_arrays_info[index]; + result.attribute_placement[index] = attribute_buffer_placement::none; + + if (vinfo.size() > 0) + { + // Stride must be updated even if the stream is disabled + info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size()); + info.locations.push_back({ index, false, 1 }); + + if (input_mask & (1u << index)) + { + result.attribute_placement[index] = attribute_buffer_placement::transient; + } + } + else if (state.register_vertex_info[index].size > 0 && input_mask & (1u << index)) + { + // Reads from register + result.referenced_registers.push_back(index); + result.attribute_placement[index] = attribute_buffer_placement::transient; + } + } + + if (info.attribute_stride) + { + // At least one array feed must be enabled for vertex input + result.interleaved_blocks.push_back(&info); + } + + return; + } + + const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask(); + result.interleaved_blocks.reserve(16); + result.referenced_registers.reserve(16); + + for (auto [ref_mask, index] = std::tuple{ input_mask, u8(0) }; ref_mask; ++index, ref_mask >>= 1) + { + ensure(index < rsx::limits::vertex_count); + + if (!(ref_mask & 1u)) + { + // Nothing to do, uninitialized + continue; + } + + // Always reset attribute placement by default + result.attribute_placement[index] = attribute_buffer_placement::none; + + // Check for interleaving + if (rsx::method_registers.current_draw_clause.is_immediate_draw && + rsx::method_registers.current_draw_clause.command != rsx::draw_command::indexed) + { + // NOTE: In immediate rendering mode, all vertex setup is ignored + // Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults + if (vertex_push_buffers[index].vertex_count > 1) + { + // Ensure consistent number of vertices per attribute. + vertex_push_buffers[index].pad_to(vertex_push_buffers[0].vertex_count, false); + + // Read temp buffer (register array) + std::pair volatile_range_info = std::make_pair(index, static_cast(vertex_push_buffers[index].data.size() * sizeof(u32))); + result.volatile_blocks.push_back(volatile_range_info); + result.attribute_placement[index] = attribute_buffer_placement::transient; + } + else if (state.register_vertex_info[index].size > 0) + { + // Reads from register + result.referenced_registers.push_back(index); + result.attribute_placement[index] = attribute_buffer_placement::transient; + } + + // Fall back to the default register value if no source is specified via register + continue; + } + + const auto& info = state.vertex_arrays_info[index]; + if (!info.size()) + { + if (state.register_vertex_info[index].size > 0) + { + //Reads from register + result.referenced_registers.push_back(index); + result.attribute_placement[index] = attribute_buffer_placement::transient; + continue; + } + } + else + { + result.attribute_placement[index] = attribute_buffer_placement::persistent; + const u32 base_address = info.offset() & 0x7fffffff; + bool alloc_new_block = true; + bool modulo = !!(frequency_divider_mask & (1 << index)); + + for (auto &block : result.interleaved_blocks) + { + if (block->single_vertex) + { + //Single vertex definition, continue + continue; + } + + if (block->attribute_stride != info.stride()) + { + //Stride does not match, continue + continue; + } + + if (base_address > block->base_offset) + { + const u32 diff = base_address - block->base_offset; + if (diff > info.stride()) + { + //Not interleaved, continue + continue; + } + } + else + { + const u32 diff = block->base_offset - base_address; + if (diff > info.stride()) + { + //Not interleaved, continue + continue; + } + + //Matches, and this address is lower than existing + block->base_offset = base_address; + } + + alloc_new_block = false; + block->locations.push_back({ index, modulo, info.frequency() }); + block->interleaved = true; + break; + } + + if (alloc_new_block) + { + interleaved_range_info& block = *result.alloc_interleaved_block(); + block.base_offset = base_address; + block.attribute_stride = info.stride(); + block.memory_location = info.offset() >> 31; + block.locations.reserve(16); + block.locations.push_back({ index, modulo, info.frequency() }); + + if (block.attribute_stride == 0) + { + block.single_vertex = true; + block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size()); + } + + result.interleaved_blocks.push_back(&block); + } + } + } + + for (auto &info : result.interleaved_blocks) + { + //Calculate real data address to be used during upload + info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location); + } + } + void thread::get_current_fragment_program(const std::array, rsx::limits::fragment_textures_count>& sampler_descriptors) { if (!m_graphics_state.test(rsx::pipeline_state::fragment_program_dirty)) @@ -2347,6 +2776,267 @@ namespace rsx return std::make_pair(persistent_memory_size, volatile_memory_size); } + void thread::fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset_base, u32 volatile_offset_base) + { + std::array offset_in_block = {}; + u32 volatile_offset = volatile_offset_base; + u32 persistent_offset = persistent_offset_base; + + //NOTE: Order is important! Transient ayout is always push_buffers followed by register data + if (rsx::method_registers.current_draw_clause.is_immediate_draw) + { + for (const auto &info : layout.volatile_blocks) + { + offset_in_block[info.first] = volatile_offset; + volatile_offset += info.second; + } + } + + for (u8 index : layout.referenced_registers) + { + offset_in_block[index] = volatile_offset; + volatile_offset += 16; + } + + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) + { + const auto &block = layout.interleaved_blocks[0]; + u32 inline_data_offset = volatile_offset; + for (const auto& attrib : block->locations) + { + auto &info = rsx::method_registers.vertex_arrays_info[attrib.index]; + + offset_in_block[attrib.index] = inline_data_offset; + inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size()); + } + } + else + { + for (const auto &block : layout.interleaved_blocks) + { + for (const auto& attrib : block->locations) + { + const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff); + offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset); + } + + const auto range = block->calculate_required_range(first_vertex, vertex_count); + persistent_offset += block->attribute_stride * range.second; + } + } + + // Fill the data + // Each descriptor field is 64 bits wide + // [0-8] attribute stride + // [8-24] attribute divisor + // [24-27] attribute type + // [27-30] attribute size + // [30-31] reserved + // [31-60] starting offset + // [60-21] swap bytes flag + // [61-22] volatile flag + // [62-63] modulo enable flag + + const s32 default_frequency_mask = (1 << 8); + const s32 swap_storage_mask = (1 << 29); + const s32 volatile_storage_mask = (1 << 30); + const s32 modulo_op_frequency_mask = smin; + + const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); + const auto max_index = (first_vertex + vertex_count) - 1; + + for (u16 ref_mask = current_vp_metadata.referenced_inputs_mask, index = 0; ref_mask; ++index, ref_mask >>= 1) + { + if (!(ref_mask & 1u)) + { + // Unused input, ignore this + continue; + } + + if (layout.attribute_placement[index] == attribute_buffer_placement::none) + { + static constexpr u64 zero = 0; + std::memcpy(buffer + index * 2, &zero, sizeof(zero)); + continue; + } + + rsx::vertex_base_type type = {}; + s32 size = 0; + s32 attrib0 = 0; + s32 attrib1 = 0; + + if (layout.attribute_placement[index] == attribute_buffer_placement::transient) + { + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) + { + const auto &info = rsx::method_registers.vertex_arrays_info[index]; + + if (!info.size()) + { + // Register + const auto& reginfo = rsx::method_registers.register_vertex_info[index]; + type = reginfo.type; + size = reginfo.size; + + attrib0 = rsx::get_vertex_type_size_on_host(type, size); + } + else + { + // Array + type = info.type(); + size = info.size(); + + attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask; + } + } + else + { + // Data is either from an immediate render or register input + // Immediate data overrides register input + + if (rsx::method_registers.current_draw_clause.is_immediate_draw && + vertex_push_buffers[index].vertex_count > 1) + { + // Push buffer + const auto &info = vertex_push_buffers[index]; + type = info.type; + size = info.size; + + attrib0 = rsx::get_vertex_type_size_on_host(type, size) | default_frequency_mask; + } + else + { + // Register + const auto& info = rsx::method_registers.register_vertex_info[index]; + type = info.type; + size = info.size; + + attrib0 = rsx::get_vertex_type_size_on_host(type, size); + } + } + + attrib1 |= volatile_storage_mask; + } + else + { + auto &info = rsx::method_registers.vertex_arrays_info[index]; + type = info.type(); + size = info.size(); + + auto stride = info.stride(); + attrib0 = stride; + + if (stride > 0) //when stride is 0, input is not an array but a single element + { + const u32 frequency = info.frequency(); + switch (frequency) + { + case 0: + case 1: + { + attrib0 |= default_frequency_mask; + break; + } + default: + { + if (modulo_mask & (1 << index)) + { + if (max_index >= frequency) + { + // Only set modulo mask if a modulo op is actually necessary! + // This requires that the uploaded range for this attr = [0, freq-1] + // Ignoring modulo op if the rendered range does not wrap allows for range optimization + attrib0 |= (frequency << 8); + attrib1 |= modulo_op_frequency_mask; + } + else + { + attrib0 |= default_frequency_mask; + } + } + else + { + // Division + attrib0 |= (frequency << 8); + } + break; + } + } + } + } //end attribute placement check + + // Special compressed 4 components into one 4-byte value. Decoded as one value. + if (type == rsx::vertex_base_type::cmp) + { + size = 1; + } + + // All data is passed in in PS3-native order (BE) so swap flag should be set + attrib1 |= swap_storage_mask; + attrib0 |= (static_cast(type) << 24); + attrib0 |= (size << 27); + attrib1 |= offset_in_block[index]; + + buffer[index * 2 + 0] = attrib0; + buffer[index * 2 + 1] = attrib1; + } + } + + void thread::write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data) + { + auto transient = static_cast(volatile_data); + auto persistent = static_cast(persistent_data); + + auto &draw_call = rsx::method_registers.current_draw_clause; + + if (transient != nullptr) + { + if (draw_call.command == rsx::draw_command::inlined_array) + { + for (const u8 index : layout.referenced_registers) + { + memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16); + transient += 16; + } + + memcpy(transient, draw_call.inline_vertex_array.data(), draw_call.inline_vertex_array.size() * sizeof(u32)); + //Is it possible to reference data outside of the inlined array? + return; + } + + //NOTE: Order is important! Transient layout is always push_buffers followed by register data + if (draw_call.is_immediate_draw) + { + //NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory + for (const auto &info : layout.volatile_blocks) + { + memcpy(transient, vertex_push_buffers[info.first].data.data(), info.second); + transient += info.second; + } + } + + for (const u8 index : layout.referenced_registers) + { + memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16); + transient += 16; + } + } + + if (persistent != nullptr) + { + for (interleaved_range_info* block : layout.interleaved_blocks) + { + auto range = block->calculate_required_range(first_vertex, vertex_count); + + const u32 data_size = range.second * block->attribute_stride; + const u32 vertex_base = range.first * block->attribute_stride; + + g_fxo->get().copy(persistent, vm::_ptr(block->real_offset_address) + vertex_base, data_size); + persistent += data_size; + } + } + } + void thread::flip(const display_flip_info_t& info) { m_eng_interrupt_mask.clear(rsx::display_interrupt); @@ -3006,7 +3696,7 @@ namespace rsx u32 thread::get_load() { - // Average load over around 30 frames + //Average load over around 30 frames if (!performance_counters.last_update_timestamp || performance_counters.sampled_frames > 30) { const auto timestamp = get_system_time(); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 4bcd08c9f4..e861a96e25 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -28,8 +28,6 @@ #include "Emu/IdManager.h" #include "Core/RSXDisplay.h" -#include "Core/RSXDrawCommands.h" -#include "Core/RSXDriverState.h" #include "Core/RSXFrameBuffer.h" #include "Core/RSXContext.h" #include "Core/RSXIOMap.hpp" @@ -61,6 +59,52 @@ namespace rsx context_clear_all = context_clear_color | context_clear_depth }; + enum pipeline_state : u32 + { + fragment_program_ucode_dirty = (1 << 0), // Fragment program ucode changed + vertex_program_ucode_dirty = (1 << 1), // Vertex program ucode changed + fragment_program_state_dirty = (1 << 2), // Fragment program state changed + vertex_program_state_dirty = (1 << 3), // Vertex program state changed + fragment_state_dirty = (1 << 4), // Fragment state changed (alpha test, etc) + vertex_state_dirty = (1 << 5), // Vertex state changed (scale_offset, clip planes, etc) + transform_constants_dirty = (1 << 6), // Transform constants changed + fragment_constants_dirty = (1 << 7), // Fragment constants changed + framebuffer_reads_dirty = (1 << 8), // Framebuffer contents changed + fragment_texture_state_dirty = (1 << 9), // Fragment texture parameters changed + vertex_texture_state_dirty = (1 << 10), // Fragment texture parameters changed + scissor_config_state_dirty = (1 << 11), // Scissor region changed + zclip_config_state_dirty = (1 << 12), // Viewport Z clip changed + + scissor_setup_invalid = (1 << 13), // Scissor configuration is broken + scissor_setup_clipped = (1 << 14), // Scissor region is cropped by viewport constraint + + polygon_stipple_pattern_dirty = (1 << 15), // Rasterizer stippling pattern changed + line_stipple_pattern_dirty = (1 << 16), // Line stippling pattern changed + + push_buffer_arrays_dirty = (1 << 17), // Push buffers have data written to them (immediate mode vertex buffers) + + polygon_offset_state_dirty = (1 << 18), // Polygon offset config was changed + depth_bounds_state_dirty = (1 << 19), // Depth bounds configuration changed + + pipeline_config_dirty = (1 << 20), // Generic pipeline configuration changes. Shader peek hint. + + rtt_config_dirty = (1 << 21), // Render target configuration changed + rtt_config_contested = (1 << 22), // Render target configuration is indeterminate + rtt_config_valid = (1 << 23), // Render target configuration is valid + rtt_cache_state_dirty = (1 << 24), // Texture cache state is indeterminate + + fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty, + vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty, + invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty, + invalidate_zclip_bits = vertex_state_dirty | zclip_config_state_dirty, + memory_barrier_bits = framebuffer_reads_dirty, + + // Vulkan-specific signals + invalidate_vk_dynamic_state = zclip_config_state_dirty | scissor_config_state_dirty | polygon_offset_state_dirty | depth_bounds_state_dirty, + + all_dirty = ~0u + }; + enum eng_interrupt_reason : u32 { backend_interrupt = 0x0001, // Backend-related interrupt @@ -117,6 +161,8 @@ namespace rsx void cpu_task() override; protected: + std::array vertex_push_buffers; + s32 m_skip_frame_ctr = 0; bool skip_current_frame = false; @@ -171,9 +217,6 @@ namespace rsx // Host DMA std::unique_ptr m_host_dma_ctrl; - // Draw call management - draw_command_processor m_draw_processor; - public: atomic_t new_get_put = u64{umax}; u32 restore_point = 0; @@ -182,7 +225,7 @@ namespace rsx atomic_t external_interrupt_lock{ 0 }; atomic_t external_interrupt_ack{ false }; atomic_t is_initialized{0}; - + rsx::simple_array element_push_buffer; bool is_fifo_idle() const; void flush_fifo(); @@ -225,8 +268,6 @@ namespace rsx void capture_frame(const std::string& name); const backend_configuration& get_backend_config() const { return backend_config; } - const draw_command_processor* draw_processor() const { return &m_draw_processor; } - public: shared_ptr> intr_thread; @@ -260,6 +301,11 @@ namespace rsx void get_framebuffer_layout(rsx::framebuffer_creation_context context, framebuffer_layout &layout); bool get_scissor(areau& region, bool clip_viewport); + /** + * Analyze vertex inputs and group all interleaved blocks + */ + void analyse_inputs_interleaved(vertex_input_layout&); + RSXVertexProgram current_vertex_program = {}; RSXFragmentProgram current_fragment_program = {}; @@ -378,6 +424,21 @@ namespace rsx virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload); virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; } + std::span get_raw_index_array(const draw_clause& draw_indexed_clause) const; + + std::variant + get_draw_command(const rsx::rsx_state& state) const; + + /** + * Immediate mode rendering requires a temp push buffer to hold attrib values + * Appends a value to the push buffer (currently only supports 32-wide types) + */ + void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value); + u32 get_push_buffer_vertex_count() const; + + void append_array_element(u32 index); + u32 get_push_buffer_index_count() const; + protected: /** @@ -387,6 +448,17 @@ namespace rsx */ std::pair calculate_memory_requirements(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count); + /** + * Generates vertex input descriptors as an array of 16x4 s32s + */ + void fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0); + + /** + * Uploads vertex data described in the layout descriptor + * Copies from local memory to the write-only output buffers provided in a sequential manner + */ + void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data); + void evaluate_cpu_usage_reduction_limits(); private: @@ -396,8 +468,29 @@ namespace rsx void handle_invalidated_memory_range(); public: + /** + * Fill buffer with 4x4 scale offset matrix. + * Vertex shader's position is to be multiplied by this matrix. + * if flip_y is set, the matrix is modified to use d3d convention. + */ + void fill_scale_offset_data(void *buffer, bool flip_y) const; - draw_command_processor& GRAPH_frontend() { return m_draw_processor; } + /** + * Fill buffer with user clip information + */ + void fill_user_clip_data(void *buffer) const; + + /** + * Fill buffer with vertex program constants. + * Relocation table allows to do a partial fill with only selected registers. + */ + void fill_vertex_program_constants_data(void* buffer, const std::span& reloc_table); + + /** + * Fill buffer with fragment rasterization state. + * Fills current fog values, alpha test parameters and texture scaling parameters + */ + void fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& fragment_program); /** * Notify that a section of memory has been mapped @@ -424,17 +517,9 @@ namespace rsx */ virtual void on_semaphore_acquire_wait() {} - /** - * Load an image from memory with optional scaling and rotation. - * Returns false to tell the HW decoder to perform the operation on the CPU as a fallback when the operation cannot be safely accelerated. - */ - virtual bool scaled_image_from_memory(const blit_src_info& /*src_info*/, const blit_dst_info& /*dst_info*/, bool /*interpolate*/) { return false; } - - - // Program public "get" handlers virtual std::pair get_programs() const { return std::make_pair("", ""); } - bool is_current_vertex_program_instanced() const { return !!(current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS); } + virtual bool scaled_image_from_memory(const blit_src_info& /*src_info*/, const blit_dst_info& /*dst_info*/, bool /*interpolate*/) { return false; } public: void reset(); diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 7c40cc12ad..ef268cfda3 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -730,7 +730,7 @@ void VKGSRender::emit_geometry(u32 sub_index) if (state_flags & rsx::vertex_arrays_changed) { - m_draw_processor.analyse_inputs_interleaved(m_vertex_layout, current_vp_metadata); + analyse_inputs_interleaved(m_vertex_layout); } else if (state_flags & rsx::vertex_base_changed) { @@ -929,11 +929,7 @@ void VKGSRender::emit_geometry(u32 sub_index) if (!upload_info.index_info) { - if (draw_call.is_trivial_instanced_draw) - { - vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, draw_call.pass_count(), 0, 0); - } - else if (draw_call.is_single_draw()) + if (draw_call.is_single_draw()) { vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0); } @@ -955,13 +951,10 @@ void VKGSRender::emit_geometry(u32 sub_index) vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); - if (draw_call.is_trivial_instanced_draw) + if (rsx::method_registers.current_draw_clause.is_single_draw()) { - vkCmdDrawIndexed(*m_current_command_buffer, upload_info.vertex_draw_count, draw_call.pass_count(), 0, 0, 0); - } - else if (rsx::method_registers.current_draw_clause.is_single_draw()) - { - vkCmdDrawIndexed(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0, 0); + const u32 index_count = upload_info.vertex_draw_count; + vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0); } else { @@ -1059,10 +1052,7 @@ void VKGSRender::end() m_frame_stats.setup_time += m_profiler.duration(); // Apply write memory barriers - if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) - { - ds->write_barrier(*m_current_command_buffer); - } + if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) ds->write_barrier(*m_current_command_buffer); for (auto &rtt : m_rtts.m_bound_render_targets) { @@ -1121,19 +1111,12 @@ void VKGSRender::end() m_current_command_buffer->flags |= vk::command_buffer::cb_reload_dynamic_state; } - auto& draw_call = rsx::method_registers.current_draw_clause; - draw_call.begin(); + rsx::method_registers.current_draw_clause.begin(); do { emit_geometry(sub_index++); - - if (draw_call.is_trivial_instanced_draw) - { - // We already completed. End the draw. - draw_call.end(); - } } - while (draw_call.next()); + while (rsx::method_registers.current_draw_clause.next()); if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 51d0df3580..5de8f62d92 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -477,22 +477,6 @@ namespace idx++; - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.instancing_lookup_table_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.instancing_constants_buffer_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - for (auto binding = binding_table.textures_first_bind_slot; binding < binding_table.vertex_textures_first_bind_slot; binding++) @@ -659,7 +643,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , (num_fs_samplers + 4) }, // Conditional rendering predicate slot; refactor to allow skipping this when not needed - { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3 } + { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 } }; m_descriptor_pool.create(*m_device, descriptor_type_sizes, max_draw_calls); @@ -677,7 +661,6 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000); m_raster_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "raster env buffer"); - m_instancing_buffer_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "instancing data buffer"); const auto shadermode = g_cfg.video.shadermode.get(); @@ -966,7 +949,6 @@ VKGSRender::~VKGSRender() m_vertex_instructions_buffer.destroy(); m_fragment_instructions_buffer.destroy(); m_raster_env_ring_info.destroy(); - m_instancing_buffer_ring_info.destroy(); // Fallback bindables null_buffer.reset(); @@ -1304,8 +1286,7 @@ void VKGSRender::check_heap_status(u32 flags) m_fragment_constants_ring_info.is_critical() || m_transform_constants_ring_info.is_critical() || m_index_buffer_ring_info.is_critical() || - m_raster_env_ring_info.is_critical() || - m_instancing_buffer_ring_info.is_critical(); + m_raster_env_ring_info.is_critical(); } else { @@ -1337,9 +1318,7 @@ void VKGSRender::check_heap_status(u32 flags) heap_critical = m_vertex_layout_ring_info.is_critical(); break; case VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE: - heap_critical = (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) - ? m_instancing_buffer_ring_info.is_critical() - : m_transform_constants_ring_info.is_critical(); + heap_critical = m_transform_constants_ring_info.is_critical(); break; case VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE: heap_critical = m_fragment_constants_ring_info.is_critical(); @@ -1382,7 +1361,6 @@ void VKGSRender::check_heap_status(u32 flags) m_attrib_ring_info.reset_allocation_stats(); m_texture_upload_buffer_ring_info.reset_allocation_stats(); m_raster_env_ring_info.reset_allocation_stats(); - m_instancing_buffer_ring_info.reset_allocation_stats(); m_current_frame->reset_heap_ptrs(); m_last_heap_sync_time = rsx::get_shared_tag(); } @@ -2152,7 +2130,6 @@ void VKGSRender::load_program_env() const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty); const bool update_instruction_buffers = (!!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program)); const bool update_raster_env = (rsx::method_registers.polygon_stipple_enabled() && !!(m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty)); - const bool update_instancing_data = rsx::method_registers.current_draw_clause.is_trivial_instanced_draw; if (update_vertex_env) { @@ -2162,8 +2139,8 @@ void VKGSRender::load_program_env() const auto mem = m_vertex_env_ring_info.alloc<256>(256); auto buf = static_cast(m_vertex_env_ring_info.map(mem, 148)); - m_draw_processor.fill_scale_offset_data(buf, false); - m_draw_processor.fill_user_clip_data(buf + 64); + fill_scale_offset_data(buf, false); + fill_user_clip_data(buf + 64); *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); *(reinterpret_cast(buf + 132)) = rsx::method_registers.point_size() * rsx::get_resolution_scale(); *(reinterpret_cast(buf + 136)) = rsx::method_registers.clip_min(); @@ -2173,32 +2150,7 @@ void VKGSRender::load_program_env() m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, mem, 144 }; } - if (update_instancing_data) - { - // Combines transform load + instancing lookup table - const auto alignment = m_device->gpu().get_limits().minStorageBufferOffsetAlignment; - usz indirection_table_offset = 0; - usz constants_data_table_offset = 0; - - rsx::io_buffer indirection_table_buf([&](usz size) -> std::pair - { - indirection_table_offset = m_instancing_buffer_ring_info.alloc<1>(utils::align(size, alignment)); - return std::make_pair(m_instancing_buffer_ring_info.map(indirection_table_offset, size), size); - }); - - rsx::io_buffer constants_array_buf([&](usz size) -> std::pair - { - constants_data_table_offset = m_instancing_buffer_ring_info.alloc<1>(utils::align(size, alignment)); - return std::make_pair(m_instancing_buffer_ring_info.map(constants_data_table_offset, size), size); - }); - - m_draw_processor.fill_constants_instancing_buffer(indirection_table_buf, constants_array_buf, *m_vertex_prog); - m_instancing_buffer_ring_info.unmap(); - - m_instancing_indirection_buffer_info = { m_instancing_buffer_ring_info.heap->value, indirection_table_offset, indirection_table_buf.size() }; - m_instancing_constants_array_buffer_info = { m_instancing_buffer_ring_info.heap->value, constants_data_table_offset, constants_array_buf.size() }; - } - else if (update_transform_constants) + if (update_transform_constants) { // Transform constants usz mem_offset = 0; @@ -2248,7 +2200,7 @@ void VKGSRender::load_program_env() auto mem = m_fragment_env_ring_info.alloc<256>(256); auto buf = m_fragment_env_ring_info.map(mem, 32); - m_draw_processor.fill_fragment_state_buffer(buf, current_fragment_program); + fill_fragment_state_buffer(buf, current_fragment_program); m_fragment_env_ring_info.unmap(); m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, mem, 32 }; } @@ -2343,24 +2295,13 @@ void VKGSRender::load_program_env() m_program->bind_buffer({ predicate, 0, 4 }, binding_table.conditional_render_predicate_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); } - if (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) - { - m_program->bind_buffer(m_instancing_indirection_buffer_info, binding_table.instancing_lookup_table_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); - m_program->bind_buffer(m_instancing_constants_array_buffer_info, binding_table.instancing_constants_buffer_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); - } - // Clear flags - u32 handled_flags = rsx::pipeline_state::fragment_state_dirty | + m_graphics_state.clear( + rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | + rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | - rsx::pipeline_state::fragment_texture_state_dirty; - - if (!update_instancing_data) - { - handled_flags |= rsx::pipeline_state::transform_constants_dirty; - } - - m_graphics_state.clear(handled_flags); + rsx::pipeline_state::fragment_texture_state_dirty); } void VKGSRender::upload_transform_constants(const rsx::io_buffer& buffer) @@ -2376,7 +2317,7 @@ void VKGSRender::upload_transform_constants(const rsx::io_buffer& buffer) const auto constant_ids = (transform_constants_size == 8192) ? std::span{} : std::span(m_vertex_prog->constant_ids); - m_draw_processor.fill_vertex_program_constants_data(buf, constant_ids); + fill_vertex_program_constants_data(buf, constant_ids); } } @@ -2419,14 +2360,8 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset; auto dst = m_vertex_layout_ring_info.map(data_offset, 128); - m_draw_processor.fill_vertex_layout_state( - m_vertex_layout, - current_vp_metadata, - vertex_info.first_vertex, - vertex_info.allocated_vertex_count, - static_cast(dst), - vertex_info.persistent_window_offset, - vertex_info.volatile_window_offset); + fill_vertex_layout_state(m_vertex_layout, vertex_info.first_vertex, vertex_info.allocated_vertex_count, static_cast(dst), + vertex_info.persistent_window_offset, vertex_info.volatile_window_offset); m_vertex_layout_ring_info.unmap(); } @@ -2547,8 +2482,7 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore m_index_buffer_ring_info.is_dirty() || m_transform_constants_ring_info.is_dirty() || m_texture_upload_buffer_ring_info.is_dirty() || - m_raster_env_ring_info.is_dirty() || - m_instancing_buffer_ring_info.is_dirty()) + m_raster_env_ring_info.is_dirty()) { auto secondary_command_buffer = m_secondary_cb_list.next(); secondary_command_buffer->begin(); @@ -2563,7 +2497,6 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore m_transform_constants_ring_info.sync(*secondary_command_buffer); m_texture_upload_buffer_ring_info.sync(*secondary_command_buffer); m_raster_env_ring_info.sync(*secondary_command_buffer); - m_instancing_buffer_ring_info.sync(*secondary_command_buffer); secondary_command_buffer->end(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 55c4b029bb..f99886c9dd 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -149,7 +149,6 @@ private: vk::data_heap m_index_buffer_ring_info; // Index data vk::data_heap m_texture_upload_buffer_ring_info; // Texture upload heap vk::data_heap m_raster_env_ring_info; // Raster control such as polygon and line stipple - vk::data_heap m_instancing_buffer_ring_info; // Instanced rendering data (constants indirection table + instanced constants) vk::data_heap m_fragment_instructions_buffer; vk::data_heap m_vertex_instructions_buffer; @@ -161,8 +160,6 @@ private: VkDescriptorBufferInfo m_fragment_constants_buffer_info {}; VkDescriptorBufferInfo m_fragment_texture_params_buffer_info {}; VkDescriptorBufferInfo m_raster_env_buffer_info {}; - VkDescriptorBufferInfo m_instancing_indirection_buffer_info {}; - VkDescriptorBufferInfo m_instancing_constants_array_buffer_info{}; VkDescriptorBufferInfo m_vertex_instructions_buffer_info {}; VkDescriptorBufferInfo m_fragment_instructions_buffer_info {}; diff --git a/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp b/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp index 21997508e1..27113bd25e 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp +++ b/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp @@ -197,7 +197,6 @@ namespace vk s64 index_heap_ptr = 0; s64 texture_upload_heap_ptr = 0; s64 rasterizer_env_heap_ptr = 0; - s64 instancing_heap_ptr = 0; u64 last_frame_sync_time = 0; @@ -219,7 +218,6 @@ namespace vk index_heap_ptr = other.index_heap_ptr; texture_upload_heap_ptr = other.texture_upload_heap_ptr; rasterizer_env_heap_ptr = other.rasterizer_env_heap_ptr; - instancing_heap_ptr = other.instancing_heap_ptr; } // Exchange storage (non-copyable) @@ -231,7 +229,7 @@ namespace vk void tag_frame_end( s64 attrib_loc, s64 vtxenv_loc, s64 fragenv_loc, s64 vtxlayout_loc, s64 fragtex_loc, s64 fragconst_loc, s64 vtxconst_loc, s64 index_loc, - s64 texture_loc, s64 rasterizer_loc, s64 instancing_loc) + s64 texture_loc, s64 rasterizer_loc) { attrib_heap_ptr = attrib_loc; vtx_env_heap_ptr = vtxenv_loc; @@ -243,7 +241,6 @@ namespace vk index_heap_ptr = index_loc; texture_upload_heap_ptr = texture_loc; rasterizer_env_heap_ptr = rasterizer_loc; - instancing_heap_ptr = instancing_loc; last_frame_sync_time = rsx::get_shared_tag(); } diff --git a/rpcs3/Emu/RSX/VK/VKPresent.cpp b/rpcs3/Emu/RSX/VK/VKPresent.cpp index 0c32d9034d..5fb4813d2c 100644 --- a/rpcs3/Emu/RSX/VK/VKPresent.cpp +++ b/rpcs3/Emu/RSX/VK/VKPresent.cpp @@ -163,8 +163,7 @@ void VKGSRender::advance_queued_frames() m_transform_constants_ring_info.get_current_put_pos_minus_one(), m_index_buffer_ring_info.get_current_put_pos_minus_one(), m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(), - m_raster_env_ring_info.get_current_put_pos_minus_one(), - m_instancing_buffer_ring_info.get_current_put_pos_minus_one()); + m_raster_env_ring_info.get_current_put_pos_minus_one()); m_queued_frames.push_back(m_current_frame); ensure(m_queued_frames.size() <= VK_MAX_ASYNC_FRAMES); @@ -267,8 +266,6 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx) m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr; m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr; m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr; - m_raster_env_ring_info.m_get_pos = ctx->rasterizer_env_heap_ptr; - m_instancing_buffer_ring_info.m_get_pos = ctx->instancing_heap_ptr; m_attrib_ring_info.notify(); m_vertex_env_ring_info.notify(); @@ -279,8 +276,6 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx) m_fragment_texture_params_ring_info.notify(); m_index_buffer_ring_info.notify(); m_texture_upload_buffer_ring_info.notify(); - m_raster_env_ring_info.notify(); - m_instancing_buffer_ring_info.notify(); } } diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 526f592eb6..e690139653 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -217,7 +217,7 @@ namespace vk::vertex_upload_info VKGSRender::upload_vertex_data() { draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout); - auto result = std::visit(visitor, m_draw_processor.get_draw_command(rsx::method_registers)); + auto result = std::visit(visitor, get_draw_command(rsx::method_registers)); const u32 vertex_count = (result.max_index - result.min_index) + 1; u32 vertex_base = result.min_index; @@ -294,7 +294,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data() const usz volatile_offset_in_block = volatile_offset - persistent_offset; void *block_mapping = m_attrib_ring_info.map(persistent_offset, block_size); - m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, static_cast(block_mapping) + volatile_offset_in_block); + write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, static_cast(block_mapping) + volatile_offset_in_block); m_attrib_ring_info.unmap(); } else @@ -302,14 +302,14 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data() if (required.first > 0 && persistent_offset != umax) { void *persistent_mapping = m_attrib_ring_info.map(persistent_offset, required.first); - m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr); + write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr); m_attrib_ring_info.unmap(); } if (required.second > 0) { void *volatile_mapping = m_attrib_ring_info.map(volatile_offset, required.second); - m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping); + write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping); m_attrib_ring_info.unmap(); } } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 0e88dab75d..fae195b1dc 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -32,34 +32,31 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) OS << "#version 450\n\n"; OS << "#extension GL_ARB_separate_shader_objects : enable\n\n"; - OS << - "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n" - "{\n" - " mat4 scale_offset_mat;\n" - " ivec4 user_clip_enabled[2];\n" - " vec4 user_clip_factor[2];\n" - " uint transform_branch_bits;\n" - " float point_size;\n" - " float z_near;\n" - " float z_far;\n" - "};\n\n"; + OS << "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n"; + OS << "{\n"; + OS << " mat4 scale_offset_mat;\n"; + OS << " ivec4 user_clip_enabled[2];\n"; + OS << " vec4 user_clip_factor[2];\n"; + OS << " uint transform_branch_bits;\n"; + OS << " float point_size;\n"; + OS << " float z_near;\n"; + OS << " float z_far;\n"; + OS << "};\n\n"; if (m_device_props.emulate_conditional_rendering) { - OS << - "layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n" - "{\n" - " uint conditional_rendering_predicate;\n" - "};\n\n"; + OS << "layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n"; + OS << "{\n"; + OS << " uint conditional_rendering_predicate;\n"; + OS << "};\n\n"; } - OS << - "layout(push_constant) uniform VertexLayoutBuffer\n" - "{\n" - " uint vertex_base_index;\n" - " uint vertex_index_offset;\n" - " uint draw_id;\n" - " uint layout_ptr_offset;\n"; + OS << "layout(push_constant) uniform VertexLayoutBuffer\n"; + OS << "{\n"; + OS << " uint vertex_base_index;\n"; + OS << " uint vertex_index_offset;\n"; + OS << " uint draw_id;\n"; + OS << " uint layout_ptr_offset;\n"; if (m_device_props.emulate_conditional_rendering) { @@ -113,50 +110,18 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std { if (PI.name.starts_with("vc[")) { - if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)) - { - OS << "layout(std140, set=0, binding=" << static_cast(m_binding_table.vertex_constant_buffers_bind_slot) << ") uniform VertexConstantsBuffer\n"; - OS << "{\n"; - OS << " vec4 " << PI.name << ";\n"; - OS << "};\n\n"; + OS << "layout(std140, set=0, binding = " << static_cast(m_binding_table.vertex_constant_buffers_bind_slot) << ") uniform VertexConstantsBuffer\n"; + OS << "{\n"; + OS << " vec4 " << PI.name << ";\n"; + OS << "};\n\n"; - in.location = m_binding_table.vertex_constant_buffers_bind_slot; - in.domain = glsl::glsl_vertex_program; - in.name = "VertexConstantsBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; + in.location = m_binding_table.vertex_constant_buffers_bind_slot; + in.domain = glsl::glsl_vertex_program; + in.name = "VertexConstantsBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; - inputs.push_back(in); - continue; - } - else - { - // 1. Bind indirection lookup buffer - OS << "layout(std430, set=0, binding=" << static_cast(m_binding_table.instancing_lookup_table_bind_slot) << ") readonly buffer InstancingData\n"; - OS << "{\n"; - OS << " int constants_addressing_lookup[];\n"; - OS << "};\n\n"; - - in.location = m_binding_table.instancing_lookup_table_bind_slot; - in.domain = glsl::glsl_vertex_program; - in.name = "InstancingData"; - in.type = vk::glsl::input_type_storage_buffer; - inputs.push_back(in); - - // 2. Bind actual constants buffer - OS << "layout(std430, set=0, binding=" << static_cast(m_binding_table.instancing_constants_buffer_slot) << ") readonly buffer VertexConstantsBuffer\n"; - OS << "{\n"; - OS << " vec4 instanced_constants_array[];\n"; - OS << "};\n\n"; - - OS << "#define CONSTANTS_ARRAY_LENGTH " << (properties.has_indexed_constants ? 468 : ::size32(m_constant_ids)) << "\n\n"; - - in.location = m_binding_table.instancing_constants_buffer_slot; - in.domain = glsl::glsl_vertex_program; - in.name = "VertexConstantsBuffer"; - in.type = vk::glsl::input_type_storage_buffer; - inputs.push_back(in); - continue; - } + inputs.push_back(in); + continue; } if (PT.type == "sampler2D" || @@ -244,7 +209,6 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) properties2.emulate_depth_clip_only = vk::g_render_device->get_shader_types_support().allow_float64; properties2.low_precision_tests = vk::is_NVIDIA(vk::get_driver_vendor()); properties2.require_explicit_invariance = (vk::is_NVIDIA(vk::get_driver_vendor()) && g_cfg.video.shader_precision != gpu_preset_level::low); - properties2.require_instanced_render = !!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS); glsl::insert_glsl_legacy_function(OS, properties2); glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_vulkan); diff --git a/rpcs3/Emu/RSX/VK/vkutils/pipeline_binding_table.h b/rpcs3/Emu/RSX/VK/vkutils/pipeline_binding_table.h index fdf0ddd2b0..e2682a503e 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/pipeline_binding_table.h +++ b/rpcs3/Emu/RSX/VK/vkutils/pipeline_binding_table.h @@ -14,10 +14,8 @@ namespace vk u8 vertex_buffers_first_bind_slot = 5; u8 conditional_render_predicate_slot = 8; u8 rasterizer_env_bind_slot = 9; - u8 instancing_lookup_table_bind_slot = 10; - u8 instancing_constants_buffer_slot = 11; - u8 textures_first_bind_slot = 12; - u8 vertex_textures_first_bind_slot = 12; // Invalid, has to be initialized properly + u8 textures_first_bind_slot = 10; + u8 vertex_textures_first_bind_slot = 10; // Invalid, has to be initialized properly u8 total_descriptor_bindings = vertex_textures_first_bind_slot; // Invalid, has to be initialized properly }; } diff --git a/rpcs3/Emu/RSX/gcm_enums.h b/rpcs3/Emu/RSX/gcm_enums.h index dcf8b51b28..816aec77c0 100644 --- a/rpcs3/Emu/RSX/gcm_enums.h +++ b/rpcs3/Emu/RSX/gcm_enums.h @@ -455,8 +455,7 @@ namespace gcm RSX_SHADER_CONTROL_UNKNOWN1 = 0x8000, // seemingly set when srgb packer is used?? // Custom - RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x10000, // Rasterizing triangles and not lines or points - RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x20000, // Support instance ID offsets when loading constants + RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x10000 // Rasterizing triangles and not lines or points }; // GCM Reports diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 728fe97bed..062d481b8a 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -104,7 +104,6 @@ - @@ -620,8 +619,6 @@ - - diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 5ba02118b5..3471fdfcbb 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -1315,9 +1315,6 @@ Emu\GPU\RSX\Host Mini-Driver - - Emu\GPU\RSX\Core - @@ -2653,12 +2650,6 @@ Emu\GPU\RSX\Host Mini-Driver - - Emu\GPU\RSX\Core - - - Emu\GPU\RSX\Core - Emu\NP