diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index 3381ac0578..a09d598acb 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -428,39 +428,41 @@ void GLGSRender::emit_geometry(u32 sub_index) } }; - if (!sub_index) + auto& draw_call = rsx::method_registers.current_draw_clause; + const rsx::flags32_t vertex_state_mask = rsx::vertex_base_changed | rsx::vertex_arrays_changed; + const rsx::flags32_t vertex_state = (sub_index == 0) ? rsx::vertex_arrays_changed : draw_call.execute_pipeline_dependencies() & vertex_state_mask; + + if (vertex_state & rsx::vertex_arrays_changed) { analyse_inputs_interleaved(m_vertex_layout); - if (!m_vertex_layout.validate()) + } + else if (vertex_state & rsx::vertex_base_changed) + { + // Rebase vertex bases instead of + for (auto& info : m_vertex_layout.interleaved_blocks) { - // Execute remainining pipeline barriers with NOP draw - do - { - rsx::method_registers.current_draw_clause.execute_pipeline_dependencies(); - } - while (rsx::method_registers.current_draw_clause.next()); - - rsx::method_registers.current_draw_clause.end(); - return; + const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset(); + info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location); } } - else + + if (vertex_state && !m_vertex_layout.validate()) { - if (rsx::method_registers.current_draw_clause.execute_pipeline_dependencies() & rsx::vertex_base_changed) + // No vertex inputs enabled + // Execute remainining pipeline barriers with NOP draw + do { - // Rebase vertex bases instead of - for (auto &info : m_vertex_layout.interleaved_blocks) - { - const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset(); - info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location); - } - } + draw_call.execute_pipeline_dependencies(); + } while (draw_call.next()); + + draw_call.end(); + return; } if (manually_flush_ring_buffers) { //Use approximations to reserve space. This path is mostly for debug purposes anyway - u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); + u32 approx_vertex_count = draw_call.get_elements_count(); u32 approx_working_buffer_size = approx_vertex_count * 256; //Allocate 256K heap if we have no approximation at this time (inlined array) @@ -478,18 +480,18 @@ void GLGSRender::emit_geometry(u32 sub_index) return; } - const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive); + const GLenum draw_mode = gl::draw_mode(draw_call.primitive); update_vertex_env(upload_info); if (!upload_info.index_info) { - if (rsx::method_registers.current_draw_clause.is_single_draw()) + if (draw_call.is_single_draw()) { glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count); } else { - const auto subranges = rsx::method_registers.current_draw_clause.get_subranges(); + const auto subranges = draw_call.get_subranges(); const auto draw_count = subranges.size(); const auto driver_caps = gl::get_driver_caps(); bool use_draw_arrays_fallback = false; @@ -542,7 +544,7 @@ void GLGSRender::emit_geometry(u32 sub_index) { const GLenum index_type = std::get<0>(*upload_info.index_info); const u32 index_offset = std::get<1>(*upload_info.index_info); - const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive; + const bool restarts_valid = gl::is_primitive_native(draw_call.primitive) && !draw_call.is_disjoint_primitive; if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) { @@ -551,13 +553,13 @@ void GLGSRender::emit_geometry(u32 sub_index) m_index_ring_buffer->bind(); - if (rsx::method_registers.current_draw_clause.is_single_draw()) + if (draw_call.is_single_draw()) { glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, reinterpret_cast(u64{index_offset})); } else { - const auto subranges = rsx::method_registers.current_draw_clause.get_subranges(); + const auto subranges = draw_call.get_subranges(); const auto draw_count = subranges.size(); const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2; uptr index_ptr = index_offset; @@ -569,7 +571,7 @@ void GLGSRender::emit_geometry(u32 sub_index) for (const auto &range : subranges) { - const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count); + const auto index_size = get_index_count(draw_call.primitive, range.count); counts[dst_index] = index_size; offsets[dst_index++] = reinterpret_cast(index_ptr); diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index f52dff0cde..f5b4578686 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -703,34 +703,37 @@ void VKGSRender::emit_geometry(u32 sub_index) auto &draw_call = rsx::method_registers.current_draw_clause; m_profiler.start(); - if (sub_index == 0) + const flags32_t vertex_state_mask = rsx::vertex_base_changed | rsx::vertex_arrays_changed; + const flags32_t vertex_state = (sub_index == 0) ? rsx::vertex_arrays_changed : draw_call.execute_pipeline_dependencies() & vertex_state_mask; + + if (vertex_state & rsx::vertex_arrays_changed) { analyse_inputs_interleaved(m_vertex_layout); - - if (!m_vertex_layout.validate()) - { - // No vertex inputs enabled - // Execute remainining pipeline barriers with NOP draw - do - { - draw_call.execute_pipeline_dependencies(); - } - while (draw_call.next()); - - draw_call.end(); - return; - } } - else if (draw_call.execute_pipeline_dependencies() & rsx::vertex_base_changed) + else if (vertex_state & rsx::vertex_base_changed) { // Rebase vertex bases instead of - for (auto &info : m_vertex_layout.interleaved_blocks) + for (auto& info : m_vertex_layout.interleaved_blocks) { const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset(); info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location); } } + if (vertex_state && !m_vertex_layout.validate()) + { + // No vertex inputs enabled + // Execute remainining pipeline barriers with NOP draw + do + { + draw_call.execute_pipeline_dependencies(); + } + while (draw_call.next()); + + draw_call.end(); + return; + } + const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 1be9b80021..c7d71ccab4 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -779,6 +779,24 @@ namespace rsx } } + template + struct set_vertex_array_offset + { + static void impl(thread* rsx, u32 reg, u32 arg) + { + if (rsx->in_begin_end && + !rsx::method_registers.current_draw_clause.empty() && + reg != method_registers.register_previous_value) + { + // Revert change to queue later + method_registers.decode(reg, method_registers.register_previous_value); + + // Insert offset mofifier barrier + method_registers.current_draw_clause.insert_command_barrier(vertex_array_offset_modifier_barrier, arg, index); + } + } + }; + void check_index_array_dma(thread* rsx, u32 reg, u32 arg) { // Check if either location or index type are invalid @@ -2637,6 +2655,56 @@ namespace rsx return registers[reg] == value; } + void draw_clause::insert_command_barrier(command_barrier_type type, u32 arg, u32 index) + { + ensure(!draw_command_ranges.empty()); + + auto _do_barrier_insert = [this](barrier_t&& val) + { + if (draw_command_barriers.empty() || draw_command_barriers.back() < val) + { + draw_command_barriers.push_back(val); + return; + } + + for (auto it = draw_command_barriers.begin(); it != draw_command_barriers.end(); it++) + { + if (*it < val) + { + continue; + } + + draw_command_barriers.insert(it, val); + break; + } + }; + + if (type == primitive_restart_barrier) + { + // Rasterization flow barrier + const auto& last = draw_command_ranges[current_range_index]; + const auto address = last.first + last.count; + + _do_barrier_insert({ current_range_index, 0, address, index, arg, 0, type }); + } + else + { + // Execution dependency barrier. Requires breaking the current draw call sequence and start another. + if (draw_command_ranges.back().count > 0) + { + append_draw_command({}); + } + else + { + // In case of back-to-back modifiers, do not add duplicates + current_range_index = draw_command_ranges.size() - 1; + } + + _do_barrier_insert({ current_range_index, get_system_time(), ~0u, index, arg, 0, type }); + last_execution_barrier_index = current_range_index; + } + } + void draw_clause::reset(primitive_type type) { current_range_index = ~0u; @@ -2676,6 +2744,11 @@ namespace rsx method_registers.decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg); result |= vertex_base_changed; break; + case vertex_array_offset_modifier_barrier: + // Change vertex array offset + method_registers.decode(NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + barrier.index, barrier.arg); + result |= vertex_arrays_changed; + break; default: fmt::throw_exception("Unreachable"); } @@ -3187,6 +3260,7 @@ namespace rsx bind(); bind(); bind(); + bind_range(); bind>(); bind>(); bind>(); diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index e1e9ff9be5..0e22d2cba2 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -24,13 +24,15 @@ namespace rsx { primitive_restart_barrier, vertex_base_modifier_barrier, - index_base_modifier_barrier + index_base_modifier_barrier, + vertex_array_offset_modifier_barrier }; enum command_execution_flags : u32 { vertex_base_changed = (1 << 0), - index_base_changed = (1 << 1) + index_base_changed = (1 << 1), + vertex_arrays_changed = (1 << 2), }; struct barrier_t @@ -39,6 +41,7 @@ namespace rsx u64 timestamp; u32 address; + u32 index; u32 arg; u32 flags; command_barrier_type type; @@ -112,47 +115,7 @@ namespace rsx simple_array inline_vertex_array{}; - void insert_command_barrier(command_barrier_type type, u32 arg) - { - ensure(!draw_command_ranges.empty()); - - auto _do_barrier_insert = [this](barrier_t&& val) - { - if (draw_command_barriers.empty() || draw_command_barriers.back() < val) - { - draw_command_barriers.push_back(val); - return; - } - - for (auto it = draw_command_barriers.begin(); it != draw_command_barriers.end(); it++) - { - if (*it < val) - { - continue; - } - - draw_command_barriers.insert(it, val); - break; - } - }; - - if (type == primitive_restart_barrier) - { - // Rasterization flow barrier - const auto& last = draw_command_ranges[current_range_index]; - const auto address = last.first + last.count; - - _do_barrier_insert({ current_range_index, 0, address, arg, 0, type }); - } - else - { - // Execution dependency barrier - append_draw_command({}); - - _do_barrier_insert({ current_range_index, get_system_time(), ~0u, arg, 0, type }); - last_execution_barrier_index = current_range_index; - } - } + void insert_command_barrier(command_barrier_type type, u32 arg, u32 register_index = 0); /** * Optimize commands for rendering